gitextract_ldfkme3g/

├── .github/
│   ├── CODEOWNERS
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.md
│   │   ├── documentation.md
│   │   ├── feature_request.md
│   │   └── how-to-question.md
│   ├── ISSUE_TEMPLATE.md
│   ├── PULL_REQUEST_TEMPLATE.md
│   ├── stale.yml
│   └── workflows/
│       ├── build.yml
│       ├── depreview.yml
│       └── release.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── RELEASE.md
├── docs/
│   ├── Makefile
│   ├── command_line_tools.rst
│   ├── conf.py
│   ├── criterions.rst
│   ├── data.rst
│   ├── docutils.conf
│   ├── getting_started.rst
│   ├── hydra_integration.md
│   ├── index.rst
│   ├── lr_scheduler.rst
│   ├── make.bat
│   ├── models.rst
│   ├── modules.rst
│   ├── optim.rst
│   ├── overview.rst
│   ├── tasks.rst
│   ├── tutorial_classifying_names.rst
│   └── tutorial_simple_lstm.rst
├── examples/
│   ├── .gitignore
│   ├── MMPT/
│   │   ├── .gitignore
│   │   ├── CONFIG.md
│   │   ├── DATASET.md
│   │   ├── README.md
│   │   ├── endtask.md
│   │   ├── locallaunch.py
│   │   ├── mmpt/
│   │   │   ├── __init__.py
│   │   │   ├── datasets/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── fairseqmmdataset.py
│   │   │   │   └── mmdataset.py
│   │   │   ├── evaluators/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── evaluator.py
│   │   │   │   ├── metric.py
│   │   │   │   └── predictor.py
│   │   │   ├── losses/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── fairseqmmloss.py
│   │   │   │   ├── loss.py
│   │   │   │   └── nce.py
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── fairseqmmmodel.py
│   │   │   │   ├── mmfusion.py
│   │   │   │   ├── mmfusionnlg.py
│   │   │   │   └── transformermodel.py
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── mm.py
│   │   │   │   ├── retri.py
│   │   │   │   └── vectorpool.py
│   │   │   ├── processors/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dedupprocessor.py
│   │   │   │   ├── dsprocessor.py
│   │   │   │   ├── how2processor.py
│   │   │   │   ├── how2retriprocessor.py
│   │   │   │   ├── models/
│   │   │   │   │   └── s3dg.py
│   │   │   │   └── processor.py
│   │   │   ├── tasks/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── fairseqmmtask.py
│   │   │   │   ├── milncetask.py
│   │   │   │   ├── retritask.py
│   │   │   │   ├── task.py
│   │   │   │   └── vlmtask.py
│   │   │   └── utils/
│   │   │       ├── __init__.py
│   │   │       ├── load_config.py
│   │   │       └── shardedtensor.py
│   │   ├── mmpt_cli/
│   │   │   ├── localjob.py
│   │   │   └── predict.py
│   │   ├── pretraining.md
│   │   ├── projects/
│   │   │   ├── mfmmlm.yaml
│   │   │   ├── mtm/
│   │   │   │   ├── mmfusionmtm.yaml
│   │   │   │   ├── vlm/
│   │   │   │   │   ├── coin.yaml
│   │   │   │   │   ├── crosstask.yaml
│   │   │   │   │   ├── how2.yaml
│   │   │   │   │   ├── test_coin.yaml
│   │   │   │   │   ├── test_crosstask.yaml
│   │   │   │   │   ├── test_crosstask_zs.yaml
│   │   │   │   │   ├── test_vtt.yaml
│   │   │   │   │   ├── test_vttqa.yaml
│   │   │   │   │   ├── test_youcook.yaml
│   │   │   │   │   ├── test_youcookcap.yaml
│   │   │   │   │   ├── vtt.yaml
│   │   │   │   │   ├── vttqa.yaml
│   │   │   │   │   ├── youcook.yaml
│   │   │   │   │   └── youcookcap.yaml
│   │   │   │   └── vlm.yaml
│   │   │   ├── retri/
│   │   │   │   ├── videoclip/
│   │   │   │   │   ├── coin_videoclip.yaml
│   │   │   │   │   ├── crosstask_videoclip.yaml
│   │   │   │   │   ├── how2.yaml
│   │   │   │   │   ├── test_coin_videoclip.yaml
│   │   │   │   │   ├── test_coin_zs.yaml
│   │   │   │   │   ├── test_crosstask_videoclip.yaml
│   │   │   │   │   ├── test_crosstask_zs_videoclip.yaml
│   │   │   │   │   ├── test_didemo_zs.yaml
│   │   │   │   │   ├── test_vtt_videoclip.yaml
│   │   │   │   │   ├── test_vtt_zs.yaml
│   │   │   │   │   ├── test_vttqa_videoclip.yaml
│   │   │   │   │   ├── test_vttqa_zs.yaml
│   │   │   │   │   ├── test_youcook_videoclip.yaml
│   │   │   │   │   ├── test_youcook_zs.yaml
│   │   │   │   │   ├── vtt_videoclip.yaml
│   │   │   │   │   ├── vttqa_videoclip.yaml
│   │   │   │   │   └── youcook_videoclip.yaml
│   │   │   │   ├── videoclip.yaml
│   │   │   │   └── videoretri.yaml
│   │   │   └── task/
│   │   │       ├── coin.yaml
│   │   │       ├── coin_videoclip.yaml
│   │   │       ├── crosstask.yaml
│   │   │       ├── crosstask_videoclip.yaml
│   │   │       ├── default.yaml
│   │   │       ├── ft.yaml
│   │   │       ├── how2.yaml
│   │   │       ├── test.yaml
│   │   │       ├── test_coin.yaml
│   │   │       ├── test_coin_videoclip.yaml
│   │   │       ├── test_coin_zs.yaml
│   │   │       ├── test_crosstask.yaml
│   │   │       ├── test_crosstask_videoclip.yaml
│   │   │       ├── test_crosstask_zs.yaml
│   │   │       ├── test_crosstask_zs_videoclip.yaml
│   │   │       ├── test_didemo_zs.yaml
│   │   │       ├── test_vtt.yaml
│   │   │       ├── test_vtt_videoclip.yaml
│   │   │       ├── test_vtt_zs.yaml
│   │   │       ├── test_vttqa.yaml
│   │   │       ├── test_vttqa_videoclip.yaml
│   │   │       ├── test_vttqa_zs.yaml
│   │   │       ├── test_youcook.yaml
│   │   │       ├── test_youcook_videoclip.yaml
│   │   │       ├── test_youcook_zs.yaml
│   │   │       ├── test_youcookcap.yaml
│   │   │       ├── vtt.yaml
│   │   │       ├── vtt_videoclip.yaml
│   │   │       ├── vttqa.yaml
│   │   │       ├── vttqa_videoclip.yaml
│   │   │       ├── youcook.yaml
│   │   │       ├── youcook_videoclip.yaml
│   │   │       └── youcookcap.yaml
│   │   ├── scripts/
│   │   │   ├── text_token_extractor/
│   │   │   │   ├── configs/
│   │   │   │   │   └── bert-base-uncased.yaml
│   │   │   │   └── pretokenization.py
│   │   │   └── video_feature_extractor/
│   │   │       ├── extract.py
│   │   │       ├── how2/
│   │   │       │   └── s3d.sh
│   │   │       ├── model.py
│   │   │       ├── pathbuilder.py
│   │   │       ├── preprocessing.py
│   │   │       ├── random_sequence_shuffler.py
│   │   │       ├── shard_feature.py
│   │   │       └── videoreader.py
│   │   └── setup.py
│   ├── __init__.py
│   ├── adaptive_span/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── adagrad_with_grad_clip.py
│   │   ├── adaptive_span_attention.py
│   │   ├── adaptive_span_loss.py
│   │   ├── adaptive_span_model.py
│   │   └── adaptive_span_model_wrapper.py
│   ├── attention_head_selection/
│   │   ├── README.md
│   │   └── src/
│   │       ├── __init__.py
│   │       ├── data/
│   │       │   ├── __init__.py
│   │       │   └── speech_to_text_dataset_with_domain.py
│   │       ├── loss/
│   │       │   ├── __init__.py
│   │       │   └── attention_head_selection.py
│   │       ├── models/
│   │       │   ├── __init__.py
│   │       │   ├── head_selection_s2t_transformer.py
│   │       │   └── head_selection_transformer.py
│   │       ├── modules/
│   │       │   ├── __init__.py
│   │       │   ├── attn_head_selector.py
│   │       │   ├── head_selection_transformer_layer.py
│   │       │   ├── multihead_attention_selection.py
│   │       │   └── multihead_functional.py
│   │       └── speech_to_text_head_selection.py
│   ├── audio_nlp/
│   │   └── nlu/
│   │       ├── README.md
│   │       ├── configs/
│   │       │   └── nlu_finetuning.yaml
│   │       ├── create_dict_stop.sh
│   │       └── generate_manifests.py
│   ├── backtranslation/
│   │   ├── README.md
│   │   ├── deduplicate_lines.py
│   │   ├── extract_bt_data.py
│   │   ├── prepare-de-monolingual.sh
│   │   ├── prepare-wmt18en2de.sh
│   │   ├── sacrebleu.sh
│   │   └── tokenized_bleu.sh
│   ├── bart/
│   │   ├── README.glue.md
│   │   ├── README.md
│   │   ├── README.summarization.md
│   │   └── summarize.py
│   ├── byte_level_bpe/
│   │   ├── README.md
│   │   ├── get_bitext.py
│   │   ├── get_data.sh
│   │   └── gru_transformer.py
│   ├── camembert/
│   │   └── README.md
│   ├── constrained_decoding/
│   │   ├── README.md
│   │   ├── normalize.py
│   │   └── tok.py
│   ├── conv_seq2seq/
│   │   └── README.md
│   ├── criss/
│   │   ├── README.md
│   │   ├── download_and_preprocess_flores_test.sh
│   │   ├── download_and_preprocess_tatoeba.sh
│   │   ├── mining/
│   │   │   ├── mine.py
│   │   │   └── mine_example.sh
│   │   ├── save_encoder.py
│   │   ├── sentence_retrieval/
│   │   │   ├── encoder_analysis.py
│   │   │   └── sentence_retrieval_tatoeba.sh
│   │   └── unsupervised_mt/
│   │       └── eval.sh
│   ├── cross_lingual_language_model/
│   │   └── README.md
│   ├── data2vec/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── config/
│   │   │   ├── audio/
│   │   │   │   ├── classification/
│   │   │   │   │   ├── base_classification.yaml
│   │   │   │   │   └── run_config/
│   │   │   │   │       ├── slurm_1.yaml
│   │   │   │   │       ├── slurm_1g.yaml
│   │   │   │   │       └── slurm_2.yaml
│   │   │   │   └── pretraining/
│   │   │   │       ├── audioset.yaml
│   │   │   │       ├── base_librispeech.yaml
│   │   │   │       └── run_config/
│   │   │   │           ├── local.yaml
│   │   │   │           ├── slurm_1.yaml
│   │   │   │           ├── slurm_1_aws.yaml
│   │   │   │           ├── slurm_2.yaml
│   │   │   │           ├── slurm_2_aws.yaml
│   │   │   │           ├── slurm_3.yaml
│   │   │   │           ├── slurm_4.yaml
│   │   │   │           ├── slurm_4_aws.yaml
│   │   │   │           ├── slurm_6_aws.yaml
│   │   │   │           └── slurm_8_aws.yaml
│   │   │   ├── text/
│   │   │   │   └── pretraining/
│   │   │   │       ├── base.yaml
│   │   │   │       └── run_config/
│   │   │   │           ├── local.yaml
│   │   │   │           ├── slurm_1_aws.yaml
│   │   │   │           ├── slurm_2.yaml
│   │   │   │           ├── slurm_2_aws.yaml
│   │   │   │           ├── slurm_3.yaml
│   │   │   │           ├── slurm_4.yaml
│   │   │   │           ├── slurm_4_aws.yaml
│   │   │   │           └── slurm_8_aws.yaml
│   │   │   ├── v2/
│   │   │   │   ├── base_audio_only_task.yaml
│   │   │   │   ├── base_images_only_task.yaml
│   │   │   │   ├── base_text_only_task.yaml
│   │   │   │   ├── huge_images14_only_task.yaml
│   │   │   │   ├── huge_images_only_task.yaml
│   │   │   │   ├── large_audio_only_task.yaml
│   │   │   │   ├── large_images_only_task.yaml
│   │   │   │   ├── large_text_only_task.yaml
│   │   │   │   ├── large_text_only_task_pgrp_1M.yaml
│   │   │   │   ├── run_config/
│   │   │   │   │   ├── local.yaml
│   │   │   │   │   ├── slurm_1.yaml
│   │   │   │   │   ├── slurm_1_aws.yaml
│   │   │   │   │   ├── slurm_2.yaml
│   │   │   │   │   ├── slurm_2_aws.yaml
│   │   │   │   │   ├── slurm_3.yaml
│   │   │   │   │   ├── slurm_4.yaml
│   │   │   │   │   ├── slurm_4_aws.yaml
│   │   │   │   │   ├── slurm_6_aws.yaml
│   │   │   │   │   ├── slurm_8.yaml
│   │   │   │   │   └── slurm_8_aws.yaml
│   │   │   │   └── text_finetuning/
│   │   │   │       ├── cola.yaml
│   │   │   │       ├── mnli.yaml
│   │   │   │       ├── mrpc.yaml
│   │   │   │       ├── qnli.yaml
│   │   │   │       ├── qqp.yaml
│   │   │   │       ├── rte.yaml
│   │   │   │       ├── run_config/
│   │   │   │       │   └── local.yaml
│   │   │   │       ├── sst_2.yaml
│   │   │   │       └── sts_b.yaml
│   │   │   └── vision/
│   │   │       ├── finetuning/
│   │   │       │   ├── imagenet.yaml
│   │   │       │   ├── mae_imagenet_clean.yaml
│   │   │       │   ├── mae_imagenet_huge_clean.yaml
│   │   │       │   ├── mae_imagenet_large_clean.yaml
│   │   │       │   └── run_config/
│   │   │       │       ├── local.yaml
│   │   │       │       ├── slurm_1.yaml
│   │   │       │       ├── slurm_1_aws.yaml
│   │   │       │       ├── slurm_2.yaml
│   │   │       │       ├── slurm_2_aws.yaml
│   │   │       │       ├── slurm_3.yaml
│   │   │       │       ├── slurm_4.yaml
│   │   │       │       ├── slurm_4_aws.yaml
│   │   │       │       ├── slurm_6_aws.yaml
│   │   │       │       └── slurm_8_aws.yaml
│   │   │       └── pretraining/
│   │   │           ├── base_imagenet.yaml
│   │   │           ├── base_imagenet_d2v1.yaml
│   │   │           ├── base_mae_imagenet.yaml
│   │   │           └── run_config/
│   │   │               ├── local.yaml
│   │   │               ├── slurm_1.yaml
│   │   │               ├── slurm_1_aws.yaml
│   │   │               ├── slurm_2.yaml
│   │   │               ├── slurm_2_aws.yaml
│   │   │               ├── slurm_3.yaml
│   │   │               ├── slurm_4.yaml
│   │   │               ├── slurm_4_aws.yaml
│   │   │               ├── slurm_6_aws.yaml
│   │   │               └── slurm_8_aws.yaml
│   │   ├── data/
│   │   │   ├── __init__.py
│   │   │   ├── add_class_target_dataset.py
│   │   │   ├── image_dataset.py
│   │   │   ├── mae_finetuning_image_dataset.py
│   │   │   ├── mae_image_dataset.py
│   │   │   ├── modality.py
│   │   │   └── path_dataset.py
│   │   ├── fb_convert_beit_cp.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── audio_classification.py
│   │   │   ├── data2vec2.py
│   │   │   ├── data2vec_audio.py
│   │   │   ├── data2vec_image_classification.py
│   │   │   ├── data2vec_text.py
│   │   │   ├── data2vec_text_classification.py
│   │   │   ├── data2vec_vision.py
│   │   │   ├── mae.py
│   │   │   ├── mae_image_classification.py
│   │   │   ├── modalities/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── audio.py
│   │   │   │   ├── base.py
│   │   │   │   ├── images.py
│   │   │   │   ├── modules.py
│   │   │   │   └── text.py
│   │   │   └── utils.py
│   │   ├── scripts/
│   │   │   ├── convert_audioset_labels.py
│   │   │   ├── multi/
│   │   │   │   ├── finetune_all_fair_aws_local_lr.sh
│   │   │   │   ├── finetune_all_fair_aws_local_lr_nodep.sh
│   │   │   │   └── finetune_all_fair_local_lr.sh
│   │   │   └── text/
│   │   │       ├── finetune_all_char_fair_aws_local_lr.sh
│   │   │       ├── finetune_all_fair.sh
│   │   │       ├── finetune_all_fair_aws.sh
│   │   │       ├── finetune_all_fair_aws_local_lr.sh
│   │   │       ├── finetune_all_fair_aws_lr.sh
│   │   │       ├── finetune_all_fair_local_lr.sh
│   │   │       ├── finetune_all_fair_nodep.sh
│   │   │       ├── finetune_all_fair_nodep_aws.sh
│   │   │       ├── finetune_all_fair_nodep_aws_local_lr.sh
│   │   │       ├── finetune_all_fair_nodep_aws_lr.sh
│   │   │       ├── finetune_all_fair_nodep_aws_lr_nopos.sh
│   │   │       ├── finetune_all_large_fair_aws_local_lr.sh
│   │   │       ├── finetune_all_large_fair_local_lr.sh
│   │   │       ├── finetune_all_large_fair_nodep_aws_local_lr.sh
│   │   │       ├── finetune_sst2_qnli_sweep_fair_nodep.sh
│   │   │       ├── glue.py
│   │   │       ├── glue_lr.py
│   │   │       ├── unprocess_data.py
│   │   │       └── valids.py
│   │   └── tasks/
│   │       ├── __init__.py
│   │       ├── audio_classification.py
│   │       ├── image_classification.py
│   │       ├── image_pretraining.py
│   │       ├── mae_image_classification.py
│   │       ├── mae_image_pretraining.py
│   │       └── multimodal.py
│   ├── discriminative_reranking_nmt/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── config/
│   │   │   └── deen.yaml
│   │   ├── criterions/
│   │   │   ├── __init__.py
│   │   │   └── discriminative_reranking_criterion.py
│   │   ├── drnmt_rerank.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   └── discriminative_reranking_model.py
│   │   ├── scripts/
│   │   │   └── prep_data.py
│   │   └── tasks/
│   │       ├── __init__.py
│   │       └── discriminative_reranking_task.py
│   ├── emotion_conversion/
│   │   ├── README.md
│   │   ├── emotion_models/
│   │   │   ├── __init__.py
│   │   │   ├── duration_predictor.py
│   │   │   ├── duration_predictor.yaml
│   │   │   ├── pitch_predictor.py
│   │   │   ├── pitch_predictor.yaml
│   │   │   └── utils.py
│   │   ├── fairseq_models/
│   │   │   └── __init__.py
│   │   ├── preprocess/
│   │   │   ├── __init__.py
│   │   │   ├── build_hifigan_manifest.py
│   │   │   ├── build_translation_manifests.py
│   │   │   ├── create_core_manifest.py
│   │   │   ├── extract_f0.py
│   │   │   ├── process_km.py
│   │   │   ├── split_emov_km_tsv_by_uttid.py
│   │   │   ├── split_km.py
│   │   │   └── split_km_tsv.py
│   │   ├── requirements.txt
│   │   └── synthesize.py
│   ├── fast_noisy_channel/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── noisy_channel_beam_search.py
│   │   ├── noisy_channel_sequence_generator.py
│   │   └── noisy_channel_translation.py
│   ├── flores101/
│   │   └── README.md
│   ├── fully_sharded_data_parallel/
│   │   └── README.md
│   ├── gottbert/
│   │   └── README.md
│   ├── hubert/
│   │   ├── README.md
│   │   ├── config/
│   │   │   ├── decode/
│   │   │   │   ├── ax_sweep/
│   │   │   │   │   ├── ngram.yaml
│   │   │   │   │   └── transformer.yaml
│   │   │   │   ├── infer_fsqlm.yaml
│   │   │   │   ├── infer_kenlm.yaml
│   │   │   │   ├── infer_viterbi.yaml
│   │   │   │   └── run/
│   │   │   │       ├── submitit_slurm.yaml
│   │   │   │       └── submitit_slurm_8gpu.yaml
│   │   │   ├── finetune/
│   │   │   │   ├── base_10h.yaml
│   │   │   │   ├── ckpt/
│   │   │   │   │   └── it1.yaml
│   │   │   │   ├── lm/
│   │   │   │   │   └── ls_4gram.yaml
│   │   │   │   └── run/
│   │   │   │       └── submitit_reg.yaml
│   │   │   └── pretrain/
│   │   │       ├── data/
│   │   │       │   ├── iter1.yaml
│   │   │       │   └── iter2.yaml
│   │   │       ├── hubert_base_librispeech.yaml
│   │   │       ├── hubert_large_librivox.yaml
│   │   │       ├── hubert_xlarge_librivox.yaml
│   │   │       └── run/
│   │   │           └── submitit_reg.yaml
│   │   ├── measure_teacher_quality.py
│   │   ├── simple_kmeans/
│   │   │   ├── README.md
│   │   │   ├── dump_hubert_feature.py
│   │   │   ├── dump_hubert_feature_s2t.py
│   │   │   ├── dump_km_label.py
│   │   │   ├── dump_mfcc_feature.py
│   │   │   ├── dump_w2v2_feature.py
│   │   │   ├── feature_utils.py
│   │   │   └── learn_kmeans.py
│   │   ├── tests/
│   │   │   ├── 6313-76958-0021.flac
│   │   │   ├── sample.base.L9.km500.km
│   │   │   ├── sample.base.L9.len
│   │   │   ├── sample.base.L9.npy
│   │   │   ├── sample.large.L20.len
│   │   │   ├── sample.large.L20.npy
│   │   │   ├── sample.large.hypo.word
│   │   │   ├── sample.xlarge.L30.len
│   │   │   ├── sample.xlarge.L30.npy
│   │   │   ├── sample.xlarge.hypo.word
│   │   │   ├── test_feature_and_unit.sh
│   │   │   └── test_finetuned_asr.sh
│   │   └── update_ckpt.py
│   ├── joint_alignment_translation/
│   │   ├── README.md
│   │   └── prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh
│   ├── language_model/
│   │   ├── README.adaptive_inputs.md
│   │   ├── README.conv.md
│   │   ├── README.md
│   │   └── prepare-wikitext-103.sh
│   ├── laser/
│   │   ├── README.md
│   │   └── laser_src/
│   │       ├── __init__.py
│   │       ├── laser_lstm.py
│   │       ├── laser_task.py
│   │       ├── laser_transformer.py
│   │       └── multitask_data_utils.py
│   ├── latent_depth/
│   │   ├── README.md
│   │   └── latent_depth_src/
│   │       ├── __init__.py
│   │       ├── loss/
│   │       │   ├── __init__.py
│   │       │   └── latent_depth.py
│   │       ├── models/
│   │       │   ├── __init__.py
│   │       │   ├── latent_multilingual_transformer.py
│   │       │   └── latent_transformer.py
│   │       ├── modules/
│   │       │   ├── __init__.py
│   │       │   └── latent_layers.py
│   │       └── multilingual_translation_latent_depth.py
│   ├── layerdrop/
│   │   └── README.md
│   ├── linformer/
│   │   ├── README.md
│   │   └── linformer_src/
│   │       ├── __init__.py
│   │       ├── models/
│   │       │   ├── __init__.py
│   │       │   └── linformer_roberta.py
│   │       └── modules/
│   │           ├── __init__.py
│   │           ├── linformer_sentence_encoder.py
│   │           ├── linformer_sentence_encoder_layer.py
│   │           └── multihead_linear_attention.py
│   ├── m2m_100/
│   │   ├── README.md
│   │   ├── install_dependecies.sh
│   │   ├── process_data/
│   │   │   ├── clean_histogram.py
│   │   │   ├── dedup_data.py
│   │   │   └── remove_too_much_punc.py
│   │   ├── tok.sh
│   │   └── tokenizers/
│   │       ├── README.md
│   │       ├── seg_ja.sh
│   │       ├── seg_ko.sh
│   │       ├── thirdparty/
│   │       │   └── .gitignore
│   │       ├── tokenize_indic.py
│   │       ├── tokenize_thai.py
│   │       ├── tokenize_zh.py
│   │       └── tokenizer_ar.sh
│   ├── mbart/
│   │   └── README.md
│   ├── megatron_11b/
│   │   ├── README.md
│   │   └── detok.py
│   ├── mms/
│   │   ├── MODEL_CARD.md
│   │   ├── README.md
│   │   ├── asr/
│   │   │   ├── config/
│   │   │   │   └── infer_common.yaml
│   │   │   ├── infer/
│   │   │   │   ├── example_infer_adapter.sh
│   │   │   │   └── mms_infer.py
│   │   │   └── tutorial/
│   │   │       └── MMS_ASR_Inference_Colab.ipynb
│   │   ├── data_prep/
│   │   │   ├── README.md
│   │   │   ├── align_and_segment.py
│   │   │   ├── align_utils.py
│   │   │   ├── norm_config.py
│   │   │   ├── punctuations.lst
│   │   │   └── text_normalization.py
│   │   ├── lid/
│   │   │   ├── infer.py
│   │   │   └── tutorial/
│   │   │       └── MMS_LID_Inference_Colab.ipynb
│   │   ├── lid_rerank/
│   │   │   ├── README.md
│   │   │   ├── cer_langs.txt
│   │   │   ├── mala/
│   │   │   │   └── infer.py
│   │   │   ├── mms/
│   │   │   │   ├── make_parallel_single_runs.py
│   │   │   │   ├── merge_by_lang.py
│   │   │   │   ├── prep_wav_list.py
│   │   │   │   ├── run_single_lang.py
│   │   │   │   └── split_by_lang.py
│   │   │   ├── mms-zs/
│   │   │   │   ├── falign.py
│   │   │   │   ├── lib.py
│   │   │   │   └── uromanize.py
│   │   │   ├── nllb/
│   │   │   │   └── infer.py
│   │   │   ├── requirements.txt
│   │   │   ├── rerank/
│   │   │   │   ├── rerank.py
│   │   │   │   └── tune_coefficients.py
│   │   │   └── whisper/
│   │   │       ├── infer_asr.py
│   │   │       ├── infer_lid.py
│   │   │       └── lid_mapping.txt
│   │   ├── misc/
│   │   │   └── get_sample_size.py
│   │   ├── tts/
│   │   │   ├── infer.py
│   │   │   └── tutorial/
│   │   │       └── MMS_TTS_Inference_Colab.ipynb
│   │   └── zero_shot/
│   │       └── README.md
│   ├── moe_lm/
│   │   ├── README.md
│   │   ├── data_card.md
│   │   └── model_card.md
│   ├── mr_hubert/
│   │   ├── README.md
│   │   ├── config/
│   │   │   ├── decode/
│   │   │   │   ├── infer.yaml
│   │   │   │   ├── infer_lm.yaml
│   │   │   │   └── run/
│   │   │   │       ├── submitit_slurm.yaml
│   │   │   │       └── submitit_slurm_8gpu.yaml
│   │   │   ├── finetune/
│   │   │   │   ├── base_100h.yaml
│   │   │   │   ├── base_100h_large.yaml
│   │   │   │   ├── base_10h.yaml
│   │   │   │   ├── base_10h_large.yaml
│   │   │   │   ├── base_1h.yaml
│   │   │   │   └── base_1h_large.yaml
│   │   │   └── pretrain/
│   │   │       ├── mrhubert_base_librispeech.yaml
│   │   │       ├── mrhubert_large_librilight.yaml
│   │   │       └── run/
│   │   │           └── submitit_reg.yaml
│   │   ├── decode.sh
│   │   ├── finetune.sh
│   │   └── train.sh
│   ├── multilingual/
│   │   ├── ML50_langs.txt
│   │   ├── README.md
│   │   ├── data_scripts/
│   │   │   ├── README.md
│   │   │   ├── binarize.py
│   │   │   ├── check_iswlt_test_data.py
│   │   │   ├── check_self_overlaps.py
│   │   │   ├── check_valid_test_overlaps.py
│   │   │   ├── dedup_all.py
│   │   │   ├── download_ML50_v1.sh
│   │   │   ├── download_af_xh.sh
│   │   │   ├── download_flores_data.sh
│   │   │   ├── download_iitb.sh
│   │   │   ├── download_iwslt_and_extract.sh
│   │   │   ├── download_lotus.sh
│   │   │   ├── download_ted_and_extract.py
│   │   │   ├── download_wat19_my.sh
│   │   │   ├── download_wmt19_and_before.py
│   │   │   ├── download_wmt20.sh
│   │   │   ├── preprocess_ML50_v1.sh
│   │   │   ├── remove_valid_test_in_train.py
│   │   │   ├── requirement.txt
│   │   │   └── utils/
│   │   │       ├── dedup.py
│   │   │       ├── fasttext_multi_filter.py
│   │   │       └── strip_sgm.sh
│   │   ├── finetune_multilingual_model.sh
│   │   ├── multilingual_fairseq_gen.sh
│   │   └── train_multilingual_model.sh
│   ├── noisychannel/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── rerank.py
│   │   ├── rerank_generate.py
│   │   ├── rerank_options.py
│   │   ├── rerank_score_bw.py
│   │   ├── rerank_score_lm.py
│   │   ├── rerank_tune.py
│   │   └── rerank_utils.py
│   ├── nonautoregressive_translation/
│   │   ├── README.md
│   │   └── scripts.md
│   ├── normformer/
│   │   ├── README.md
│   │   └── train_lm.sh
│   ├── operators/
│   │   ├── alignment_train_cpu.cpp
│   │   ├── alignment_train_cuda.cpp
│   │   ├── alignment_train_cuda.h
│   │   ├── alignment_train_kernel.cu
│   │   └── utils.h
│   ├── paraphraser/
│   │   ├── README.md
│   │   └── paraphrase.py
│   ├── pay_less_attention_paper/
│   │   └── README.md
│   ├── pointer_generator/
│   │   ├── README.md
│   │   ├── README.xsum.md
│   │   ├── pointer_generator_src/
│   │   │   ├── __init__.py
│   │   │   └── transformer_pg.py
│   │   ├── postprocess.py
│   │   └── preprocess.py
│   ├── quant_noise/
│   │   ├── README.md
│   │   └── transformer_quantization_config.yaml
│   ├── roberta/
│   │   ├── README.custom_classification.md
│   │   ├── README.glue.md
│   │   ├── README.md
│   │   ├── README.pretraining.md
│   │   ├── README.race.md
│   │   ├── commonsense_qa/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── commonsense_qa_task.py
│   │   │   └── download_cqa_data.sh
│   │   ├── config/
│   │   │   ├── finetuning/
│   │   │   │   ├── cola.yaml
│   │   │   │   ├── mnli.yaml
│   │   │   │   ├── mrpc.yaml
│   │   │   │   ├── qnli.yaml
│   │   │   │   ├── qqp.yaml
│   │   │   │   ├── rte.yaml
│   │   │   │   ├── run_config/
│   │   │   │   │   ├── local.yaml
│   │   │   │   │   ├── slurm_1g.yaml
│   │   │   │   │   └── slurm_1g_aws.yaml
│   │   │   │   ├── sst_2.yaml
│   │   │   │   └── sts_b.yaml
│   │   │   └── pretraining/
│   │   │       ├── base.yaml
│   │   │       └── run_config/
│   │   │           ├── local.yaml
│   │   │           ├── slurm_2.yaml
│   │   │           ├── slurm_2_aws.yaml
│   │   │           ├── slurm_3.yaml
│   │   │           └── slurm_4.yaml
│   │   ├── fb_multilingual/
│   │   │   └── README.multilingual.pretraining.md
│   │   ├── multiprocessing_bpe_encoder.py
│   │   ├── preprocess_GLUE_tasks.sh
│   │   ├── preprocess_RACE.py
│   │   ├── preprocess_RACE.sh
│   │   └── wsc/
│   │       ├── README.md
│   │       ├── __init__.py
│   │       ├── wsc_criterion.py
│   │       ├── wsc_task.py
│   │       └── wsc_utils.py
│   ├── rxf/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   └── rxf_src/
│   │       ├── __init__.py
│   │       ├── label_smoothed_cross_entropy_r3f.py
│   │       └── sentence_prediction_r3f.py
│   ├── scaling_nmt/
│   │   └── README.md
│   ├── shuffled_word_order/
│   │   ├── README.finetuning.md
│   │   └── README.md
│   ├── simultaneous_translation/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── docs/
│   │   │   ├── ende-mma.md
│   │   │   └── enja-waitk.md
│   │   ├── eval/
│   │   │   └── agents/
│   │   │       └── simul_t2t_enja.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── convtransformer_simul_trans.py
│   │   │   └── transformer_monotonic_attention.py
│   │   ├── modules/
│   │   │   ├── __init__.py
│   │   │   ├── fixed_pre_decision.py
│   │   │   ├── monotonic_multihead_attention.py
│   │   │   └── monotonic_transformer_layer.py
│   │   ├── tests/
│   │   │   ├── test_alignment_train.py
│   │   │   └── test_text_models.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── functions.py
│   │       ├── monotonic_attention.py
│   │       └── p_choose_strategy.py
│   ├── speech_recognition/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── criterions/
│   │   │   ├── ASG_loss.py
│   │   │   ├── __init__.py
│   │   │   └── cross_entropy_acc.py
│   │   ├── data/
│   │   │   ├── __init__.py
│   │   │   ├── asr_dataset.py
│   │   │   ├── collaters.py
│   │   │   ├── data_utils.py
│   │   │   └── replabels.py
│   │   ├── datasets/
│   │   │   ├── asr_prep_json.py
│   │   │   └── prepare-librispeech.sh
│   │   ├── infer.py
│   │   ├── kaldi/
│   │   │   ├── __init__.py
│   │   │   ├── add-self-loop-simple.cc
│   │   │   ├── config/
│   │   │   │   └── kaldi_initializer.yaml
│   │   │   ├── kaldi_decoder.py
│   │   │   └── kaldi_initializer.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── vggtransformer.py
│   │   │   └── w2l_conv_glu_enc.py
│   │   ├── new/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── conf/
│   │   │   │   ├── hydra/
│   │   │   │   │   └── sweeper/
│   │   │   │   │       ├── ax.yaml
│   │   │   │   │       └── ax_sil.yaml
│   │   │   │   ├── infer.yaml
│   │   │   │   └── run_config/
│   │   │   │       ├── fb_slurm_1.yaml
│   │   │   │       └── fb_slurm_2g.yaml
│   │   │   ├── decoders/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base_decoder.py
│   │   │   │   ├── decoder.py
│   │   │   │   ├── decoder_config.py
│   │   │   │   ├── flashlight_decoder.py
│   │   │   │   └── viterbi_decoder.py
│   │   │   └── infer.py
│   │   ├── tasks/
│   │   │   ├── __init__.py
│   │   │   └── speech_recognition.py
│   │   ├── utils/
│   │   │   └── wer_utils.py
│   │   └── w2l_decoder.py
│   ├── speech_synthesis/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── data_utils.py
│   │   ├── docs/
│   │   │   ├── common_voice_example.md
│   │   │   ├── ljspeech_example.md
│   │   │   └── vctk_example.md
│   │   ├── evaluation/
│   │   │   ├── __init__.py
│   │   │   ├── eval_asr.py
│   │   │   ├── eval_f0.py
│   │   │   ├── eval_sp.py
│   │   │   └── get_eval_manifest.py
│   │   ├── generate_waveform.py
│   │   ├── preprocessing/
│   │   │   ├── __init__.py
│   │   │   ├── denoise_and_vad_audio.py
│   │   │   ├── denoiser/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── demucs.py
│   │   │   │   ├── pretrained.py
│   │   │   │   ├── resample.py
│   │   │   │   └── utils.py
│   │   │   ├── get_common_voice_audio_manifest.py
│   │   │   ├── get_feature_manifest.py
│   │   │   ├── get_ljspeech_audio_manifest.py
│   │   │   ├── get_speaker_embedding.py
│   │   │   ├── get_vctk_audio_manifest.py
│   │   │   ├── speaker_embedder/
│   │   │   │   └── __init__.py
│   │   │   └── vad/
│   │   │       └── __init__.py
│   │   └── utils.py
│   ├── speech_text_joint_to_text/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── configs/
│   │   │   └── mustc_noise.list
│   │   ├── criterions/
│   │   │   ├── __init__.py
│   │   │   ├── multi_modality_compound.py
│   │   │   ├── multi_modality_cross_entropy.py
│   │   │   └── text_guide_cross_entropy_acc.py
│   │   ├── data/
│   │   │   └── pair_denoising_dataset.py
│   │   ├── docs/
│   │   │   ├── ende-mustc.md
│   │   │   ├── iwslt2021.md
│   │   │   └── pre-training.md
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── joint_speech_text_pretrain_transformer.py
│   │   │   ├── s2t_dualinputtransformer.py
│   │   │   ├── s2t_dualinputwavtransformer.py
│   │   │   └── s2t_dualinputxmtransformer.py
│   │   ├── scripts/
│   │   │   ├── convert_model.py
│   │   │   └── g2p_encode.py
│   │   └── tasks/
│   │       ├── __init__.py
│   │       ├── pair_denoising.py
│   │       ├── speech_text_denoise_pretrain.py
│   │       └── speech_text_joint.py
│   ├── speech_to_speech/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── asr_bleu/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── asr_model_cfgs.json
│   │   │   ├── compute_asr_bleu.py
│   │   │   ├── requirements.txt
│   │   │   └── utils.py
│   │   ├── benchmarking/
│   │   │   ├── README.md
│   │   │   ├── configs/
│   │   │   │   ├── 2StageS2ST.yaml
│   │   │   │   ├── 3StageS2ST.yaml
│   │   │   │   ├── DirectS2U.yaml
│   │   │   │   └── S2T.yaml
│   │   │   ├── core.py
│   │   │   ├── data_utils.py
│   │   │   └── get_metrics.py
│   │   ├── docs/
│   │   │   ├── data_augmentation.md
│   │   │   ├── direct_s2st_discrete_units.md
│   │   │   ├── enhanced_direct_s2st_discrete_units.md
│   │   │   └── textless_s2st_real_data.md
│   │   ├── generate_waveform_from_code.py
│   │   ├── preprocessing/
│   │   │   ├── __init__.py
│   │   │   ├── data_utils.py
│   │   │   ├── prep_s2spect_data.py
│   │   │   ├── prep_s2ut_data.py
│   │   │   ├── prep_sn_data.py
│   │   │   └── prep_sn_output_data.py
│   │   └── unity/
│   │       ├── __init__.py
│   │       ├── sequence_generator.py
│   │       └── sequence_generator_multi_decoder.py
│   ├── speech_to_text/
│   │   ├── README.md
│   │   ├── data_utils.py
│   │   ├── docs/
│   │   │   ├── covost_example.md
│   │   │   ├── librispeech_example.md
│   │   │   ├── mtedx_example.md
│   │   │   ├── mustc_example.md
│   │   │   └── simulst_mustc_example.md
│   │   ├── prep_covost_data.py
│   │   ├── prep_librispeech_data.py
│   │   ├── prep_mtedx_data.py
│   │   ├── prep_mustc_data.py
│   │   ├── seg_mustc_data.py
│   │   └── simultaneous_translation/
│   │       └── agents/
│   │           └── fairseq_simul_st_agent.py
│   ├── stories/
│   │   └── README.md
│   ├── textless_nlp/
│   │   ├── dgslm/
│   │   │   ├── README.md
│   │   │   ├── create_code_file.py
│   │   │   ├── dgslm_utils.py
│   │   │   ├── hubert_fisher/
│   │   │   │   └── README.md
│   │   │   ├── sample_speech_dlm.py
│   │   │   └── vocoder_hifigan/
│   │   │       ├── README.md
│   │   │       └── generate_stereo_waveform.py
│   │   ├── gslm/
│   │   │   ├── README.md
│   │   │   ├── metrics/
│   │   │   │   ├── README.md
│   │   │   │   ├── abx_metrics/
│   │   │   │   │   ├── README.md
│   │   │   │   │   └── dump_abx_feats.py
│   │   │   │   └── asr_metrics/
│   │   │   │       ├── README.md
│   │   │   │       ├── continuation_eval.py
│   │   │   │       ├── misc/
│   │   │   │       │   ├── bleu_utils.py
│   │   │   │       │   ├── cut_as.py
│   │   │   │       │   └── dict.ltr.txt
│   │   │   │       ├── ppx.py
│   │   │   │       └── self_auto_bleu.py
│   │   │   ├── speech2unit/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── clustering/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cluster_kmeans.py
│   │   │   │   │   ├── dump_feats.py
│   │   │   │   │   ├── quantize_with_kmeans.py
│   │   │   │   │   └── utils.py
│   │   │   │   └── pretrained/
│   │   │   │       ├── cpc_feature_reader.py
│   │   │   │       ├── hubert_feature_reader.py
│   │   │   │       ├── logmel_feature_reader.py
│   │   │   │       ├── utils.py
│   │   │   │       └── w2v2_feature_reader.py
│   │   │   ├── tools/
│   │   │   │   ├── README.md
│   │   │   │   └── resynthesize_speech.py
│   │   │   ├── ulm/
│   │   │   │   ├── README.md
│   │   │   │   └── sample.py
│   │   │   └── unit2speech/
│   │   │       ├── README.md
│   │   │       ├── convert_to_16k.py
│   │   │       ├── glow.py
│   │   │       ├── multiproc.py
│   │   │       ├── synthesize_audio_from_units.py
│   │   │       ├── tacotron2/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── audio_processing.py
│   │   │       │   ├── cleaners.py
│   │   │       │   ├── cmudict.py
│   │   │       │   ├── layers.py
│   │   │       │   ├── model.py
│   │   │       │   ├── numbers.py
│   │   │       │   ├── stft.py
│   │   │       │   ├── symbols.py
│   │   │       │   ├── text.py
│   │   │       │   ├── utils.py
│   │   │       │   └── waveglow_denoiser.py
│   │   │       ├── tts_data.py
│   │   │       └── utils.py
│   │   ├── pgslm/
│   │   │   ├── README.md
│   │   │   ├── data_utils.py
│   │   │   ├── eval/
│   │   │   │   ├── __init__.py
│   │   │   │   └── cont_metrics.py
│   │   │   ├── generate_waveform.py
│   │   │   ├── inference_dataset.py
│   │   │   ├── naive_decoder.py
│   │   │   ├── prepare_dataset.py
│   │   │   ├── preprocess_f0.py
│   │   │   ├── quantize_f0.py
│   │   │   ├── sample/
│   │   │   │   ├── __init__.py
│   │   │   │   └── sample.py
│   │   │   ├── scripts/
│   │   │   │   ├── join_units_manifest.py
│   │   │   │   ├── prepare_data.sh
│   │   │   │   └── prepare_f0_quantization.sh
│   │   │   └── truncated_laplace.py
│   │   └── speech-resynth/
│   │       └── README.md
│   ├── translation/
│   │   ├── README.md
│   │   ├── prepare-iwslt14.sh
│   │   ├── prepare-iwslt17-multilingual.sh
│   │   ├── prepare-wmt14en2de.sh
│   │   └── prepare-wmt14en2fr.sh
│   ├── translation_moe/
│   │   ├── README.md
│   │   ├── score.py
│   │   └── translation_moe_src/
│   │       ├── __init__.py
│   │       ├── logsumexp_moe.py
│   │       ├── mean_pool_gating_network.py
│   │       └── translation_moe.py
│   ├── truncated_bptt/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── transformer_xl_model.py
│   │   └── truncated_bptt_lm_task.py
│   ├── unsupervised_quality_estimation/
│   │   ├── README.md
│   │   ├── aggregate_scores.py
│   │   ├── meteor.py
│   │   └── repeat_lines.py
│   ├── wav2vec/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── config/
│   │   │   ├── finetuning/
│   │   │   │   ├── base_100h.yaml
│   │   │   │   ├── base_10h.yaml
│   │   │   │   ├── base_10m.yaml
│   │   │   │   ├── base_1h.yaml
│   │   │   │   ├── base_960h.yaml
│   │   │   │   ├── run_config/
│   │   │   │   │   ├── slurm_1.yaml
│   │   │   │   │   ├── slurm_16.yaml
│   │   │   │   │   ├── slurm_1_aws.yaml
│   │   │   │   │   ├── slurm_1_old.yaml
│   │   │   │   │   ├── slurm_2.yaml
│   │   │   │   │   ├── slurm_2_aws.yaml
│   │   │   │   │   ├── slurm_2g.yaml
│   │   │   │   │   ├── slurm_3.yaml
│   │   │   │   │   ├── slurm_4g.yaml
│   │   │   │   │   ├── slurm_4g_aws.yaml
│   │   │   │   │   └── slurm_8.yaml
│   │   │   │   ├── vox_100h.yaml
│   │   │   │   ├── vox_100h_2.yaml
│   │   │   │   ├── vox_100h_2_aws.yaml
│   │   │   │   ├── vox_100h_3.yaml
│   │   │   │   ├── vox_10h.yaml
│   │   │   │   ├── vox_10h_2.yaml
│   │   │   │   ├── vox_10h_2_aws.yaml
│   │   │   │   ├── vox_10h_aws.yaml
│   │   │   │   ├── vox_10h_aws_v100.yaml
│   │   │   │   ├── vox_10m.yaml
│   │   │   │   ├── vox_10m_2.yaml
│   │   │   │   ├── vox_10m_2_aws.yaml
│   │   │   │   ├── vox_10m_3.yaml
│   │   │   │   ├── vox_1h.yaml
│   │   │   │   ├── vox_1h_2.yaml
│   │   │   │   ├── vox_1h_2_aws.yaml
│   │   │   │   ├── vox_1h_3.yaml
│   │   │   │   ├── vox_1h_4.yaml
│   │   │   │   ├── vox_1h_aws.yaml
│   │   │   │   ├── vox_960h.yaml
│   │   │   │   ├── vox_960h_2.yaml
│   │   │   │   ├── vox_960h_2_aws.yaml
│   │   │   │   └── vox_960h_3.yaml
│   │   │   └── pretraining/
│   │   │       ├── wav2vec2_base_librispeech.yaml
│   │   │       ├── wav2vec2_conformer_base_librispeech.yaml
│   │   │       ├── wav2vec2_conformer_large_librivox.yaml
│   │   │       ├── wav2vec2_large_librivox.yaml
│   │   │       ├── wav2vec2_large_librivox_tpu-pod.yaml
│   │   │       └── wav2vec2_large_librivox_tpu.yaml
│   │   ├── libri_labels.py
│   │   ├── scripts/
│   │   │   └── binarize_manifest.sh
│   │   ├── unsupervised/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── config/
│   │   │   │   ├── finetuning/
│   │   │   │   │   └── w2v_finetune.yaml
│   │   │   │   ├── gan/
│   │   │   │   │   ├── w2vu.yaml
│   │   │   │   │   └── w2vu2.yaml
│   │   │   │   ├── generate/
│   │   │   │   │   └── viterbi.yaml
│   │   │   │   ├── timit_matched/
│   │   │   │   │   ├── test.uid
│   │   │   │   │   ├── train.uid
│   │   │   │   │   ├── train_text.uid
│   │   │   │   │   └── valid.uid
│   │   │   │   └── timit_unmatched/
│   │   │   │       ├── test.uid
│   │   │   │       ├── train.uid
│   │   │   │       ├── train_text.uid
│   │   │   │       └── valid.uid
│   │   │   ├── data/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── extracted_features_dataset.py
│   │   │   │   └── random_input_dataset.py
│   │   │   ├── kaldi_self_train/
│   │   │   │   ├── README.md
│   │   │   │   └── st/
│   │   │   │       ├── cmd.sh
│   │   │   │       ├── decode_phone.sh
│   │   │   │       ├── decode_word_step1.sh
│   │   │   │       ├── decode_word_step2.sh
│   │   │   │       ├── local/
│   │   │   │       │   ├── copy_aligned_text.py
│   │   │   │       │   ├── decode.sh
│   │   │   │       │   ├── prepare_data_from_w2v.py
│   │   │   │       │   ├── prepare_lang.sh
│   │   │   │       │   ├── prepare_lang_word.sh
│   │   │   │       │   ├── prepare_lm.sh
│   │   │   │       │   ├── score.sh
│   │   │   │       │   ├── show_wer.sh
│   │   │   │       │   ├── train_subset_lgbeam.sh
│   │   │   │       │   ├── unsup_select.py
│   │   │   │       │   ├── unsup_select_decode.sh
│   │   │   │       │   └── unsup_select_decode_word.sh
│   │   │   │       ├── path.sh
│   │   │   │       ├── steps_gan/
│   │   │   │       │   ├── train_deltas.sh
│   │   │   │       │   ├── train_lda_mllt.sh
│   │   │   │       │   └── train_sat.sh
│   │   │   │       └── train.sh
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   └── wav2vec_u.py
│   │   │   ├── scripts/
│   │   │   │   ├── apply_pca.py
│   │   │   │   ├── copy_labels.py
│   │   │   │   ├── filter_lexicon.py
│   │   │   │   ├── filter_tsv.py
│   │   │   │   ├── g2p_wrd_to_phn.py
│   │   │   │   ├── ltr_to_wrd.py
│   │   │   │   ├── mean_pool.py
│   │   │   │   ├── merge_clusters.py
│   │   │   │   ├── normalize_and_filter_text.py
│   │   │   │   ├── normalize_text.py
│   │   │   │   ├── pca.py
│   │   │   │   ├── phonemize_with_sil.py
│   │   │   │   ├── prepare_audio.sh
│   │   │   │   ├── prepare_audio_v2.sh
│   │   │   │   ├── prepare_text.sh
│   │   │   │   ├── prepare_timit.sh
│   │   │   │   ├── remove_silence.py
│   │   │   │   ├── vads.py
│   │   │   │   ├── wav2vec_apply_cluster_faiss.py
│   │   │   │   ├── wav2vec_cluster_faiss.py
│   │   │   │   ├── wav2vec_extract_features.py
│   │   │   │   ├── wer.py
│   │   │   │   └── wrd_to_ltr.py
│   │   │   ├── tasks/
│   │   │   │   ├── __init__.py
│   │   │   │   └── unpaired_audio_text.py
│   │   │   └── w2vu_generate.py
│   │   ├── vq-wav2vec_featurize.py
│   │   ├── wav2vec_featurize.py
│   │   ├── wav2vec_manifest.py
│   │   └── xlsr/
│   │       ├── README.md
│   │       ├── config/
│   │       │   └── finetune.yaml
│   │       └── scripts/
│   │           ├── eval_speaker_clf_task.py
│   │           └── gen_audio_embedding.py
│   ├── wmt19/
│   │   └── README.md
│   ├── wmt20/
│   │   └── README.md
│   ├── wmt21/
│   │   ├── README.md
│   │   ├── eval.sh
│   │   └── scripts/
│   │       ├── normalize-punctuation.perl
│   │       └── replace-unicode-punctuation.perl
│   ├── womens_bios/
│   │   ├── README.md
│   │   └── query_occupations_from_wikidata.py
│   ├── xformers/
│   │   └── README.md
│   ├── xglm/
│   │   ├── README.md
│   │   ├── XStoryCloze.md
│   │   └── model_card.md
│   ├── xlmr/
│   │   └── README.md
│   └── xmod/
│       ├── README.md
│       └── preprocess_nli.py
├── fairseq/
│   ├── __init__.py
│   ├── benchmark/
│   │   ├── __init__.py
│   │   ├── benchmark_multihead_attention.py
│   │   ├── dummy_dataset.py
│   │   ├── dummy_lm.py
│   │   ├── dummy_masked_lm.py
│   │   ├── dummy_model.py
│   │   └── dummy_mt.py
│   ├── binarizer.py
│   ├── checkpoint_utils.py
│   ├── clib/
│   │   ├── cuda/
│   │   │   ├── ngram_repeat_block_cuda.cpp
│   │   │   └── ngram_repeat_block_cuda_kernel.cu
│   │   ├── libbase/
│   │   │   └── balanced_assignment.cpp
│   │   ├── libbleu/
│   │   │   ├── libbleu.cpp
│   │   │   └── module.cpp
│   │   ├── libnat/
│   │   │   └── edit_dist.cpp
│   │   └── libnat_cuda/
│   │       ├── binding.cpp
│   │       ├── edit_dist.cu
│   │       └── edit_dist.h
│   ├── config/
│   │   ├── __init__.py
│   │   ├── config.yaml
│   │   ├── fb_run_config/
│   │   │   └── slurm.yaml
│   │   └── model/
│   │       ├── transformer_lm/
│   │       │   ├── transformer_lm_baevski_gbw.yaml
│   │       │   ├── transformer_lm_baevski_wiki103.yaml
│   │       │   ├── transformer_lm_big.yaml
│   │       │   ├── transformer_lm_gbw.yaml
│   │       │   ├── transformer_lm_gpt.yaml
│   │       │   ├── transformer_lm_gpt2_big.yaml
│   │       │   ├── transformer_lm_gpt2_medium.yaml
│   │       │   ├── transformer_lm_gpt2_small.yaml
│   │       │   └── transformer_lm_wiki103.yaml
│   │       ├── wav2vec/
│   │       │   └── vq_wav2vec_gumbel.yaml
│   │       └── wav2vec2/
│   │           ├── wav2vec2_base.yaml
│   │           └── wav2vec2_large.yaml
│   ├── criterions/
│   │   ├── __init__.py
│   │   ├── adaptive_loss.py
│   │   ├── composite_loss.py
│   │   ├── cross_entropy.py
│   │   ├── ctc.py
│   │   ├── fairseq_criterion.py
│   │   ├── fastspeech2_loss.py
│   │   ├── hubert_criterion.py
│   │   ├── label_smoothed_cross_entropy.py
│   │   ├── label_smoothed_cross_entropy_latency_augmented.py
│   │   ├── label_smoothed_cross_entropy_with_alignment.py
│   │   ├── label_smoothed_cross_entropy_with_ctc.py
│   │   ├── label_smoothed_cross_entropy_with_rdrop.py
│   │   ├── legacy_masked_lm.py
│   │   ├── masked_lm.py
│   │   ├── model_criterion.py
│   │   ├── nat_loss.py
│   │   ├── sentence_prediction.py
│   │   ├── sentence_prediction_adapters.py
│   │   ├── sentence_ranking.py
│   │   ├── speech_dlm_criterion.py
│   │   ├── speech_to_speech_criterion.py
│   │   ├── speech_ulm_criterion.py
│   │   ├── tacotron2_loss.py
│   │   └── wav2vec_criterion.py
│   ├── data/
│   │   ├── __init__.py
│   │   ├── add_class_target_dataset.py
│   │   ├── add_target_dataset.py
│   │   ├── append_token_dataset.py
│   │   ├── audio/
│   │   │   ├── __init__.py
│   │   │   ├── audio_utils.py
│   │   │   ├── data_cfg.py
│   │   │   ├── dataset_transforms/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── concataugment.py
│   │   │   │   └── noisyoverlapaugment.py
│   │   │   ├── feature_transforms/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── delta_deltas.py
│   │   │   │   ├── global_cmvn.py
│   │   │   │   ├── specaugment.py
│   │   │   │   └── utterance_cmvn.py
│   │   │   ├── frm_text_to_speech_dataset.py
│   │   │   ├── hubert_dataset.py
│   │   │   ├── multi_modality_dataset.py
│   │   │   ├── raw_audio_dataset.py
│   │   │   ├── speech_to_speech_dataset.py
│   │   │   ├── speech_to_text_dataset.py
│   │   │   ├── speech_to_text_joint_dataset.py
│   │   │   ├── text_to_speech_dataset.py
│   │   │   └── waveform_transforms/
│   │   │       ├── __init__.py
│   │   │       └── noiseaugment.py
│   │   ├── backtranslation_dataset.py
│   │   ├── base_wrapper_dataset.py
│   │   ├── bucket_pad_length_dataset.py
│   │   ├── codedataset.py
│   │   ├── colorize_dataset.py
│   │   ├── concat_dataset.py
│   │   ├── concat_sentences_dataset.py
│   │   ├── data_utils.py
│   │   ├── data_utils_fast.pyx
│   │   ├── denoising_dataset.py
│   │   ├── dictionary.py
│   │   ├── encoders/
│   │   │   ├── __init__.py
│   │   │   ├── byte_bpe.py
│   │   │   ├── byte_utils.py
│   │   │   ├── bytes.py
│   │   │   ├── characters.py
│   │   │   ├── fastbpe.py
│   │   │   ├── gpt2_bpe.py
│   │   │   ├── gpt2_bpe_utils.py
│   │   │   ├── hf_bert_bpe.py
│   │   │   ├── hf_byte_bpe.py
│   │   │   ├── moses_tokenizer.py
│   │   │   ├── nltk_tokenizer.py
│   │   │   ├── sentencepiece_bpe.py
│   │   │   ├── space_tokenizer.py
│   │   │   ├── subword_nmt_bpe.py
│   │   │   └── utils.py
│   │   ├── fairseq_dataset.py
│   │   ├── fasta_dataset.py
│   │   ├── huffman/
│   │   │   ├── __init__.py
│   │   │   ├── huffman_coder.py
│   │   │   └── huffman_mmap_indexed_dataset.py
│   │   ├── id_dataset.py
│   │   ├── indexed_dataset.py
│   │   ├── iterators.py
│   │   ├── language_pair_dataset.py
│   │   ├── legacy/
│   │   │   ├── __init__.py
│   │   │   ├── block_pair_dataset.py
│   │   │   ├── masked_lm_dataset.py
│   │   │   └── masked_lm_dictionary.py
│   │   ├── list_dataset.py
│   │   ├── lm_context_window_dataset.py
│   │   ├── lru_cache_dataset.py
│   │   ├── mask_tokens_dataset.py
│   │   ├── monolingual_dataset.py
│   │   ├── multi_corpus_dataset.py
│   │   ├── multi_corpus_sampled_dataset.py
│   │   ├── multilingual/
│   │   │   ├── __init__.py
│   │   │   ├── multilingual_data_manager.py
│   │   │   ├── multilingual_utils.py
│   │   │   ├── sampled_multi_dataset.py
│   │   │   ├── sampled_multi_epoch_dataset.py
│   │   │   └── sampling_method.py
│   │   ├── nested_dictionary_dataset.py
│   │   ├── noising.py
│   │   ├── num_samples_dataset.py
│   │   ├── numel_dataset.py
│   │   ├── offset_tokens_dataset.py
│   │   ├── pad_dataset.py
│   │   ├── padding_mask_dataset.py
│   │   ├── plasma_utils.py
│   │   ├── prepend_dataset.py
│   │   ├── prepend_token_dataset.py
│   │   ├── raw_label_dataset.py
│   │   ├── replace_dataset.py
│   │   ├── resampling_dataset.py
│   │   ├── roll_dataset.py
│   │   ├── round_robin_zip_datasets.py
│   │   ├── shorten_dataset.py
│   │   ├── sort_dataset.py
│   │   ├── span_mask_tokens_dataset.py
│   │   ├── speech_dlm_dataset.py
│   │   ├── strip_token_dataset.py
│   │   ├── subsample_dataset.py
│   │   ├── text_compressor.py
│   │   ├── token_block_dataset.py
│   │   ├── token_block_utils_fast.pyx
│   │   ├── transform_eos_concat_langpair_dataset.py
│   │   ├── transform_eos_dataset.py
│   │   └── transform_eos_lang_pair_dataset.py
│   ├── dataclass/
│   │   ├── __init__.py
│   │   ├── configs.py
│   │   ├── constants.py
│   │   ├── initialize.py
│   │   └── utils.py
│   ├── distributed/
│   │   ├── __init__.py
│   │   ├── distributed_timeout_wrapper.py
│   │   ├── fully_sharded_data_parallel.py
│   │   ├── legacy_distributed_data_parallel.py
│   │   ├── module_proxy_wrapper.py
│   │   ├── tpu_distributed_data_parallel.py
│   │   └── utils.py
│   ├── file_chunker_utils.py
│   ├── file_io.py
│   ├── file_utils.py
│   ├── hub_utils.py
│   ├── incremental_decoding_utils.py
│   ├── iterative_refinement_generator.py
│   ├── logging/
│   │   ├── __init__.py
│   │   ├── meters.py
│   │   ├── metrics.py
│   │   └── progress_bar.py
│   ├── model_parallel/
│   │   ├── __init__.py
│   │   ├── criterions/
│   │   │   ├── __init__.py
│   │   │   └── vocab_parallel_cross_entropy.py
│   │   ├── megatron_trainer.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── pipeline_parallel_transformer/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── layers.py
│   │   │   │   └── model.py
│   │   │   ├── roberta/
│   │   │   │   ├── __init__.py
│   │   │   │   └── model.py
│   │   │   ├── transformer.py
│   │   │   └── transformer_lm.py
│   │   └── modules/
│   │       ├── __init__.py
│   │       ├── multihead_attention.py
│   │       └── transformer_layer.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── bart/
│   │   │   ├── __init__.py
│   │   │   ├── hub_interface.py
│   │   │   └── model.py
│   │   ├── composite_encoder.py
│   │   ├── distributed_fairseq_model.py
│   │   ├── ema/
│   │   │   ├── __init__.py
│   │   │   └── ema.py
│   │   ├── fairseq_decoder.py
│   │   ├── fairseq_encoder.py
│   │   ├── fairseq_incremental_decoder.py
│   │   ├── fairseq_model.py
│   │   ├── fconv.py
│   │   ├── fconv_lm.py
│   │   ├── fconv_self_att.py
│   │   ├── hubert/
│   │   │   ├── __init__.py
│   │   │   ├── hubert.py
│   │   │   └── hubert_asr.py
│   │   ├── huggingface/
│   │   │   ├── __init__.py
│   │   │   └── hf_gpt2.py
│   │   ├── lightconv.py
│   │   ├── lightconv_lm.py
│   │   ├── lstm.py
│   │   ├── lstm_lm.py
│   │   ├── masked_lm.py
│   │   ├── model_utils.py
│   │   ├── multilingual_transformer.py
│   │   ├── multires_hubert/
│   │   │   ├── __init__.py
│   │   │   ├── multires_hubert.py
│   │   │   └── multires_hubert_asr.py
│   │   ├── nat/
│   │   │   ├── __init__.py
│   │   │   ├── cmlm_transformer.py
│   │   │   ├── fairseq_nat_model.py
│   │   │   ├── insertion_transformer.py
│   │   │   ├── iterative_nonautoregressive_transformer.py
│   │   │   ├── levenshtein_transformer.py
│   │   │   ├── levenshtein_utils.py
│   │   │   ├── nat_crf_transformer.py
│   │   │   ├── nonautoregressive_ensembles.py
│   │   │   └── nonautoregressive_transformer.py
│   │   ├── roberta/
│   │   │   ├── __init__.py
│   │   │   ├── alignment_utils.py
│   │   │   ├── enc_dec.py
│   │   │   ├── hub_interface.py
│   │   │   ├── model.py
│   │   │   ├── model_camembert.py
│   │   │   ├── model_gottbert.py
│   │   │   └── model_xlmr.py
│   │   ├── speech_dlm/
│   │   │   ├── __init__.py
│   │   │   ├── hub_interface.py
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── speech_dlm_decoder.py
│   │   │   │   └── speech_dlm_decoder_layer.py
│   │   │   ├── sequence_generator/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── multichannel_search.py
│   │   │   │   └── multichannel_sequence_generator.py
│   │   │   └── speech_dlm.py
│   │   ├── speech_to_speech/
│   │   │   ├── __init__.py
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── ctc_decoder.py
│   │   │   │   ├── stacked_embedding.py
│   │   │   │   ├── transformer_decoder_aug.py
│   │   │   │   └── transformer_encoder.py
│   │   │   ├── s2s_conformer.py
│   │   │   ├── s2s_conformer_translatotron2.py
│   │   │   ├── s2s_conformer_unity.py
│   │   │   └── s2s_transformer.py
│   │   ├── speech_to_text/
│   │   │   ├── __init__.py
│   │   │   ├── berard.py
│   │   │   ├── convtransformer.py
│   │   │   ├── hub_interface.py
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── augmented_memory_attention.py
│   │   │   │   ├── convolution.py
│   │   │   │   └── emformer.py
│   │   │   ├── multi_modality_model.py
│   │   │   ├── s2t_conformer.py
│   │   │   ├── s2t_transformer.py
│   │   │   ├── s2t_wav_transformer.py
│   │   │   ├── utils.py
│   │   │   ├── xm_transformer.py
│   │   │   └── xm_transformer_unity.py
│   │   ├── text_to_speech/
│   │   │   ├── __init__.py
│   │   │   ├── codehifigan.py
│   │   │   ├── fastspeech2.py
│   │   │   ├── hifigan.py
│   │   │   ├── hub_interface.py
│   │   │   ├── tacotron2.py
│   │   │   ├── tts_transformer.py
│   │   │   └── vocoder.py
│   │   ├── transformer/
│   │   │   ├── __init__.py
│   │   │   ├── transformer_base.py
│   │   │   ├── transformer_config.py
│   │   │   ├── transformer_decoder.py
│   │   │   ├── transformer_decoder_aug.py
│   │   │   ├── transformer_encoder.py
│   │   │   └── transformer_legacy.py
│   │   ├── transformer_align.py
│   │   ├── transformer_from_pretrained_xlm.py
│   │   ├── transformer_lm.py
│   │   ├── transformer_ulm.py
│   │   ├── wav2vec/
│   │   │   ├── __init__.py
│   │   │   ├── utils.py
│   │   │   ├── wav2vec.py
│   │   │   ├── wav2vec2.py
│   │   │   ├── wav2vec2_asr.py
│   │   │   ├── wav2vec2_classification.py
│   │   │   └── wav2vec2_laser.py
│   │   └── xmod/
│   │       ├── __init__.py
│   │       ├── hub_interface.py
│   │       ├── model.py
│   │       └── transformer_layer_xmod.py
│   ├── modules/
│   │   ├── __init__.py
│   │   ├── adaptive_input.py
│   │   ├── adaptive_softmax.py
│   │   ├── base_layer.py
│   │   ├── beamable_mm.py
│   │   ├── character_token_embedder.py
│   │   ├── checkpoint_activations.py
│   │   ├── conformer_layer.py
│   │   ├── conv_tbc.py
│   │   ├── cross_entropy.py
│   │   ├── cuda_utils.cu
│   │   ├── downsampled_multihead_attention.py
│   │   ├── dynamic_convolution.py
│   │   ├── dynamic_crf_layer.py
│   │   ├── dynamicconv_layer/
│   │   │   ├── __init__.py
│   │   │   ├── cuda_function_gen.py
│   │   │   ├── dynamicconv_cuda.cpp
│   │   │   ├── dynamicconv_cuda.cuh
│   │   │   ├── dynamicconv_cuda_kernel.cu
│   │   │   ├── dynamicconv_layer.py
│   │   │   ├── dynamiconv_cpu.cpp
│   │   │   └── setup.py
│   │   ├── ema_module.py
│   │   ├── espnet_multihead_attention.py
│   │   ├── fairseq_dropout.py
│   │   ├── fp32_batch_norm.py
│   │   ├── fp32_group_norm.py
│   │   ├── fp32_instance_norm.py
│   │   ├── gelu.py
│   │   ├── grad_multiply.py
│   │   ├── gumbel_vector_quantizer.py
│   │   ├── kmeans_attention.py
│   │   ├── kmeans_vector_quantizer.py
│   │   ├── layer_drop.py
│   │   ├── layer_norm.py
│   │   ├── learned_positional_embedding.py
│   │   ├── lightconv_layer/
│   │   │   ├── __init__.py
│   │   │   ├── cuda_function_gen.py
│   │   │   ├── lightconv_cuda.cpp
│   │   │   ├── lightconv_cuda.cuh
│   │   │   ├── lightconv_cuda_kernel.cu
│   │   │   ├── lightconv_layer.py
│   │   │   └── setup.py
│   │   ├── lightweight_convolution.py
│   │   ├── linearized_convolution.py
│   │   ├── location_attention.py
│   │   ├── lstm_cell_with_zoneout.py
│   │   ├── multihead_attention.py
│   │   ├── positional_embedding.py
│   │   ├── positional_encoding.py
│   │   ├── quant_noise.py
│   │   ├── quantization/
│   │   │   ├── __init__.py
│   │   │   ├── pq/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── em.py
│   │   │   │   ├── modules/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── qconv.py
│   │   │   │   │   ├── qemb.py
│   │   │   │   │   └── qlinear.py
│   │   │   │   ├── pq.py
│   │   │   │   └── utils.py
│   │   │   ├── quantization_options.py
│   │   │   └── scalar/
│   │   │       ├── __init__.py
│   │   │       ├── modules/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── qact.py
│   │   │       │   ├── qconv.py
│   │   │       │   ├── qemb.py
│   │   │       │   └── qlinear.py
│   │   │       ├── ops.py
│   │   │       └── utils.py
│   │   ├── rotary_positional_embedding.py
│   │   ├── same_pad.py
│   │   ├── scalar_bias.py
│   │   ├── sinusoidal_positional_embedding.py
│   │   ├── sparse_multihead_attention.py
│   │   ├── sparse_transformer_sentence_encoder.py
│   │   ├── sparse_transformer_sentence_encoder_layer.py
│   │   ├── transformer_layer.py
│   │   ├── transformer_layer_aug.py
│   │   ├── transformer_sentence_encoder.py
│   │   ├── transformer_sentence_encoder_layer.py
│   │   ├── transpose_last.py
│   │   ├── unfold.py
│   │   └── vggblock.py
│   ├── nan_detector.py
│   ├── ngram_repeat_block.py
│   ├── optim/
│   │   ├── __init__.py
│   │   ├── adadelta.py
│   │   ├── adafactor.py
│   │   ├── adagrad.py
│   │   ├── adam.py
│   │   ├── adamax.py
│   │   ├── amp_optimizer.py
│   │   ├── bmuf.py
│   │   ├── composite.py
│   │   ├── cpu_adam.py
│   │   ├── dynamic_loss_scaler.py
│   │   ├── fairseq_optimizer.py
│   │   ├── fp16_optimizer.py
│   │   ├── fused_adam.py
│   │   ├── fused_lamb.py
│   │   ├── lr_scheduler/
│   │   │   ├── __init__.py
│   │   │   ├── cosine_lr_scheduler.py
│   │   │   ├── fairseq_lr_scheduler.py
│   │   │   ├── fixed_schedule.py
│   │   │   ├── inverse_square_root_schedule.py
│   │   │   ├── manual_lr_scheduler.py
│   │   │   ├── pass_through.py
│   │   │   ├── polynomial_decay_schedule.py
│   │   │   ├── reduce_lr_on_plateau.py
│   │   │   ├── step_lr_scheduler.py
│   │   │   ├── tri_stage_lr_scheduler.py
│   │   │   └── triangular_lr_scheduler.py
│   │   ├── nag.py
│   │   ├── sgd.py
│   │   └── shard.py
│   ├── options.py
│   ├── pdb.py
│   ├── quantization_utils.py
│   ├── registry.py
│   ├── scoring/
│   │   ├── __init__.py
│   │   ├── bertscore.py
│   │   ├── bleu.py
│   │   ├── chrf.py
│   │   ├── meteor.py
│   │   ├── tokenizer.py
│   │   └── wer.py
│   ├── search.py
│   ├── sequence_generator.py
│   ├── sequence_scorer.py
│   ├── speech_generator.py
│   ├── tasks/
│   │   ├── __init__.py
│   │   ├── audio_classification.py
│   │   ├── audio_finetuning.py
│   │   ├── audio_pretraining.py
│   │   ├── cross_lingual_lm.py
│   │   ├── denoising.py
│   │   ├── fairseq_task.py
│   │   ├── frm_text_to_speech.py
│   │   ├── hubert_pretraining.py
│   │   ├── language_modeling.py
│   │   ├── legacy_masked_lm.py
│   │   ├── masked_lm.py
│   │   ├── multilingual_denoising.py
│   │   ├── multilingual_language_modeling.py
│   │   ├── multilingual_masked_lm.py
│   │   ├── multilingual_translation.py
│   │   ├── multires_hubert_pretraining.py
│   │   ├── nlu_finetuning.py
│   │   ├── online_backtranslation.py
│   │   ├── semisupervised_translation.py
│   │   ├── sentence_prediction.py
│   │   ├── sentence_prediction_adapters.py
│   │   ├── sentence_ranking.py
│   │   ├── simultaneous_translation.py
│   │   ├── span_masked_lm.py
│   │   ├── speech_dlm_task.py
│   │   ├── speech_to_speech.py
│   │   ├── speech_to_text.py
│   │   ├── speech_ulm_task.py
│   │   ├── text_to_speech.py
│   │   ├── translation.py
│   │   ├── translation_from_pretrained_bart.py
│   │   ├── translation_from_pretrained_xlm.py
│   │   ├── translation_lev.py
│   │   └── translation_multi_simple_epoch.py
│   ├── token_generation_constraints.py
│   ├── tokenizer.py
│   ├── trainer.py
│   ├── utils.py
│   └── version.txt
├── fairseq_cli/
│   ├── __init__.py
│   ├── eval_lm.py
│   ├── generate.py
│   ├── hydra_train.py
│   ├── hydra_validate.py
│   ├── interactive.py
│   ├── preprocess.py
│   ├── score.py
│   ├── train.py
│   └── validate.py
├── hubconf.py
├── hydra_plugins/
│   └── dependency_submitit_launcher/
│       ├── hydra_plugins/
│       │   └── dependency_submitit_launcher/
│       │       ├── __init__.py
│       │       ├── config.py
│       │       └── launcher.py
│       └── setup.py
├── pyproject.toml
├── release_utils.py
├── scripts/
│   ├── __init__.py
│   ├── average_checkpoints.py
│   ├── build_sym_alignment.py
│   ├── check_installation.py
│   ├── compare_namespaces.py
│   ├── compound_split_bleu.sh
│   ├── constraints/
│   │   ├── extract.py
│   │   └── validate.py
│   ├── convert_dictionary.lua
│   ├── convert_model.lua
│   ├── count_docs.py
│   ├── read_binarized.py
│   ├── rm_pt.py
│   ├── sacrebleu.sh
│   ├── shard_docs.py
│   ├── split_train_valid_docs.py
│   ├── spm_decode.py
│   ├── spm_encode.py
│   ├── spm_train.py
│   └── test_fsdp.sh
├── setup.cfg
├── setup.py
├── tests/
│   ├── __init__.py
│   ├── distributed/
│   │   ├── __init__.py
│   │   ├── test_bmuf.py
│   │   ├── test_distributed_timeout_wrapper.py
│   │   ├── test_module_proxy_wrapper.py
│   │   ├── test_utils.py
│   │   └── utils.py
│   ├── gpu/
│   │   ├── __init__.py
│   │   ├── test_binaries_gpu.py
│   │   ├── test_ema_gpu.py
│   │   └── transformer_quantization_config.yaml
│   ├── speech/
│   │   ├── __init__.py
│   │   ├── test_convtransformer_simul_trans.py
│   │   ├── test_dual_input_wav_transformer.py
│   │   ├── test_dualinput_s2t_transformer.py
│   │   ├── test_fastspeech2.py
│   │   ├── test_s2s_transformer.py
│   │   ├── test_s2t_conformer.py
│   │   ├── test_s2t_transformer.py
│   │   ├── test_tts_transformer.py
│   │   ├── test_wav2vec2.py
│   │   └── test_xm_transformer.py
│   ├── speech_recognition/
│   │   ├── __init__.py
│   │   ├── asr_test_base.py
│   │   ├── test_collaters.py
│   │   ├── test_cross_entropy.py
│   │   ├── test_data_utils.py
│   │   └── test_vggtransformer.py
│   ├── tasks/
│   │   ├── test_denoising.py
│   │   ├── test_masked_lm.py
│   │   ├── test_multilingual_denoising.py
│   │   └── test_span_masked_lm.py
│   ├── test_activation_checkpointing.py
│   ├── test_amp_optimizer.py
│   ├── test_average_checkpoints.py
│   ├── test_backtranslation_dataset.py
│   ├── test_binaries.py
│   ├── test_binarizer.py
│   ├── test_character_token_embedder.py
│   ├── test_checkpoint_utils.py
│   ├── test_checkpoint_utils_for_task_level_attributes.py
│   ├── test_concat_dataset.py
│   ├── test_constraints.py
│   ├── test_convtbc.py
│   ├── test_data_utils.py
│   ├── test_dataclass_utils.py
│   ├── test_dataset.py
│   ├── test_dictionary.py
│   ├── test_ema.py
│   ├── test_espnet_multihead_attention.py
│   ├── test_export.py
│   ├── test_file_chunker_utils.py
│   ├── test_file_io.py
│   ├── test_fp16_optimizer.py
│   ├── test_hf_hub.py
│   ├── test_huffman.py
│   ├── test_inference_dropout.py
│   ├── test_iopath.py
│   ├── test_iterators.py
│   ├── test_label_smoothing.py
│   ├── test_lm_context_window.py
│   ├── test_lstm_jitable.py
│   ├── test_memory_efficient_fp16.py
│   ├── test_metrics.py
│   ├── test_multi_corpus_dataset.py
│   ├── test_multi_corpus_sampled_dataset.py
│   ├── test_multihead_attention.py
│   ├── test_noising.py
│   ├── test_online_backtranslation.py
│   ├── test_plasma_utils.py
│   ├── test_positional_encoding.py
│   ├── test_reproducibility.py
│   ├── test_resampling_dataset.py
│   ├── test_roberta.py
│   ├── test_rotary_positional_embedding.py
│   ├── test_sequence_generator.py
│   ├── test_sequence_scorer.py
│   ├── test_sparse_multihead_attention.py
│   ├── test_token_block_dataset.py
│   ├── test_train.py
│   ├── test_transformer.py
│   ├── test_utils.py
│   ├── test_valid_subset_checks.py
│   └── utils.py
└── train.py