gitextract_cuit3xwy/

├── .github/
│   └── ISSUE_TEMPLATE/
│       ├── bug_report.md
│       └── custom.md
├── .gitignore
├── .gitmodules
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Diff-Transformer/
│   ├── Diff-Transformer-V2/
│   │   ├── README.md
│   │   └── multihead_flashdiffv2.py
│   ├── README.md
│   ├── example.py
│   ├── kernel/
│   │   └── rotary.py
│   ├── multihead_attention.py
│   ├── multihead_diffattn.py
│   ├── multihead_flashdiff_1.py
│   ├── multihead_flashdiff_2.py
│   └── rms_norm.py
├── LICENSE
├── LatentLM/
│   ├── README.md
│   ├── evaluate_fid.py
│   ├── evaluate_fid_fidelity.py
│   ├── inference_speed.py
│   ├── metrics/
│   │   ├── IS.py
│   │   ├── __init__.py
│   │   ├── fid.py
│   │   └── inception.py
│   ├── models/
│   │   ├── DiT.py
│   │   ├── EMA.py
│   │   ├── RMSNorm.py
│   │   ├── Transformer.py
│   │   ├── __init__.py
│   │   └── kernel/
│   │       ├── rotary.py
│   │       └── swiglu.py
│   ├── sample_hf.py
│   ├── sample_many.py
│   ├── schedule/
│   │   ├── __init__.py
│   │   ├── ddpm.py
│   │   └── dpm_solver.py
│   ├── tokenizer_models/
│   │   ├── __init__.py
│   │   ├── modeling_beit3_vision.py
│   │   ├── modeling_common.py
│   │   ├── modeling_sigma_vae.py
│   │   ├── modeling_utils.py
│   │   └── vae.py
│   ├── train_hf.py
│   └── utils.py
├── NOTICE.md
├── PFPO/
│   ├── README.md
│   ├── apps_train_sub_val_ids.json
│   ├── conf/
│   │   ├── api/
│   │   │   └── vllm/
│   │   │       ├── apps/
│   │   │       │   ├── deepseek_coder/
│   │   │       │   │   ├── dev_v1_0.yaml
│   │   │       │   │   ├── dev_v1_0_fix_bos.yaml
│   │   │       │   │   ├── dev_v1_1.yaml
│   │   │       │   │   ├── dev_v1_1_sample.yaml
│   │   │       │   │   ├── dev_v2_0.yaml
│   │   │       │   │   ├── r2c/
│   │   │       │   │   │   ├── dev_v1_0.yaml
│   │   │       │   │   │   ├── dev_v1_1.yaml
│   │   │       │   │   │   ├── dev_v1_1_sample.yaml
│   │   │       │   │   │   ├── dev_v2_0.yaml
│   │   │       │   │   │   ├── dev_v2_0_sample.yaml
│   │   │       │   │   │   ├── general_combine_train_v2_0.yaml
│   │   │       │   │   │   ├── general_combine_train_v2_0_prefix_completion.yaml
│   │   │       │   │   │   ├── general_combine_train_v2_1_4o_non_sc.yaml
│   │   │       │   │   │   ├── sub_dev_v1_1.yaml
│   │   │       │   │   │   ├── sub_dev_v2_0.yaml
│   │   │       │   │   │   ├── train_v1_0.yaml
│   │   │       │   │   │   ├── train_v1_0_s43.yaml
│   │   │       │   │   │   ├── train_v2_0.yaml
│   │   │       │   │   │   ├── train_v2_0_prefix_completion.yaml
│   │   │       │   │   │   └── xcode_train_v2_0.yaml
│   │   │       │   │   ├── sub_dev_v1_1.yaml
│   │   │       │   │   ├── sub_dev_v2_0.yaml
│   │   │       │   │   ├── test_inputs_gen/
│   │   │       │   │   │   ├── sub_dev_v1_0.yaml
│   │   │       │   │   │   └── test_v1_0.yaml
│   │   │       │   │   ├── train_v1_0.yaml
│   │   │       │   │   └── train_v2_0.yaml
│   │   │       │   └── general_eval/
│   │   │       │       ├── dev_v2_0.yaml
│   │   │       │       ├── dev_v2_1.yaml
│   │   │       │       └── dev_v2_2.yaml
│   │   │       ├── human_eval/
│   │   │       │   ├── ds_coder/
│   │   │       │   │   ├── r2c/
│   │   │       │   │   │   ├── test_v1_0.yaml
│   │   │       │   │   │   ├── test_v1_0_local.yaml
│   │   │       │   │   │   ├── test_v2_0_local.yaml
│   │   │       │   │   │   ├── test_v2_1_local.yaml
│   │   │       │   │   │   └── test_v2_2_local.yaml
│   │   │       │   │   ├── test_v1_0_local.yaml
│   │   │       │   │   └── test_v2_0.yaml
│   │   │       │   ├── test_v2_1.yaml
│   │   │       │   └── test_v2_2.yaml
│   │   │       ├── magicoder/
│   │   │       │   ├── llama3/
│   │   │       │   │   └── test_case_input_gen_v1_0.yaml
│   │   │       │   └── mistral/
│   │   │       │       ├── func_head_extract_v1_0.yaml
│   │   │       │       └── test_case_input_gen_v1_0.yaml
│   │   │       ├── mathscale/
│   │   │       │   ├── 4o_mathstral_train_0shot_v1_0.yaml
│   │   │       │   ├── 4o_mathstral_train_0shot_v1_0_completion.yaml
│   │   │       │   ├── 4o_mathstral_train_0shot_v1_1.yaml
│   │   │       │   ├── 4o_mathstral_train_0shot_v1_1_completion.yaml
│   │   │       │   ├── 4o_mathstral_train_half_0shot_v1_0.yaml
│   │   │       │   ├── 4o_mathstral_train_half_0shot_v1_0_completion.yaml
│   │   │       │   ├── mathstral/
│   │   │       │   │   ├── deepseek_test_0shot_tem_v1_1.yaml
│   │   │       │   │   ├── mistral_mathscale4o_labeling.yaml
│   │   │       │   │   ├── mistral_train_0shot_iter0_v1_0.yaml
│   │   │       │   │   ├── test_0shot_tem_v1_1.yaml
│   │   │       │   │   ├── test_0shot_tem_v1_1_step.yaml
│   │   │       │   │   ├── test_0shot_tem_v1_1_step_seed.yaml
│   │   │       │   │   ├── test_0shot_tem_v2_0_step.yaml
│   │   │       │   │   └── test_0shot_tem_v3_0_step.yaml
│   │   │       │   ├── mistral_train_0shot_v1_0.yaml
│   │   │       │   ├── mistral_train_0shot_v1_1.yaml
│   │   │       │   ├── mistral_train_0shot_v1_2.yaml
│   │   │       │   ├── numina_hard_train_0shot_v1_0_completion.yaml
│   │   │       │   ├── numina_hard_train_0shot_v1_0_seed.yaml
│   │   │       │   ├── numina_rewrite_qwen25_0shot_v1_0.yaml
│   │   │       │   ├── numina_train_0shot_v1_0.yaml
│   │   │       │   ├── numina_train_0shot_v1_0_completion.yaml
│   │   │       │   ├── test_0shot_tem_v1_1.yaml
│   │   │       │   └── test_0shot_tem_v1_1_step.yaml
│   │   │       ├── mbpp_sanitized/
│   │   │       │   ├── r2c/
│   │   │       │   │   ├── test_3shot_v2_0.yaml
│   │   │       │   │   ├── test_v1_0.yaml
│   │   │       │   │   └── test_v1_0_local.yaml
│   │   │       │   ├── test_3shot_v1_0.yaml
│   │   │       │   ├── test_3shot_v1_0_local.yaml
│   │   │       │   ├── test_v1_0_local.yaml
│   │   │       │   ├── test_v1_1_local.yaml
│   │   │       │   ├── test_v2_0_local.yaml
│   │   │       │   ├── test_v2_1_local.yaml
│   │   │       │   └── test_v2_2_local.yaml
│   │   │       ├── mwp-bench/
│   │   │       │   ├── deepseek_test_0shot_v1_1.yaml
│   │   │       │   ├── llama_base/
│   │   │       │   │   └── college_math_test_4shot_v1_0.yaml
│   │   │       │   ├── llama_chat/
│   │   │       │   │   ├── dev_0shot_v1_0.yaml
│   │   │       │   │   ├── math_test_0shot_v1_0.yaml
│   │   │       │   │   ├── math_test_0shot_v3_0.yaml
│   │   │       │   │   └── test_0shot_v1_0.yaml
│   │   │       │   ├── mathstral_dev_0shot_self_correct_v1_0.yaml
│   │   │       │   ├── mathstral_dev_0shot_v1_0.yaml
│   │   │       │   ├── mathstral_test_0shot_self_correct_v1_0.yaml
│   │   │       │   ├── mathstral_test_0shot_v1_0.yaml
│   │   │       │   ├── mathstral_test_gaokao_2023_0shot_v1_0.yaml
│   │   │       │   ├── mathstral_test_gsm8k_0shot_v1_0.yaml
│   │   │       │   ├── mistral/
│   │   │       │   │   ├── dev_0shot_v1_0.yaml
│   │   │       │   │   └── test_0shot_v1_0.yaml
│   │   │       │   └── mistral_dev_0shot_v1_0.yaml
│   │   │       └── vllm_params/
│   │   │           ├── sampling_param_greedy.yaml
│   │   │           └── sampling_param_sample.yaml
│   │   ├── deepspeed/
│   │   │   ├── fp16.yaml
│   │   │   ├── train_hybrid_engine_zero0.yaml
│   │   │   ├── train_hybrid_engine_zero1.yaml
│   │   │   ├── train_hybrid_engine_zero1_cosine.yaml
│   │   │   ├── train_hybrid_engine_zero1_lr.yaml
│   │   │   ├── train_hybrid_engine_zero1_optim_offload.yaml
│   │   │   ├── train_hybrid_engine_zero1_optim_offload_cosine.yaml
│   │   │   ├── train_hybrid_engine_zero1_optim_offload_lr.yaml
│   │   │   ├── train_hybrid_engine_zero1_wo_optim.yaml
│   │   │   ├── train_hybrid_engine_zero2.yaml
│   │   │   ├── train_hybrid_engine_zero2_cosine.yaml
│   │   │   ├── train_hybrid_engine_zero2_lr.yaml
│   │   │   ├── train_hybrid_engine_zero2_optim_offload.yaml
│   │   │   ├── train_hybrid_engine_zero2_optim_offload_cosine.yaml
│   │   │   ├── train_hybrid_engine_zero3.yaml
│   │   │   ├── train_hybrid_engine_zero3_cosine.yaml
│   │   │   ├── train_hybrid_engine_zero3_optim_offload.yaml
│   │   │   └── train_hybrid_engine_zero3_optim_offload_cosine.yaml
│   │   ├── exp/
│   │   │   ├── apps/
│   │   │   │   ├── code_gen/
│   │   │   │   │   └── deepseek_coder/
│   │   │   │   │       ├── dpo/
│   │   │   │   │       │   ├── orig-pseudo-v1.0-a100.yaml
│   │   │   │   │       │   ├── orig-v1.0-v100.yaml
│   │   │   │   │       │   ├── orig-v1.1-v100-tp2.yaml
│   │   │   │   │       │   ├── orig-v1.1-v100-tp4.yaml
│   │   │   │   │       │   ├── orig-v1.1-v100.yaml
│   │   │   │   │       │   ├── orig-v1.2-v100-tp4.yaml
│   │   │   │   │       │   ├── orig-v1.3-a100.yaml
│   │   │   │   │       │   ├── orig-v1.3-v100-tp4.yaml
│   │   │   │   │       │   ├── orig-v1.4-a100.yaml
│   │   │   │   │       │   ├── orig-v1.4-v100-tp4.yaml
│   │   │   │   │       │   ├── pseudo-sc-dpo-v1.0-v100-tp8.yaml
│   │   │   │   │       │   ├── pseudo-sc-dpo-v1.1-h100.yaml
│   │   │   │   │       │   ├── pseudo-sc-dpo-v1.1-v100-tp8.yaml
│   │   │   │   │       │   ├── pseudo-sc-dpo-v1.2-a100.yaml
│   │   │   │   │       │   └── pseudo-sc-dpo-v1.2-v100-tp8.yaml
│   │   │   │   │       └── sft/
│   │   │   │   │           ├── v1.0-a100.yaml
│   │   │   │   │           └── v1.0-v100.yaml
│   │   │   │   ├── r2c_generation/
│   │   │   │   │   └── deepseek_coder/
│   │   │   │   │       ├── dpo/
│   │   │   │   │       │   ├── deprecated/
│   │   │   │   │       │   │   └── sft-v1.0-v100-tp4.yaml
│   │   │   │   │       │   ├── gpt4o-distil-4o-ps-test-pdpo-h100-v1.0.yaml
│   │   │   │   │       │   ├── gpt4o-distil-4o-ps-test-pdpo-h100-v1.1.yaml
│   │   │   │   │       │   ├── gpt4o-distil-4o-self-mix-ps-test-v1.0-mi300x-dp16.yaml
│   │   │   │   │       │   ├── gpt4o-distil-4o-self-mix-ps-test-v1.0-mi300x.yaml
│   │   │   │   │       │   ├── gpt4o-distil-4o-self-mix-ps-test-v1.1-mi300x.yaml
│   │   │   │   │       │   ├── gpt4o-distil-ps-pdpo-ctr-ts-num-v1.0-mi300x-dp32.yaml
│   │   │   │   │       │   ├── gpt4o-distil-pseudo-v1.0-a100.yaml
│   │   │   │   │       │   ├── gpt4o-distil-self-pseudo-v1.0-a100.yaml
│   │   │   │   │       │   ├── gpt4o-distil-self-pseudo-v1.0-v100.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v1.0-H100-4o-ps-test.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v2.0-v100.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v3.0-a100.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v3.1-rm-a100.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v3.2-v100.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.0-v100-ps-test.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.1-H100-ps-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.10-V100-ps-pdpo-rerun.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.2-H100-gd-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.2-v100-gd-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.3-H100-ps-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.3-V100-ps-pdpo-rerun.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.3-v100-gd-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.4-H100-ps-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.4-V100-ps-pdpo-rerun.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.5-A100-ps-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.5-v100-ps-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.6-v100-ps-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.7-A100-ps-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.8-A100-ps-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.9-V100-ps-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.9.1-V100-ps-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.9.2-V100-ps-pdpo.yaml
│   │   │   │   │       │   ├── gpt4o-distil-v4.9.3-A100-ps-pdpo.yaml
│   │   │   │   │       │   ├── iter1/
│   │   │   │   │       │   │   ├── gpt4o-distil-apps-mc-v1.0-mi300x-hybrid.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-apps-mc-v1.1-mi300x-hybrid.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-pdpo-v1.0-a100-40-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-pdpo-v1.1-v100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-pdpo-v1.2-h100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-v1.0-H100-4o-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-v1.0-v100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-v1.1-H100-4o-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-v1.1-a100-40-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-v1.1-v100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-v1.2-H100-4o-ps-test.yaml
│   │   │   │   │       │   │   └── gpt4o-distil-combine-v1.2-a100-40-ps-test.yaml
│   │   │   │   │       │   ├── iter2/
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-dpo-n64sc-v1.0-A100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-dpo-n64sc-v1.1-A100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-dpo-n64sc-v1.2-A100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-dpo-n64sc-v1.2-V100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-dpo-n64sc-v1.3-A100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-dpo-n64sc-v1.4-A100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-pdpo-v1.0-h100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-pdpo-v1.1-h100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-pdpo-v1.1-v100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-pdpo-v1.2-v100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-pdpo-v1.3-h100-fix-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-pdpo-v1.3-v100-fix-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-pdpo-v1.3-v100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-pdpo-v2.0-h100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-pdpo-v2.1-h100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-v1.0-H100-ps-test.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-v1.0-mi300x-hybrid.yaml
│   │   │   │   │       │   │   ├── gpt4o-distil-combine-v1.1-mi300x-hybrid.yaml
│   │   │   │   │       │   │   └── gpt4o-distil-combine-v1.2-mi300x-hybrid.yaml
│   │   │   │   │       │   └── iter3/
│   │   │   │   │       │       ├── gpt4o-distil-combine-pdpo-v1.0-h100-ps-test.yaml
│   │   │   │   │       │       ├── gpt4o-distil-combine-pdpo-v1.1-h100-ps-test.yaml
│   │   │   │   │       │       └── gpt4o-distil-combine-pdpo-v1.2-h100-ps-test.yaml
│   │   │   │   │       └── sft/
│   │   │   │   │           ├── deprecated/
│   │   │   │   │           │   ├── gpt4o-distil-v1.0-v100.yaml
│   │   │   │   │           │   ├── gpt4o-distil-v1.1-v100.yaml
│   │   │   │   │           │   ├── gpt4o-distil-v2.0-v100.yaml
│   │   │   │   │           │   ├── gpt4o-distil-v2.1-v100-tp.yaml
│   │   │   │   │           │   ├── gpt4o-distil-v2.1-v100.yaml
│   │   │   │   │           │   ├── gpt4o-distil-v2.2-v100.yaml
│   │   │   │   │           │   ├── gpt4o-distil-v2.3-v100.yaml
│   │   │   │   │           │   ├── gpt4o-distil-v2.5-v100.yaml
│   │   │   │   │           │   └── gpt4o-distil-v2.6-v100.yaml
│   │   │   │   │           ├── gpt4o-distil-v2.4-a100.yaml
│   │   │   │   │           ├── gpt4o-distil-v2.4-v100-fix-2node-test.yaml
│   │   │   │   │           ├── gpt4o-distil-v2.4-v100-fix.yaml
│   │   │   │   │           ├── gpt4o-distil-v2.4-v100.yaml
│   │   │   │   │           ├── gpt4o-distil-v3.0-a100.yaml
│   │   │   │   │           ├── gpt4o-distil-v3.0-v100.yaml
│   │   │   │   │           ├── gpt4o-distil-v3.1-v100-test.yaml
│   │   │   │   │           └── gpt4o-distil-v3.1-v100.yaml
│   │   │   │   └── test_input_gen/
│   │   │   │       └── deepseek_coder/
│   │   │   │           └── sft/
│   │   │   │               └── v1.0-a100.yaml
│   │   │   └── mathscale/
│   │   │       ├── llama/
│   │   │       │   ├── dpo/
│   │   │       │   │   ├── iter1/
│   │   │       │   │   │   ├── llama3.1-dpo-4o-iter0-v1.0-H100.yaml
│   │   │       │   │   │   ├── llama3.1-dpo-4o-iter0-v1.1-A100-40.yaml
│   │   │       │   │   │   ├── llama3.1-pdpo-4o-iter1-1.0-A100.yaml
│   │   │       │   │   │   └── llama3.1-pdpo-4o-iter1-1.1-v100.yaml
│   │   │       │   │   ├── llama3.1-dpo-4o-iter0-v1.0-v100.yaml
│   │   │       │   │   ├── llama3.1-pdpo-4o-iter0-v1.0-v100.yaml
│   │   │       │   │   ├── llama3.1-pdpo-4o-iter0-v1.1-H100.yaml
│   │   │       │   │   ├── llama3.1-pdpo-4o-iter0-v1.2-V100.yaml
│   │   │       │   │   ├── llama3.1-pdpo-4o-iter0-v1.2-a100-40.yaml
│   │   │       │   │   ├── llama3.1-pdpo-4o-iter0-v2.0-v100.yaml
│   │   │       │   │   ├── llama3.1-pdpo-4o-iter0-v2.1-a100-40.yaml
│   │   │       │   │   ├── llama3.1-pdpo-4o-iter0-v2.1-v100.yaml
│   │   │       │   │   ├── llama3.1-pdpo-4o-iter0-v2.2-A100.yaml
│   │   │       │   │   └── numina-co/
│   │   │       │   │       ├── llama3.1-pdpo-iter1-1.0-split01-p0.0-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter1-1.0-split01-p0.5-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter1-split0123-cross2-p0.5-v1.0-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter1-split0123-cross2-p0.5-v1.1-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter1-split0123-p0.5-v1.0-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter1-split0123-p0.5-v1.1-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter1-split23-p0.0-v1.0-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter1-split23-p0.0-v1.1-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter1-split23-p0.5-v1.0-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter1-split23-p0.5-v1.1-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter2-split01-23-p0.5-v1.0-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter2-split01-23-p0.5-v1.1-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter2-split01-23-p0.5-v1.2-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter2-split01-23-p0.5-v1.3-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter2-split01-23-p0.5-v1.4-a100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter2-split01-23-p0.5-v1.4-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter2-split01-23-p0.5-v1.4-v100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter3-split01-23-45-p0.5-v1.0-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter3-split01-23-45-p0.5-v1.1-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter3-split01-23-45-p0.5-v1.2-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter3-split01-23-45-p0.5-v1.3-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter3-split01-23-45-p0.5-v1.3-v100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter3-split01-23-45-p0.5-v1.4-a100-dp16.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter3-split01-23-45-p0.5-v1.4-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-67-p0.0-v1.1-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-67-p0.0-v1.2-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-67-p0.0-v1.3-v100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-67-p0.0-v1.4-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-67-p0.0-v1.5-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-67-p0.0-v1.6-a100-40.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-67-p0.0-v1.6-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-67-p0.0-v1.7-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-6789-p0.0-v1.1-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-6789-p0.0-v1.2-a100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-6789-p0.0-v1.3-v100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-6789-p0.0-v1.5-h100-dp16.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-6789-p0.0-v1.5-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-6789-p0.0-v1.5-v100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter4-split01-23-45-p0.0-v1.0-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter5-split01-23-45-67-89-p0.2-v1.0-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter5-split01-23-45-67-89-p0.2-v1.1-h100.yaml
│   │   │       │   │       ├── llama3.1-pdpo-iter5-split01-23-45-67-89-p0.4-v1.2-h100.yaml
│   │   │       │   │       └── llama3.1-pdpo-iter5-split01-23-45-67-89-p0.5-v1.3-a100-40.yaml
│   │   │       │   └── sft/
│   │   │       │       ├── 70b-sft-v1.0-mi300x.yaml
│   │   │       │       ├── 70b-sft-v1.1-mi300x.yaml
│   │   │       │       ├── 70b-sft-v1.2-mi300x.yaml
│   │   │       │       └── 70b-sft-v2.0-mi300x.yaml
│   │   │       └── mistral/
│   │   │           ├── dpo/
│   │   │           │   ├── co-half-0/
│   │   │           │   │   ├── mathstral-co-pdpo-half0-iter0-v1.0-a100.yaml
│   │   │           │   │   ├── mathstral-co-pdpo-half0-iter0-v1.1-h100.yaml
│   │   │           │   │   ├── mathstral-co-pdpo-half0-iter0-v1.2-h100.yaml
│   │   │           │   │   ├── mathstral-co-pdpo-half0-iter0-v1.3-a100.yaml
│   │   │           │   │   └── mathstral-co-pdpo-sc-half0-iter0-p0.0-v1.0-a100.yaml
│   │   │           │   ├── co-half-1/
│   │   │           │   │   ├── mathstral-co-pdpo-half1-iter0-v1.0-a100.yaml
│   │   │           │   │   └── mathstral-co-pdpo-sc-half1-iter1-p0.0-v1.0-a100.yaml
│   │   │           │   ├── iter-2-mscale-v0.1/
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter2-v1.0-A100-40.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter2-v1.0-V100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter2-v1.1-A100-40.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter2-v1.1-H100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter2-v1.1-V100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter2-v1.2-V100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter2-v1.3-A100-40.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter2-v1.3-A100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter2-v1.3-H100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter2-v1.3-V100.yaml
│   │   │           │   │   └── mathstral-pdpo-mscale300k-iter2-v1.3.1-A100-40.yaml
│   │   │           │   ├── iter-3-mscale-v0.1/
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter3-v1.0-V100.yaml
│   │   │           │   │   └── mathstral-pdpo-mscale300k-iter3-v1.1-A100.yaml
│   │   │           │   ├── iter1/
│   │   │           │   │   ├── mathstral-dpo-4o-iter1-v1.0-v100.yaml
│   │   │           │   │   ├── mathstral-dpo-4o-iter1-v1.1-a100.yaml
│   │   │           │   │   ├── mathstral-dpo-4o-iter1-v1.2-h100.yaml
│   │   │           │   │   ├── mathstral-dpo-4o-iter1-v1.3-h100.yaml
│   │   │           │   │   ├── mathstral-dpo-4o-iter1-v1.4-v100.yaml
│   │   │           │   │   ├── mathstral-dpo-4o-iter1-v1.5-h100.yaml
│   │   │           │   │   ├── mathstral-pdpo-4o-iter1-v1.0-H100.yaml
│   │   │           │   │   ├── mathstral-raft-dpo-4o-iter1-v2.0-h100.yaml
│   │   │           │   │   ├── mathstral-raft-dpo-4o-iter1-v2.1-h100.yaml
│   │   │           │   │   ├── mathstral-sc-dpo-4o-iter1-v1.0-a100-40.yaml
│   │   │           │   │   ├── mathstral-sc-dpo-4o-iter1-v1.1-a100.yaml
│   │   │           │   │   ├── mathstral-sc-dpo-4o-iter1-v1.2-a100-40.yaml
│   │   │           │   │   ├── mathstral-sc-dpo-numina-iter1-v1.0-h100.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-4o-iter1-v1.0-H100.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-4o-iter1-v1.1-H100.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-4o-iter1-v1.2-A100.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-4o-iter1-v1.3-A100-40.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-4o-iter1-v1.4-A100-40.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-4o-iter1-v1.4-H100.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-4o-iter1-v2.0-H100.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-4o-iter1-v2.1-H100.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-4o-iter1-v2.2-H100.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-numina-iter1-v2.0-h100.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-numina-iter1-v2.1-A100-40.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-numina-iter1-v2.1-a100.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-numina-iter1-v2.2-h100.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-numina-iter1-v2.3-A100-40.yaml
│   │   │           │   │   ├── mathstral-sc-pdpo-numina-iter1-v2.4-A100-40.yaml
│   │   │           │   │   ├── mathstral-sc-prm-4o-iter1-v1.0-H100.yaml
│   │   │           │   │   └── mathstral-sc-prm-4o-iter1-v1.1-A100-40.yaml
│   │   │           │   ├── iter1-mscale-v0.1/
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-4o-iter1-v1.0-MI300x.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v1.0-H100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v1.1-H100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v1.2-H100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v1.3-A100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v1.4-A100-40.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v1.4-H100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v1.5-A100-40.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v1.5-H100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v1.6-H100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v1.7-H100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v2.0-H100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v2.1-H100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v3.0-H100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v3.1-A100-40.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v3.1-H100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v3.1-V100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v3.2-A100.yaml
│   │   │           │   │   ├── mathstral-pdpo-mscale300k-iter1-v3.2-v100.yaml
│   │   │           │   │   └── mathstral-sc-dpo-mscale300k-iter1-v1.0-H100.yaml
│   │   │           │   ├── mathstral-dpo-4o-iter0-v1.0-a100.yaml
│   │   │           │   ├── mathstral-dpo-4o-iter0-v1.1-a100.yaml
│   │   │           │   ├── mathstral-dpo-4o-iter0-v1.2-a100.yaml
│   │   │           │   ├── mathstral-dpo-full-v1.0-a100.yaml
│   │   │           │   ├── mathstral-dpo-split1-v1.0-a100.yaml
│   │   │           │   ├── mathstral-dpo-split1-v1.0-v100.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v1.1-a100.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v1.2-a100.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v1.3-a100-40.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v2.0-A100.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v2.1-A100-40-tp2.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v2.1-A100-40.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v2.1-H100.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v2.1-V100.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v2.2-V100.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v2.2.1-H100.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v2.2.2-A100.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v2.3-H100.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v2.4-H100.yaml
│   │   │           │   ├── mathstral-pdpo-4o-iter0-v2.4-V100.yaml
│   │   │           │   ├── mathstral-pdpo-sc-iter0-v1.0-H100.yaml
│   │   │           │   ├── mathstral-pdpo-sc-iter0-v1.1-H100.yaml
│   │   │           │   ├── mathstral-pdpo-sc-iter0-v2.0-H100.yaml
│   │   │           │   ├── mathstral-pdpo-sc-iter0-v2.1-A100.yaml
│   │   │           │   ├── ms-mistral-dpo-split1-v1.0-v100.yaml
│   │   │           │   ├── ms-mistral-dpo-split1-v1.1-v100.yaml
│   │   │           │   ├── ms-mistral-dpo-split1-v1.2-v100.yaml
│   │   │           │   ├── ms-mistral-dpo-split2-v1.0-v100.yaml
│   │   │           │   ├── ms-mistral-dpo-split2-v1.1-v100.yaml
│   │   │           │   └── reverse_order/
│   │   │           │       ├── mathstral-pre-sc-pdpo-4o-iter1-v1.0-H100.yaml
│   │   │           │       ├── mathstral-pre-sc-pdpo-4o-iter1-v1.0-V100.yaml
│   │   │           │       ├── mathstral-pre-sc-pdpo-mscale-iter1-v1.0-H100.yaml
│   │   │           │       ├── mathstral-pre-sc-pdpo-mscale-iter1-v1.1-H100.yaml
│   │   │           │       ├── mathstral-pre-sc-pdpo-mscale-iter1-v1.2-H100.yaml
│   │   │           │       ├── mathstral-pre-sc-pdpo-mscale-iter1-v1.3-H100-dp8.yaml
│   │   │           │       ├── mathstral-pre-sc-pdpo-mscale-iter1-v1.3-H100.yaml
│   │   │           │       ├── mathstral-pre-sc-pdpo-mscale-iter2-4o-gd-v1.0-H100-dp16.yaml
│   │   │           │       └── mathstral-pre-sc-pdpo-mscale-iter2-4o-gd-v1.1-H100-dp16.yaml
│   │   │           ├── reward/
│   │   │           │   └── iter1/
│   │   │           │       ├── mathstral-sc-prm-4o-iter1-v1.0-h100.yaml
│   │   │           │       ├── mathstral-sc-prm-4o-iter1-v1.0-v100.yaml
│   │   │           │       ├── mathstral-sc-prm-mscale-iter2-v1.0-v100.yaml
│   │   │           │       ├── mathstral-sc-prm-mscale-iter3-v1.0-v100.yaml
│   │   │           │       ├── process-rm-predict-flat.yaml
│   │   │           │       └── process-rm-predict-single.yaml
│   │   │           └── sft/
│   │   │               ├── co-half-0/
│   │   │               │   └── mathstral-mathscale4o-sft-v1.0-v100.yaml
│   │   │               ├── co-half-1/
│   │   │               │   └── mathstral-mathscale4o-sft-v1.0-v100.yaml
│   │   │               ├── iter1/
│   │   │               │   ├── mathstral-mathscale4o-raft-v1.0-h100.yaml
│   │   │               │   ├── mathstral-mathscale4o-raft-v1.1-a100-40.yaml
│   │   │               │   └── mathstral-mathscale4o-raft-v1.1-h100.yaml
│   │   │               ├── mathstral-mathscale4o-sft-v1.0-a100.yaml
│   │   │               ├── mathstral-mathscale4o-sft-v1.1-v100.yaml
│   │   │               ├── mathstral-mathscale4o-sft-v1.2-v100.yaml
│   │   │               ├── mathstral-mathscale4o-sft-v2.0-v100.yaml
│   │   │               └── mistral-mathscale4o-sft-v1.0-v100.yaml
│   │   ├── hydra/
│   │   │   └── default.yaml
│   │   └── post_process/
│   │       ├── deepseek.yaml
│   │       ├── gsm8k.yaml
│   │       ├── math.yaml
│   │       ├── openai_cot.yaml
│   │       └── openai_react.yaml
│   ├── data/
│   │   ├── apps.py
│   │   ├── code_contest.py
│   │   ├── combine_dataset.py
│   │   ├── deepseek_math_utils/
│   │   │   ├── answer_extraction.py
│   │   │   ├── eval_script.py
│   │   │   ├── eval_utils.py
│   │   │   └── ocwcourses_eval_utils.py
│   │   ├── general_collator.py
│   │   ├── human_eval.py
│   │   ├── input_aligner.py
│   │   ├── input_utils.py
│   │   ├── math.py
│   │   ├── math_reader.py
│   │   ├── math_util.py
│   │   ├── mathscale/
│   │   │   └── util.py
│   │   ├── numina_math.py
│   │   ├── openai_api_caller.py
│   │   ├── qwen25math/
│   │   │   ├── LICENSE
│   │   │   ├── README.md
│   │   │   ├── data_loader.py
│   │   │   ├── evaluate.py
│   │   │   ├── examples.py
│   │   │   ├── grader.py
│   │   │   ├── math_eval.py
│   │   │   ├── math_utils.py
│   │   │   ├── model_utils.py
│   │   │   ├── parser.py
│   │   │   ├── python_executor.py
│   │   │   ├── requirements.txt
│   │   │   ├── trajectory.py
│   │   │   └── utils.py
│   │   └── vllm.py
│   ├── eval/
│   │   ├── codex_humaneval/
│   │   │   ├── data.py
│   │   │   ├── evaluation.py
│   │   │   ├── execution.py
│   │   │   └── run_eval.py
│   │   ├── dispatch_openai_requests.py
│   │   ├── mbpp_eval/
│   │   │   ├── execute.py
│   │   │   ├── run_eval.py
│   │   │   └── utils.py
│   │   └── utils.py
│   ├── general_util/
│   │   ├── __init__.py
│   │   ├── average_meter.py
│   │   ├── dist_utils.py
│   │   ├── evaluator.py
│   │   ├── fs_tp_utils.py
│   │   ├── fsdp_utils.py
│   │   ├── lightseq_utils.py
│   │   ├── logger.py
│   │   ├── mixin.py
│   │   ├── mpu_proxy.py
│   │   ├── tensorboard_helper.py
│   │   ├── tokenization_utils.py
│   │   ├── torch_fsdp_utils.py
│   │   ├── training_utils.py
│   │   └── transformer_engine.py
│   ├── models/
│   │   ├── dpo_utils.py
│   │   ├── ds_utils.py
│   │   ├── fs_tp_mixin.py
│   │   ├── llama.py
│   │   ├── llama_megatron_tp.py
│   │   ├── llama_tp.py
│   │   ├── megatron_tp_mixin.py
│   │   ├── mistral.py
│   │   ├── mistral_tp.py
│   │   ├── mixin.py
│   │   ├── qwen2.py
│   │   ├── qwen2_megatron_tp.py
│   │   ├── qwen2_tp.py
│   │   └── utils.py
│   ├── openai_api_caller_v1.py
│   ├── post_inference.py
│   ├── post_processors/
│   │   ├── code/
│   │   │   ├── clean.py
│   │   │   ├── code.py
│   │   │   └── evaluator.py
│   │   ├── dist_mixin.py
│   │   ├── dpo.py
│   │   ├── openai_api_callback.py
│   │   ├── pattern/
│   │   │   └── tags.py
│   │   └── qwen25_math_callback.py
│   ├── prompts/
│   │   ├── apps/
│   │   │   ├── critique_0shot_v1.0.txt
│   │   │   ├── magicoder_cls_2shot.txt
│   │   │   ├── r2c_prompt_0shot_v1.0.txt
│   │   │   ├── r2c_prompt_1shot_v1.0.txt
│   │   │   ├── test_case_simulate.v1.0.txt
│   │   │   ├── test_input_gen_0shot_v1.0.txt
│   │   │   ├── test_input_gen_2shot_v2.0.txt
│   │   │   ├── test_input_gen_2shot_v2.1.txt
│   │   │   ├── worsen_0shot_v1.0.txt
│   │   │   └── worsen_from_feedback_0shot_v1.0.txt
│   │   ├── human_eval/
│   │   │   ├── ds_coder_prompt_v1_0.txt
│   │   │   ├── r2c_prompt_0shot_v1.0.txt
│   │   │   ├── r2c_prompt_0shot_v1.1.txt
│   │   │   ├── r2c_prompt_0shot_v1.2.txt
│   │   │   └── r2c_prompt_0shot_v1.3.txt
│   │   ├── magicoder/
│   │   │   ├── oss_has_function_head_v1_0.txt
│   │   │   └── test_input_gen_2shot_v1.0.txt
│   │   ├── math/
│   │   │   └── college_math_4shot.txt
│   │   └── mbpp/
│   │       ├── r2c_prompt_0shot_v1.0.txt
│   │       ├── r2c_prompt_3shot_v1.0.txt
│   │       └── r2c_prompt_3shot_v2.0.txt
│   ├── requirements.txt
│   ├── scripts/
│   │   ├── __init__.py
│   │   ├── apps/
│   │   │   ├── __init__.py
│   │   │   ├── analyze/
│   │   │   │   ├── freq2image.py
│   │   │   │   ├── get_output_frequency.py
│   │   │   │   └── pipeline.sh
│   │   │   ├── code_flaw/
│   │   │   │   └── pipeline_v1.0.sh
│   │   │   ├── construct_prefer_pair.py
│   │   │   ├── construct_prefer_pair_rm.py
│   │   │   ├── construct_prefer_pair_soft.py
│   │   │   ├── eval_gpt4_outputs.py
│   │   │   ├── execute_gold_sol_on_test_case.py
│   │   │   ├── execute_gold_sol_on_test_case.sh
│   │   │   ├── extract_pseudo_outputs_as_label.py
│   │   │   ├── get_output_frequency.py
│   │   │   ├── gpt4o_to_normal_pred_format.py
│   │   │   ├── merge_dp_predictions.py
│   │   │   ├── merge_dp_solutions.sh
│   │   │   ├── pp_critique_difficulty.py
│   │   │   ├── pp_eval_gpt4.py
│   │   │   ├── pp_eval_gpt4_general_combine.py
│   │   │   ├── pp_solution_gen_inputs.py
│   │   │   ├── pp_test_case.py
│   │   │   ├── pp_test_case_gen_inputs.py
│   │   │   ├── pp_test_case_gen_inputs_v2.0.py
│   │   │   ├── pp_test_case_gen_outputs.py
│   │   │   ├── pp_test_case_gen_public_outputs.py
│   │   │   ├── pp_test_case_gen_public_outputs_few_shot.py
│   │   │   ├── pp_test_case_gen_public_outputs_few_shot_verify.py
│   │   │   ├── pp_worsen_inputs.py
│   │   │   ├── prm/
│   │   │   │   ├── construct_process_rm_sample.py
│   │   │   │   ├── construct_process_rm_sample_fix.py
│   │   │   │   └── sample_steps.py
│   │   │   ├── pseudo_test_cases/
│   │   │   │   ├── 4o_pseudo_baseline.sh
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── clean_oss_mistral_data.py
│   │   │   │   ├── clean_xcode_4o_test_inputs_data.py
│   │   │   │   ├── collect_pseudo_outputs.py
│   │   │   │   ├── combine_gpt_raw_requests.py
│   │   │   │   ├── combine_pseudo_test_inputs.py
│   │   │   │   ├── construct_dpo_pairs.sh
│   │   │   │   ├── control_test_case_num_baseline.sh
│   │   │   │   ├── control_test_case_num_baseline_pipeline.sh
│   │   │   │   ├── extract_4o_combine_outputs_as_label.sh
│   │   │   │   ├── oss_combine_collect_pseudo_outputs.py
│   │   │   │   ├── oss_combine_collect_pseudo_outputs_mp.py
│   │   │   │   ├── oss_combine_collect_pseudo_outputs_mp_compress.py
│   │   │   │   ├── oss_combine_collect_pseudo_outputs_takes_extra.py
│   │   │   │   ├── oss_combine_prefix_fail_extract_pseudo_label.py
│   │   │   │   ├── oss_combine_run_extract_pseudo_label.py
│   │   │   │   ├── pipeline.sh
│   │   │   │   ├── pp_inputs_pick_problem_evol.py
│   │   │   │   ├── pp_inputs_pick_problem_oss.py
│   │   │   │   ├── prefix_fail_extract_pseudo_label.py
│   │   │   │   ├── prefix_fail_extract_pseudo_label_align_ts_num.py
│   │   │   │   ├── run_outputs_local.sh
│   │   │   │   ├── xcode_pipeline.sh
│   │   │   │   └── xcode_pp_test_case_gen.py
│   │   │   ├── re_verify_solutions.py
│   │   │   ├── rerank_code_rm.py
│   │   │   ├── solution_fail_extract.py
│   │   │   ├── solution_fail_extract_critique.py
│   │   │   ├── solution_fail_extract_pseudo_label.py
│   │   │   ├── solution_run_outputs.py
│   │   │   ├── solution_run_outputs_local.py
│   │   │   ├── solution_run_pseudo_outputs_local.py
│   │   │   ├── utils_execute.py
│   │   │   └── worsen_gpt4_combine.py
│   │   ├── collect_mbpp_test_cases_outputs_sc_v1.0.py
│   │   ├── eval_mbpp_judgement.py
│   │   ├── eval_mbpp_judgement_v2.py
│   │   ├── execute_mbpp_intermediate_res.py
│   │   ├── execute_mbpp_intermediate_res_mp.py
│   │   ├── inference/
│   │   │   └── vllm_dp_mul_node.sh
│   │   ├── math/
│   │   │   ├── analyze_sc.py
│   │   │   ├── deepseek_math_sample_steps.py
│   │   │   ├── estimate_state_value.py
│   │   │   ├── merge_dp_multi_solution.py
│   │   │   ├── merge_dp_predictions.py
│   │   │   ├── merge_dp_predictions.sh
│   │   │   ├── merge_incomplete_predictions.py
│   │   │   ├── merge_rm_dp_multi_solution.py
│   │   │   ├── rerank_w_orm.py
│   │   │   ├── rerank_w_prm.py
│   │   │   └── rerank_w_prm_combine.py
│   │   ├── math_scale/
│   │   │   ├── __init__.py
│   │   │   ├── analyze/
│   │   │   │   ├── compute_acc_by_id.py
│   │   │   │   ├── draw_sc.py
│   │   │   │   ├── extract_hard_questions.py
│   │   │   │   ├── freq2image.py
│   │   │   │   ├── get_output_frequency.py
│   │   │   │   ├── hard_change.sh
│   │   │   │   └── pipeline.sh
│   │   │   ├── concat_data.py
│   │   │   ├── construct_prefer_pair.py
│   │   │   ├── construct_prefer_pair_sc.py
│   │   │   ├── construct_prm_pair.sh
│   │   │   ├── construct_process_rm_sample_gd.py
│   │   │   ├── construct_process_rm_sample_sc.py
│   │   │   ├── exclude_unused_data.py
│   │   │   ├── extract_content_from_orig_format.py
│   │   │   ├── extract_mathscale_v2_box_answer.py
│   │   │   ├── extract_numina_math_box_answer.py
│   │   │   ├── fix_answer_extract_and_verify.py
│   │   │   ├── fix_answer_extract_and_verify_v2.py
│   │   │   ├── llama_numina_co_train/
│   │   │   │   ├── construct_prm_sc_pair.sh
│   │   │   │   └── pipeline.sh
│   │   │   ├── math_scale_offline_gpt_eval.py
│   │   │   ├── mathstral_mathscale_co_train/
│   │   │   │   ├── construct_prm_gd_pair.sh
│   │   │   │   ├── construct_prm_sc_pair.sh
│   │   │   │   └── pipeline.sh
│   │   │   ├── merge_dp_predictions.py
│   │   │   ├── merge_dp_predictions.sh
│   │   │   ├── merge_dp_seed_predictions.py
│   │   │   ├── merge_dp_seed_predictions_by_split.sh
│   │   │   ├── merge_math500_predictions.sh
│   │   │   ├── merge_mwpbench_predictions.sh
│   │   │   ├── merge_mwpbench_sympy_predictions.sh
│   │   │   ├── merge_qwen2_dp_math_dev_predictions_v0.0.sh
│   │   │   ├── merge_qwen2_dp_math_dev_predictions_v1.3.sh
│   │   │   ├── merge_qwen2_dp_predictions_v1.1.sh
│   │   │   ├── merge_qwen2_dp_predictions_v1.2.sh
│   │   │   ├── merge_qwen2_dp_predictions_v1.3.sh
│   │   │   ├── mscale/
│   │   │   │   ├── 4o_pipeline.sh
│   │   │   │   ├── construct_prm_sc_pair.sh
│   │   │   │   ├── pipeline.sh
│   │   │   │   └── rerank.sh
│   │   │   ├── pipeline.sh
│   │   │   ├── pp_gpt_inputs.py
│   │   │   ├── process_4o.py
│   │   │   ├── process_raw_4o.py
│   │   │   ├── process_raw_4o_labeling.py
│   │   │   ├── qwen25math_style_eval.py
│   │   │   ├── qwen25math_style_eval.sh
│   │   │   ├── qwen25math_style_eval_math.py
│   │   │   ├── qwen25math_style_eval_v2.0.py
│   │   │   ├── qwen25math_style_preprocess_pred_label.py
│   │   │   ├── reject_sampling_pipeline.sh
│   │   │   ├── rerank_w_prm_math.py
│   │   │   ├── rerank_w_prm_math_scale_save.py
│   │   │   ├── rerank_w_prm_math_scale_save_pair.py
│   │   │   ├── rerank_w_prm_math_scale_save_pair_margin.py
│   │   │   └── split_data.py
│   │   ├── mbpp/
│   │   │   ├── eval_human_eval_gpt_outputs.py
│   │   │   ├── eval_mbpp_gpt_outputs.py
│   │   │   ├── pp_eval_gpt4_human_eval.py
│   │   │   ├── pp_eval_gpt4_mbpp.py
│   │   │   ├── prepare_mbpp_test_cases_inputs_v1.0.py
│   │   │   ├── print_human_eval_mbpp_res.sh
│   │   │   ├── process_mbpp_test_cases_inputs.py
│   │   │   └── run_test_case_v1.0.py
│   │   ├── model_converts/
│   │   │   ├── llama_hf_mp_split.py
│   │   │   └── pad_model_embedding.py
│   │   ├── prepare_code_contests_decompose.py
│   │   ├── prepare_code_contests_decompose_verification.py
│   │   ├── prepare_code_contests_decompose_verification_v2.0.py
│   │   ├── prepare_code_contests_judgement.py
│   │   ├── prepare_mbpp_desc2code_inputs_v1.0.py
│   │   ├── prepare_mbpp_inputs_v1.0.py
│   │   ├── prepare_mbpp_intermediate_print_v1.0.py
│   │   ├── prepare_mbpp_predict_judgement.py
│   │   ├── prepare_mbpp_test_cases_inputs_v1.0.py
│   │   ├── prepare_mbpp_test_cases_outputs_v1.0.py
│   │   ├── prepare_mbpp_test_cases_outputs_v1.1.py
│   │   ├── split_data_according_to_id.py
│   │   └── verify_mbpp_test_cases.py
│   ├── service_api_caller_v1.py
│   ├── trainer_base_ds_mul_fs_tp.py
│   ├── trainer_ds_megatron_mul.py
│   ├── visualize/
│   │   ├── length_distribution.py
│   │   ├── reward_histogram.py
│   │   └── test_response_length.py
│   ├── vllm_inference.py
│   └── vllm_inference_dp.py
├── README.md
├── ReSA/
│   ├── README.md
│   ├── figures/
│   │   └── figure.py
│   ├── llm/
│   │   ├── __init__.py
│   │   ├── arch/
│   │   │   ├── __init__.py
│   │   │   ├── context_manager.py
│   │   │   └── model.py
│   │   ├── config.py
│   │   ├── data/
│   │   │   └── tokenizer.py
│   │   ├── eval.py
│   │   ├── eval_math.py
│   │   ├── kernel/
│   │   │   ├── __init__.py
│   │   │   ├── flash_attention_with_kv_cache.py
│   │   │   ├── flash_sparse_decoding.py
│   │   │   ├── rotary.py
│   │   │   ├── tilelang_attention_with_kv_cache.py
│   │   │   └── tilelang_sparse_decoding.py
│   │   └── utils/
│   │       └── math_utils.py
│   ├── math_data/
│   │   ├── aime24/
│   │   │   └── test.jsonl
│   │   ├── amc23/
│   │   │   └── test.jsonl
│   │   ├── aqua/
│   │   │   └── test.jsonl
│   │   ├── asdiv/
│   │   │   └── test.jsonl
│   │   ├── carp_en/
│   │   │   ├── demo.json
│   │   │   └── test.jsonl
│   │   ├── cmath/
│   │   │   └── test.jsonl
│   │   ├── cn_middle_school/
│   │   │   └── test.jsonl
│   │   ├── college_math/
│   │   │   └── test.jsonl
│   │   ├── eval_rm_maj_example/
│   │   │   └── math_cot_100.jsonl
│   │   ├── gaokao2023en/
│   │   │   └── test.jsonl
│   │   ├── gaokao2024_I/
│   │   │   └── test.jsonl
│   │   ├── gaokao2024_II/
│   │   │   └── test.jsonl
│   │   ├── gaokao2024_mix/
│   │   │   └── test.jsonl
│   │   ├── gaokao_math_cloze/
│   │   │   └── test.jsonl
│   │   ├── gaokao_math_qa/
│   │   │   └── test.jsonl
│   │   ├── gsm8k/
│   │   │   ├── test.jsonl
│   │   │   └── train.jsonl
│   │   ├── math/
│   │   │   ├── test.jsonl
│   │   │   └── train.jsonl
│   │   ├── mawps/
│   │   │   ├── addsub.jsonl
│   │   │   ├── multiarith.jsonl
│   │   │   ├── singleeq.jsonl
│   │   │   ├── singleop.jsonl
│   │   │   └── test.jsonl
│   │   ├── minerva_math/
│   │   │   ├── README.md
│   │   │   └── test.jsonl
│   │   ├── mmlu_stem/
│   │   │   └── test.jsonl
│   │   ├── olympiadbench/
│   │   │   ├── test.json
│   │   │   └── test.jsonl
│   │   ├── sat_math/
│   │   │   └── test.jsonl
│   │   ├── svamp/
│   │   │   └── test.jsonl
│   │   └── tabmwp/
│   │       └── test.jsonl
│   └── scripts/
│       ├── local_eval_math.sh
│       ├── math_eval_result.sh
│       ├── math_eval_result_length.py
│       ├── math_utils.py
│       └── setup_math_eval.sh
├── SECURITY.md
├── YOCO/
│   ├── README.md
│   ├── requirements.txt
│   ├── scripts/
│   │   ├── eval_needle.sh
│   │   ├── eval_task.sh
│   │   └── train.sh
│   └── yoco/
│       ├── __init__.py
│       ├── criterions/
│       │   ├── __init__.py
│       │   ├── harness_eval.py
│       │   ├── multi_needle.py
│       │   └── needle_haystack.py
│       ├── models/
│       │   ├── __init__.py
│       │   ├── decoder/
│       │   │   ├── __init__.py
│       │   │   ├── cross_attention.py
│       │   │   ├── feedforward_network.py
│       │   │   ├── gate_retention.py
│       │   │   ├── kernel/
│       │   │   │   ├── gate_recurrent.py
│       │   │   │   ├── rotary.py
│       │   │   │   └── swiglu.py
│       │   │   ├── model_parallel_init.py
│       │   │   ├── rms_norm.py
│       │   │   ├── sliding_window_attention.py
│       │   │   ├── transformer.py
│       │   │   └── yoco.py
│       │   ├── transformer.py
│       │   └── yoco.py
│       ├── tasks/
│       │   ├── __init__.py
│       │   ├── data/
│       │   │   ├── __init__.py
│       │   │   ├── basic_loader.py
│       │   │   ├── llama_tokenizer.py
│       │   │   ├── lm_loader.py
│       │   │   ├── tiktoken_tokenizer.py
│       │   │   └── utils.py
│       │   ├── gpt.py
│       │   ├── harness_eval.py
│       │   ├── harness_task.py
│       │   ├── mmlu_task.py
│       │   └── pseudo.py
│       ├── train.py
│       └── validate.py
├── adalm/
│   ├── README.md
│   ├── finetune/
│   │   ├── __init__.py
│   │   ├── run_classifier.py
│   │   ├── run_ner.py
│   │   ├── run_pico.py
│   │   ├── utils_for_glue.py
│   │   └── utils_ner.py
│   ├── incr_bpe/
│   │   ├── README.md
│   │   ├── subword_builder.py
│   │   ├── test_data/
│   │   │   ├── chem.txt
│   │   │   └── vocab.txt
│   │   ├── text_encoder.py
│   │   ├── tokenizer.py
│   │   └── vocab_extend.py
│   ├── requirements.txt
│   └── setup.py
├── beats/
│   ├── BEATs.py
│   ├── README.md
│   ├── Tokenizers.py
│   ├── backbone.py
│   ├── beats_README.md
│   ├── modules.py
│   └── quantizer.py
├── beit/
│   ├── .gitignore
│   ├── README.md
│   ├── dall_e/
│   │   ├── __init__.py
│   │   ├── decoder.py
│   │   ├── encoder.py
│   │   └── utils.py
│   ├── dataset_folder.py
│   ├── datasets.py
│   ├── engine_for_finetuning.py
│   ├── engine_for_pretraining.py
│   ├── get_started_for_image_classification.md
│   ├── masking_generator.py
│   ├── modeling_discrete_vae.py
│   ├── modeling_finetune.py
│   ├── modeling_pretrain.py
│   ├── optim_factory.py
│   ├── requirements.txt
│   ├── run_beit_pretraining.py
│   ├── run_class_finetuning.py
│   ├── run_linear_eval.py
│   ├── semantic_segmentation/
│   │   ├── README.md
│   │   ├── backbone/
│   │   │   └── beit.py
│   │   ├── configs/
│   │   │   ├── _base_/
│   │   │   │   ├── datasets/
│   │   │   │   │   ├── ade20k.py
│   │   │   │   │   └── ade20k_640x640.py
│   │   │   │   ├── default_runtime.py
│   │   │   │   ├── models/
│   │   │   │   │   └── upernet_beit.py
│   │   │   │   └── schedules/
│   │   │   │       ├── schedule_160k.py
│   │   │   │       └── schedule_320k.py
│   │   │   └── beit/
│   │   │       └── upernet/
│   │   │           ├── upernet_beit_base_12_512_slide_160k_ade20k_ms.py
│   │   │           ├── upernet_beit_base_12_512_slide_160k_ade20k_pt.py
│   │   │           ├── upernet_beit_base_12_512_slide_160k_ade20k_pt2ft.py
│   │   │           ├── upernet_beit_base_12_640_slide_160k_ade20k_ms.py
│   │   │           ├── upernet_beit_base_12_640_slide_160k_ade20k_pt2ft.py
│   │   │           ├── upernet_beit_large_24_512_slide_160k_ade20k_ms.py
│   │   │           ├── upernet_beit_large_24_512_slide_160k_ade20k_pt2ft.py
│   │   │           ├── upernet_beit_large_24_640_slide_160k_ade20k_ms.py
│   │   │           └── upernet_beit_large_24_640_slide_160k_ade20k_pt2ft.py
│   │   ├── mmcv_custom/
│   │   │   ├── __init__.py
│   │   │   ├── apex_runner/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── apex_iter_based_runner.py
│   │   │   │   ├── checkpoint.py
│   │   │   │   └── optimizer.py
│   │   │   ├── checkpoint.py
│   │   │   ├── layer_decay_optimizer_constructor.py
│   │   │   ├── resize_transform.py
│   │   │   └── train_api.py
│   │   └── tools/
│   │       ├── dist_test.sh
│   │       ├── dist_train.sh
│   │       ├── test.py
│   │       └── train.py
│   ├── transforms.py
│   └── utils.py
├── beit2/
│   ├── .gitignore
│   ├── PRETRAINING.md
│   ├── README.md
│   ├── TOKENIZER.md
│   ├── dataset_folder.py
│   ├── datasets.py
│   ├── engine_for_finetuning.py
│   ├── engine_for_pretraining.py
│   ├── engine_for_vqkd.py
│   ├── get_started_for_image_classification.md
│   ├── imagenet_a_r_indices.py
│   ├── masking_generator.py
│   ├── modeling_finetune.py
│   ├── modeling_pretrain.py
│   ├── modeling_vqkd.py
│   ├── norm_ema_quantizer.py
│   ├── optim_factory.py
│   ├── requirements.txt
│   ├── run_beitv2_pretraining.py
│   ├── run_class_finetuning.py
│   ├── run_vqkd_training.py
│   ├── semantic_segmentation/
│   │   ├── README.md
│   │   ├── backbone/
│   │   │   └── beit.py
│   │   ├── configs/
│   │   │   ├── _base_/
│   │   │   │   ├── datasets/
│   │   │   │   │   ├── ade20k.py
│   │   │   │   │   └── ade20k_640x640.py
│   │   │   │   ├── default_runtime.py
│   │   │   │   ├── models/
│   │   │   │   │   └── upernet_beit.py
│   │   │   │   └── schedules/
│   │   │   │       ├── schedule_160k.py
│   │   │   │       └── schedule_320k.py
│   │   │   └── beit/
│   │   │       └── upernet/
│   │   │           ├── upernet_beit_base_12_512_slide_160k_21ktoade20k.py
│   │   │           ├── upernet_beit_base_12_512_slide_160k_ade20k.py
│   │   │           ├── upernet_beit_large_24_512_slide_160k_21ktoade20k.py
│   │   │           └── upernet_beit_large_24_512_slide_160k_ade20k.py
│   │   ├── mmcv_custom/
│   │   │   ├── __init__.py
│   │   │   ├── apex_runner/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── apex_iter_based_runner.py
│   │   │   │   ├── checkpoint.py
│   │   │   │   └── optimizer.py
│   │   │   ├── checkpoint.py
│   │   │   ├── layer_decay_optimizer_constructor.py
│   │   │   ├── resize_transform.py
│   │   │   └── train_api.py
│   │   └── tools/
│   │       ├── dist_test.sh
│   │       ├── dist_train.sh
│   │       ├── test.py
│   │       └── train.py
│   ├── test_get_code.py
│   ├── transforms.py
│   ├── utils.py
│   ├── visualize_attention.py
│   └── vqkd_teacher/
│       ├── __init__.py
│       ├── clip/
│       │   ├── __init__.py
│       │   ├── clip.py
│       │   ├── model.py
│       │   └── simple_tokenizer.py
│       └── dino.py
├── beit3/
│   ├── README.md
│   ├── datasets.py
│   ├── engine_for_finetuning.py
│   ├── get_started/
│   │   ├── get_started_for_captioning.md
│   │   ├── get_started_for_image_classification.md
│   │   ├── get_started_for_nlvr2.md
│   │   ├── get_started_for_retrieval.md
│   │   └── get_started_for_vqav2.md
│   ├── glossary.py
│   ├── modeling_finetune.py
│   ├── modeling_utils.py
│   ├── optim_factory.py
│   ├── randaug.py
│   ├── requirements.txt
│   ├── run_beit3_finetuning.py
│   └── utils.py
├── bitnet/
│   └── README.md
├── decoding/
│   ├── GAD/
│   │   ├── block_plugins/
│   │   │   ├── __init__.py
│   │   │   ├── criterions/
│   │   │   │   ├── __init__.py
│   │   │   │   └── glat_loss.py
│   │   │   ├── models/
│   │   │   │   ├── BlockNAT.py
│   │   │   │   └── __init__.py
│   │   │   └── tasks/
│   │   │       ├── __init__.py
│   │   │       └── translation_lev_modified.py
│   │   ├── data/
│   │   │   ├── test.de.compound.ref
│   │   │   ├── wmt14.en-de/
│   │   │   │   ├── bpe.32000
│   │   │   │   ├── dict.de.txt
│   │   │   │   └── dict.en.txt
│   │   │   └── wmt16.en-ro/
│   │   │       ├── dict.en.txt
│   │   │       ├── dict.ro.txt
│   │   │       └── get_data.sh
│   │   ├── fairseq/
│   │   │   ├── __init__.py
│   │   │   ├── benchmark/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dummy_lm.py
│   │   │   │   ├── dummy_masked_lm.py
│   │   │   │   ├── dummy_model.py
│   │   │   │   └── dummy_mt.py
│   │   │   ├── binarizer.py
│   │   │   ├── checkpoint_utils.py
│   │   │   ├── clib/
│   │   │   │   ├── cuda/
│   │   │   │   │   ├── ngram_repeat_block_cuda.cpp
│   │   │   │   │   └── ngram_repeat_block_cuda_kernel.cu
│   │   │   │   ├── libbleu/
│   │   │   │   │   ├── libbleu.cpp
│   │   │   │   │   └── module.cpp
│   │   │   │   ├── libnat/
│   │   │   │   │   └── edit_dist.cpp
│   │   │   │   └── libnat_cuda/
│   │   │   │       ├── binding.cpp
│   │   │   │       ├── edit_dist.cu
│   │   │   │       └── edit_dist.h
│   │   │   ├── config/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── config.yaml
│   │   │   │   └── model/
│   │   │   │       ├── transformer_lm/
│   │   │   │       │   ├── transformer_lm_baevski_gbw.yaml
│   │   │   │       │   ├── transformer_lm_baevski_wiki103.yaml
│   │   │   │       │   ├── transformer_lm_big.yaml
│   │   │   │       │   ├── transformer_lm_gbw.yaml
│   │   │   │       │   ├── transformer_lm_gpt.yaml
│   │   │   │       │   ├── transformer_lm_gpt2_big.yaml
│   │   │   │       │   ├── transformer_lm_gpt2_medium.yaml
│   │   │   │       │   ├── transformer_lm_gpt2_small.yaml
│   │   │   │       │   └── transformer_lm_wiki103.yaml
│   │   │   │       ├── wav2vec/
│   │   │   │       │   └── vq_wav2vec_gumbel.yaml
│   │   │   │       └── wav2vec2/
│   │   │   │           ├── wav2vec2_base.yaml
│   │   │   │           └── wav2vec2_large.yaml
│   │   │   ├── criterions/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adaptive_loss.py
│   │   │   │   ├── composite_loss.py
│   │   │   │   ├── cross_entropy.py
│   │   │   │   ├── ctc.py
│   │   │   │   ├── fairseq_criterion.py
│   │   │   │   ├── label_smoothed_cross_entropy.py
│   │   │   │   ├── label_smoothed_cross_entropy_with_alignment.py
│   │   │   │   ├── legacy_masked_lm.py
│   │   │   │   ├── masked_lm.py
│   │   │   │   ├── model_criterion.py
│   │   │   │   ├── nat_loss.py
│   │   │   │   ├── sentence_prediction.py
│   │   │   │   ├── sentence_ranking.py
│   │   │   │   └── wav2vec_criterion.py
│   │   │   ├── data/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── add_target_dataset.py
│   │   │   │   ├── append_token_dataset.py
│   │   │   │   ├── audio/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── audio_utils.py
│   │   │   │   │   ├── feature_transforms/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── global_cmvn.py
│   │   │   │   │   │   ├── specaugment.py
│   │   │   │   │   │   └── utterance_cmvn.py
│   │   │   │   │   ├── raw_audio_dataset.py
│   │   │   │   │   └── speech_to_text_dataset.py
│   │   │   │   ├── backtranslation_dataset.py
│   │   │   │   ├── base_wrapper_dataset.py
│   │   │   │   ├── bucket_pad_length_dataset.py
│   │   │   │   ├── colorize_dataset.py
│   │   │   │   ├── concat_dataset.py
│   │   │   │   ├── concat_sentences_dataset.py
│   │   │   │   ├── data_utils.py
│   │   │   │   ├── data_utils_fast.cpp
│   │   │   │   ├── data_utils_fast.pyx
│   │   │   │   ├── denoising_dataset.py
│   │   │   │   ├── dictionary.py
│   │   │   │   ├── encoders/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── byte_bpe.py
│   │   │   │   │   ├── byte_utils.py
│   │   │   │   │   ├── bytes.py
│   │   │   │   │   ├── characters.py
│   │   │   │   │   ├── fastbpe.py
│   │   │   │   │   ├── gpt2_bpe.py
│   │   │   │   │   ├── gpt2_bpe_utils.py
│   │   │   │   │   ├── hf_bert_bpe.py
│   │   │   │   │   ├── hf_byte_bpe.py
│   │   │   │   │   ├── moses_tokenizer.py
│   │   │   │   │   ├── nltk_tokenizer.py
│   │   │   │   │   ├── sentencepiece_bpe.py
│   │   │   │   │   ├── space_tokenizer.py
│   │   │   │   │   ├── subword_nmt_bpe.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── fairseq_dataset.py
│   │   │   │   ├── fasta_dataset.py
│   │   │   │   ├── id_dataset.py
│   │   │   │   ├── indexed_dataset.py
│   │   │   │   ├── iterators.py
│   │   │   │   ├── language_pair_dataset.py
│   │   │   │   ├── legacy/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── block_pair_dataset.py
│   │   │   │   │   ├── masked_lm_dataset.py
│   │   │   │   │   └── masked_lm_dictionary.py
│   │   │   │   ├── list_dataset.py
│   │   │   │   ├── lm_context_window_dataset.py
│   │   │   │   ├── lru_cache_dataset.py
│   │   │   │   ├── mask_tokens_dataset.py
│   │   │   │   ├── monolingual_dataset.py
│   │   │   │   ├── multi_corpus_dataset.py
│   │   │   │   ├── multi_corpus_sampled_dataset.py
│   │   │   │   ├── multilingual/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── multilingual_data_manager.py
│   │   │   │   │   ├── multilingual_utils.py
│   │   │   │   │   ├── sampled_multi_dataset.py
│   │   │   │   │   ├── sampled_multi_epoch_dataset.py
│   │   │   │   │   └── sampling_method.py
│   │   │   │   ├── nested_dictionary_dataset.py
│   │   │   │   ├── noising.py
│   │   │   │   ├── num_samples_dataset.py
│   │   │   │   ├── numel_dataset.py
│   │   │   │   ├── offset_tokens_dataset.py
│   │   │   │   ├── pad_dataset.py
│   │   │   │   ├── plasma_utils.py
│   │   │   │   ├── prepend_dataset.py
│   │   │   │   ├── prepend_token_dataset.py
│   │   │   │   ├── raw_label_dataset.py
│   │   │   │   ├── replace_dataset.py
│   │   │   │   ├── resampling_dataset.py
│   │   │   │   ├── roll_dataset.py
│   │   │   │   ├── round_robin_zip_datasets.py
│   │   │   │   ├── shorten_dataset.py
│   │   │   │   ├── sort_dataset.py
│   │   │   │   ├── strip_token_dataset.py
│   │   │   │   ├── subsample_dataset.py
│   │   │   │   ├── token_block_dataset.py
│   │   │   │   ├── token_block_utils_fast.cpp
│   │   │   │   ├── token_block_utils_fast.pyx
│   │   │   │   ├── transform_eos_dataset.py
│   │   │   │   └── transform_eos_lang_pair_dataset.py
│   │   │   ├── dataclass/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── configs.py
│   │   │   │   ├── constants.py
│   │   │   │   ├── initialize.py
│   │   │   │   └── utils.py
│   │   │   ├── distributed/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── distributed_timeout_wrapper.py
│   │   │   │   ├── legacy_distributed_data_parallel.py
│   │   │   │   ├── module_proxy_wrapper.py
│   │   │   │   ├── tpu_distributed_data_parallel.py
│   │   │   │   └── utils.py
│   │   │   ├── file_io.py
│   │   │   ├── file_utils.py
│   │   │   ├── hub_utils.py
│   │   │   ├── incremental_decoding_utils.py
│   │   │   ├── iterative_refinement_generator.py
│   │   │   ├── logging/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── meters.py
│   │   │   │   ├── metrics.py
│   │   │   │   └── progress_bar.py
│   │   │   ├── model_parallel/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── criterions/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── vocab_parallel_cross_entropy.py
│   │   │   │   ├── megatron_trainer.py
│   │   │   │   ├── models/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── pipeline_parallel_transformer/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── layers.py
│   │   │   │   │   │   └── model.py
│   │   │   │   │   ├── roberta/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── model.py
│   │   │   │   │   ├── transformer.py
│   │   │   │   │   └── transformer_lm.py
│   │   │   │   └── modules/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── multihead_attention.py
│   │   │   │       └── transformer_layer.py
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bart/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── hub_interface.py
│   │   │   │   │   └── model.py
│   │   │   │   ├── composite_encoder.py
│   │   │   │   ├── distributed_fairseq_model.py
│   │   │   │   ├── fairseq_decoder.py
│   │   │   │   ├── fairseq_encoder.py
│   │   │   │   ├── fairseq_incremental_decoder.py
│   │   │   │   ├── fairseq_model.py
│   │   │   │   ├── fconv.py
│   │   │   │   ├── fconv_lm.py
│   │   │   │   ├── fconv_self_att.py
│   │   │   │   ├── huggingface/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── hf_gpt2.py
│   │   │   │   ├── lightconv.py
│   │   │   │   ├── lightconv_lm.py
│   │   │   │   ├── lstm.py
│   │   │   │   ├── lstm_lm.py
│   │   │   │   ├── masked_lm.py
│   │   │   │   ├── model_utils.py
│   │   │   │   ├── multilingual_transformer.py
│   │   │   │   ├── nat/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── fairseq_nat_model.py
│   │   │   │   │   ├── nonautoregressive_ensembles.py
│   │   │   │   │   └── nonautoregressive_transformer.py
│   │   │   │   ├── roberta/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── alignment_utils.py
│   │   │   │   │   ├── hub_interface.py
│   │   │   │   │   ├── model.py
│   │   │   │   │   ├── model_camembert.py
│   │   │   │   │   ├── model_gottbert.py
│   │   │   │   │   └── model_xlmr.py
│   │   │   │   ├── speech_to_text/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── berard.py
│   │   │   │   │   ├── convtransformer.py
│   │   │   │   │   └── s2t_transformer.py
│   │   │   │   ├── transformer.py
│   │   │   │   ├── transformer_align.py
│   │   │   │   ├── transformer_from_pretrained_xlm.py
│   │   │   │   ├── transformer_lm.py
│   │   │   │   └── wav2vec/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── wav2vec.py
│   │   │   │       ├── wav2vec2.py
│   │   │   │       └── wav2vec2_asr.py
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adaptive_input.py
│   │   │   │   ├── adaptive_softmax.py
│   │   │   │   ├── beamable_mm.py
│   │   │   │   ├── character_token_embedder.py
│   │   │   │   ├── checkpoint_activations.py
│   │   │   │   ├── conv_tbc.py
│   │   │   │   ├── cross_entropy.py
│   │   │   │   ├── cuda_utils.cu
│   │   │   │   ├── downsampled_multihead_attention.py
│   │   │   │   ├── dynamic_convolution.py
│   │   │   │   ├── dynamic_crf_layer.py
│   │   │   │   ├── dynamicconv_layer/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cuda_function_gen.py
│   │   │   │   │   ├── dynamicconv_cuda.cpp
│   │   │   │   │   ├── dynamicconv_cuda.cuh
│   │   │   │   │   ├── dynamicconv_cuda_kernel.cu
│   │   │   │   │   ├── dynamicconv_layer.py
│   │   │   │   │   ├── dynamiconv_cpu.cpp
│   │   │   │   │   └── setup.py
│   │   │   │   ├── fairseq_dropout.py
│   │   │   │   ├── fp32_group_norm.py
│   │   │   │   ├── gelu.py
│   │   │   │   ├── grad_multiply.py
│   │   │   │   ├── gumbel_vector_quantizer.py
│   │   │   │   ├── kmeans_vector_quantizer.py
│   │   │   │   ├── layer_drop.py
│   │   │   │   ├── layer_norm.py
│   │   │   │   ├── learned_positional_embedding.py
│   │   │   │   ├── lightconv_layer/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cuda_function_gen.py
│   │   │   │   │   ├── lightconv_cuda.cpp
│   │   │   │   │   ├── lightconv_cuda.cuh
│   │   │   │   │   ├── lightconv_cuda_kernel.cu
│   │   │   │   │   ├── lightconv_layer.py
│   │   │   │   │   └── setup.py
│   │   │   │   ├── lightweight_convolution.py
│   │   │   │   ├── linearized_convolution.py
│   │   │   │   ├── multihead_attention.py
│   │   │   │   ├── positional_embedding.py
│   │   │   │   ├── quant_noise.py
│   │   │   │   ├── quantization/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── pq/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── em.py
│   │   │   │   │   │   ├── modules/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── qconv.py
│   │   │   │   │   │   │   ├── qemb.py
│   │   │   │   │   │   │   └── qlinear.py
│   │   │   │   │   │   ├── pq.py
│   │   │   │   │   │   └── utils.py
│   │   │   │   │   ├── quantization_options.py
│   │   │   │   │   └── scalar/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── modules/
│   │   │   │   │       │   ├── __init__.py
│   │   │   │   │       │   ├── qact.py
│   │   │   │   │       │   ├── qconv.py
│   │   │   │   │       │   ├── qemb.py
│   │   │   │   │       │   └── qlinear.py
│   │   │   │   │       ├── ops.py
│   │   │   │   │       └── utils.py
│   │   │   │   ├── same_pad.py
│   │   │   │   ├── scalar_bias.py
│   │   │   │   ├── sinusoidal_positional_embedding.py
│   │   │   │   ├── sparse_multihead_attention.py
│   │   │   │   ├── sparse_transformer_sentence_encoder.py
│   │   │   │   ├── sparse_transformer_sentence_encoder_layer.py
│   │   │   │   ├── transformer_layer.py
│   │   │   │   ├── transformer_sentence_encoder.py
│   │   │   │   ├── transformer_sentence_encoder_layer.py
│   │   │   │   ├── transpose_last.py
│   │   │   │   ├── unfold.py
│   │   │   │   └── vggblock.py
│   │   │   ├── nan_detector.py
│   │   │   ├── ngram_repeat_block.py
│   │   │   ├── optim/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adadelta.py
│   │   │   │   ├── adafactor.py
│   │   │   │   ├── adagrad.py
│   │   │   │   ├── adam.py
│   │   │   │   ├── adamax.py
│   │   │   │   ├── bmuf.py
│   │   │   │   ├── composite.py
│   │   │   │   ├── cpu_adam.py
│   │   │   │   ├── dynamic_loss_scaler.py
│   │   │   │   ├── fairseq_optimizer.py
│   │   │   │   ├── fp16_optimizer.py
│   │   │   │   ├── fused_adam.py
│   │   │   │   ├── fused_lamb.py
│   │   │   │   ├── lr_scheduler/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cosine_lr_scheduler.py
│   │   │   │   │   ├── fairseq_lr_scheduler.py
│   │   │   │   │   ├── fixed_schedule.py
│   │   │   │   │   ├── inverse_square_root_schedule.py
│   │   │   │   │   ├── manual_lr_scheduler.py
│   │   │   │   │   ├── pass_through.py
│   │   │   │   │   ├── polynomial_decay_schedule.py
│   │   │   │   │   ├── reduce_lr_on_plateau.py
│   │   │   │   │   ├── tri_stage_lr_scheduler.py
│   │   │   │   │   └── triangular_lr_scheduler.py
│   │   │   │   ├── nag.py
│   │   │   │   ├── sgd.py
│   │   │   │   └── shard.py
│   │   │   ├── options.py
│   │   │   ├── pdb.py
│   │   │   ├── quantization_utils.py
│   │   │   ├── registry.py
│   │   │   ├── scoring/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bleu.py
│   │   │   │   ├── chrf.py
│   │   │   │   ├── tokenizer.py
│   │   │   │   └── wer.py
│   │   │   ├── search.py
│   │   │   ├── sequence_generator.py
│   │   │   ├── sequence_scorer.py
│   │   │   ├── tasks/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── audio_pretraining.py
│   │   │   │   ├── cross_lingual_lm.py
│   │   │   │   ├── denoising.py
│   │   │   │   ├── fairseq_task.py
│   │   │   │   ├── language_modeling.py
│   │   │   │   ├── legacy_masked_lm.py
│   │   │   │   ├── masked_lm.py
│   │   │   │   ├── multilingual_denoising.py
│   │   │   │   ├── multilingual_masked_lm.py
│   │   │   │   ├── multilingual_translation.py
│   │   │   │   ├── semisupervised_translation.py
│   │   │   │   ├── sentence_prediction.py
│   │   │   │   ├── sentence_ranking.py
│   │   │   │   ├── speech_to_text.py
│   │   │   │   ├── translation.py
│   │   │   │   ├── translation_from_pretrained_bart.py
│   │   │   │   ├── translation_from_pretrained_xlm.py
│   │   │   │   ├── translation_lev.py
│   │   │   │   └── translation_multi_simple_epoch.py
│   │   │   ├── token_generation_constraints.py
│   │   │   ├── tokenizer.py
│   │   │   ├── trainer.py
│   │   │   ├── utils.py
│   │   │   ├── version.py
│   │   │   └── version.txt
│   │   ├── fairseq_cli/
│   │   │   ├── __init__.py
│   │   │   ├── eval_lm.py
│   │   │   ├── generate.py
│   │   │   ├── hydra_train.py
│   │   │   ├── interactive.py
│   │   │   ├── preprocess.py
│   │   │   ├── score.py
│   │   │   ├── train.py
│   │   │   └── validate.py
│   │   ├── hubconf.py
│   │   ├── inference.py
│   │   ├── inference.sh
│   │   ├── inference_paper.py
│   │   ├── pyproject.toml
│   │   ├── readme.md
│   │   ├── ref.sh
│   │   ├── scripts/
│   │   │   ├── __init__.py
│   │   │   ├── average_checkpoints.py
│   │   │   ├── build_sym_alignment.py
│   │   │   ├── compare_namespaces.py
│   │   │   ├── compound_split_bleu.sh
│   │   │   ├── constraints/
│   │   │   │   ├── extract.py
│   │   │   │   └── validate.py
│   │   │   ├── convert_dictionary.lua
│   │   │   ├── convert_model.lua
│   │   │   ├── count_docs.py
│   │   │   ├── read_binarized.py
│   │   │   ├── rm_pt.py
│   │   │   ├── sacrebleu.sh
│   │   │   ├── shard_docs.py
│   │   │   ├── split_train_valid_docs.py
│   │   │   ├── spm_decode.py
│   │   │   ├── spm_encode.py
│   │   │   └── spm_train.py
│   │   ├── setup.py
│   │   ├── train.py
│   │   └── train.sh
│   ├── IAD/
│   │   ├── README.md
│   │   ├── fairseq/
│   │   │   ├── .github/
│   │   │   │   ├── ISSUE_TEMPLATE/
│   │   │   │   │   ├── bug_report.md
│   │   │   │   │   ├── documentation.md
│   │   │   │   │   ├── feature_request.md
│   │   │   │   │   └── how-to-question.md
│   │   │   │   ├── ISSUE_TEMPLATE.md
│   │   │   │   ├── PULL_REQUEST_TEMPLATE.md
│   │   │   │   ├── stale.yml
│   │   │   │   └── workflows/
│   │   │   │       ├── build.yml
│   │   │   │       └── build_wheels.yml
│   │   │   ├── .gitignore
│   │   │   ├── .gitmodules
│   │   │   ├── CODE_OF_CONDUCT.md
│   │   │   ├── CONTRIBUTING.md
│   │   │   ├── LICENSE
│   │   │   ├── README.md
│   │   │   ├── README_FAIRSEQ.md
│   │   │   ├── docs/
│   │   │   │   ├── Makefile
│   │   │   │   ├── _static/
│   │   │   │   │   └── theme_overrides.css
│   │   │   │   ├── command_line_tools.rst
│   │   │   │   ├── conf.py
│   │   │   │   ├── criterions.rst
│   │   │   │   ├── data.rst
│   │   │   │   ├── docutils.conf
│   │   │   │   ├── getting_started.rst
│   │   │   │   ├── hydra_integration.md
│   │   │   │   ├── index.rst
│   │   │   │   ├── lr_scheduler.rst
│   │   │   │   ├── make.bat
│   │   │   │   ├── models.rst
│   │   │   │   ├── modules.rst
│   │   │   │   ├── optim.rst
│   │   │   │   ├── overview.rst
│   │   │   │   ├── requirements.txt
│   │   │   │   ├── tasks.rst
│   │   │   │   ├── tutorial_classifying_names.rst
│   │   │   │   └── tutorial_simple_lstm.rst
│   │   │   ├── examples/
│   │   │   │   ├── .gitignore
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adaptive_span/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── adagrad_with_grad_clip.py
│   │   │   │   │   ├── adaptive_span_attention.py
│   │   │   │   │   ├── adaptive_span_loss.py
│   │   │   │   │   ├── adaptive_span_model.py
│   │   │   │   │   ├── adaptive_span_model_wrapper.py
│   │   │   │   │   └── truncated_bptt_lm_task.py
│   │   │   │   ├── backtranslation/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── deduplicate_lines.py
│   │   │   │   │   ├── extract_bt_data.py
│   │   │   │   │   ├── prepare-de-monolingual.sh
│   │   │   │   │   ├── prepare-wmt18en2de.sh
│   │   │   │   │   ├── sacrebleu.sh
│   │   │   │   │   └── tokenized_bleu.sh
│   │   │   │   ├── bart/
│   │   │   │   │   ├── README.glue.md
│   │   │   │   │   ├── README.md
│   │   │   │   │   └── README.summarization.md
│   │   │   │   ├── byte_level_bpe/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── get_bitext.py
│   │   │   │   │   ├── get_data.sh
│   │   │   │   │   └── gru_transformer.py
│   │   │   │   ├── camembert/
│   │   │   │   │   └── README.md
│   │   │   │   ├── constrained_decoding/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── normalize.py
│   │   │   │   │   └── tok.py
│   │   │   │   ├── conv_seq2seq/
│   │   │   │   │   └── README.md
│   │   │   │   ├── criss/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── download_and_preprocess_flores_test.sh
│   │   │   │   │   ├── download_and_preprocess_tatoeba.sh
│   │   │   │   │   ├── mining/
│   │   │   │   │   │   ├── mine.py
│   │   │   │   │   │   └── mine_example.sh
│   │   │   │   │   ├── save_encoder.py
│   │   │   │   │   ├── sentence_retrieval/
│   │   │   │   │   │   ├── encoder_analysis.py
│   │   │   │   │   │   └── sentence_retrieval_tatoeba.sh
│   │   │   │   │   └── unsupervised_mt/
│   │   │   │   │       └── eval.sh
│   │   │   │   ├── cross_lingual_language_model/
│   │   │   │   │   └── README.md
│   │   │   │   ├── fast_noisy_channel/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── noisy_channel_beam_search.py
│   │   │   │   │   ├── noisy_channel_sequence_generator.py
│   │   │   │   │   └── noisy_channel_translation.py
│   │   │   │   ├── gottbert/
│   │   │   │   │   └── README.md
│   │   │   │   ├── joint_alignment_translation/
│   │   │   │   │   ├── README.md
│   │   │   │   │   └── prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh
│   │   │   │   ├── language_model/
│   │   │   │   │   ├── README.adaptive_inputs.md
│   │   │   │   │   ├── README.conv.md
│   │   │   │   │   ├── README.md
│   │   │   │   │   └── prepare-wikitext-103.sh
│   │   │   │   ├── latent_depth/
│   │   │   │   │   ├── README.md
│   │   │   │   │   └── latent_depth_src/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── loss/
│   │   │   │   │       │   ├── __init__.py
│   │   │   │   │       │   └── latent_depth.py
│   │   │   │   │       ├── models/
│   │   │   │   │       │   ├── __init__.py
│   │   │   │   │       │   ├── latent_multilingual_transformer.py
│   │   │   │   │       │   └── latent_transformer.py
│   │   │   │   │       ├── modules/
│   │   │   │   │       │   ├── __init__.py
│   │   │   │   │       │   └── latent_layers.py
│   │   │   │   │       └── multilingual_translation_latent_depth.py
│   │   │   │   ├── layerdrop/
│   │   │   │   │   └── README.md
│   │   │   │   ├── linformer/
│   │   │   │   │   ├── README.md
│   │   │   │   │   └── linformer_src/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── models/
│   │   │   │   │       │   ├── __init__.py
│   │   │   │   │       │   └── linformer_roberta.py
│   │   │   │   │       └── modules/
│   │   │   │   │           ├── __init__.py
│   │   │   │   │           ├── linformer_sentence_encoder.py
│   │   │   │   │           ├── linformer_sentence_encoder_layer.py
│   │   │   │   │           └── multihead_linear_attention.py
│   │   │   │   ├── m2m_100/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── install_dependecies.sh
│   │   │   │   │   ├── process_data/
│   │   │   │   │   │   ├── clean_histogram.py
│   │   │   │   │   │   ├── dedup_data.py
│   │   │   │   │   │   └── remove_too_much_punc.py
│   │   │   │   │   ├── tok.sh
│   │   │   │   │   └── tokenizers/
│   │   │   │   │       ├── README.md
│   │   │   │   │       ├── seg_ja.sh
│   │   │   │   │       ├── seg_ko.sh
│   │   │   │   │       ├── thirdparty/
│   │   │   │   │       │   └── .gitignore
│   │   │   │   │       ├── tokenize_indic.py
│   │   │   │   │       ├── tokenize_thai.py
│   │   │   │   │       ├── tokenize_zh.py
│   │   │   │   │       └── tokenizer_ar.sh
│   │   │   │   ├── mbart/
│   │   │   │   │   └── README.md
│   │   │   │   ├── megatron_11b/
│   │   │   │   │   ├── README.md
│   │   │   │   │   └── detok.py
│   │   │   │   ├── multilingual/
│   │   │   │   │   ├── ML50_langs.txt
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── data_scripts/
│   │   │   │   │   │   ├── README.md
│   │   │   │   │   │   ├── binarize.py
│   │   │   │   │   │   ├── check_iswlt_test_data.py
│   │   │   │   │   │   ├── check_self_overlaps.py
│   │   │   │   │   │   ├── check_valid_test_overlaps.py
│   │   │   │   │   │   ├── dedup_all.py
│   │   │   │   │   │   ├── download_ML50_v1.sh
│   │   │   │   │   │   ├── download_af_xh.sh
│   │   │   │   │   │   ├── download_flores_data.sh
│   │   │   │   │   │   ├── download_iitb.sh
│   │   │   │   │   │   ├── download_iwslt_and_extract.sh
│   │   │   │   │   │   ├── download_lotus.sh
│   │   │   │   │   │   ├── download_ted_and_extract.py
│   │   │   │   │   │   ├── download_wat19_my.sh
│   │   │   │   │   │   ├── download_wmt19_and_before.py
│   │   │   │   │   │   ├── download_wmt20.sh
│   │   │   │   │   │   ├── preprocess_ML50_v1.sh
│   │   │   │   │   │   ├── remove_valid_test_in_train.py
│   │   │   │   │   │   ├── requirement.txt
│   │   │   │   │   │   └── utils/
│   │   │   │   │   │       ├── dedup.py
│   │   │   │   │   │       ├── fasttext_multi_filter.py
│   │   │   │   │   │       └── strip_sgm.sh
│   │   │   │   │   ├── finetune_multilingual_model.sh
│   │   │   │   │   ├── multilingual_fairseq_gen.sh
│   │   │   │   │   └── train_multilingual_model.sh
│   │   │   │   ├── noisychannel/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── rerank.py
│   │   │   │   │   ├── rerank_generate.py
│   │   │   │   │   ├── rerank_options.py
│   │   │   │   │   ├── rerank_score_bw.py
│   │   │   │   │   ├── rerank_score_lm.py
│   │   │   │   │   ├── rerank_tune.py
│   │   │   │   │   └── rerank_utils.py
│   │   │   │   ├── nonautoregressive_translation/
│   │   │   │   │   ├── README.md
│   │   │   │   │   └── scripts.md
│   │   │   │   ├── paraphraser/
│   │   │   │   │   ├── README.md
│   │   │   │   │   └── paraphrase.py
│   │   │   │   ├── pay_less_attention_paper/
│   │   │   │   │   └── README.md
│   │   │   │   ├── pointer_generator/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── README.xsum.md
│   │   │   │   │   ├── pointer_generator_src/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── transformer_pg.py
│   │   │   │   │   ├── postprocess.py
│   │   │   │   │   └── preprocess.py
│   │   │   │   ├── quant_noise/
│   │   │   │   │   ├── README.md
│   │   │   │   │   └── transformer_quantization_config.yaml
│   │   │   │   ├── roberta/
│   │   │   │   │   ├── README.custom_classification.md
│   │   │   │   │   ├── README.glue.md
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── README.pretraining.md
│   │   │   │   │   ├── README.race.md
│   │   │   │   │   ├── commonsense_qa/
│   │   │   │   │   │   ├── README.md
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── commonsense_qa_task.py
│   │   │   │   │   │   └── download_cqa_data.sh
│   │   │   │   │   ├── multiprocessing_bpe_encoder.py
│   │   │   │   │   ├── preprocess_GLUE_tasks.sh
│   │   │   │   │   ├── preprocess_RACE.py
│   │   │   │   │   ├── preprocess_RACE.sh
│   │   │   │   │   └── wsc/
│   │   │   │   │       ├── README.md
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── wsc_criterion.py
│   │   │   │   │       ├── wsc_task.py
│   │   │   │   │       └── wsc_utils.py
│   │   │   │   ├── rxf/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── rxf_src/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── label_smoothed_cross_entropy_r3f.py
│   │   │   │   │       └── sentence_prediction_r3f.py
│   │   │   │   ├── scaling_nmt/
│   │   │   │   │   └── README.md
│   │   │   │   ├── simultaneous_translation/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── criterions/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── label_smoothed_cross_entropy_latency_augmented.py
│   │   │   │   │   ├── docs/
│   │   │   │   │   │   ├── baseline.md
│   │   │   │   │   │   └── evaluation.md
│   │   │   │   │   ├── eval/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── agents/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── agent.py
│   │   │   │   │   │   │   ├── simul_trans_agent.py
│   │   │   │   │   │   │   ├── simul_trans_text_agent.py
│   │   │   │   │   │   │   └── word_splitter.py
│   │   │   │   │   │   ├── client.py
│   │   │   │   │   │   ├── eval_latency.py
│   │   │   │   │   │   ├── evaluate.py
│   │   │   │   │   │   ├── scorers/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── scorer.py
│   │   │   │   │   │   │   └── text_scorer.py
│   │   │   │   │   │   └── server.py
│   │   │   │   │   ├── models/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── transformer_monotonic_attention.py
│   │   │   │   │   ├── modules/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── monotonic_multihead_attention.py
│   │   │   │   │   │   └── monotonic_transformer_layer.py
│   │   │   │   │   └── utils/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── functions.py
│   │   │   │   │       └── latency.py
│   │   │   │   ├── speech_recognition/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── criterions/
│   │   │   │   │   │   ├── ASG_loss.py
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── cross_entropy_acc.py
│   │   │   │   │   ├── data/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── asr_dataset.py
│   │   │   │   │   │   ├── collaters.py
│   │   │   │   │   │   ├── data_utils.py
│   │   │   │   │   │   └── replabels.py
│   │   │   │   │   ├── datasets/
│   │   │   │   │   │   ├── asr_prep_json.py
│   │   │   │   │   │   └── prepare-librispeech.sh
│   │   │   │   │   ├── infer.py
│   │   │   │   │   ├── models/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── vggtransformer.py
│   │   │   │   │   │   └── w2l_conv_glu_enc.py
│   │   │   │   │   ├── tasks/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── speech_recognition.py
│   │   │   │   │   ├── utils/
│   │   │   │   │   │   └── wer_utils.py
│   │   │   │   │   └── w2l_decoder.py
│   │   │   │   ├── speech_to_text/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── data_utils.py
│   │   │   │   │   ├── docs/
│   │   │   │   │   │   ├── covost_example.md
│   │   │   │   │   │   ├── librispeech_example.md
│   │   │   │   │   │   └── mustc_example.md
│   │   │   │   │   ├── prep_covost_data.py
│   │   │   │   │   ├── prep_librispeech_data.py
│   │   │   │   │   └── prep_mustc_data.py
│   │   │   │   ├── stories/
│   │   │   │   │   └── README.md
│   │   │   │   ├── translation/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── prepare-iwslt14.sh
│   │   │   │   │   ├── prepare-iwslt17-multilingual.sh
│   │   │   │   │   ├── prepare-wmt14en2de.sh
│   │   │   │   │   └── prepare-wmt14en2fr.sh
│   │   │   │   ├── translation_moe/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── score.py
│   │   │   │   │   └── translation_moe_src/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── logsumexp_moe.py
│   │   │   │   │       ├── mean_pool_gating_network.py
│   │   │   │   │       └── translation_moe.py
│   │   │   │   ├── truncated_bptt/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── transformer_xl_model.py
│   │   │   │   │   └── truncated_bptt_lm_task.py
│   │   │   │   ├── unsupervised_quality_estimation/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── aggregate_scores.py
│   │   │   │   │   ├── meteor.py
│   │   │   │   │   └── repeat_lines.py
│   │   │   │   ├── wav2vec/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── config/
│   │   │   │   │   │   ├── finetuning/
│   │   │   │   │   │   │   ├── base_100h.yaml
│   │   │   │   │   │   │   ├── base_10h.yaml
│   │   │   │   │   │   │   ├── base_10m.yaml
│   │   │   │   │   │   │   ├── base_1h.yaml
│   │   │   │   │   │   │   ├── base_960h.yaml
│   │   │   │   │   │   │   ├── vox_100h.yaml
│   │   │   │   │   │   │   ├── vox_10h.yaml
│   │   │   │   │   │   │   ├── vox_10m.yaml
│   │   │   │   │   │   │   ├── vox_1h.yaml
│   │   │   │   │   │   │   └── vox_960h.yaml
│   │   │   │   │   │   └── pretraining/
│   │   │   │   │   │       ├── wav2vec2_base_librispeech.yaml
│   │   │   │   │   │       └── wav2vec2_large_librivox.yaml
│   │   │   │   │   ├── libri_labels.py
│   │   │   │   │   ├── vq-wav2vec_featurize.py
│   │   │   │   │   ├── wav2vec_featurize.py
│   │   │   │   │   └── wav2vec_manifest.py
│   │   │   │   ├── wmt19/
│   │   │   │   │   └── README.md
│   │   │   │   ├── wmt20/
│   │   │   │   │   └── README.md
│   │   │   │   └── xlmr/
│   │   │   │       └── README.md
│   │   │   ├── fairseq/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── benchmark/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── dummy_lm.py
│   │   │   │   │   ├── dummy_masked_lm.py
│   │   │   │   │   ├── dummy_model.py
│   │   │   │   │   └── dummy_mt.py
│   │   │   │   ├── binarizer.py
│   │   │   │   ├── checkpoint_utils.py
│   │   │   │   ├── clib/
│   │   │   │   │   ├── cuda/
│   │   │   │   │   │   ├── ngram_repeat_block_cuda.cpp
│   │   │   │   │   │   └── ngram_repeat_block_cuda_kernel.cu
│   │   │   │   │   ├── libbleu/
│   │   │   │   │   │   ├── libbleu.cpp
│   │   │   │   │   │   └── module.cpp
│   │   │   │   │   ├── libnat/
│   │   │   │   │   │   └── edit_dist.cpp
│   │   │   │   │   └── libnat_cuda/
│   │   │   │   │       ├── binding.cpp
│   │   │   │   │       ├── edit_dist.cu
│   │   │   │   │       └── edit_dist.h
│   │   │   │   ├── config/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── config.yaml
│   │   │   │   │   └── model/
│   │   │   │   │       ├── transformer_lm/
│   │   │   │   │       │   ├── transformer_lm_baevski_gbw.yaml
│   │   │   │   │       │   ├── transformer_lm_baevski_wiki103.yaml
│   │   │   │   │       │   ├── transformer_lm_big.yaml
│   │   │   │   │       │   ├── transformer_lm_gbw.yaml
│   │   │   │   │       │   ├── transformer_lm_gpt.yaml
│   │   │   │   │       │   ├── transformer_lm_gpt2_big.yaml
│   │   │   │   │       │   ├── transformer_lm_gpt2_medium.yaml
│   │   │   │   │       │   ├── transformer_lm_gpt2_small.yaml
│   │   │   │   │       │   └── transformer_lm_wiki103.yaml
│   │   │   │   │       ├── wav2vec/
│   │   │   │   │       │   └── vq_wav2vec_gumbel.yaml
│   │   │   │   │       └── wav2vec2/
│   │   │   │   │           ├── wav2vec2_base.yaml
│   │   │   │   │           └── wav2vec2_large.yaml
│   │   │   │   ├── criterions/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── adaptive_loss.py
│   │   │   │   │   ├── composite_loss.py
│   │   │   │   │   ├── cross_entropy.py
│   │   │   │   │   ├── ctc.py
│   │   │   │   │   ├── fairseq_criterion.py
│   │   │   │   │   ├── label_smoothed_cross_entropy.py
│   │   │   │   │   ├── label_smoothed_cross_entropy_with_alignment.py
│   │   │   │   │   ├── legacy_masked_lm.py
│   │   │   │   │   ├── masked_lm.py
│   │   │   │   │   ├── model_criterion.py
│   │   │   │   │   ├── nat_loss.py
│   │   │   │   │   ├── sentence_prediction.py
│   │   │   │   │   ├── sentence_ranking.py
│   │   │   │   │   └── wav2vec_criterion.py
│   │   │   │   ├── data/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── add_target_dataset.py
│   │   │   │   │   ├── append_token_dataset.py
│   │   │   │   │   ├── audio/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── audio_utils.py
│   │   │   │   │   │   ├── feature_transforms/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── global_cmvn.py
│   │   │   │   │   │   │   ├── specaugment.py
│   │   │   │   │   │   │   └── utterance_cmvn.py
│   │   │   │   │   │   ├── raw_audio_dataset.py
│   │   │   │   │   │   └── speech_to_text_dataset.py
│   │   │   │   │   ├── backtranslation_dataset.py
│   │   │   │   │   ├── base_wrapper_dataset.py
│   │   │   │   │   ├── bucket_pad_length_dataset.py
│   │   │   │   │   ├── colorize_dataset.py
│   │   │   │   │   ├── concat_dataset.py
│   │   │   │   │   ├── concat_sentences_dataset.py
│   │   │   │   │   ├── data_utils.py
│   │   │   │   │   ├── data_utils_fast.pyx
│   │   │   │   │   ├── denoising_dataset.py
│   │   │   │   │   ├── dictionary.py
│   │   │   │   │   ├── encoders/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── byte_bpe.py
│   │   │   │   │   │   ├── byte_utils.py
│   │   │   │   │   │   ├── bytes.py
│   │   │   │   │   │   ├── characters.py
│   │   │   │   │   │   ├── fastbpe.py
│   │   │   │   │   │   ├── gpt2_bpe.py
│   │   │   │   │   │   ├── gpt2_bpe_utils.py
│   │   │   │   │   │   ├── hf_bert_bpe.py
│   │   │   │   │   │   ├── hf_byte_bpe.py
│   │   │   │   │   │   ├── moses_tokenizer.py
│   │   │   │   │   │   ├── nltk_tokenizer.py
│   │   │   │   │   │   ├── sentencepiece_bpe.py
│   │   │   │   │   │   ├── space_tokenizer.py
│   │   │   │   │   │   ├── subword_nmt_bpe.py
│   │   │   │   │   │   └── utils.py
│   │   │   │   │   ├── fairseq_dataset.py
│   │   │   │   │   ├── fasta_dataset.py
│   │   │   │   │   ├── id_dataset.py
│   │   │   │   │   ├── indexed_dataset.py
│   │   │   │   │   ├── iterators.py
│   │   │   │   │   ├── language_pair_dataset.py
│   │   │   │   │   ├── legacy/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── block_pair_dataset.py
│   │   │   │   │   │   ├── masked_lm_dataset.py
│   │   │   │   │   │   └── masked_lm_dictionary.py
│   │   │   │   │   ├── list_dataset.py
│   │   │   │   │   ├── lm_context_window_dataset.py
│   │   │   │   │   ├── lru_cache_dataset.py
│   │   │   │   │   ├── mask_tokens_dataset.py
│   │   │   │   │   ├── monolingual_dataset.py
│   │   │   │   │   ├── multi_corpus_dataset.py
│   │   │   │   │   ├── multi_corpus_sampled_dataset.py
│   │   │   │   │   ├── multilingual/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── multilingual_data_manager.py
│   │   │   │   │   │   ├── multilingual_utils.py
│   │   │   │   │   │   ├── sampled_multi_dataset.py
│   │   │   │   │   │   ├── sampled_multi_epoch_dataset.py
│   │   │   │   │   │   └── sampling_method.py
│   │   │   │   │   ├── nested_dictionary_dataset.py
│   │   │   │   │   ├── noising.py
│   │   │   │   │   ├── num_samples_dataset.py
│   │   │   │   │   ├── numel_dataset.py
│   │   │   │   │   ├── offset_tokens_dataset.py
│   │   │   │   │   ├── pad_dataset.py
│   │   │   │   │   ├── plasma_utils.py
│   │   │   │   │   ├── prepend_dataset.py
│   │   │   │   │   ├── prepend_token_dataset.py
│   │   │   │   │   ├── raw_label_dataset.py
│   │   │   │   │   ├── replace_dataset.py
│   │   │   │   │   ├── resampling_dataset.py
│   │   │   │   │   ├── roll_dataset.py
│   │   │   │   │   ├── round_robin_zip_datasets.py
│   │   │   │   │   ├── shorten_dataset.py
│   │   │   │   │   ├── sort_dataset.py
│   │   │   │   │   ├── strip_token_dataset.py
│   │   │   │   │   ├── subsample_dataset.py
│   │   │   │   │   ├── token_block_dataset.py
│   │   │   │   │   ├── token_block_utils_fast.pyx
│   │   │   │   │   ├── transform_eos_dataset.py
│   │   │   │   │   └── transform_eos_lang_pair_dataset.py
│   │   │   │   ├── dataclass/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── configs.py
│   │   │   │   │   ├── constants.py
│   │   │   │   │   ├── initialize.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── distributed_utils.py
│   │   │   │   ├── file_io.py
│   │   │   │   ├── file_utils.py
│   │   │   │   ├── hub_utils.py
│   │   │   │   ├── incremental_decoding_utils.py
│   │   │   │   ├── iterative_refinement_generator.py
│   │   │   │   ├── legacy_distributed_data_parallel.py
│   │   │   │   ├── logging/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── meters.py
│   │   │   │   │   ├── metrics.py
│   │   │   │   │   └── progress_bar.py
│   │   │   │   ├── model_parallel/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── criterions/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── vocab_parallel_cross_entropy.py
│   │   │   │   │   ├── megatron_trainer.py
│   │   │   │   │   ├── models/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── pipeline_parallel_transformer/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── layers.py
│   │   │   │   │   │   │   └── model.py
│   │   │   │   │   │   ├── roberta/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   └── model.py
│   │   │   │   │   │   ├── transformer.py
│   │   │   │   │   │   └── transformer_lm.py
│   │   │   │   │   └── modules/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── multihead_attention.py
│   │   │   │   │       ├── transformer_layer.py
│   │   │   │   │       ├── transformer_sentence_encoder.py
│   │   │   │   │       └── transformer_sentence_encoder_layer.py
│   │   │   │   ├── models/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── bart/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── hub_interface.py
│   │   │   │   │   │   └── model.py
│   │   │   │   │   ├── composite_encoder.py
│   │   │   │   │   ├── distributed_fairseq_model.py
│   │   │   │   │   ├── fairseq_decoder.py
│   │   │   │   │   ├── fairseq_encoder.py
│   │   │   │   │   ├── fairseq_incremental_decoder.py
│   │   │   │   │   ├── fairseq_model.py
│   │   │   │   │   ├── fconv.py
│   │   │   │   │   ├── fconv_lm.py
│   │   │   │   │   ├── fconv_self_att.py
│   │   │   │   │   ├── huggingface/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── hf_gpt2.py
│   │   │   │   │   ├── lightconv.py
│   │   │   │   │   ├── lightconv_lm.py
│   │   │   │   │   ├── lstm.py
│   │   │   │   │   ├── lstm_lm.py
│   │   │   │   │   ├── masked_lm.py
│   │   │   │   │   ├── model_utils.py
│   │   │   │   │   ├── multilingual_transformer.py
│   │   │   │   │   ├── nat/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── cmlm_transformer.py
│   │   │   │   │   │   ├── fairseq_nat_model.py
│   │   │   │   │   │   ├── insertion_transformer.py
│   │   │   │   │   │   ├── iterative_nonautoregressive_transformer.py
│   │   │   │   │   │   ├── levenshtein_transformer.py
│   │   │   │   │   │   ├── levenshtein_utils.py
│   │   │   │   │   │   ├── nat_crf_transformer.py
│   │   │   │   │   │   ├── nonautoregressive_ensembles.py
│   │   │   │   │   │   └── nonautoregressive_transformer.py
│   │   │   │   │   ├── roberta/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── alignment_utils.py
│   │   │   │   │   │   ├── hub_interface.py
│   │   │   │   │   │   ├── model.py
│   │   │   │   │   │   ├── model_camembert.py
│   │   │   │   │   │   ├── model_gottbert.py
│   │   │   │   │   │   └── model_xlmr.py
│   │   │   │   │   ├── speech_to_text/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── berard.py
│   │   │   │   │   │   └── s2t_transformer.py
│   │   │   │   │   ├── transformer.py
│   │   │   │   │   ├── transformer_align.py
│   │   │   │   │   ├── transformer_from_pretrained_xlm.py
│   │   │   │   │   ├── transformer_lm.py
│   │   │   │   │   └── wav2vec/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── wav2vec.py
│   │   │   │   │       ├── wav2vec2.py
│   │   │   │   │       └── wav2vec2_asr.py
│   │   │   │   ├── modules/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── adaptive_input.py
│   │   │   │   │   ├── adaptive_softmax.py
│   │   │   │   │   ├── beamable_mm.py
│   │   │   │   │   ├── character_token_embedder.py
│   │   │   │   │   ├── checkpoint_activations.py
│   │   │   │   │   ├── conv_tbc.py
│   │   │   │   │   ├── cross_entropy.py
│   │   │   │   │   ├── cuda_utils.cu
│   │   │   │   │   ├── downsampled_multihead_attention.py
│   │   │   │   │   ├── dynamic_convolution.py
│   │   │   │   │   ├── dynamic_crf_layer.py
│   │   │   │   │   ├── dynamicconv_layer/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── cuda_function_gen.py
│   │   │   │   │   │   ├── dynamicconv_cuda.cpp
│   │   │   │   │   │   ├── dynamicconv_cuda.cuh
│   │   │   │   │   │   ├── dynamicconv_cuda_kernel.cu
│   │   │   │   │   │   ├── dynamicconv_layer.py
│   │   │   │   │   │   ├── dynamiconv_cpu.cpp
│   │   │   │   │   │   └── setup.py
│   │   │   │   │   ├── fairseq_dropout.py
│   │   │   │   │   ├── fp32_group_norm.py
│   │   │   │   │   ├── gelu.py
│   │   │   │   │   ├── grad_multiply.py
│   │   │   │   │   ├── gumbel_vector_quantizer.py
│   │   │   │   │   ├── kmeans_vector_quantizer.py
│   │   │   │   │   ├── layer_drop.py
│   │   │   │   │   ├── layer_norm.py
│   │   │   │   │   ├── learned_positional_embedding.py
│   │   │   │   │   ├── lightconv_layer/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── cuda_function_gen.py
│   │   │   │   │   │   ├── lightconv_cuda.cpp
│   │   │   │   │   │   ├── lightconv_cuda.cuh
│   │   │   │   │   │   ├── lightconv_cuda_kernel.cu
│   │   │   │   │   │   ├── lightconv_layer.py
│   │   │   │   │   │   └── setup.py
│   │   │   │   │   ├── lightweight_convolution.py
│   │   │   │   │   ├── linearized_convolution.py
│   │   │   │   │   ├── multihead_attention.py
│   │   │   │   │   ├── positional_embedding.py
│   │   │   │   │   ├── quant_noise.py
│   │   │   │   │   ├── quantization/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── pq/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── em.py
│   │   │   │   │   │   │   ├── modules/
│   │   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   │   ├── qconv.py
│   │   │   │   │   │   │   │   ├── qemb.py
│   │   │   │   │   │   │   │   └── qlinear.py
│   │   │   │   │   │   │   ├── pq.py
│   │   │   │   │   │   │   └── utils.py
│   │   │   │   │   │   ├── quantization_options.py
│   │   │   │   │   │   └── scalar/
│   │   │   │   │   │       ├── __init__.py
│   │   │   │   │   │       ├── modules/
│   │   │   │   │   │       │   ├── __init__.py
│   │   │   │   │   │       │   ├── qact.py
│   │   │   │   │   │       │   ├── qconv.py
│   │   │   │   │   │       │   ├── qemb.py
│   │   │   │   │   │       │   └── qlinear.py
│   │   │   │   │   │       ├── ops.py
│   │   │   │   │   │       └── utils.py
│   │   │   │   │   ├── same_pad.py
│   │   │   │   │   ├── scalar_bias.py
│   │   │   │   │   ├── sinusoidal_positional_embedding.py
│   │   │   │   │   ├── sparse_multihead_attention.py
│   │   │   │   │   ├── sparse_transformer_sentence_encoder.py
│   │   │   │   │   ├── sparse_transformer_sentence_encoder_layer.py
│   │   │   │   │   ├── transformer_layer.py
│   │   │   │   │   ├── transformer_sentence_encoder.py
│   │   │   │   │   ├── transformer_sentence_encoder_layer.py
│   │   │   │   │   ├── transpose_last.py
│   │   │   │   │   ├── unfold.py
│   │   │   │   │   └── vggblock.py
│   │   │   │   ├── nan_detector.py
│   │   │   │   ├── ngram_repeat_block.py
│   │   │   │   ├── optim/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── adadelta.py
│   │   │   │   │   ├── adafactor.py
│   │   │   │   │   ├── adagrad.py
│   │   │   │   │   ├── adam.py
│   │   │   │   │   ├── adamax.py
│   │   │   │   │   ├── bmuf.py
│   │   │   │   │   ├── composite.py
│   │   │   │   │   ├── dynamic_loss_scaler.py
│   │   │   │   │   ├── fairseq_optimizer.py
│   │   │   │   │   ├── fp16_optimizer.py
│   │   │   │   │   ├── fused_adam.py
│   │   │   │   │   ├── fused_lamb.py
│   │   │   │   │   ├── lr_scheduler/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── cosine_lr_scheduler.py
│   │   │   │   │   │   ├── fairseq_lr_scheduler.py
│   │   │   │   │   │   ├── fixed_schedule.py
│   │   │   │   │   │   ├── inverse_square_root_schedule.py
│   │   │   │   │   │   ├── manual_lr_scheduler.py
│   │   │   │   │   │   ├── pass_through.py
│   │   │   │   │   │   ├── polynomial_decay_schedule.py
│   │   │   │   │   │   ├── reduce_lr_on_plateau.py
│   │   │   │   │   │   ├── tri_stage_lr_scheduler.py
│   │   │   │   │   │   └── triangular_lr_scheduler.py
│   │   │   │   │   ├── nag.py
│   │   │   │   │   ├── sgd.py
│   │   │   │   │   └── shard.py
│   │   │   │   ├── options.py
│   │   │   │   ├── pdb.py
│   │   │   │   ├── quantization_utils.py
│   │   │   │   ├── registry.py
│   │   │   │   ├── scoring/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── bleu.py
│   │   │   │   │   ├── chrf.py
│   │   │   │   │   ├── tokenizer.py
│   │   │   │   │   └── wer.py
│   │   │   │   ├── search.py
│   │   │   │   ├── sequence_generator.py
│   │   │   │   ├── sequence_scorer.py
│   │   │   │   ├── tasks/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── audio_pretraining.py
│   │   │   │   │   ├── cross_lingual_lm.py
│   │   │   │   │   ├── denoising.py
│   │   │   │   │   ├── fairseq_task.py
│   │   │   │   │   ├── language_modeling.py
│   │   │   │   │   ├── legacy_masked_lm.py
│   │   │   │   │   ├── masked_lm.py
│   │   │   │   │   ├── multilingual_denoising.py
│   │   │   │   │   ├── multilingual_masked_lm.py
│   │   │   │   │   ├── multilingual_translation.py
│   │   │   │   │   ├── semisupervised_translation.py
│   │   │   │   │   ├── sentence_prediction.py
│   │   │   │   │   ├── sentence_ranking.py
│   │   │   │   │   ├── speech_to_text.py
│   │   │   │   │   ├── translation.py
│   │   │   │   │   ├── translation_from_pretrained_bart.py
│   │   │   │   │   ├── translation_from_pretrained_xlm.py
│   │   │   │   │   ├── translation_lev.py
│   │   │   │   │   └── translation_multi_simple_epoch.py
│   │   │   │   ├── token_generation_constraints.py
│   │   │   │   ├── tokenizer.py
│   │   │   │   ├── trainer.py
│   │   │   │   ├── utils.py
│   │   │   │   └── version.txt
│   │   │   ├── fairseq_cli/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── eval_lm.py
│   │   │   │   ├── generate.py
│   │   │   │   ├── hydra_train.py
│   │   │   │   ├── interactive.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── score.py
│   │   │   │   ├── train.py
│   │   │   │   └── validate.py
│   │   │   ├── hubconf.py
│   │   │   ├── pyproject.toml
│   │   │   ├── scripts/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── average_checkpoints.py
│   │   │   │   ├── build_sym_alignment.py
│   │   │   │   ├── compare_namespaces.py
│   │   │   │   ├── compound_split_bleu.sh
│   │   │   │   ├── constraints/
│   │   │   │   │   ├── extract.py
│   │   │   │   │   └── validate.py
│   │   │   │   ├── convert_dictionary.lua
│   │   │   │   ├── convert_model.lua
│   │   │   │   ├── count_docs.py
│   │   │   │   ├── read_binarized.py
│   │   │   │   ├── rm_pt.py
│   │   │   │   ├── sacrebleu.sh
│   │   │   │   ├── shard_docs.py
│   │   │   │   ├── split_train_valid_docs.py
│   │   │   │   ├── spm_decode.py
│   │   │   │   ├── spm_encode.py
│   │   │   │   └── spm_train.py
│   │   │   ├── setup.py
│   │   │   ├── tests/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── distributed/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── test_distributed_utils.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── gpu/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── test_binaries_gpu.py
│   │   │   │   │   └── transformer_quantization_config.yaml
│   │   │   │   ├── speech_recognition/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── asr_test_base.py
│   │   │   │   │   ├── test_collaters.py
│   │   │   │   │   ├── test_cross_entropy.py
│   │   │   │   │   ├── test_data_utils.py
│   │   │   │   │   └── test_vggtransformer.py
│   │   │   │   ├── test_average_checkpoints.py
│   │   │   │   ├── test_backtranslation_dataset.py
│   │   │   │   ├── test_binaries.py
│   │   │   │   ├── test_bmuf.py
│   │   │   │   ├── test_character_token_embedder.py
│   │   │   │   ├── test_checkpoint_utils.py
│   │   │   │   ├── test_concat_dataset.py
│   │   │   │   ├── test_constraints.py
│   │   │   │   ├── test_convtbc.py
│   │   │   │   ├── test_data_utils.py
│   │   │   │   ├── test_dictionary.py
│   │   │   │   ├── test_export.py
│   │   │   │   ├── test_file_io.py
│   │   │   │   ├── test_fp16_optimizer.py
│   │   │   │   ├── test_inference_dropout.py
│   │   │   │   ├── test_iopath.py
│   │   │   │   ├── test_iterators.py
│   │   │   │   ├── test_label_smoothing.py
│   │   │   │   ├── test_lm_context_window.py
│   │   │   │   ├── test_lstm_jitable.py
│   │   │   │   ├── test_memory_efficient_fp16.py
│   │   │   │   ├── test_metrics.py
│   │   │   │   ├── test_multi_corpus_sampled_dataset.py
│   │   │   │   ├── test_multihead_attention.py
│   │   │   │   ├── test_noising.py
│   │   │   │   ├── test_reproducibility.py
│   │   │   │   ├── test_resampling_dataset.py
│   │   │   │   ├── test_sequence_generator.py
│   │   │   │   ├── test_sequence_scorer.py
│   │   │   │   ├── test_sparse_multihead_attention.py
│   │   │   │   ├── test_token_block_dataset.py
│   │   │   │   ├── test_train.py
│   │   │   │   ├── test_utils.py
│   │   │   │   └── utils.py
│   │   │   ├── tmp.txt
│   │   │   └── train.py
│   │   ├── inference.py
│   │   ├── inference_batch.py
│   │   └── interactive.sh
│   └── readme.md
├── deepnet/
│   └── README.md
├── deltalm/
│   ├── README.md
│   ├── deltalm/
│   │   ├── __init__.py
│   │   └── models/
│   │       ├── __init__.py
│   │       └── deltalm.py
│   ├── examples/
│   │   ├── binary_iwslt14.sh
│   │   ├── evaluate_iwslt14.sh
│   │   ├── prepare_iwslt14.sh
│   │   ├── spm_iwslt14.sh
│   │   └── train_iwslt14.sh
│   ├── generate.py
│   ├── interactive.py
│   ├── preprocess.py
│   └── train.py
├── dit/
│   ├── README.md
│   ├── classification/
│   │   ├── README.md
│   │   ├── dataset_folder.py
│   │   ├── datasets.py
│   │   ├── deepspeed_configs/
│   │   │   └── config.json
│   │   ├── engine_for_finetuning.py
│   │   ├── modeling_finetune.py
│   │   ├── optim_factory.py
│   │   ├── requirements.txt
│   │   ├── run_class_finetuning.py
│   │   ├── transforms.py
│   │   └── utils.py
│   ├── object_detection/
│   │   ├── README.md
│   │   ├── adaptive_binarize.py
│   │   ├── convert_to_coco_format.py
│   │   ├── ditod/
│   │   │   ├── __init__.py
│   │   │   ├── backbone.py
│   │   │   ├── beit.py
│   │   │   ├── config.py
│   │   │   ├── dataset_mapper.py
│   │   │   ├── deit.py
│   │   │   ├── icdar_evaluation.py
│   │   │   ├── mycheckpointer.py
│   │   │   ├── mytrainer.py
│   │   │   └── table_evaluation/
│   │   │       ├── __init__.py
│   │   │       ├── data_structure.py
│   │   │       └── evaluate.py
│   │   ├── icdar19_configs/
│   │   │   ├── Base-RCNN-FPN.yaml
│   │   │   ├── cascade/
│   │   │   │   ├── cascade_dit_base.yaml
│   │   │   │   └── cascade_dit_large.yaml
│   │   │   └── maskrcnn/
│   │   │       ├── maskrcnn_dit_base.yaml
│   │   │       └── maskrcnn_dit_large.yaml
│   │   ├── inference.py
│   │   ├── publaynet_configs/
│   │   │   ├── Base-RCNN-FPN.yaml
│   │   │   ├── cascade/
│   │   │   │   ├── cascade_dit_base.yaml
│   │   │   │   └── cascade_dit_large.yaml
│   │   │   └── maskrcnn/
│   │   │       ├── maskrcnn_dit_base.yaml
│   │   │       └── maskrcnn_dit_large.yaml
│   │   └── train_net.py
│   ├── requirements.txt
│   └── text_detection/
│       ├── README.md
│       ├── configs/
│       │   ├── Base-RCNN-FPN.yaml
│       │   ├── mask_rcnn_dit_base.yaml
│       │   └── mask_rcnn_dit_large.yaml
│       ├── ditod/
│       │   ├── __init__.py
│       │   ├── backbone.py
│       │   ├── beit.py
│       │   ├── concern/
│       │   │   ├── __init__.py
│       │   │   ├── average_meter.py
│       │   │   ├── box2seg.py
│       │   │   ├── config.py
│       │   │   ├── convert.py
│       │   │   ├── icdar2015_eval/
│       │   │   │   ├── __init__.py
│       │   │   │   └── detection/
│       │   │   │       ├── __init__.py
│       │   │   │       ├── deteval.py
│       │   │   │       ├── icdar2013.py
│       │   │   │       ├── iou.py
│       │   │   │       └── mtwi2018.py
│       │   │   ├── log.py
│       │   │   ├── signal_monitor.py
│       │   │   ├── visualizer.py
│       │   │   └── webcv2/
│       │   │       ├── __init__.py
│       │   │       ├── manager.py
│       │   │       ├── server.py
│       │   │       └── templates/
│       │   │           └── index.html
│       │   ├── config.py
│       │   ├── dataset_mapper.py
│       │   ├── deit.py
│       │   ├── funsd_evaluation.py
│       │   ├── mycheckpointer.py
│       │   └── mytrainer.py
│       └── train_net.py
├── e5/
│   ├── README.md
│   ├── model_config.py
│   ├── mteb_beir_eval.py
│   ├── mteb_except_retrieval_eval.py
│   ├── requirements.txt
│   ├── scripts/
│   │   ├── eval_mteb_beir.sh
│   │   └── eval_mteb_except_retrieval.sh
│   └── utils.py
├── edgelm/
│   ├── CODE_OF_CONDUCT.md
│   ├── CONTRIBUTING.md
│   ├── LICENSE
│   ├── README.md
│   ├── docs/
│   │   ├── Makefile
│   │   ├── _static/
│   │   │   └── theme_overrides.css
│   │   ├── command_line_tools.rst
│   │   ├── conf.py
│   │   ├── criterions.rst
│   │   ├── data.rst
│   │   ├── docutils.conf
│   │   ├── getting_started.rst
│   │   ├── hydra_integration.md
│   │   ├── index.rst
│   │   ├── lr_scheduler.rst
│   │   ├── make.bat
│   │   ├── models.rst
│   │   ├── modules.rst
│   │   ├── optim.rst
│   │   ├── overview.rst
│   │   ├── requirements.txt
│   │   ├── tasks.rst
│   │   ├── tutorial_classifying_names.rst
│   │   └── tutorial_simple_lstm.rst
│   ├── examples/
│   │   ├── .gitignore
│   │   ├── MMPT/
│   │   │   ├── .gitignore
│   │   │   ├── CONFIG.md
│   │   │   ├── DATASET.md
│   │   │   ├── README.md
│   │   │   ├── endtask.md
│   │   │   ├── locallaunch.py
│   │   │   ├── mmpt/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── datasets/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── fairseqmmdataset.py
│   │   │   │   │   └── mmdataset.py
│   │   │   │   ├── evaluators/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── evaluator.py
│   │   │   │   │   ├── metric.py
│   │   │   │   │   └── predictor.py
│   │   │   │   ├── losses/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── fairseqmmloss.py
│   │   │   │   │   ├── loss.py
│   │   │   │   │   └── nce.py
│   │   │   │   ├── models/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── fairseqmmmodel.py
│   │   │   │   │   ├── mmfusion.py
│   │   │   │   │   ├── mmfusionnlg.py
│   │   │   │   │   └── transformermodel.py
│   │   │   │   ├── modules/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── mm.py
│   │   │   │   │   ├── retri.py
│   │   │   │   │   └── vectorpool.py
│   │   │   │   ├── processors/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── dedupprocessor.py
│   │   │   │   │   ├── dsprocessor.py
│   │   │   │   │   ├── how2processor.py
│   │   │   │   │   ├── how2retriprocessor.py
│   │   │   │   │   ├── models/
│   │   │   │   │   │   └── s3dg.py
│   │   │   │   │   └── processor.py
│   │   │   │   ├── tasks/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── fairseqmmtask.py
│   │   │   │   │   ├── milncetask.py
│   │   │   │   │   ├── retritask.py
│   │   │   │   │   ├── task.py
│   │   │   │   │   └── vlmtask.py
│   │   │   │   └── utils/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── load_config.py
│   │   │   │       └── shardedtensor.py
│   │   │   ├── mmpt_cli/
│   │   │   │   ├── localjob.py
│   │   │   │   └── predict.py
│   │   │   ├── pretraining.md
│   │   │   ├── projects/
│   │   │   │   ├── mfmmlm.yaml
│   │   │   │   ├── mtm/
│   │   │   │   │   ├── mmfusionmtm.yaml
│   │   │   │   │   ├── vlm/
│   │   │   │   │   │   ├── coin.yaml
│   │   │   │   │   │   ├── crosstask.yaml
│   │   │   │   │   │   ├── how2.yaml
│   │   │   │   │   │   ├── test_coin.yaml
│   │   │   │   │   │   ├── test_crosstask.yaml
│   │   │   │   │   │   ├── test_crosstask_zs.yaml
│   │   │   │   │   │   ├── test_vtt.yaml
│   │   │   │   │   │   ├── test_vttqa.yaml
│   │   │   │   │   │   ├── test_youcook.yaml
│   │   │   │   │   │   ├── test_youcookcap.yaml
│   │   │   │   │   │   ├── vtt.yaml
│   │   │   │   │   │   ├── vttqa.yaml
│   │   │   │   │   │   ├── youcook.yaml
│   │   │   │   │   │   └── youcookcap.yaml
│   │   │   │   │   └── vlm.yaml
│   │   │   │   ├── retri/
│   │   │   │   │   ├── videoclip/
│   │   │   │   │   │   ├── coin_videoclip.yaml
│   │   │   │   │   │   ├── crosstask_videoclip.yaml
│   │   │   │   │   │   ├── how2.yaml
│   │   │   │   │   │   ├── test_coin_videoclip.yaml
│   │   │   │   │   │   ├── test_coin_zs.yaml
│   │   │   │   │   │   ├── test_crosstask_videoclip.yaml
│   │   │   │   │   │   ├── test_crosstask_zs_videoclip.yaml
│   │   │   │   │   │   ├── test_didemo_zs.yaml
│   │   │   │   │   │   ├── test_vtt_videoclip.yaml
│   │   │   │   │   │   ├── test_vtt_zs.yaml
│   │   │   │   │   │   ├── test_vttqa_videoclip.yaml
│   │   │   │   │   │   ├── test_vttqa_zs.yaml
│   │   │   │   │   │   ├── test_youcook_videoclip.yaml
│   │   │   │   │   │   ├── test_youcook_zs.yaml
│   │   │   │   │   │   ├── vtt_videoclip.yaml
│   │   │   │   │   │   ├── vttqa_videoclip.yaml
│   │   │   │   │   │   └── youcook_videoclip.yaml
│   │   │   │   │   ├── videoclip.yaml
│   │   │   │   │   └── videoretri.yaml
│   │   │   │   └── task/
│   │   │   │       ├── coin.yaml
│   │   │   │       ├── coin_videoclip.yaml
│   │   │   │       ├── crosstask.yaml
│   │   │   │       ├── crosstask_videoclip.yaml
│   │   │   │       ├── default.yaml
│   │   │   │       ├── ft.yaml
│   │   │   │       ├── how2.yaml
│   │   │   │       ├── test.yaml
│   │   │   │       ├── test_coin.yaml
│   │   │   │       ├── test_coin_videoclip.yaml
│   │   │   │       ├── test_coin_zs.yaml
│   │   │   │       ├── test_crosstask.yaml
│   │   │   │       ├── test_crosstask_videoclip.yaml
│   │   │   │       ├── test_crosstask_zs.yaml
│   │   │   │       ├── test_crosstask_zs_videoclip.yaml
│   │   │   │       ├── test_didemo_zs.yaml
│   │   │   │       ├── test_vtt.yaml
│   │   │   │       ├── test_vtt_videoclip.yaml
│   │   │   │       ├── test_vtt_zs.yaml
│   │   │   │       ├── test_vttqa.yaml
│   │   │   │       ├── test_vttqa_videoclip.yaml
│   │   │   │       ├── test_vttqa_zs.yaml
│   │   │   │       ├── test_youcook.yaml
│   │   │   │       ├── test_youcook_videoclip.yaml
│   │   │   │       ├── test_youcook_zs.yaml
│   │   │   │       ├── test_youcookcap.yaml
│   │   │   │       ├── vtt.yaml
│   │   │   │       ├── vtt_videoclip.yaml
│   │   │   │       ├── vttqa.yaml
│   │   │   │       ├── vttqa_videoclip.yaml
│   │   │   │       ├── youcook.yaml
│   │   │   │       ├── youcook_videoclip.yaml
│   │   │   │       └── youcookcap.yaml
│   │   │   ├── scripts/
│   │   │   │   ├── text_token_extractor/
│   │   │   │   │   ├── configs/
│   │   │   │   │   │   └── bert-base-uncased.yaml
│   │   │   │   │   └── pretokenization.py
│   │   │   │   └── video_feature_extractor/
│   │   │   │       ├── extract.py
│   │   │   │       ├── how2/
│   │   │   │       │   └── s3d.sh
│   │   │   │       ├── model.py
│   │   │   │       ├── pathbuilder.py
│   │   │   │       ├── preprocessing.py
│   │   │   │       ├── random_sequence_shuffler.py
│   │   │   │       ├── shard_feature.py
│   │   │   │       └── videoreader.py
│   │   │   └── setup.py
│   │   ├── __init__.py
│   │   ├── adaptive_span/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── adagrad_with_grad_clip.py
│   │   │   ├── adaptive_span_attention.py
│   │   │   ├── adaptive_span_loss.py
│   │   │   ├── adaptive_span_model.py
│   │   │   ├── adaptive_span_model_wrapper.py
│   │   │   └── truncated_bptt_lm_task.py
│   │   ├── backtranslation/
│   │   │   ├── README.md
│   │   │   ├── deduplicate_lines.py
│   │   │   ├── extract_bt_data.py
│   │   │   ├── prepare-de-monolingual.sh
│   │   │   ├── prepare-wmt18en2de.sh
│   │   │   ├── sacrebleu.sh
│   │   │   └── tokenized_bleu.sh
│   │   ├── bart/
│   │   │   ├── README.glue.md
│   │   │   ├── README.md
│   │   │   ├── README.summarization.md
│   │   │   └── summarize.py
│   │   ├── byte_level_bpe/
│   │   │   ├── README.md
│   │   │   ├── get_bitext.py
│   │   │   ├── get_data.sh
│   │   │   └── gru_transformer.py
│   │   ├── camembert/
│   │   │   └── README.md
│   │   ├── constrained_decoding/
│   │   │   ├── README.md
│   │   │   ├── normalize.py
│   │   │   └── tok.py
│   │   ├── conv_seq2seq/
│   │   │   └── README.md
│   │   ├── criss/
│   │   │   ├── README.md
│   │   │   ├── download_and_preprocess_flores_test.sh
│   │   │   ├── download_and_preprocess_tatoeba.sh
│   │   │   ├── mining/
│   │   │   │   ├── mine.py
│   │   │   │   └── mine_example.sh
│   │   │   ├── save_encoder.py
│   │   │   ├── sentence_retrieval/
│   │   │   │   ├── encoder_analysis.py
│   │   │   │   └── sentence_retrieval_tatoeba.sh
│   │   │   └── unsupervised_mt/
│   │   │       └── eval.sh
│   │   ├── cross_lingual_language_model/
│   │   │   └── README.md
│   │   ├── discriminative_reranking_nmt/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── config/
│   │   │   │   └── deen.yaml
│   │   │   ├── criterions/
│   │   │   │   ├── __init__.py
│   │   │   │   └── discriminative_reranking_criterion.py
│   │   │   ├── drnmt_rerank.py
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   └── discriminative_reranking_model.py
│   │   │   ├── scripts/
│   │   │   │   └── prep_data.py
│   │   │   └── tasks/
│   │   │       ├── __init__.py
│   │   │       └── discriminative_reranking_task.py
│   │   ├── fast_noisy_channel/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── noisy_channel_beam_search.py
│   │   │   ├── noisy_channel_sequence_generator.py
│   │   │   └── noisy_channel_translation.py
│   │   ├── flores101/
│   │   │   └── README.md
│   │   ├── fully_sharded_data_parallel/
│   │   │   └── README.md
│   │   ├── gottbert/
│   │   │   └── README.md
│   │   ├── hubert/
│   │   │   ├── README.md
│   │   │   ├── config/
│   │   │   │   ├── decode/
│   │   │   │   │   ├── ax_sweep/
│   │   │   │   │   │   ├── ngram.yaml
│   │   │   │   │   │   └── transformer.yaml
│   │   │   │   │   ├── infer_fsqlm.yaml
│   │   │   │   │   ├── infer_kenlm.yaml
│   │   │   │   │   ├── infer_viterbi.yaml
│   │   │   │   │   └── run/
│   │   │   │   │       ├── submitit_slurm.yaml
│   │   │   │   │       └── submitit_slurm_8gpu.yaml
│   │   │   │   ├── finetune/
│   │   │   │   │   ├── base_10h.yaml
│   │   │   │   │   ├── ckpt/
│   │   │   │   │   │   └── it1.yaml
│   │   │   │   │   ├── lm/
│   │   │   │   │   │   └── ls_4gram.yaml
│   │   │   │   │   └── run/
│   │   │   │   │       └── submitit_reg.yaml
│   │   │   │   └── pretrain/
│   │   │   │       ├── data/
│   │   │   │       │   ├── iter1.yaml
│   │   │   │       │   └── iter2.yaml
│   │   │   │       ├── hubert_base_librispeech.yaml
│   │   │   │       ├── hubert_large_librivox.yaml
│   │   │   │       ├── hubert_xlarge_librivox.yaml
│   │   │   │       └── run/
│   │   │   │           └── submitit_reg.yaml
│   │   │   ├── measure_teacher_quality.py
│   │   │   ├── simple_kmeans/
│   │   │   │   ├── README.md
│   │   │   │   ├── dump_hubert_feature.py
│   │   │   │   ├── dump_hubert_feature_s2t.py
│   │   │   │   ├── dump_km_label.py
│   │   │   │   ├── dump_mfcc_feature.py
│   │   │   │   ├── dump_w2v2_feature.py
│   │   │   │   ├── feature_utils.py
│   │   │   │   └── learn_kmeans.py
│   │   │   └── update_ckpt.py
│   │   ├── joint_alignment_translation/
│   │   │   ├── README.md
│   │   │   └── prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh
│   │   ├── language_model/
│   │   │   ├── README.adaptive_inputs.md
│   │   │   ├── README.conv.md
│   │   │   ├── README.md
│   │   │   └── prepare-wikitext-103.sh
│   │   ├── laser/
│   │   │   ├── README.md
│   │   │   └── laser_src/
│   │   │       ├── __init__.py
│   │   │       ├── laser_lstm.py
│   │   │       ├── laser_task.py
│   │   │       ├── laser_transformer.py
│   │   │       └── multitask_data_utils.py
│   │   ├── latent_depth/
│   │   │   ├── README.md
│   │   │   └── latent_depth_src/
│   │   │       ├── __init__.py
│   │   │       ├── loss/
│   │   │       │   ├── __init__.py
│   │   │       │   └── latent_depth.py
│   │   │       ├── models/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── latent_multilingual_transformer.py
│   │   │       │   └── latent_transformer.py
│   │   │       ├── modules/
│   │   │       │   ├── __init__.py
│   │   │       │   └── latent_layers.py
│   │   │       └── multilingual_translation_latent_depth.py
│   │   ├── layerdrop/
│   │   │   └── README.md
│   │   ├── linformer/
│   │   │   ├── README.md
│   │   │   └── linformer_src/
│   │   │       ├── __init__.py
│   │   │       ├── models/
│   │   │       │   ├── __init__.py
│   │   │       │   └── linformer_roberta.py
│   │   │       └── modules/
│   │   │           ├── __init__.py
│   │   │           ├── linformer_sentence_encoder.py
│   │   │           ├── linformer_sentence_encoder_layer.py
│   │   │           └── multihead_linear_attention.py
│   │   ├── m2m_100/
│   │   │   ├── README.md
│   │   │   ├── install_dependecies.sh
│   │   │   ├── process_data/
│   │   │   │   ├── clean_histogram.py
│   │   │   │   ├── dedup_data.py
│   │   │   │   └── remove_too_much_punc.py
│   │   │   ├── tok.sh
│   │   │   └── tokenizers/
│   │   │       ├── README.md
│   │   │       ├── seg_ja.sh
│   │   │       ├── seg_ko.sh
│   │   │       ├── thirdparty/
│   │   │       │   └── .gitignore
│   │   │       ├── tokenize_indic.py
│   │   │       ├── tokenize_thai.py
│   │   │       ├── tokenize_zh.py
│   │   │       └── tokenizer_ar.sh
│   │   ├── mbart/
│   │   │   └── README.md
│   │   ├── megatron_11b/
│   │   │   ├── README.md
│   │   │   └── detok.py
│   │   ├── multilingual/
│   │   │   ├── ML50_langs.txt
│   │   │   ├── README.md
│   │   │   ├── data_scripts/
│   │   │   │   ├── README.md
│   │   │   │   ├── binarize.py
│   │   │   │   ├── check_iswlt_test_data.py
│   │   │   │   ├── check_self_overlaps.py
│   │   │   │   ├── check_valid_test_overlaps.py
│   │   │   │   ├── dedup_all.py
│   │   │   │   ├── download_ML50_v1.sh
│   │   │   │   ├── download_af_xh.sh
│   │   │   │   ├── download_flores_data.sh
│   │   │   │   ├── download_iitb.sh
│   │   │   │   ├── download_iwslt_and_extract.sh
│   │   │   │   ├── download_lotus.sh
│   │   │   │   ├── download_ted_and_extract.py
│   │   │   │   ├── download_wat19_my.sh
│   │   │   │   ├── download_wmt19_and_before.py
│   │   │   │   ├── download_wmt20.sh
│   │   │   │   ├── preprocess_ML50_v1.sh
│   │   │   │   ├── remove_valid_test_in_train.py
│   │   │   │   ├── requirement.txt
│   │   │   │   └── utils/
│   │   │   │       ├── dedup.py
│   │   │   │       ├── fasttext_multi_filter.py
│   │   │   │       └── strip_sgm.sh
│   │   │   ├── finetune_multilingual_model.sh
│   │   │   ├── multilingual_fairseq_gen.sh
│   │   │   └── train_multilingual_model.sh
│   │   ├── noisychannel/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── rerank.py
│   │   │   ├── rerank_generate.py
│   │   │   ├── rerank_options.py
│   │   │   ├── rerank_score_bw.py
│   │   │   ├── rerank_score_lm.py
│   │   │   ├── rerank_tune.py
│   │   │   └── rerank_utils.py
│   │   ├── nonautoregressive_translation/
│   │   │   ├── README.md
│   │   │   └── scripts.md
│   │   ├── normformer/
│   │   │   ├── README.md
│   │   │   └── train_lm.sh
│   │   ├── operators/
│   │   │   ├── alignment_train_cpu.cpp
│   │   │   ├── alignment_train_cuda.cpp
│   │   │   ├── alignment_train_cuda.h
│   │   │   ├── alignment_train_kernel.cu
│   │   │   └── utils.h
│   │   ├── paraphraser/
│   │   │   ├── README.md
│   │   │   └── paraphrase.py
│   │   ├── pay_less_attention_paper/
│   │   │   └── README.md
│   │   ├── pointer_generator/
│   │   │   ├── README.md
│   │   │   ├── README.xsum.md
│   │   │   ├── pointer_generator_src/
│   │   │   │   ├── __init__.py
│   │   │   │   └── transformer_pg.py
│   │   │   ├── postprocess.py
│   │   │   └── preprocess.py
│   │   ├── quant_noise/
│   │   │   ├── README.md
│   │   │   └── transformer_quantization_config.yaml
│   │   ├── roberta/
│   │   │   ├── README.custom_classification.md
│   │   │   ├── README.glue.md
│   │   │   ├── README.md
│   │   │   ├── README.pretraining.md
│   │   │   ├── README.race.md
│   │   │   ├── commonsense_qa/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── commonsense_qa_task.py
│   │   │   │   └── download_cqa_data.sh
│   │   │   ├── config/
│   │   │   │   ├── finetuning/
│   │   │   │   │   ├── cola.yaml
│   │   │   │   │   ├── mnli.yaml
│   │   │   │   │   ├── mrpc.yaml
│   │   │   │   │   ├── qnli.yaml
│   │   │   │   │   ├── qqp.yaml
│   │   │   │   │   ├── rte.yaml
│   │   │   │   │   ├── sst_2.yaml
│   │   │   │   │   └── sts_b.yaml
│   │   │   │   └── pretraining/
│   │   │   │       └── base.yaml
│   │   │   ├── multiprocessing_bpe_encoder.py
│   │   │   ├── preprocess_GLUE_tasks.sh
│   │   │   ├── preprocess_RACE.py
│   │   │   ├── preprocess_RACE.sh
│   │   │   └── wsc/
│   │   │       ├── README.md
│   │   │       ├── __init__.py
│   │   │       ├── wsc_criterion.py
│   │   │       ├── wsc_task.py
│   │   │       └── wsc_utils.py
│   │   ├── rxf/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── rxf_src/
│   │   │       ├── __init__.py
│   │   │       ├── label_smoothed_cross_entropy_r3f.py
│   │   │       └── sentence_prediction_r3f.py
│   │   ├── scaling_nmt/
│   │   │   └── README.md
│   │   ├── shuffled_word_order/
│   │   │   ├── README.finetuning.md
│   │   │   └── README.md
│   │   ├── simultaneous_translation/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── docs/
│   │   │   │   ├── ende-mma.md
│   │   │   │   └── enja-waitk.md
│   │   │   ├── eval/
│   │   │   │   └── agents/
│   │   │   │       └── simul_t2t_enja.py
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── convtransformer_simul_trans.py
│   │   │   │   └── transformer_monotonic_attention.py
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── fixed_pre_decision.py
│   │   │   │   ├── monotonic_multihead_attention.py
│   │   │   │   └── monotonic_transformer_layer.py
│   │   │   ├── tests/
│   │   │   │   ├── test_alignment_train.py
│   │   │   │   └── test_text_models.py
│   │   │   └── utils/
│   │   │       ├── __init__.py
│   │   │       ├── functions.py
│   │   │       ├── monotonic_attention.py
│   │   │       └── p_choose_strategy.py
│   │   ├── speech_recognition/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── criterions/
│   │   │   │   ├── ASG_loss.py
│   │   │   │   ├── __init__.py
│   │   │   │   └── cross_entropy_acc.py
│   │   │   ├── data/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── asr_dataset.py
│   │   │   │   ├── collaters.py
│   │   │   │   ├── data_utils.py
│   │   │   │   └── replabels.py
│   │   │   ├── datasets/
│   │   │   │   ├── asr_prep_json.py
│   │   │   │   └── prepare-librispeech.sh
│   │   │   ├── infer.py
│   │   │   ├── kaldi/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── add-self-loop-simple.cc
│   │   │   │   ├── config/
│   │   │   │   │   └── kaldi_initializer.yaml
│   │   │   │   ├── kaldi_decoder.py
│   │   │   │   └── kaldi_initializer.py
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── vggtransformer.py
│   │   │   │   └── w2l_conv_glu_enc.py
│   │   │   ├── new/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── conf/
│   │   │   │   │   ├── hydra/
│   │   │   │   │   │   └── sweeper/
│   │   │   │   │   │       └── ax.yaml
│   │   │   │   │   └── infer.yaml
│   │   │   │   ├── decoders/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base_decoder.py
│   │   │   │   │   ├── decoder.py
│   │   │   │   │   ├── decoder_config.py
│   │   │   │   │   ├── flashlight_decoder.py
│   │   │   │   │   └── viterbi_decoder.py
│   │   │   │   └── infer.py
│   │   │   ├── tasks/
│   │   │   │   ├── __init__.py
│   │   │   │   └── speech_recognition.py
│   │   │   ├── utils/
│   │   │   │   └── wer_utils.py
│   │   │   └── w2l_decoder.py
│   │   ├── speech_synthesis/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── data_utils.py
│   │   │   ├── docs/
│   │   │   │   ├── common_voice_example.md
│   │   │   │   ├── ljspeech_example.md
│   │   │   │   └── vctk_example.md
│   │   │   ├── evaluation/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── eval_asr.py
│   │   │   │   ├── eval_f0.py
│   │   │   │   ├── eval_sp.py
│   │   │   │   └── get_eval_manifest.py
│   │   │   ├── generate_waveform.py
│   │   │   ├── preprocessing/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── denoise_and_vad_audio.py
│   │   │   │   ├── denoiser/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── demucs.py
│   │   │   │   │   ├── pretrained.py
│   │   │   │   │   ├── resample.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── get_common_voice_audio_manifest.py
│   │   │   │   ├── get_feature_manifest.py
│   │   │   │   ├── get_ljspeech_audio_manifest.py
│   │   │   │   ├── get_speaker_embedding.py
│   │   │   │   ├── get_vctk_audio_manifest.py
│   │   │   │   ├── speaker_embedder/
│   │   │   │   │   └── __init__.py
│   │   │   │   └── vad/
│   │   │   │       └── __init__.py
│   │   │   └── utils.py
│   │   ├── speech_text_joint_to_text/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── configs/
│   │   │   │   └── mustc_noise.list
│   │   │   ├── criterions/
│   │   │   │   ├── __init__.py
│   │   │   │   └── text_guide_cross_entropy_acc.py
│   │   │   ├── docs/
│   │   │   │   ├── ende-mustc.md
│   │   │   │   └── iwslt2021.md
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── s2t_dualinputtransformer.py
│   │   │   │   └── s2t_dualinputxmtransformer.py
│   │   │   ├── scripts/
│   │   │   │   └── g2p_encode.py
│   │   │   └── tasks/
│   │   │       ├── __init__.py
│   │   │       └── speech_text_joint.py
│   │   ├── speech_to_text/
│   │   │   ├── README.md
│   │   │   ├── data_utils.py
│   │   │   ├── docs/
│   │   │   │   ├── covost_example.md
│   │   │   │   ├── librispeech_example.md
│   │   │   │   ├── mtedx_example.md
│   │   │   │   ├── mustc_example.md
│   │   │   │   └── simulst_mustc_example.md
│   │   │   ├── prep_covost_data.py
│   │   │   ├── prep_librispeech_data.py
│   │   │   ├── prep_mtedx_data.py
│   │   │   ├── prep_mustc_data.py
│   │   │   ├── seg_mustc_data.py
│   │   │   └── simultaneous_translation/
│   │   │       └── agents/
│   │   │           └── fairseq_simul_st_agent.py
│   │   ├── stories/
│   │   │   └── README.md
│   │   ├── textless_nlp/
│   │   │   └── gslm/
│   │   │       ├── README.md
│   │   │       ├── metrics/
│   │   │       │   ├── README.md
│   │   │       │   ├── abx_metrics/
│   │   │       │   │   ├── README.md
│   │   │       │   │   └── dump_abx_feats.py
│   │   │       │   └── asr_metrics/
│   │   │       │       ├── README.md
│   │   │       │       ├── continuation_eval.py
│   │   │       │       ├── misc/
│   │   │       │       │   ├── bleu_utils.py
│   │   │       │       │   ├── cut_as.py
│   │   │       │       │   └── dict.ltr.txt
│   │   │       │       ├── ppx.py
│   │   │       │       └── self_auto_bleu.py
│   │   │       ├── speech2unit/
│   │   │       │   ├── README.md
│   │   │       │   ├── __init__.py
│   │   │       │   ├── clustering/
│   │   │       │   │   ├── __init__.py
│   │   │       │   │   ├── cluster_kmeans.py
│   │   │       │   │   ├── dump_feats.py
│   │   │       │   │   ├── quantize_with_kmeans.py
│   │   │       │   │   └── utils.py
│   │   │       │   └── pretrained/
│   │   │       │       ├── cpc_feature_reader.py
│   │   │       │       ├── hubert_feature_reader.py
│   │   │       │       ├── logmel_feature_reader.py
│   │   │       │       ├── utils.py
│   │   │       │       └── w2v2_feature_reader.py
│   │   │       ├── tools/
│   │   │       │   ├── README.md
│   │   │       │   └── resynthesize_speech.py
│   │   │       ├── ulm/
│   │   │       │   ├── README.md
│   │   │       │   └── sample.py
│   │   │       └── unit2speech/
│   │   │           ├── README.md
│   │   │           ├── convert_to_16k.py
│   │   │           ├── glow.py
│   │   │           ├── multiproc.py
│   │   │           ├── synthesize_audio_from_units.py
│   │   │           ├── tacotron2/
│   │   │           │   ├── __init__.py
│   │   │           │   ├── audio_processing.py
│   │   │           │   ├── cleaners.py
│   │   │           │   ├── cmudict.py
│   │   │           │   ├── layers.py
│   │   │           │   ├── model.py
│   │   │           │   ├── numbers.py
│   │   │           │   ├── stft.py
│   │   │           │   ├── symbols.py
│   │   │           │   ├── text.py
│   │   │           │   ├── utils.py
│   │   │           │   └── waveglow_denoiser.py
│   │   │           ├── tts_data.py
│   │   │           └── utils.py
│   │   ├── translation/
│   │   │   ├── README.md
│   │   │   ├── prepare-iwslt14.sh
│   │   │   ├── prepare-iwslt17-multilingual.sh
│   │   │   ├── prepare-wmt14en2de.sh
│   │   │   └── prepare-wmt14en2fr.sh
│   │   ├── translation_moe/
│   │   │   ├── README.md
│   │   │   ├── score.py
│   │   │   └── translation_moe_src/
│   │   │       ├── __init__.py
│   │   │       ├── logsumexp_moe.py
│   │   │       ├── mean_pool_gating_network.py
│   │   │       └── translation_moe.py
│   │   ├── truncated_bptt/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── transformer_xl_model.py
│   │   │   └── truncated_bptt_lm_task.py
│   │   ├── unsupervised_quality_estimation/
│   │   │   ├── README.md
│   │   │   ├── aggregate_scores.py
│   │   │   ├── meteor.py
│   │   │   └── repeat_lines.py
│   │   ├── wav2vec/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── config/
│   │   │   │   ├── finetuning/
│   │   │   │   │   ├── base_100h.yaml
│   │   │   │   │   ├── base_10h.yaml
│   │   │   │   │   ├── base_10m.yaml
│   │   │   │   │   ├── base_1h.yaml
│   │   │   │   │   ├── base_960h.yaml
│   │   │   │   │   ├── vox_100h.yaml
│   │   │   │   │   ├── vox_10h.yaml
│   │   │   │   │   ├── vox_10m.yaml
│   │   │   │   │   ├── vox_1h.yaml
│   │   │   │   │   └── vox_960h.yaml
│   │   │   │   └── pretraining/
│   │   │   │       ├── wav2vec2_base_librispeech.yaml
│   │   │   │       ├── wav2vec2_large_librivox.yaml
│   │   │   │       ├── wav2vec2_large_librivox_tpu-pod.yaml
│   │   │   │       └── wav2vec2_large_librivox_tpu.yaml
│   │   │   ├── libri_labels.py
│   │   │   ├── scripts/
│   │   │   │   └── binarize_manifest.sh
│   │   │   ├── unsupervised/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── config/
│   │   │   │   │   ├── finetuning/
│   │   │   │   │   │   └── w2v_finetune.yaml
│   │   │   │   │   ├── gan/
│   │   │   │   │   │   └── w2vu.yaml
│   │   │   │   │   ├── generate/
│   │   │   │   │   │   └── viterbi.yaml
│   │   │   │   │   ├── timit_matched/
│   │   │   │   │   │   ├── test.uid
│   │   │   │   │   │   ├── train.uid
│   │   │   │   │   │   ├── train_text.uid
│   │   │   │   │   │   └── valid.uid
│   │   │   │   │   └── timit_unmatched/
│   │   │   │   │       ├── test.uid
│   │   │   │   │       ├── train.uid
│   │   │   │   │       ├── train_text.uid
│   │   │   │   │       └── valid.uid
│   │   │   │   ├── data/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── extracted_features_dataset.py
│   │   │   │   │   └── random_input_dataset.py
│   │   │   │   ├── kaldi_self_train/
│   │   │   │   │   ├── README.md
│   │   │   │   │   └── st/
│   │   │   │   │       ├── cmd.sh
│   │   │   │   │       ├── decode_phone.sh
│   │   │   │   │       ├── decode_word_step1.sh
│   │   │   │   │       ├── decode_word_step2.sh
│   │   │   │   │       ├── local/
│   │   │   │   │       │   ├── copy_aligned_text.py
│   │   │   │   │       │   ├── decode.sh
│   │   │   │   │       │   ├── prepare_data_from_w2v.py
│   │   │   │   │       │   ├── prepare_lang.sh
│   │   │   │   │       │   ├── prepare_lang_word.sh
│   │   │   │   │       │   ├── prepare_lm.sh
│   │   │   │   │       │   ├── score.sh
│   │   │   │   │       │   ├── show_wer.sh
│   │   │   │   │       │   ├── train_subset_lgbeam.sh
│   │   │   │   │       │   ├── unsup_select.py
│   │   │   │   │       │   ├── unsup_select_decode.sh
│   │   │   │   │       │   └── unsup_select_decode_word.sh
│   │   │   │   │       ├── path.sh
│   │   │   │   │       ├── steps
│   │   │   │   │       ├── steps_gan/
│   │   │   │   │       │   ├── train_deltas.sh
│   │   │   │   │       │   ├── train_lda_mllt.sh
│   │   │   │   │       │   └── train_sat.sh
│   │   │   │   │       ├── train.sh
│   │   │   │   │       └── utils
│   │   │   │   ├── models/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── wav2vec_u.py
│   │   │   │   ├── scripts/
│   │   │   │   │   ├── apply_pca.py
│   │   │   │   │   ├── copy_labels.py
│   │   │   │   │   ├── filter_lexicon.py
│   │   │   │   │   ├── filter_tsv.py
│   │   │   │   │   ├── g2p_wrd_to_phn.py
│   │   │   │   │   ├── ltr_to_wrd.py
│   │   │   │   │   ├── mean_pool.py
│   │   │   │   │   ├── merge_clusters.py
│   │   │   │   │   ├── normalize_and_filter_text.py
│   │   │   │   │   ├── normalize_text.py
│   │   │   │   │   ├── pca.py
│   │   │   │   │   ├── phonemize_with_sil.py
│   │   │   │   │   ├── prepare_audio.sh
│   │   │   │   │   ├── prepare_text.sh
│   │   │   │   │   ├── prepare_timit.sh
│   │   │   │   │   ├── remove_silence.py
│   │   │   │   │   ├── vads.py
│   │   │   │   │   ├── wav2vec_apply_cluster_faiss.py
│   │   │   │   │   ├── wav2vec_cluster_faiss.py
│   │   │   │   │   ├── wav2vec_extract_features.py
│   │   │   │   │   ├── wer.py
│   │   │   │   │   └── wrd_to_ltr.py
│   │   │   │   ├── tasks/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── unpaired_audio_text.py
│   │   │   │   └── w2vu_generate.py
│   │   │   ├── vq-wav2vec_featurize.py
│   │   │   ├── wav2vec_featurize.py
│   │   │   └── wav2vec_manifest.py
│   │   ├── wmt19/
│   │   │   └── README.md
│   │   ├── wmt20/
│   │   │   └── README.md
│   │   └── xlmr/
│   │       └── README.md
│   ├── fairseq/
│   │   ├── __init__.py
│   │   ├── benchmark/
│   │   │   ├── __init__.py
│   │   │   ├── dummy_dataset.py
│   │   │   ├── dummy_lm.py
│   │   │   ├── dummy_masked_lm.py
│   │   │   ├── dummy_model.py
│   │   │   └── dummy_mt.py
│   │   ├── binarizer.py
│   │   ├── checkpoint_utils.py
│   │   ├── clib/
│   │   │   ├── cuda/
│   │   │   │   ├── ngram_repeat_block_cuda.cpp
│   │   │   │   └── ngram_repeat_block_cuda_kernel.cu
│   │   │   ├── libbase/
│   │   │   │   └── balanced_assignment.cpp
│   │   │   ├── libbleu/
│   │   │   │   ├── libbleu.cpp
│   │   │   │   └── module.cpp
│   │   │   ├── libnat/
│   │   │   │   └── edit_dist.cpp
│   │   │   └── libnat_cuda/
│   │   │       ├── binding.cpp
│   │   │       ├── edit_dist.cu
│   │   │       └── edit_dist.h
│   │   ├── config/
│   │   │   ├── __init__.py
│   │   │   ├── config.yaml
│   │   │   └── model/
│   │   │       ├── transformer_lm/
│   │   │       │   ├── transformer_lm_baevski_gbw.yaml
│   │   │       │   ├── transformer_lm_baevski_wiki103.yaml
│   │   │       │   ├── transformer_lm_big.yaml
│   │   │       │   ├── transformer_lm_gbw.yaml
│   │   │       │   ├── transformer_lm_gpt.yaml
│   │   │       │   ├── transformer_lm_gpt2_big.yaml
│   │   │       │   ├── transformer_lm_gpt2_medium.yaml
│   │   │       │   ├── transformer_lm_gpt2_small.yaml
│   │   │       │   └── transformer_lm_wiki103.yaml
│   │   │       ├── wav2vec/
│   │   │       │   └── vq_wav2vec_gumbel.yaml
│   │   │       └── wav2vec2/
│   │   │           ├── wav2vec2_base.yaml
│   │   │           └── wav2vec2_large.yaml
│   │   ├── criterions/
│   │   │   ├── __init__.py
│   │   │   ├── adaptive_loss.py
│   │   │   ├── composite_loss.py
│   │   │   ├── cross_entropy.py
│   │   │   ├── ctc.py
│   │   │   ├── fairseq_criterion.py
│   │   │   ├── fastspeech2_loss.py
│   │   │   ├── hubert_criterion.py
│   │   │   ├── label_smoothed_cross_entropy.py
│   │   │   ├── label_smoothed_cross_entropy_latency_augmented.py
│   │   │   ├── label_smoothed_cross_entropy_with_alignment.py
│   │   │   ├── legacy_masked_lm.py
│   │   │   ├── masked_lm.py
│   │   │   ├── model_criterion.py
│   │   │   ├── nat_loss.py
│   │   │   ├── sentence_prediction.py
│   │   │   ├── sentence_ranking.py
│   │   │   ├── tacotron2_loss.py
│   │   │   └── wav2vec_criterion.py
│   │   ├── data/
│   │   │   ├── __init__.py
│   │   │   ├── add_target_dataset.py
│   │   │   ├── append_token_dataset.py
│   │   │   ├── audio/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── audio_utils.py
│   │   │   │   ├── data_cfg.py
│   │   │   │   ├── feature_transforms/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── global_cmvn.py
│   │   │   │   │   ├── specaugment.py
│   │   │   │   │   └── utterance_cmvn.py
│   │   │   │   ├── frm_text_to_speech_dataset.py
│   │   │   │   ├── hubert_dataset.py
│   │   │   │   ├── multi_modality_dataset.py
│   │   │   │   ├── raw_audio_dataset.py
│   │   │   │   ├── speech_to_text_dataset.py
│   │   │   │   ├── speech_to_text_joint_dataset.py
│   │   │   │   └── text_to_speech_dataset.py
│   │   │   ├── backtranslation_dataset.py
│   │   │   ├── base_wrapper_dataset.py
│   │   │   ├── bucket_pad_length_dataset.py
│   │   │   ├── colorize_dataset.py
│   │   │   ├── concat_dataset.py
│   │   │   ├── concat_sentences_dataset.py
│   │   │   ├── data_utils.py
│   │   │   ├── data_utils_fast.cpp
│   │   │   ├── data_utils_fast.pyx
│   │   │   ├── denoising_dataset.py
│   │   │   ├── dictionary.py
│   │   │   ├── encoders/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── byte_bpe.py
│   │   │   │   ├── byte_utils.py
│   │   │   │   ├── bytes.py
│   │   │   │   ├── characters.py
│   │   │   │   ├── fastbpe.py
│   │   │   │   ├── gpt2_bpe.py
│   │   │   │   ├── gpt2_bpe_utils.py
│   │   │   │   ├── hf_bert_bpe.py
│   │   │   │   ├── hf_byte_bpe.py
│   │   │   │   ├── moses_tokenizer.py
│   │   │   │   ├── nltk_tokenizer.py
│   │   │   │   ├── sentencepiece_bpe.py
│   │   │   │   ├── space_tokenizer.py
│   │   │   │   ├── subword_nmt_bpe.py
│   │   │   │   └── utils.py
│   │   │   ├── fairseq_dataset.py
│   │   │   ├── fasta_dataset.py
│   │   │   ├── huffman/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── huffman_coder.py
│   │   │   │   └── huffman_mmap_indexed_dataset.py
│   │   │   ├── id_dataset.py
│   │   │   ├── indexed_dataset.py
│   │   │   ├── iterators.py
│   │   │   ├── language_pair_dataset.py
│   │   │   ├── legacy/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── block_pair_dataset.py
│   │   │   │   ├── masked_lm_dataset.py
│   │   │   │   └── masked_lm_dictionary.py
│   │   │   ├── list_dataset.py
│   │   │   ├── lm_context_window_dataset.py
│   │   │   ├── lru_cache_dataset.py
│   │   │   ├── mask_tokens_dataset.py
│   │   │   ├── monolingual_dataset.py
│   │   │   ├── multi_corpus_dataset.py
│   │   │   ├── multi_corpus_sampled_dataset.py
│   │   │   ├── multilingual/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── multilingual_data_manager.py
│   │   │   │   ├── multilingual_utils.py
│   │   │   │   ├── sampled_multi_dataset.py
│   │   │   │   ├── sampled_multi_epoch_dataset.py
│   │   │   │   └── sampling_method.py
│   │   │   ├── nested_dictionary_dataset.py
│   │   │   ├── noising.py
│   │   │   ├── num_samples_dataset.py
│   │   │   ├── numel_dataset.py
│   │   │   ├── offset_tokens_dataset.py
│   │   │   ├── pad_dataset.py
│   │   │   ├── plasma_utils.py
│   │   │   ├── prepend_dataset.py
│   │   │   ├── prepend_token_dataset.py
│   │   │   ├── raw_label_dataset.py
│   │   │   ├── replace_dataset.py
│   │   │   ├── resampling_dataset.py
│   │   │   ├── roll_dataset.py
│   │   │   ├── round_robin_zip_datasets.py
│   │   │   ├── shorten_dataset.py
│   │   │   ├── sort_dataset.py
│   │   │   ├── squad/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── basic_tokenizer.py
│   │   │   │   ├── squad_extractor.py
│   │   │   │   └── squad_metrics.py
│   │   │   ├── strip_token_dataset.py
│   │   │   ├── subsample_dataset.py
│   │   │   ├── text_compressor.py
│   │   │   ├── token_block_dataset.py
│   │   │   ├── token_block_utils_fast.cpp
│   │   │   ├── token_block_utils_fast.pyx
│   │   │   ├── transform_eos_dataset.py
│   │   │   └── transform_eos_lang_pair_dataset.py
│   │   ├── dataclass/
│   │   │   ├── __init__.py
│   │   │   ├── configs.py
│   │   │   ├── constants.py
│   │   │   ├── initialize.py
│   │   │   └── utils.py
│   │   ├── distributed/
│   │   │   ├── __init__.py
│   │   │   ├── distributed_timeout_wrapper.py
│   │   │   ├── fully_sharded_data_parallel.py
│   │   │   ├── legacy_distributed_data_parallel.py
│   │   │   ├── module_proxy_wrapper.py
│   │   │   ├── tpu_distributed_data_parallel.py
│   │   │   └── utils.py
│   │   ├── file_chunker_utils.py
│   │   ├── file_io.py
│   │   ├── file_utils.py
│   │   ├── hub_utils.py
│   │   ├── incremental_decoding_utils.py
│   │   ├── iterative_refinement_generator.py
│   │   ├── logging/
│   │   │   ├── __init__.py
│   │   │   ├── meters.py
│   │   │   ├── metrics.py
│   │   │   └── progress_bar.py
│   │   ├── model_parallel/
│   │   │   ├── __init__.py
│   │   │   ├── criterions/
│   │   │   │   ├── __init__.py
│   │   │   │   └── vocab_parallel_cross_entropy.py
│   │   │   ├── megatron_trainer.py
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── pipeline_parallel_transformer/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── layers.py
│   │   │   │   │   └── model.py
│   │   │   │   ├── roberta/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── model.py
│   │   │   │   ├── transformer.py
│   │   │   │   └── transformer_lm.py
│   │   │   └── modules/
│   │   │       ├── __init__.py
│   │   │       ├── multihead_attention.py
│   │   │       └── transformer_layer.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── bart/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hub_interface.py
│   │   │   │   └── model.py
│   │   │   ├── composite_encoder.py
│   │   │   ├── distributed_fairseq_model.py
│   │   │   ├── ema/
│   │   │   │   ├── __init__.py
│   │   │   │   └── ema.py
│   │   │   ├── fairseq_decoder.py
│   │   │   ├── fairseq_encoder.py
│   │   │   ├── fairseq_incremental_decoder.py
│   │   │   ├── fairseq_model.py
│   │   │   ├── fconv.py
│   │   │   ├── fconv_lm.py
│   │   │   ├── fconv_self_att.py
│   │   │   ├── hubert/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hubert.py
│   │   │   │   └── hubert_asr.py
│   │   │   ├── huggingface/
│   │   │   │   ├── __init__.py
│   │   │   │   └── hf_gpt2.py
│   │   │   ├── lightconv.py
│   │   │   ├── lightconv_lm.py
│   │   │   ├── lstm.py
│   │   │   ├── lstm_lm.py
│   │   │   ├── masked_lm.py
│   │   │   ├── model_utils.py
│   │   │   ├── multilingual_transformer.py
│   │   │   ├── nat/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── cmlm_transformer.py
│   │   │   │   ├── fairseq_nat_model.py
│   │   │   │   ├── insertion_transformer.py
│   │   │   │   ├── iterative_nonautoregressive_transformer.py
│   │   │   │   ├── levenshtein_transformer.py
│   │   │   │   ├── levenshtein_utils.py
│   │   │   │   ├── nat_crf_transformer.py
│   │   │   │   ├── nonautoregressive_ensembles.py
│   │   │   │   └── nonautoregressive_transformer.py
│   │   │   ├── roberta/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── alignment_utils.py
│   │   │   │   ├── enc_dec.py
│   │   │   │   ├── hub_interface.py
│   │   │   │   ├── model.py
│   │   │   │   ├── model_camembert.py
│   │   │   │   ├── model_gottbert.py
│   │   │   │   └── model_xlmr.py
│   │   │   ├── speech_to_text/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── berard.py
│   │   │   │   ├── convtransformer.py
│   │   │   │   ├── modules/
│   │   │   │   │   ├── augmented_memory_attention.py
│   │   │   │   │   └── emformer.py
│   │   │   │   ├── s2t_transformer.py
│   │   │   │   ├── utils.py
│   │   │   │   └── xm_transformer.py
│   │   │   ├── text_to_speech/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── fastspeech2.py
│   │   │   │   ├── hifigan.py
│   │   │   │   ├── tacotron2.py
│   │   │   │   ├── tts_transformer.py
│   │   │   │   └── vocoder.py
│   │   │   ├── transformer/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── transformer_base.py
│   │   │   │   ├── transformer_config.py
│   │   │   │   ├── transformer_decoder.py
│   │   │   │   ├── transformer_encoder.py
│   │   │   │   └── transformer_legacy.py
│   │   │   ├── transformer_align.py
│   │   │   ├── transformer_from_pretrained_xlm.py
│   │   │   ├── transformer_lm.py
│   │   │   └── wav2vec/
│   │   │       ├── __init__.py
│   │   │       ├── wav2vec.py
│   │   │       ├── wav2vec2.py
│   │   │       └── wav2vec2_asr.py
│   │   ├── modules/
│   │   │   ├── __init__.py
│   │   │   ├── adaptive_input.py
│   │   │   ├── adaptive_softmax.py
│   │   │   ├── base_layer.py
│   │   │   ├── beamable_mm.py
│   │   │   ├── character_token_embedder.py
│   │   │   ├── checkpoint_activations.py
│   │   │   ├── conv_tbc.py
│   │   │   ├── cross_entropy.py
│   │   │   ├── cuda_utils.cu
│   │   │   ├── downsampled_multihead_attention.py
│   │   │   ├── dynamic_convolution.py
│   │   │   ├── dynamic_crf_layer.py
│   │   │   ├── dynamicconv_layer/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── cuda_function_gen.py
│   │   │   │   ├── dynamicconv_cuda.cpp
│   │   │   │   ├── dynamicconv_cuda.cuh
│   │   │   │   ├── dynamicconv_cuda_kernel.cu
│   │   │   │   ├── dynamicconv_layer.py
│   │   │   │   ├── dynamiconv_cpu.cpp
│   │   │   │   └── setup.py
│   │   │   ├── fairseq_dropout.py
│   │   │   ├── fp32_group_norm.py
│   │   │   ├── gelu.py
│   │   │   ├── grad_multiply.py
│   │   │   ├── gumbel_vector_quantizer.py
│   │   │   ├── kmeans_attention.py
│   │   │   ├── kmeans_vector_quantizer.py
│   │   │   ├── layer_drop.py
│   │   │   ├── layer_norm.py
│   │   │   ├── learned_positional_embedding.py
│   │   │   ├── lightconv_layer/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── cuda_function_gen.py
│   │   │   │   ├── lightconv_cuda.cpp
│   │   │   │   ├── lightconv_cuda.cuh
│   │   │   │   ├── lightconv_cuda_kernel.cu
│   │   │   │   ├── lightconv_layer.py
│   │   │   │   └── setup.py
│   │   │   ├── lightweight_convolution.py
│   │   │   ├── linearized_convolution.py
│   │   │   ├── location_attention.py
│   │   │   ├── lora.py
│   │   │   ├── lstm_cell_with_zoneout.py
│   │   │   ├── multihead_attention.py
│   │   │   ├── positional_embedding.py
│   │   │   ├── quant_noise.py
│   │   │   ├── quantization/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── pq/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── em.py
│   │   │   │   │   ├── modules/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── qconv.py
│   │   │   │   │   │   ├── qemb.py
│   │   │   │   │   │   └── qlinear.py
│   │   │   │   │   ├── pq.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── quantization_options.py
│   │   │   │   └── scalar/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── modules/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   ├── qact.py
│   │   │   │       │   ├── qconv.py
│   │   │   │       │   ├── qemb.py
│   │   │   │       │   └── qlinear.py
│   │   │   │       ├── ops.py
│   │   │   │       └── utils.py
│   │   │   ├── same_pad.py
│   │   │   ├── scalar_bias.py
│   │   │   ├── sinusoidal_positional_embedding.py
│   │   │   ├── sparse_multihead_attention.py
│   │   │   ├── sparse_transformer_sentence_encoder.py
│   │   │   ├── sparse_transformer_sentence_encoder_layer.py
│   │   │   ├── transformer_layer.py
│   │   │   ├── transformer_sentence_encoder.py
│   │   │   ├── transformer_sentence_encoder_layer.py
│   │   │   ├── transpose_last.py
│   │   │   ├── unfold.py
│   │   │   └── vggblock.py
│   │   ├── nan_detector.py
│   │   ├── ngram_repeat_block.py
│   │   ├── optim/
│   │   │   ├── __init__.py
│   │   │   ├── adadelta.py
│   │   │   ├── adafactor.py
│   │   │   ├── adagrad.py
│   │   │   ├── adam.py
│   │   │   ├── adamax.py
│   │   │   ├── amp_optimizer.py
│   │   │   ├── bmuf.py
│   │   │   ├── composite.py
│   │   │   ├── cpu_adam.py
│   │   │   ├── dynamic_loss_scaler.py
│   │   │   ├── fairseq_optimizer.py
│   │   │   ├── fp16_optimizer.py
│   │   │   ├── fused_adam.py
│   │   │   ├── fused_lamb.py
│   │   │   ├── lr_scheduler/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── cosine_lr_scheduler.py
│   │   │   │   ├── fairseq_lr_scheduler.py
│   │   │   │   ├── fixed_schedule.py
│   │   │   │   ├── inverse_square_root_schedule.py
│   │   │   │   ├── manual_lr_scheduler.py
│   │   │   │   ├── pass_through.py
│   │   │   │   ├── polynomial_decay_schedule.py
│   │   │   │   ├── reduce_lr_on_plateau.py
│   │   │   │   ├── step_lr_scheduler.py
│   │   │   │   ├── tri_stage_lr_scheduler.py
│   │   │   │   └── triangular_lr_scheduler.py
│   │   │   ├── nag.py
│   │   │   ├── sgd.py
│   │   │   └── shard.py
│   │   ├── options.py
│   │   ├── pdb.py
│   │   ├── quantization_utils.py
│   │   ├── registry.py
│   │   ├── scoring/
│   │   │   ├── __init__.py
│   │   │   ├── bleu.py
│   │   │   ├── chrf.py
│   │   │   ├── tokenizer.py
│   │   │   └── wer.py
│   │   ├── search.py
│   │   ├── sequence_generator.py
│   │   ├── sequence_scorer.py
│   │   ├── speech_generator.py
│   │   ├── tasks/
│   │   │   ├── __init__.py
│   │   │   ├── audio_finetuning.py
│   │   │   ├── audio_pretraining.py
│   │   │   ├── cross_lingual_lm.py
│   │   │   ├── denoising.py
│   │   │   ├── fairseq_task.py
│   │   │   ├── frm_text_to_speech.py
│   │   │   ├── hubert_pretraining.py
│   │   │   ├── language_modeling.py
│   │   │   ├── legacy_masked_lm.py
│   │   │   ├── masked_lm.py
│   │   │   ├── multilingual_denoising.py
│   │   │   ├── multilingual_masked_lm.py
│   │   │   ├── multilingual_translation.py
│   │   │   ├── online_backtranslation.py
│   │   │   ├── semisupervised_translation.py
│   │   │   ├── sentence_prediction.py
│   │   │   ├── sentence_ranking.py
│   │   │   ├── simultaneous_translation.py
│   │   │   ├── speech_to_text.py
│   │   │   ├── text_to_speech.py
│   │   │   ├── translation.py
│   │   │   ├── translation_from_pretrained_bart.py
│   │   │   ├── translation_from_pretrained_xlm.py
│   │   │   ├── translation_lev.py
│   │   │   └── translation_multi_simple_epoch.py
│   │   ├── token_generation_constraints.py
│   │   ├── tokenizer.py
│   │   ├── trainer.py
│   │   ├── utils.py
│   │   ├── version.py
│   │   └── version.txt
│   ├── fairseq_cli/
│   │   ├── __init__.py
│   │   ├── eval_lm.py
│   │   ├── generate.py
│   │   ├── hydra_train.py
│   │   ├── interactive.py
│   │   ├── preprocess.py
│   │   ├── score.py
│   │   ├── train.py
│   │   └── validate.py
│   ├── hubconf.py
│   ├── pyproject.toml
│   ├── scripts/
│   │   ├── __init__.py
│   │   ├── average_checkpoints.py
│   │   ├── build_sym_alignment.py
│   │   ├── compare_namespaces.py
│   │   ├── compound_split_bleu.sh
│   │   ├── constraints/
│   │   │   ├── extract.py
│   │   │   └── validate.py
│   │   ├── convert_dictionary.lua
│   │   ├── convert_model.lua
│   │   ├── count_docs.py
│   │   ├── read_binarized.py
│   │   ├── rm_pt.py
│   │   ├── sacrebleu.sh
│   │   ├── shard_docs.py
│   │   ├── split_train_valid_docs.py
│   │   ├── spm_decode.py
│   │   ├── spm_encode.py
│   │   ├── spm_train.py
│   │   └── test_fsdp.sh
│   ├── setup.py
│   ├── tests/
│   │   ├── __init__.py
│   │   ├── distributed/
│   │   │   ├── __init__.py
│   │   │   ├── test_bmuf.py
│   │   │   ├── test_distributed_timeout_wrapper.py
│   │   │   ├── test_module_proxy_wrapper.py
│   │   │   ├── test_utils.py
│   │   │   └── utils.py
│   │   ├── gpu/
│   │   │   ├── __init__.py
│   │   │   ├── test_binaries_gpu.py
│   │   │   ├── test_ema_gpu.py
│   │   │   └── transformer_quantization_config.yaml
│   │   ├── speech_recognition/
│   │   │   ├── __init__.py
│   │   │   ├── asr_test_base.py
│   │   │   ├── test_collaters.py
│   │   │   ├── test_cross_entropy.py
│   │   │   ├── test_data_utils.py
│   │   │   └── test_vggtransformer.py
│   │   ├── test_activation_checkpointing.py
│   │   ├── test_amp_optimizer.py
│   │   ├── test_average_checkpoints.py
│   │   ├── test_backtranslation_dataset.py
│   │   ├── test_binaries.py
│   │   ├── test_character_token_embedder.py
│   │   ├── test_checkpoint_utils.py
│   │   ├── test_concat_dataset.py
│   │   ├── test_constraints.py
│   │   ├── test_convtbc.py
│   │   ├── test_data_utils.py
│   │   ├── test_dataclass_utils.py
│   │   ├── test_dataset.py
│   │   ├── test_dictionary.py
│   │   ├── test_ema.py
│   │   ├── test_export.py
│   │   ├── test_file_chunker_utils.py
│   │   ├── test_file_io.py
│   │   ├── test_fp16_optimizer.py
│   │   ├── test_huffman.py
│   │   ├── test_inference_dropout.py
│   │   ├── test_iopath.py
│   │   ├── test_iterators.py
│   │   ├── test_label_smoothing.py
│   │   ├── test_lm_context_window.py
│   │   ├── test_lstm_jitable.py
│   │   ├── test_memory_efficient_fp16.py
│   │   ├── test_metrics.py
│   │   ├── test_multi_corpus_dataset.py
│   │   ├── test_multi_corpus_sampled_dataset.py
│   │   ├── test_multihead_attention.py
│   │   ├── test_noising.py
│   │   ├── test_online_backtranslation.py
│   │   ├── test_plasma_utils.py
│   │   ├── test_reproducibility.py
│   │   ├── test_resampling_dataset.py
│   │   ├── test_roberta.py
│   │   ├── test_sequence_generator.py
│   │   ├── test_sequence_scorer.py
│   │   ├── test_sparse_multihead_attention.py
│   │   ├── test_token_block_dataset.py
│   │   ├── test_train.py
│   │   ├── test_transformer.py
│   │   ├── test_utils.py
│   │   ├── test_valid_subset_checks.py
│   │   └── utils.py
│   └── train.py
├── glan/
│   └── README.md
├── infoxlm/
│   ├── README.md
│   ├── fairseq/
│   │   ├── .gitignore
│   │   ├── CODE_OF_CONDUCT.md
│   │   ├── CONTRIBUTING.md
│   │   ├── LICENSE
│   │   ├── README.md
│   │   ├── docs/
│   │   │   ├── Makefile
│   │   │   ├── _static/
│   │   │   │   └── theme_overrides.css
│   │   │   ├── command_line_tools.rst
│   │   │   ├── conf.py
│   │   │   ├── criterions.rst
│   │   │   ├── data.rst
│   │   │   ├── docutils.conf
│   │   │   ├── getting_started.rst
│   │   │   ├── index.rst
│   │   │   ├── lr_scheduler.rst
│   │   │   ├── make.bat
│   │   │   ├── models.rst
│   │   │   ├── modules.rst
│   │   │   ├── optim.rst
│   │   │   ├── overview.rst
│   │   │   ├── requirements.txt
│   │   │   ├── tasks.rst
│   │   │   ├── tutorial_classifying_names.rst
│   │   │   └── tutorial_simple_lstm.rst
│   │   ├── eval_lm.py
│   │   ├── examples/
│   │   │   ├── .gitignore
│   │   │   ├── __init__.py
│   │   │   ├── backtranslation/
│   │   │   │   └── README.md
│   │   │   ├── bart/
│   │   │   │   ├── README.cnn.md
│   │   │   │   ├── README.glue.md
│   │   │   │   └── README.md
│   │   │   ├── camembert/
│   │   │   │   └── README.md
│   │   │   ├── conv_seq2seq/
│   │   │   │   └── README.md
│   │   │   ├── cross_lingual_language_model/
│   │   │   │   └── README.md
│   │   │   ├── joint_alignment_translation/
│   │   │   │   ├── README.md
│   │   │   │   └── prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh
│   │   │   ├── language_model/
│   │   │   │   ├── README.md
│   │   │   │   ├── conv_lm/
│   │   │   │   │   └── README.md
│   │   │   │   ├── prepare-wikitext-103.sh
│   │   │   │   └── transformer_lm/
│   │   │   │       └── README.md
│   │   │   ├── layerdrop/
│   │   │   │   └── README.md
│   │   │   ├── noisychannel/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── rerank.py
│   │   │   │   ├── rerank_generate.py
│   │   │   │   ├── rerank_options.py
│   │   │   │   ├── rerank_score_bw.py
│   │   │   │   ├── rerank_score_lm.py
│   │   │   │   ├── rerank_tune.py
│   │   │   │   └── rerank_utils.py
│   │   │   ├── nonautoregressive_translation/
│   │   │   │   ├── README.md
│   │   │   │   └── scripts.md
│   │   │   ├── pay_less_attention_paper/
│   │   │   │   └── README.md
│   │   │   ├── roberta/
│   │   │   │   ├── README.custom_classification.md
│   │   │   │   ├── README.glue.md
│   │   │   │   ├── README.md
│   │   │   │   ├── README.pretraining.md
│   │   │   │   ├── README.race.md
│   │   │   │   ├── commonsense_qa/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── commonsense_qa_task.py
│   │   │   │   │   └── download_cqa_data.sh
│   │   │   │   ├── multiprocessing_bpe_encoder.py
│   │   │   │   ├── preprocess_GLUE_tasks.sh
│   │   │   │   ├── preprocess_RACE.py
│   │   │   │   ├── preprocess_RACE.sh
│   │   │   │   └── wsc/
│   │   │   │       ├── README.md
│   │   │   │       ├── __init__.py
│   │   │   │       ├── wsc_criterion.py
│   │   │   │       ├── wsc_task.py
│   │   │   │       └── wsc_utils.py
│   │   │   ├── scaling_nmt/
│   │   │   │   └── README.md
│   │   │   ├── speech_recognition/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── criterions/
│   │   │   │   │   ├── ASG_loss.py
│   │   │   │   │   ├── CTC_loss.py
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── cross_entropy_acc.py
│   │   │   │   ├── data/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── asr_dataset.py
│   │   │   │   │   ├── collaters.py
│   │   │   │   │   ├── data_utils.py
│   │   │   │   │   └── replabels.py
│   │   │   │   ├── datasets/
│   │   │   │   │   ├── asr_prep_json.py
│   │   │   │   │   └── prepare-librispeech.sh
│   │   │   │   ├── infer.py
│   │   │   │   ├── models/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── vggtransformer.py
│   │   │   │   │   └── w2l_conv_glu_enc.py
│   │   │   │   ├── tasks/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── speech_recognition.py
│   │   │   │   ├── utils/
│   │   │   │   │   └── wer_utils.py
│   │   │   │   └── w2l_decoder.py
│   │   │   ├── stories/
│   │   │   │   └── README.md
│   │   │   ├── translation/
│   │   │   │   ├── README.md
│   │   │   │   ├── prepare-iwslt14.sh
│   │   │   │   ├── prepare-iwslt17-multilingual.sh
│   │   │   │   ├── prepare-wmt14en2de.sh
│   │   │   │   └── prepare-wmt14en2fr.sh
│   │   │   ├── translation_moe/
│   │   │   │   ├── README.md
│   │   │   │   └── score.py
│   │   │   ├── wav2vec/
│   │   │   │   └── README.md
│   │   │   ├── wmt19/
│   │   │   │   └── README.md
│   │   │   └── xlmr/
│   │   │       └── README.md
│   │   ├── fairseq/
│   │   │   ├── __init__.py
│   │   │   ├── binarizer.py
│   │   │   ├── bleu.py
│   │   │   ├── checkpoint_utils.py
│   │   │   ├── clib/
│   │   │   │   ├── libbleu/
│   │   │   │   │   ├── libbleu.cpp
│   │   │   │   │   └── module.cpp
│   │   │   │   └── libnat/
│   │   │   │       └── edit_dist.cpp
│   │   │   ├── criterions/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adaptive_loss.py
│   │   │   │   ├── binary_cross_entropy.py
│   │   │   │   ├── composite_loss.py
│   │   │   │   ├── cross_entropy.py
│   │   │   │   ├── fairseq_criterion.py
│   │   │   │   ├── label_smoothed_cross_entropy.py
│   │   │   │   ├── label_smoothed_cross_entropy_with_alignment.py
│   │   │   │   ├── legacy_masked_lm.py
│   │   │   │   ├── masked_lm.py
│   │   │   │   ├── nat_loss.py
│   │   │   │   ├── sentence_prediction.py
│   │   │   │   └── sentence_ranking.py
│   │   │   ├── data/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── append_token_dataset.py
│   │   │   │   ├── audio/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── raw_audio_dataset.py
│   │   │   │   ├── backtranslation_dataset.py
│   │   │   │   ├── base_wrapper_dataset.py
│   │   │   │   ├── colorize_dataset.py
│   │   │   │   ├── concat_dataset.py
│   │   │   │   ├── concat_sentences_dataset.py
│   │   │   │   ├── data_utils.py
│   │   │   │   ├── data_utils_fast.pyx
│   │   │   │   ├── denoising_dataset.py
│   │   │   │   ├── dictionary.py
│   │   │   │   ├── encoders/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── fastbpe.py
│   │   │   │   │   ├── gpt2_bpe.py
│   │   │   │   │   ├── gpt2_bpe_utils.py
│   │   │   │   │   ├── hf_bert_bpe.py
│   │   │   │   │   ├── moses_tokenizer.py
│   │   │   │   │   ├── nltk_tokenizer.py
│   │   │   │   │   ├── sentencepiece_bpe.py
│   │   │   │   │   ├── space_tokenizer.py
│   │   │   │   │   ├── subword_nmt_bpe.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── fairseq_dataset.py
│   │   │   │   ├── id_dataset.py
│   │   │   │   ├── indexed_dataset.py
│   │   │   │   ├── iterators.py
│   │   │   │   ├── language_pair_dataset.py
│   │   │   │   ├── legacy/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── block_pair_dataset.py
│   │   │   │   │   ├── masked_lm_dataset.py
│   │   │   │   │   └── masked_lm_dictionary.py
│   │   │   │   ├── list_dataset.py
│   │   │   │   ├── lm_context_window_dataset.py
│   │   │   │   ├── lru_cache_dataset.py
│   │   │   │   ├── mask_tokens_dataset.py
│   │   │   │   ├── monolingual_dataset.py
│   │   │   │   ├── multi_corpus_sampled_dataset.py
│   │   │   │   ├── nested_dictionary_dataset.py
│   │   │   │   ├── noising.py
│   │   │   │   ├── num_samples_dataset.py
│   │   │   │   ├── numel_dataset.py
│   │   │   │   ├── offset_tokens_dataset.py
│   │   │   │   ├── pad_dataset.py
│   │   │   │   ├── plasma_utils.py
│   │   │   │   ├── prepend_dataset.py
│   │   │   │   ├── prepend_token_dataset.py
│   │   │   │   ├── raw_label_dataset.py
│   │   │   │   ├── replace_dataset.py
│   │   │   │   ├── resampling_dataset.py
│   │   │   │   ├── roll_dataset.py
│   │   │   │   ├── round_robin_zip_datasets.py
│   │   │   │   ├── sharded_dataset.py
│   │   │   │   ├── sort_dataset.py
│   │   │   │   ├── strip_token_dataset.py
│   │   │   │   ├── subsample_dataset.py
│   │   │   │   ├── token_block_dataset.py
│   │   │   │   ├── token_block_utils_fast.pyx
│   │   │   │   ├── transform_eos_dataset.py
│   │   │   │   ├── transform_eos_lang_pair_dataset.py
│   │   │   │   └── truncate_dataset.py
│   │   │   ├── distributed_utils.py
│   │   │   ├── file_utils.py
│   │   │   ├── hub_utils.py
│   │   │   ├── iterative_refinement_generator.py
│   │   │   ├── legacy_distributed_data_parallel.py
│   │   │   ├── meters.py
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bart/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── hub_interface.py
│   │   │   │   │   └── model.py
│   │   │   │   ├── cmlm_transformer.py
│   │   │   │   ├── composite_encoder.py
│   │   │   │   ├── distributed_fairseq_model.py
│   │   │   │   ├── fairseq_decoder.py
│   │   │   │   ├── fairseq_encoder.py
│   │   │   │   ├── fairseq_incremental_decoder.py
│   │   │   │   ├── fairseq_model.py
│   │   │   │   ├── fconv.py
│   │   │   │   ├── fconv_lm.py
│   │   │   │   ├── fconv_self_att.py
│   │   │   │   ├── insertion_transformer.py
│   │   │   │   ├── iterative_nonautoregressive_transformer.py
│   │   │   │   ├── levenshtein_transformer.py
│   │   │   │   ├── lightconv.py
│   │   │   │   ├── lightconv_lm.py
│   │   │   │   ├── lstm.py
│   │   │   │   ├── masked_lm.py
│   │   │   │   ├── model_utils.py
│   │   │   │   ├── multilingual_transformer.py
│   │   │   │   ├── nonautoregressive_ensembles.py
│   │   │   │   ├── nonautoregressive_transformer.py
│   │   │   │   ├── roberta/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── alignment_utils.py
│   │   │   │   │   ├── hub_interface.py
│   │   │   │   │   └── model.py
│   │   │   │   ├── transformer.py
│   │   │   │   ├── transformer_from_pretrained_xlm.py
│   │   │   │   ├── transformer_lm.py
│   │   │   │   └── wav2vec.py
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adaptive_input.py
│   │   │   │   ├── adaptive_softmax.py
│   │   │   │   ├── beamable_mm.py
│   │   │   │   ├── character_token_embedder.py
│   │   │   │   ├── conv_tbc.py
│   │   │   │   ├── cuda_utils.cu
│   │   │   │   ├── downsampled_multihead_attention.py
│   │   │   │   ├── dynamic_convolution.py
│   │   │   │   ├── dynamicconv_layer/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cuda_function_gen.py
│   │   │   │   │   ├── dynamicconv_cuda.cpp
│   │   │   │   │   ├── dynamicconv_cuda.cuh
│   │   │   │   │   ├── dynamicconv_cuda_kernel.cu
│   │   │   │   │   ├── dynamicconv_layer.py
│   │   │   │   │   ├── dynamiconv_cpu.cpp
│   │   │   │   │   └── setup.py
│   │   │   │   ├── gelu.py
│   │   │   │   ├── grad_multiply.py
│   │   │   │   ├── highway.py
│   │   │   │   ├── layer_norm.py
│   │   │   │   ├── learned_positional_embedding.py
│   │   │   │   ├── lightconv_layer/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cuda_function_gen.py
│   │   │   │   │   ├── lightconv_cuda.cpp
│   │   │   │   │   ├── lightconv_cuda.cuh
│   │   │   │   │   ├── lightconv_cuda_kernel.cu
│   │   │   │   │   ├── lightconv_layer.py
│   │   │   │   │   └── setup.py
│   │   │   │   ├── lightweight_convolution.py
│   │   │   │   ├── linearized_convolution.py
│   │   │   │   ├── logsumexp_moe.py
│   │   │   │   ├── mean_pool_gating_network.py
│   │   │   │   ├── multihead_attention.py
│   │   │   │   ├── positional_embedding.py
│   │   │   │   ├── scalar_bias.py
│   │   │   │   ├── sinusoidal_positional_embedding.py
│   │   │   │   ├── sparse_multihead_attention.py
│   │   │   │   ├── sparse_transformer_sentence_encoder.py
│   │   │   │   ├── sparse_transformer_sentence_encoder_layer.py
│   │   │   │   ├── transformer_layer.py
│   │   │   │   ├── transformer_sentence_encoder.py
│   │   │   │   ├── transformer_sentence_encoder_layer.py
│   │   │   │   ├── unfold.py
│   │   │   │   └── vggblock.py
│   │   │   ├── optim/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adadelta.py
│   │   │   │   ├── adafactor.py
│   │   │   │   ├── adagrad.py
│   │   │   │   ├── adam.py
│   │   │   │   ├── adamax.py
│   │   │   │   ├── bmuf.py
│   │   │   │   ├── fairseq_optimizer.py
│   │   │   │   ├── fp16_optimizer.py
│   │   │   │   ├── lr_scheduler/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cosine_lr_scheduler.py
│   │   │   │   │   ├── fairseq_lr_scheduler.py
│   │   │   │   │   ├── fixed_schedule.py
│   │   │   │   │   ├── inverse_square_root_schedule.py
│   │   │   │   │   ├── polynomial_decay_schedule.py
│   │   │   │   │   ├── reduce_lr_on_plateau.py
│   │   │   │   │   ├── tri_stage_lr_scheduler.py
│   │   │   │   │   └── triangular_lr_scheduler.py
│   │   │   │   ├── nag.py
│   │   │   │   └── sgd.py
│   │   │   ├── options.py
│   │   │   ├── pdb.py
│   │   │   ├── progress_bar.py
│   │   │   ├── registry.py
│   │   │   ├── search.py
│   │   │   ├── sequence_generator.py
│   │   │   ├── sequence_scorer.py
│   │   │   ├── tasks/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── audio_pretraining.py
│   │   │   │   ├── cross_lingual_lm.py
│   │   │   │   ├── denoising.py
│   │   │   │   ├── fairseq_task.py
│   │   │   │   ├── language_modeling.py
│   │   │   │   ├── legacy_masked_lm.py
│   │   │   │   ├── masked_lm.py
│   │   │   │   ├── multilingual_masked_lm.py
│   │   │   │   ├── multilingual_translation.py
│   │   │   │   ├── semisupervised_translation.py
│   │   │   │   ├── sentence_prediction.py
│   │   │   │   ├── sentence_ranking.py
│   │   │   │   ├── translation.py
│   │   │   │   ├── translation_from_pretrained_xlm.py
│   │   │   │   ├── translation_lev.py
│   │   │   │   └── translation_moe.py
│   │   │   ├── tokenizer.py
│   │   │   ├── trainer.py
│   │   │   └── utils.py
│   │   ├── fairseq_cli/
│   │   │   └── __init__.py
│   │   ├── generate.py
│   │   ├── hubconf.py
│   │   ├── interactive.py
│   │   ├── preprocess.py
│   │   ├── score.py
│   │   ├── scripts/
│   │   │   ├── __init__.py
│   │   │   ├── average_checkpoints.py
│   │   │   ├── build_sym_alignment.py
│   │   │   ├── compare_namespaces.py
│   │   │   ├── compound_split_bleu.sh
│   │   │   ├── convert_dictionary.lua
│   │   │   ├── convert_model.lua
│   │   │   ├── count_docs.py
│   │   │   ├── read_binarized.py
│   │   │   ├── rm_pt.py
│   │   │   ├── sacrebleu_pregen.sh
│   │   │   ├── shard_docs.py
│   │   │   ├── split_train_valid_docs.py
│   │   │   ├── spm_decode.py
│   │   │   ├── spm_encode.py
│   │   │   ├── spm_train.py
│   │   │   ├── wav2vec_featurize.py
│   │   │   └── wav2vec_manifest.py
│   │   ├── setup.py
│   │   ├── tests/
│   │   │   ├── __init__.py
│   │   │   ├── speech_recognition/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── asr_test_base.py
│   │   │   │   ├── test_collaters.py
│   │   │   │   ├── test_cross_entropy.py
│   │   │   │   └── test_vggtransformer.py
│   │   │   ├── test_average_checkpoints.py
│   │   │   ├── test_backtranslation_dataset.py
│   │   │   ├── test_binaries.py
│   │   │   ├── test_bmuf.py
│   │   │   ├── test_character_token_embedder.py
│   │   │   ├── test_concat_dataset.py
│   │   │   ├── test_convtbc.py
│   │   │   ├── test_dictionary.py
│   │   │   ├── test_iterators.py
│   │   │   ├── test_label_smoothing.py
│   │   │   ├── test_memory_efficient_fp16.py
│   │   │   ├── test_multi_corpus_sampled_dataset.py
│   │   │   ├── test_multihead_attention.py
│   │   │   ├── test_noising.py
│   │   │   ├── test_reproducibility.py
│   │   │   ├── test_resampling_dataset.py
│   │   │   ├── test_sequence_generator.py
│   │   │   ├── test_sequence_scorer.py
│   │   │   ├── test_sparse_multihead_attention.py
│   │   │   ├── test_token_block_dataset.py
│   │   │   ├── test_train.py
│   │   │   ├── test_utils.py
│   │   │   └── utils.py
│   │   ├── train.py
│   │   └── validate.py
│   ├── src-infoxlm/
│   │   ├── infoxlm/
│   │   │   ├── __init__.py
│   │   │   ├── criterions/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── xlco.py
│   │   │   │   └── xlm_align.py
│   │   │   ├── data/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dict_dataset.py
│   │   │   │   ├── mlm_utils.py
│   │   │   │   ├── offset_dataset.py
│   │   │   │   ├── tlm_dataset.py
│   │   │   │   ├── xlco_dataset.py
│   │   │   │   └── xlm_align.py
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── infoxlm.py
│   │   │   │   ├── roberta.py
│   │   │   │   └── xlm_align.py
│   │   │   ├── tasks/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── infoxlm.py
│   │   │   │   ├── mlm.py
│   │   │   │   ├── tlm.py
│   │   │   │   └── xlm_align.py
│   │   │   └── utils.py
│   │   ├── setup.py
│   │   └── train.py
│   └── tools/
│       ├── para2bin.py
│       ├── para2bin4xlco.py
│       └── txt2bin.py
├── kosmos-1/
│   └── README.md
├── kosmos-2/
│   ├── README.md
│   ├── data/
│   │   ├── dict.txt
│   │   ├── generate_config.py
│   │   ├── prepare_grit.py
│   │   ├── sentencepiece.bpe.model
│   │   └── visualize_grit.py
│   ├── demo/
│   │   ├── decode_string.py
│   │   ├── draw_box.py
│   │   └── gradio_app.py
│   ├── docs/
│   │   └── install.md
│   ├── evaluation/
│   │   ├── caption_obj_few_shot.py
│   │   ├── caption_obj_qa.py
│   │   ├── flickr_entities/
│   │   │   ├── README.md
│   │   │   ├── cook_data.py
│   │   │   ├── decode_string.py
│   │   │   └── flickr_entities_evaluate.py
│   │   ├── grd-zeroshot-flickr.sh
│   │   ├── grd-zeroshot-refcoco.sh
│   │   ├── refcoco/
│   │   │   ├── README.md
│   │   │   ├── box_ops.py
│   │   │   ├── cook_data.py
│   │   │   ├── decode_string.py
│   │   │   └── refexp_evaluate.py
│   │   ├── seed-bench/
│   │   │   ├── README.md
│   │   │   ├── cook_image_data.py
│   │   │   └── eval_ppl.py
│   │   └── zeroshot-seed-bench.sh
│   ├── fairseq/
│   │   ├── .circleci/
│   │   │   └── config.yml
│   │   ├── .github/
│   │   │   ├── ISSUE_TEMPLATE/
│   │   │   │   ├── bug_report.md
│   │   │   │   ├── documentation.md
│   │   │   │   ├── feature_request.md
│   │   │   │   └── how-to-question.md
│   │   │   ├── ISSUE_TEMPLATE.md
│   │   │   ├── PULL_REQUEST_TEMPLATE.md
│   │   │   ├── stale.yml
│   │   │   └── workflows/
│   │   │       ├── build.yml
│   │   │       └── build_wheels.yml
│   │   ├── .gitignore
│   │   ├── .gitmodules
│   │   ├── .isort.cfg
│   │   ├── .pre-commit-config.yaml
│   │   ├── CODE_OF_CONDUCT.md
│   │   ├── CONTRIBUTING.md
│   │   ├── LICENSE
│   │   ├── README.md
│   │   ├── docs/
│   │   │   ├── Makefile
│   │   │   ├── _static/
│   │   │   │   └── theme_overrides.css
│   │   │   ├── command_line_tools.rst
│   │   │   ├── conf.py
│   │   │   ├── criterions.rst
│   │   │   ├── data.rst
│   │   │   ├── docutils.conf
│   │   │   ├── getting_started.rst
│   │   │   ├── hydra_integration.md
│   │   │   ├── index.rst
│   │   │   ├── lr_scheduler.rst
│   │   │   ├── make.bat
│   │   │   ├── models.rst
│   │   │   ├── modules.rst
│   │   │   ├── optim.rst
│   │   │   ├── overview.rst
│   │   │   ├── requirements.txt
│   │   │   ├── tasks.rst
│   │   │   ├── tutorial_classifying_names.rst
│   │   │   └── tutorial_simple_lstm.rst
│   │   ├── examples/
│   │   │   ├── .gitignore
│   │   │   ├── MMPT/
│   │   │   │   ├── .gitignore
│   │   │   │   ├── CONFIG.md
│   │   │   │   ├── DATASET.md
│   │   │   │   ├── README.md
│   │   │   │   ├── endtask.md
│   │   │   │   ├── locallaunch.py
│   │   │   │   ├── mmpt/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── datasets/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── fairseqmmdataset.py
│   │   │   │   │   │   └── mmdataset.py
│   │   │   │   │   ├── evaluators/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── evaluator.py
│   │   │   │   │   │   ├── metric.py
│   │   │   │   │   │   └── predictor.py
│   │   │   │   │   ├── losses/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── fairseqmmloss.py
│   │   │   │   │   │   ├── loss.py
│   │   │   │   │   │   └── nce.py
│   │   │   │   │   ├── models/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── fairseqmmmodel.py
│   │   │   │   │   │   ├── mmfusion.py
│   │   │   │   │   │   ├── mmfusionnlg.py
│   │   │   │   │   │   └── transformermodel.py
│   │   │   │   │   ├── modules/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── mm.py
│   │   │   │   │   │   ├── retri.py
│   │   │   │   │   │   └── vectorpool.py
│   │   │   │   │   ├── processors/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── dedupprocessor.py
│   │   │   │   │   │   ├── dsprocessor.py
│   │   │   │   │   │   ├── how2processor.py
│   │   │   │   │   │   ├── how2retriprocessor.py
│   │   │   │   │   │   ├── models/
│   │   │   │   │   │   │   └── s3dg.py
│   │   │   │   │   │   └── processor.py
│   │   │   │   │   ├── tasks/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── fairseqmmtask.py
│   │   │   │   │   │   ├── milncetask.py
│   │   │   │   │   │   ├── retritask.py
│   │   │   │   │   │   ├── task.py
│   │   │   │   │   │   └── vlmtask.py
│   │   │   │   │   └── utils/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── load_config.py
│   │   │   │   │       └── shardedtensor.py
│   │   │   │   ├── mmpt_cli/
│   │   │   │   │   ├── localjob.py
│   │   │   │   │   └── predict.py
│   │   │   │   ├── pretraining.md
│   │   │   │   ├── projects/
│   │   │   │   │   ├── mfmmlm.yaml
│   │   │   │   │   ├── mtm/
│   │   │   │   │   │   ├── mmfusionmtm.yaml
│   │   │   │   │   │   ├── vlm/
│   │   │   │   │   │   │   ├── coin.yaml
│   │   │   │   │   │   │   ├── crosstask.yaml
│   │   │   │   │   │   │   ├── how2.yaml
│   │   │   │   │   │   │   ├── test_coin.yaml
│   │   │   │   │   │   │   ├── test_crosstask.yaml
│   │   │   │   │   │   │   ├── test_crosstask_zs.yaml
│   │   │   │   │   │   │   ├── test_vtt.yaml
│   │   │   │   │   │   │   ├── test_vttqa.yaml
│   │   │   │   │   │   │   ├── test_youcook.yaml
│   │   │   │   │   │   │   ├── test_youcookcap.yaml
│   │   │   │   │   │   │   ├── vtt.yaml
│   │   │   │   │   │   │   ├── vttqa.yaml
│   │   │   │   │   │   │   ├── youcook.yaml
│   │   │   │   │   │   │   └── youcookcap.yaml
│   │   │   │   │   │   └── vlm.yaml
│   │   │   │   │   ├── retri/
│   │   │   │   │   │   ├── videoclip/
│   │   │   │   │   │   │   ├── coin_videoclip.yaml
│   │   │   │   │   │   │   ├── crosstask_videoclip.yaml
│   │   │   │   │   │   │   ├── how2.yaml
│   │   │   │   │   │   │   ├── test_coin_videoclip.yaml
│   │   │   │   │   │   │   ├── test_coin_zs.yaml
│   │   │   │   │   │   │   ├── test_crosstask_videoclip.yaml
│   │   │   │   │   │   │   ├── test_crosstask_zs_videoclip.yaml
│   │   │   │   │   │   │   ├── test_didemo_zs.yaml
│   │   │   │   │   │   │   ├── test_vtt_videoclip.yaml
│   │   │   │   │   │   │   ├── test_vtt_zs.yaml
│   │   │   │   │   │   │   ├── test_vttqa_videoclip.yaml
│   │   │   │   │   │   │   ├── test_vttqa_zs.yaml
│   │   │   │   │   │   │   ├── test_youcook_videoclip.yaml
│   │   │   │   │   │   │   ├── test_youcook_zs.yaml
│   │   │   │   │   │   │   ├── vtt_videoclip.yaml
│   │   │   │   │   │   │   ├── vttqa_videoclip.yaml
│   │   │   │   │   │   │   └── youcook_videoclip.yaml
│   │   │   │   │   │   ├── videoclip.yaml
│   │   │   │   │   │   └── videoretri.yaml
│   │   │   │   │   └── task/
│   │   │   │   │       ├── coin.yaml
│   │   │   │   │       ├── coin_videoclip.yaml
│   │   │   │   │       ├── crosstask.yaml
│   │   │   │   │       ├── crosstask_videoclip.yaml
│   │   │   │   │       ├── default.yaml
│   │   │   │   │       ├── ft.yaml
│   │   │   │   │       ├── how2.yaml
│   │   │   │   │       ├── test.yaml
│   │   │   │   │       ├── test_coin.yaml
│   │   │   │   │       ├── test_coin_videoclip.yaml
│   │   │   │   │       ├── test_coin_zs.yaml
│   │   │   │   │       ├── test_crosstask.yaml
│   │   │   │   │       ├── test_crosstask_videoclip.yaml
│   │   │   │   │       ├── test_crosstask_zs.yaml
│   │   │   │   │       ├── test_crosstask_zs_videoclip.yaml
│   │   │   │   │       ├── test_didemo_zs.yaml
│   │   │   │   │       ├── test_vtt.yaml
│   │   │   │   │       ├── test_vtt_videoclip.yaml
│   │   │   │   │       ├── test_vtt_zs.yaml
│   │   │   │   │       ├── test_vttqa.yaml
│   │   │   │   │       ├── test_vttqa_videoclip.yaml
│   │   │   │   │       ├── test_vttqa_zs.yaml
│   │   │   │   │       ├── test_youcook.yaml
│   │   │   │   │       ├── test_youcook_videoclip.yaml
│   │   │   │   │       ├── test_youcook_zs.yaml
│   │   │   │   │       ├── test_youcookcap.yaml
│   │   │   │   │       ├── vtt.yaml
│   │   │   │   │       ├── vtt_videoclip.yaml
│   │   │   │   │       ├── vttqa.yaml
│   │   │   │   │       ├── vttqa_videoclip.yaml
│   │   │   │   │       ├── youcook.yaml
│   │   │   │   │       ├── youcook_videoclip.yaml
│   │   │   │   │       └── youcookcap.yaml
│   │   │   │   ├── scripts/
│   │   │   │   │   ├── text_token_extractor/
│   │   │   │   │   │   ├── configs/
│   │   │   │   │   │   │   └── bert-base-uncased.yaml
│   │   │   │   │   │   └── pretokenization.py
│   │   │   │   │   └── video_feature_extractor/
│   │   │   │   │       ├── extract.py
│   │   │   │   │       ├── how2/
│   │   │   │   │       │   └── s3d.sh
│   │   │   │   │       ├── model.py
│   │   │   │   │       ├── pathbuilder.py
│   │   │   │   │       ├── preprocessing.py
│   │   │   │   │       ├── random_sequence_shuffler.py
│   │   │   │   │       ├── shard_feature.py
│   │   │   │   │       └── videoreader.py
│   │   │   │   └── setup.py
│   │   │   ├── __init__.py
│   │   │   ├── adaptive_span/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adagrad_with_grad_clip.py
│   │   │   │   ├── adaptive_span_attention.py
│   │   │   │   ├── adaptive_span_loss.py
│   │   │   │   ├── adaptive_span_model.py
│   │   │   │   ├── adaptive_span_model_wrapper.py
│   │   │   │   └── truncated_bptt_lm_task.py
│   │   │   ├── attention_head_selection/
│   │   │   │   ├── README.md
│   │   │   │   └── src/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── data/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   └── speech_to_text_dataset_with_domain.py
│   │   │   │       ├── loss/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   └── attention_head_selection.py
│   │   │   │       ├── models/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   ├── head_selection_s2t_transformer.py
│   │   │   │       │   └── head_selection_transformer.py
│   │   │   │       ├── modules/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   ├── attn_head_selector.py
│   │   │   │       │   ├── head_selection_transformer_layer.py
│   │   │   │       │   ├── multihead_attention_selection.py
│   │   │   │       │   └── multihead_functional.py
│   │   │   │       └── speech_to_text_head_selection.py
│   │   │   ├── backtranslation/
│   │   │   │   ├── README.md
│   │   │   │   ├── deduplicate_lines.py
│   │   │   │   ├── extract_bt_data.py
│   │   │   │   ├── prepare-de-monolingual.sh
│   │   │   │   ├── prepare-wmt18en2de.sh
│   │   │   │   ├── sacrebleu.sh
│   │   │   │   └── tokenized_bleu.sh
│   │   │   ├── bart/
│   │   │   │   ├── README.glue.md
│   │   │   │   ├── README.md
│   │   │   │   ├── README.summarization.md
│   │   │   │   └── summarize.py
│   │   │   ├── byte_level_bpe/
│   │   │   │   ├── README.md
│   │   │   │   ├── get_bitext.py
│   │   │   │   ├── get_data.sh
│   │   │   │   └── gru_transformer.py
│   │   │   ├── camembert/
│   │   │   │   └── README.md
│   │   │   ├── constrained_decoding/
│   │   │   │   ├── README.md
│   │   │   │   ├── normalize.py
│   │   │   │   └── tok.py
│   │   │   ├── conv_seq2seq/
│   │   │   │   └── README.md
│   │   │   ├── criss/
│   │   │   │   ├── README.md
│   │   │   │   ├── download_and_preprocess_flores_test.sh
│   │   │   │   ├── download_and_preprocess_tatoeba.sh
│   │   │   │   ├── mining/
│   │   │   │   │   ├── mine.py
│   │   │   │   │   └── mine_example.sh
│   │   │   │   ├── save_encoder.py
│   │   │   │   ├── sentence_retrieval/
│   │   │   │   │   ├── encoder_analysis.py
│   │   │   │   │   └── sentence_retrieval_tatoeba.sh
│   │   │   │   └── unsupervised_mt/
│   │   │   │       └── eval.sh
│   │   │   ├── cross_lingual_language_model/
│   │   │   │   └── README.md
│   │   │   ├── discriminative_reranking_nmt/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── config/
│   │   │   │   │   └── deen.yaml
│   │   │   │   ├── criterions/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── discriminative_reranking_criterion.py
│   │   │   │   ├── drnmt_rerank.py
│   │   │   │   ├── models/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── discriminative_reranking_model.py
│   │   │   │   ├── scripts/
│   │   │   │   │   └── prep_data.py
│   │   │   │   └── tasks/
│   │   │   │       ├── __init__.py
│   │   │   │       └── discriminative_reranking_task.py
│   │   │   ├── fast_noisy_channel/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── noisy_channel_beam_search.py
│   │   │   │   ├── noisy_channel_sequence_generator.py
│   │   │   │   └── noisy_channel_translation.py
│   │   │   ├── flores101/
│   │   │   │   └── README.md
│   │   │   ├── fully_sharded_data_parallel/
│   │   │   │   └── README.md
│   │   │   ├── gottbert/
│   │   │   │   └── README.md
│   │   │   ├── hubert/
│   │   │   │   ├── README.md
│   │   │   │   ├── config/
│   │   │   │   │   ├── decode/
│   │   │   │   │   │   ├── ax_sweep/
│   │   │   │   │   │   │   ├── ngram.yaml
│   │   │   │   │   │   │   └── transformer.yaml
│   │   │   │   │   │   ├── infer_fsqlm.yaml
│   │   │   │   │   │   ├── infer_kenlm.yaml
│   │   │   │   │   │   ├── infer_viterbi.yaml
│   │   │   │   │   │   └── run/
│   │   │   │   │   │       ├── submitit_slurm.yaml
│   │   │   │   │   │       └── submitit_slurm_8gpu.yaml
│   │   │   │   │   ├── finetune/
│   │   │   │   │   │   ├── base_10h.yaml
│   │   │   │   │   │   ├── ckpt/
│   │   │   │   │   │   │   └── it1.yaml
│   │   │   │   │   │   ├── lm/
│   │   │   │   │   │   │   └── ls_4gram.yaml
│   │   │   │   │   │   └── run/
│   │   │   │   │   │       └── submitit_reg.yaml
│   │   │   │   │   └── pretrain/
│   │   │   │   │       ├── data/
│   │   │   │   │       │   ├── iter1.yaml
│   │   │   │   │       │   └── iter2.yaml
│   │   │   │   │       ├── hubert_base_librispeech.yaml
│   │   │   │   │       ├── hubert_large_librivox.yaml
│   │   │   │   │       ├── hubert_xlarge_librivox.yaml
│   │   │   │   │       └── run/
│   │   │   │   │           └── submitit_reg.yaml
│   │   │   │   ├── measure_teacher_quality.py
│   │   │   │   ├── simple_kmeans/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── dump_hubert_feature.py
│   │   │   │   │   ├── dump_hubert_feature_s2t.py
│   │   │   │   │   ├── dump_km_label.py
│   │   │   │   │   ├── dump_mfcc_feature.py
│   │   │   │   │   ├── dump_w2v2_feature.py
│   │   │   │   │   ├── feature_utils.py
│   │   │   │   │   └── learn_kmeans.py
│   │   │   │   └── update_ckpt.py
│   │   │   ├── joint_alignment_translation/
│   │   │   │   ├── README.md
│   │   │   │   └── prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh
│   │   │   ├── language_model/
│   │   │   │   ├── README.adaptive_inputs.md
│   │   │   │   ├── README.conv.md
│   │   │   │   ├── README.md
│   │   │   │   └── prepare-wikitext-103.sh
│   │   │   ├── laser/
│   │   │   │   ├── README.md
│   │   │   │   └── laser_src/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── laser_lstm.py
│   │   │   │       ├── laser_task.py
│   │   │   │       ├── laser_transformer.py
│   │   │   │       └── multitask_data_utils.py
│   │   │   ├── latent_depth/
│   │   │   │   ├── README.md
│   │   │   │   └── latent_depth_src/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── loss/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   └── latent_depth.py
│   │   │   │       ├── models/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   ├── latent_multilingual_transformer.py
│   │   │   │       │   └── latent_transformer.py
│   │   │   │       ├── modules/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   └── latent_layers.py
│   │   │   │       └── multilingual_translation_latent_depth.py
│   │   │   ├── layerdrop/
│   │   │   │   └── README.md
│   │   │   ├── linformer/
│   │   │   │   ├── README.md
│   │   │   │   └── linformer_src/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── models/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   └── linformer_roberta.py
│   │   │   │       └── modules/
│   │   │   │           ├── __init__.py
│   │   │   │           ├── linformer_sentence_encoder.py
│   │   │   │           ├── linformer_sentence_encoder_layer.py
│   │   │   │           └── multihead_linear_attention.py
│   │   │   ├── m2m_100/
│   │   │   │   ├── README.md
│   │   │   │   ├── install_dependecies.sh
│   │   │   │   ├── process_data/
│   │   │   │   │   ├── clean_histogram.py
│   │   │   │   │   ├── dedup_data.py
│   │   │   │   │   └── remove_too_much_punc.py
│   │   │   │   ├── tok.sh
│   │   │   │   └── tokenizers/
│   │   │   │       ├── README.md
│   │   │   │       ├── seg_ja.sh
│   │   │   │       ├── seg_ko.sh
│   │   │   │       ├── thirdparty/
│   │   │   │       │   └── .gitignore
│   │   │   │       ├── tokenize_indic.py
│   │   │   │       ├── tokenize_thai.py
│   │   │   │       ├── tokenize_zh.py
│   │   │   │       └── tokenizer_ar.sh
│   │   │   ├── mbart/
│   │   │   │   └── README.md
│   │   │   ├── megatron_11b/
│   │   │   │   ├── README.md
│   │   │   │   └── detok.py
│   │   │   ├── moe_lm/
│   │   │   │   ├── README.md
│   │   │   │   ├── data_card.md
│   │   │   │   └── model_card.md
│   │   │   ├── multilingual/
│   │   │   │   ├── ML50_langs.txt
│   │   │   │   ├── README.md
│   │   │   │   ├── data_scripts/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── binarize.py
│   │   │   │   │   ├── check_iswlt_test_data.py
│   │   │   │   │   ├── check_self_overlaps.py
│   │   │   │   │   ├── check_valid_test_overlaps.py
│   │   │   │   │   ├── dedup_all.py
│   │   │   │   │   ├── download_ML50_v1.sh
│   │   │   │   │   ├── download_af_xh.sh
│   │   │   │   │   ├── download_flores_data.sh
│   │   │   │   │   ├── download_iitb.sh
│   │   │   │   │   ├── download_iwslt_and_extract.sh
│   │   │   │   │   ├── download_lotus.sh
│   │   │   │   │   ├── download_ted_and_extract.py
│   │   │   │   │   ├── download_wat19_my.sh
│   │   │   │   │   ├── download_wmt19_and_before.py
│   │   │   │   │   ├── download_wmt20.sh
│   │   │   │   │   ├── preprocess_ML50_v1.sh
│   │   │   │   │   ├── remove_valid_test_in_train.py
│   │   │   │   │   ├── requirement.txt
│   │   │   │   │   └── utils/
│   │   │   │   │       ├── dedup.py
│   │   │   │   │       ├── fasttext_multi_filter.py
│   │   │   │   │       └── strip_sgm.sh
│   │   │   │   ├── finetune_multilingual_model.sh
│   │   │   │   ├── multilingual_fairseq_gen.sh
│   │   │   │   └── train_multilingual_model.sh
│   │   │   ├── noisychannel/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── rerank.py
│   │   │   │   ├── rerank_generate.py
│   │   │   │   ├── rerank_options.py
│   │   │   │   ├── rerank_score_bw.py
│   │   │   │   ├── rerank_score_lm.py
│   │   │   │   ├── rerank_tune.py
│   │   │   │   └── rerank_utils.py
│   │   │   ├── nonautoregressive_translation/
│   │   │   │   ├── README.md
│   │   │   │   └── scripts.md
│   │   │   ├── normformer/
│   │   │   │   ├── README.md
│   │   │   │   └── train_lm.sh
│   │   │   ├── operators/
│   │   │   │   ├── alignment_train_cpu.cpp
│   │   │   │   ├── alignment_train_cuda.cpp
│   │   │   │   ├── alignment_train_cuda.h
│   │   │   │   ├── alignment_train_kernel.cu
│   │   │   │   └── utils.h
│   │   │   ├── paraphraser/
│   │   │   │   ├── README.md
│   │   │   │   └── paraphrase.py
│   │   │   ├── pay_less_attention_paper/
│   │   │   │   └── README.md
│   │   │   ├── pointer_generator/
│   │   │   │   ├── README.md
│   │   │   │   ├── README.xsum.md
│   │   │   │   ├── pointer_generator_src/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── transformer_pg.py
│   │   │   │   ├── postprocess.py
│   │   │   │   └── preprocess.py
│   │   │   ├── quant_noise/
│   │   │   │   ├── README.md
│   │   │   │   └── transformer_quantization_config.yaml
│   │   │   ├── roberta/
│   │   │   │   ├── README.custom_classification.md
│   │   │   │   ├── README.glue.md
│   │   │   │   ├── README.md
│   │   │   │   ├── README.pretraining.md
│   │   │   │   ├── README.race.md
│   │   │   │   ├── commonsense_qa/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── commonsense_qa_task.py
│   │   │   │   │   └── download_cqa_data.sh
│   │   │   │   ├── config/
│   │   │   │   │   ├── finetuning/
│   │   │   │   │   │   ├── cola.yaml
│   │   │   │   │   │   ├── mnli.yaml
│   │   │   │   │   │   ├── mrpc.yaml
│   │   │   │   │   │   ├── qnli.yaml
│   │   │   │   │   │   ├── qqp.yaml
│   │   │   │   │   │   ├── rte.yaml
│   │   │   │   │   │   ├── sst_2.yaml
│   │   │   │   │   │   └── sts_b.yaml
│   │   │   │   │   └── pretraining/
│   │   │   │   │       └── base.yaml
│   │   │   │   ├── multiprocessing_bpe_encoder.py
│   │   │   │   ├── preprocess_GLUE_tasks.sh
│   │   │   │   ├── preprocess_RACE.py
│   │   │   │   ├── preprocess_RACE.sh
│   │   │   │   └── wsc/
│   │   │   │       ├── README.md
│   │   │   │       ├── __init__.py
│   │   │   │       ├── wsc_criterion.py
│   │   │   │       ├── wsc_task.py
│   │   │   │       └── wsc_utils.py
│   │   │   ├── rxf/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   └── rxf_src/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── label_smoothed_cross_entropy_r3f.py
│   │   │   │       └── sentence_prediction_r3f.py
│   │   │   ├── scaling_nmt/
│   │   │   │   └── README.md
│   │   │   ├── shuffled_word_order/
│   │   │   │   ├── README.finetuning.md
│   │   │   │   └── README.md
│   │   │   ├── simultaneous_translation/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── docs/
│   │   │   │   │   ├── ende-mma.md
│   │   │   │   │   └── enja-waitk.md
│   │   │   │   ├── eval/
│   │   │   │   │   └── agents/
│   │   │   │   │       └── simul_t2t_enja.py
│   │   │   │   ├── models/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── convtransformer_simul_trans.py
│   │   │   │   │   └── transformer_monotonic_attention.py
│   │   │   │   ├── modules/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── fixed_pre_decision.py
│   │   │   │   │   ├── monotonic_multihead_attention.py
│   │   │   │   │   └── monotonic_transformer_layer.py
│   │   │   │   └── utils/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── functions.py
│   │   │   │       ├── monotonic_attention.py
│   │   │   │       └── p_choose_strategy.py
│   │   │   ├── speech_recognition/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── criterions/
│   │   │   │   │   ├── ASG_loss.py
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── cross_entropy_acc.py
│   │   │   │   ├── data/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── asr_dataset.py
│   │   │   │   │   ├── collaters.py
│   │   │   │   │   ├── data_utils.py
│   │   │   │   │   └── replabels.py
│   │   │   │   ├── datasets/
│   │   │   │   │   ├── asr_prep_json.py
│   │   │   │   │   └── prepare-librispeech.sh
│   │   │   │   ├── infer.py
│   │   │   │   ├── kaldi/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── add-self-loop-simple.cc
│   │   │   │   │   ├── config/
│   │   │   │   │   │   └── kaldi_initializer.yaml
│   │   │   │   │   ├── kaldi_decoder.py
│   │   │   │   │   └── kaldi_initializer.py
│   │   │   │   ├── models/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── vggtransformer.py
│   │   │   │   │   └── w2l_conv_glu_enc.py
│   │   │   │   ├── new/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── conf/
│   │   │   │   │   │   ├── hydra/
│   │   │   │   │   │   │   └── sweeper/
│   │   │   │   │   │   │       └── ax.yaml
│   │   │   │   │   │   └── infer.yaml
│   │   │   │   │   ├── decoders/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── base_decoder.py
│   │   │   │   │   │   ├── decoder.py
│   │   │   │   │   │   ├── decoder_config.py
│   │   │   │   │   │   ├── flashlight_decoder.py
│   │   │   │   │   │   └── viterbi_decoder.py
│   │   │   │   │   └── infer.py
│   │   │   │   ├── tasks/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── speech_recognition.py
│   │   │   │   ├── utils/
│   │   │   │   │   └── wer_utils.py
│   │   │   │   └── w2l_decoder.py
│   │   │   ├── speech_synthesis/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── data_utils.py
│   │   │   │   ├── docs/
│   │   │   │   │   ├── common_voice_example.md
│   │   │   │   │   ├── ljspeech_example.md
│   │   │   │   │   └── vctk_example.md
│   │   │   │   ├── evaluation/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── eval_asr.py
│   │   │   │   │   ├── eval_f0.py
│   │   │   │   │   ├── eval_sp.py
│   │   │   │   │   └── get_eval_manifest.py
│   │   │   │   ├── generate_waveform.py
│   │   │   │   ├── preprocessing/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── denoise_and_vad_audio.py
│   │   │   │   │   ├── denoiser/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── demucs.py
│   │   │   │   │   │   ├── pretrained.py
│   │   │   │   │   │   ├── resample.py
│   │   │   │   │   │   └── utils.py
│   │   │   │   │   ├── get_common_voice_audio_manifest.py
│   │   │   │   │   ├── get_feature_manifest.py
│   │   │   │   │   ├── get_ljspeech_audio_manifest.py
│   │   │   │   │   ├── get_speaker_embedding.py
│   │   │   │   │   ├── get_vctk_audio_manifest.py
│   │   │   │   │   ├── speaker_embedder/
│   │   │   │   │   │   └── __init__.py
│   │   │   │   │   └── vad/
│   │   │   │   │       └── __init__.py
│   │   │   │   └── utils.py
│   │   │   ├── speech_text_joint_to_text/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── configs/
│   │   │   │   │   └── mustc_noise.list
│   │   │   │   ├── criterions/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── text_guide_cross_entropy_acc.py
│   │   │   │   ├── docs/
│   │   │   │   │   ├── ende-mustc.md
│   │   │   │   │   └── iwslt2021.md
│   │   │   │   ├── models/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── s2t_dualinputtransformer.py
│   │   │   │   │   └── s2t_dualinputxmtransformer.py
│   │   │   │   ├── scripts/
│   │   │   │   │   └── g2p_encode.py
│   │   │   │   └── tasks/
│   │   │   │       ├── __init__.py
│   │   │   │       └── speech_text_joint.py
│   │   │   ├── speech_to_speech/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── benchmarking/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── configs/
│   │   │   │   │   │   ├── 2StageS2ST.yaml
│   │   │   │   │   │   ├── 3StageS2ST.yaml
│   │   │   │   │   │   ├── DirectS2U.yaml
│   │   │   │   │   │   └── S2T.yaml
│   │   │   │   │   ├── core.py
│   │   │   │   │   ├── data_utils.py
│   │   │   │   │   └── get_metrics.py
│   │   │   │   ├── generate_waveform_from_code.py
│   │   │   │   └── preprocessing/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── data_utils.py
│   │   │   │       ├── prep_s2spect_data.py
│   │   │   │       └── prep_s2ut_data.py
│   │   │   ├── speech_to_text/
│   │   │   │   ├── README.md
│   │   │   │   ├── data_utils.py
│   │   │   │   ├── docs/
│   │   │   │   │   ├── covost_example.md
│   │   │   │   │   ├── librispeech_example.md
│   │   │   │   │   ├── mtedx_example.md
│   │   │   │   │   ├── mustc_example.md
│   │   │   │   │   └── simulst_mustc_example.md
│   │   │   │   ├── prep_covost_data.py
│   │   │   │   ├── prep_librispeech_data.py
│   │   │   │   ├── prep_mtedx_data.py
│   │   │   │   ├── prep_mustc_data.py
│   │   │   │   ├── seg_mustc_data.py
│   │   │   │   └── simultaneous_translation/
│   │   │   │       └── agents/
│   │   │   │           └── fairseq_simul_st_agent.py
│   │   │   ├── stories/
│   │   │   │   └── README.md
│   │   │   ├── textless_nlp/
│   │   │   │   ├── gslm/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── metrics/
│   │   │   │   │   │   ├── README.md
│   │   │   │   │   │   ├── abx_metrics/
│   │   │   │   │   │   │   ├── README.md
│   │   │   │   │   │   │   └── dump_abx_feats.py
│   │   │   │   │   │   └── asr_metrics/
│   │   │   │   │   │       ├── README.md
│   │   │   │   │   │       ├── continuation_eval.py
│   │   │   │   │   │       ├── misc/
│   │   │   │   │   │       │   ├── bleu_utils.py
│   │   │   │   │   │       │   ├── cut_as.py
│   │   │   │   │   │       │   └── dict.ltr.txt
│   │   │   │   │   │       ├── ppx.py
│   │   │   │   │   │       └── self_auto_bleu.py
│   │   │   │   │   ├── speech2unit/
│   │   │   │   │   │   ├── README.md
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── clustering/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── cluster_kmeans.py
│   │   │   │   │   │   │   ├── dump_feats.py
│   │   │   │   │   │   │   ├── quantize_with_kmeans.py
│   │   │   │   │   │   │   └── utils.py
│   │   │   │   │   │   └── pretrained/
│   │   │   │   │   │       ├── cpc_feature_reader.py
│   │   │   │   │   │       ├── hubert_feature_reader.py
│   │   │   │   │   │       ├── logmel_feature_reader.py
│   │   │   │   │   │       ├── utils.py
│   │   │   │   │   │       └── w2v2_feature_reader.py
│   │   │   │   │   ├── tools/
│   │   │   │   │   │   ├── README.md
│   │   │   │   │   │   └── resynthesize_speech.py
│   │   │   │   │   ├── ulm/
│   │   │   │   │   │   ├── README.md
│   │   │   │   │   │   └── sample.py
│   │   │   │   │   └── unit2speech/
│   │   │   │   │       ├── README.md
│   │   │   │   │       ├── convert_to_16k.py
│   │   │   │   │       ├── glow.py
│   │   │   │   │       ├── multiproc.py
│   │   │   │   │       ├── synthesize_audio_from_units.py
│   │   │   │   │       ├── tacotron2/
│   │   │   │   │       │   ├── __init__.py
│   │   │   │   │       │   ├── audio_processing.py
│   │   │   │   │       │   ├── cleaners.py
│   │   │   │   │       │   ├── cmudict.py
│   │   │   │   │       │   ├── layers.py
│   │   │   │   │       │   ├── model.py
│   │   │   │   │       │   ├── numbers.py
│   │   │   │   │       │   ├── stft.py
│   │   │   │   │       │   ├── symbols.py
│   │   │   │   │       │   ├── text.py
│   │   │   │   │       │   ├── utils.py
│   │   │   │   │       │   └── waveglow_denoiser.py
│   │   │   │   │       ├── tts_data.py
│   │   │   │   │       └── utils.py
│   │   │   │   └── speech-resynth/
│   │   │   │       └── README.md
│   │   │   ├── translation/
│   │   │   │   ├── README.md
│   │   │   │   ├── prepare-iwslt14.sh
│   │   │   │   ├── prepare-iwslt17-multilingual.sh
│   │   │   │   ├── prepare-wmt14en2de.sh
│   │   │   │   └── prepare-wmt14en2fr.sh
│   │   │   ├── translation_moe/
│   │   │   │   ├── README.md
│   │   │   │   ├── score.py
│   │   │   │   └── translation_moe_src/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── logsumexp_moe.py
│   │   │   │       ├── mean_pool_gating_network.py
│   │   │   │       └── translation_moe.py
│   │   │   ├── truncated_bptt/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── transformer_xl_model.py
│   │   │   │   └── truncated_bptt_lm_task.py
│   │   │   ├── unsupervised_quality_estimation/
│   │   │   │   ├── README.md
│   │   │   │   ├── aggregate_scores.py
│   │   │   │   ├── meteor.py
│   │   │   │   └── repeat_lines.py
│   │   │   ├── wav2vec/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── config/
│   │   │   │   │   ├── finetuning/
│   │   │   │   │   │   ├── base_100h.yaml
│   │   │   │   │   │   ├── base_10h.yaml
│   │   │   │   │   │   ├── base_10m.yaml
│   │   │   │   │   │   ├── base_1h.yaml
│   │   │   │   │   │   ├── base_960h.yaml
│   │   │   │   │   │   ├── vox_100h.yaml
│   │   │   │   │   │   ├── vox_10h.yaml
│   │   │   │   │   │   ├── vox_10m.yaml
│   │   │   │   │   │   ├── vox_1h.yaml
│   │   │   │   │   │   └── vox_960h.yaml
│   │   │   │   │   └── pretraining/
│   │   │   │   │       ├── wav2vec2_base_librispeech.yaml
│   │   │   │   │       ├── wav2vec2_large_librivox.yaml
│   │   │   │   │       ├── wav2vec2_large_librivox_tpu-pod.yaml
│   │   │   │   │       └── wav2vec2_large_librivox_tpu.yaml
│   │   │   │   ├── libri_labels.py
│   │   │   │   ├── scripts/
│   │   │   │   │   └── binarize_manifest.sh
│   │   │   │   ├── unsupervised/
│   │   │   │   │   ├── README.md
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── config/
│   │   │   │   │   │   ├── finetuning/
│   │   │   │   │   │   │   └── w2v_finetune.yaml
│   │   │   │   │   │   ├── gan/
│   │   │   │   │   │   │   └── w2vu.yaml
│   │   │   │   │   │   ├── generate/
│   │   │   │   │   │   │   └── viterbi.yaml
│   │   │   │   │   │   ├── timit_matched/
│   │   │   │   │   │   │   ├── test.uid
│   │   │   │   │   │   │   ├── train.uid
│   │   │   │   │   │   │   ├── train_text.uid
│   │   │   │   │   │   │   └── valid.uid
│   │   │   │   │   │   └── timit_unmatched/
│   │   │   │   │   │       ├── test.uid
│   │   │   │   │   │       ├── train.uid
│   │   │   │   │   │       ├── train_text.uid
│   │   │   │   │   │       └── valid.uid
│   │   │   │   │   ├── data/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── extracted_features_dataset.py
│   │   │   │   │   │   └── random_input_dataset.py
│   │   │   │   │   ├── kaldi_self_train/
│   │   │   │   │   │   ├── README.md
│   │   │   │   │   │   └── st/
│   │   │   │   │   │       ├── cmd.sh
│   │   │   │   │   │       ├── decode_phone.sh
│   │   │   │   │   │       ├── decode_word_step1.sh
│   │   │   │   │   │       ├── decode_word_step2.sh
│   │   │   │   │   │       ├── local/
│   │   │   │   │   │       │   ├── copy_aligned_text.py
│   │   │   │   │   │       │   ├── decode.sh
│   │   │   │   │   │       │   ├── prepare_data_from_w2v.py
│   │   │   │   │   │       │   ├── prepare_lang.sh
│   │   │   │   │   │       │   ├── prepare_lang_word.sh
│   │   │   │   │   │       │   ├── prepare_lm.sh
│   │   │   │   │   │       │   ├── score.sh
│   │   │   │   │   │       │   ├── show_wer.sh
│   │   │   │   │   │       │   ├── train_subset_lgbeam.sh
│   │   │   │   │   │       │   ├── unsup_select.py
│   │   │   │   │   │       │   ├── unsup_select_decode.sh
│   │   │   │   │   │       │   └── unsup_select_decode_word.sh
│   │   │   │   │   │       ├── path.sh
│   │   │   │   │   │       ├── steps
│   │   │   │   │   │       ├── steps_gan/
│   │   │   │   │   │       │   ├── train_deltas.sh
│   │   │   │   │   │       │   ├── train_lda_mllt.sh
│   │   │   │   │   │       │   └── train_sat.sh
│   │   │   │   │   │       ├── train.sh
│   │   │   │   │   │       └── utils
│   │   │   │   │   ├── models/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── wav2vec_u.py
│   │   │   │   │   ├── scripts/
│   │   │   │   │   │   ├── apply_pca.py
│   │   │   │   │   │   ├── copy_labels.py
│   │   │   │   │   │   ├── filter_lexicon.py
│   │   │   │   │   │   ├── filter_tsv.py
│   │   │   │   │   │   ├── g2p_wrd_to_phn.py
│   │   │   │   │   │   ├── ltr_to_wrd.py
│   │   │   │   │   │   ├── mean_pool.py
│   │   │   │   │   │   ├── merge_clusters.py
│   │   │   │   │   │   ├── normalize_and_filter_text.py
│   │   │   │   │   │   ├── normalize_text.py
│   │   │   │   │   │   ├── pca.py
│   │   │   │   │   │   ├── phonemize_with_sil.py
│   │   │   │   │   │   ├── prepare_audio.sh
│   │   │   │   │   │   ├── prepare_text.sh
│   │   │   │   │   │   ├── prepare_timit.sh
│   │   │   │   │   │   ├── remove_silence.py
│   │   │   │   │   │   ├── vads.py
│   │   │   │   │   │   ├── wav2vec_apply_cluster_faiss.py
│   │   │   │   │   │   ├── wav2vec_cluster_faiss.py
│   │   │   │   │   │   ├── wav2vec_extract_features.py
│   │   │   │   │   │   ├── wer.py
│   │   │   │   │   │   └── wrd_to_ltr.py
│   │   │   │   │   ├── tasks/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── unpaired_audio_text.py
│   │   │   │   │   └── w2vu_generate.py
│   │   │   │   ├── vq-wav2vec_featurize.py
│   │   │   │   ├── wav2vec_featurize.py
│   │   │   │   ├── wav2vec_manifest.py
│   │   │   │   └── xlsr/
│   │   │   │       ├── README.md
│   │   │   │       └── config/
│   │   │   │           └── finetune.yaml
│   │   │   ├── wmt19/
│   │   │   │   └── README.md
│   │   │   ├── wmt20/
│   │   │   │   └── README.md
│   │   │   ├── wmt21/
│   │   │   │   ├── README.md
│   │   │   │   ├── eval.sh
│   │   │   │   └── scripts/
│   │   │   │       ├── normalize-punctuation.perl
│   │   │   │       └── replace-unicode-punctuation.perl
│   │   │   ├── xglm/
│   │   │   │   ├── README.md
│   │   │   │   └── model_card.md
│   │   │   └── xlmr/
│   │   │       └── README.md
│   │   ├── fairseq/
│   │   │   ├── __init__.py
│   │   │   ├── benchmark/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dummy_dataset.py
│   │   │   │   ├── dummy_lm.py
│   │   │   │   ├── dummy_masked_lm.py
│   │   │   │   ├── dummy_model.py
│   │   │   │   └── dummy_mt.py
│   │   │   ├── binarizer.py
│   │   │   ├── checkpoint_utils.py
│   │   │   ├── clib/
│   │   │   │   ├── cuda/
│   │   │   │   │   ├── ngram_repeat_block_cuda.cpp
│   │   │   │   │   └── ngram_repeat_block_cuda_kernel.cu
│   │   │   │   ├── libbase/
│   │   │   │   │   └── balanced_assignment.cpp
│   │   │   │   ├── libbleu/
│   │   │   │   │   ├── libbleu.cpp
│   │   │   │   │   └── module.cpp
│   │   │   │   ├── libnat/
│   │   │   │   │   └── edit_dist.cpp
│   │   │   │   └── libnat_cuda/
│   │   │   │       ├── binding.cpp
│   │   │   │       ├── edit_dist.cu
│   │   │   │       └── edit_dist.h
│   │   │   ├── config/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── config.yaml
│   │   │   │   └── model/
│   │   │   │       ├── transformer_lm/
│   │   │   │       │   ├── transformer_lm_baevski_gbw.yaml
│   │   │   │       │   ├── transformer_lm_baevski_wiki103.yaml
│   │   │   │       │   ├── transformer_lm_big.yaml
│   │   │   │       │   ├── transformer_lm_gbw.yaml
│   │   │   │       │   ├── transformer_lm_gpt.yaml
│   │   │   │       │   ├── transformer_lm_gpt2_big.yaml
│   │   │   │       │   ├── transformer_lm_gpt2_medium.yaml
│   │   │   │       │   ├── transformer_lm_gpt2_small.yaml
│   │   │   │       │   └── transformer_lm_wiki103.yaml
│   │   │   │       ├── wav2vec/
│   │   │   │       │   └── vq_wav2vec_gumbel.yaml
│   │   │   │       └── wav2vec2/
│   │   │   │           ├── wav2vec2_base.yaml
│   │   │   │           └── wav2vec2_large.yaml
│   │   │   ├── criterions/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adaptive_loss.py
│   │   │   │   ├── composite_loss.py
│   │   │   │   ├── cross_entropy.py
│   │   │   │   ├── ctc.py
│   │   │   │   ├── fairseq_criterion.py
│   │   │   │   ├── fastspeech2_loss.py
│   │   │   │   ├── hubert_criterion.py
│   │   │   │   ├── label_smoothed_cross_entropy.py
│   │   │   │   ├── label_smoothed_cross_entropy_latency_augmented.py
│   │   │   │   ├── label_smoothed_cross_entropy_with_alignment.py
│   │   │   │   ├── legacy_masked_lm.py
│   │   │   │   ├── masked_lm.py
│   │   │   │   ├── model_criterion.py
│   │   │   │   ├── nat_loss.py
│   │   │   │   ├── sentence_prediction.py
│   │   │   │   ├── sentence_ranking.py
│   │   │   │   ├── speech_to_speech_criterion.py
│   │   │   │   ├── tacotron2_loss.py
│   │   │   │   └── wav2vec_criterion.py
│   │   │   ├── data/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── add_target_dataset.py
│   │   │   │   ├── append_token_dataset.py
│   │   │   │   ├── audio/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── audio_utils.py
│   │   │   │   │   ├── data_cfg.py
│   │   │   │   │   ├── feature_transforms/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── delta_deltas.py
│   │   │   │   │   │   ├── global_cmvn.py
│   │   │   │   │   │   ├── specaugment.py
│   │   │   │   │   │   └── utterance_cmvn.py
│   │   │   │   │   ├── frm_text_to_speech_dataset.py
│   │   │   │   │   ├── hubert_dataset.py
│   │   │   │   │   ├── multi_modality_dataset.py
│   │   │   │   │   ├── raw_audio_dataset.py
│   │   │   │   │   ├── speech_to_speech_dataset.py
│   │   │   │   │   ├── speech_to_text_dataset.py
│   │   │   │   │   ├── speech_to_text_joint_dataset.py
│   │   │   │   │   └── text_to_speech_dataset.py
│   │   │   │   ├── backtranslation_dataset.py
│   │   │   │   ├── base_wrapper_dataset.py
│   │   │   │   ├── bucket_pad_length_dataset.py
│   │   │   │   ├── colorize_dataset.py
│   │   │   │   ├── concat_dataset.py
│   │   │   │   ├── concat_sentences_dataset.py
│   │   │   │   ├── data_utils.py
│   │   │   │   ├── data_utils_fast.pyx
│   │   │   │   ├── denoising_dataset.py
│   │   │   │   ├── dictionary.py
│   │   │   │   ├── encoders/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── byte_bpe.py
│   │   │   │   │   ├── byte_utils.py
│   │   │   │   │   ├── bytes.py
│   │   │   │   │   ├── characters.py
│   │   │   │   │   ├── fastbpe.py
│   │   │   │   │   ├── gpt2_bpe.py
│   │   │   │   │   ├── gpt2_bpe_utils.py
│   │   │   │   │   ├── hf_bert_bpe.py
│   │   │   │   │   ├── hf_byte_bpe.py
│   │   │   │   │   ├── moses_tokenizer.py
│   │   │   │   │   ├── nltk_tokenizer.py
│   │   │   │   │   ├── sentencepiece_bpe.py
│   │   │   │   │   ├── space_tokenizer.py
│   │   │   │   │   ├── subword_nmt_bpe.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── fairseq_dataset.py
│   │   │   │   ├── fasta_dataset.py
│   │   │   │   ├── huffman/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── huffman_coder.py
│   │   │   │   │   └── huffman_mmap_indexed_dataset.py
│   │   │   │   ├── id_dataset.py
│   │   │   │   ├── indexed_dataset.py
│   │   │   │   ├── iterators.py
│   │   │   │   ├── language_pair_dataset.py
│   │   │   │   ├── legacy/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── block_pair_dataset.py
│   │   │   │   │   ├── masked_lm_dataset.py
│   │   │   │   │   └── masked_lm_dictionary.py
│   │   │   │   ├── list_dataset.py
│   │   │   │   ├── lm_context_window_dataset.py
│   │   │   │   ├── lru_cache_dataset.py
│   │   │   │   ├── mask_tokens_dataset.py
│   │   │   │   ├── monolingual_dataset.py
│   │   │   │   ├── multi_corpus_dataset.py
│   │   │   │   ├── multi_corpus_sampled_dataset.py
│   │   │   │   ├── multilingual/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── multilingual_data_manager.py
│   │   │   │   │   ├── multilingual_utils.py
│   │   │   │   │   ├── sampled_multi_dataset.py
│   │   │   │   │   ├── sampled_multi_epoch_dataset.py
│   │   │   │   │   └── sampling_method.py
│   │   │   │   ├── nested_dictionary_dataset.py
│   │   │   │   ├── noising.py
│   │   │   │   ├── num_samples_dataset.py
│   │   │   │   ├── numel_dataset.py
│   │   │   │   ├── offset_tokens_dataset.py
│   │   │   │   ├── pad_dataset.py
│   │   │   │   ├── plasma_utils.py
│   │   │   │   ├── prepend_dataset.py
│   │   │   │   ├── prepend_token_dataset.py
│   │   │   │   ├── raw_label_dataset.py
│   │   │   │   ├── replace_dataset.py
│   │   │   │   ├── resampling_dataset.py
│   │   │   │   ├── roll_dataset.py
│   │   │   │   ├── round_robin_zip_datasets.py
│   │   │   │   ├── shorten_dataset.py
│   │   │   │   ├── sort_dataset.py
│   │   │   │   ├── strip_token_dataset.py
│   │   │   │   ├── subsample_dataset.py
│   │   │   │   ├── text_compressor.py
│   │   │   │   ├── token_block_dataset.py
│   │   │   │   ├── token_block_utils_fast.pyx
│   │   │   │   ├── transform_eos_concat_langpair_dataset.py
│   │   │   │   ├── transform_eos_dataset.py
│   │   │   │   └── transform_eos_lang_pair_dataset.py
│   │   │   ├── dataclass/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── configs.py
│   │   │   │   ├── constants.py
│   │   │   │   ├── initialize.py
│   │   │   │   └── utils.py
│   │   │   ├── distributed/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── distributed_timeout_wrapper.py
│   │   │   │   ├── fully_sharded_data_parallel.py
│   │   │   │   ├── legacy_distributed_data_parallel.py
│   │   │   │   ├── module_proxy_wrapper.py
│   │   │   │   ├── tpu_distributed_data_parallel.py
│   │   │   │   └── utils.py
│   │   │   ├── ds_trainer.py
│   │   │   ├── file_chunker_utils.py
│   │   │   ├── file_io.py
│   │   │   ├── file_utils.py
│   │   │   ├── hub_utils.py
│   │   │   ├── incremental_decoding_utils.py
│   │   │   ├── iterative_refinement_generator.py
│   │   │   ├── logging/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── meters.py
│   │   │   │   ├── metrics.py
│   │   │   │   └── progress_bar.py
│   │   │   ├── model_parallel/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── criterions/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── vocab_parallel_cross_entropy.py
│   │   │   │   ├── megatron_trainer.py
│   │   │   │   ├── models/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── pipeline_parallel_transformer/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── layers.py
│   │   │   │   │   │   └── model.py
│   │   │   │   │   ├── roberta/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── model.py
│   │   │   │   │   ├── transformer.py
│   │   │   │   │   └── transformer_lm.py
│   │   │   │   └── modules/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── multihead_attention.py
│   │   │   │       └── transformer_layer.py
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bart/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── hub_interface.py
│   │   │   │   │   └── model.py
│   │   │   │   ├── composite_encoder.py
│   │   │   │   ├── distributed_fairseq_model.py
│   │   │   │   ├── ema/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── ema.py
│   │   │   │   ├── fairseq_decoder.py
│   │   │   │   ├── fairseq_encoder.py
│   │   │   │   ├── fairseq_incremental_decoder.py
│   │   │   │   ├── fairseq_model.py
│   │   │   │   ├── fconv.py
│   │   │   │   ├── fconv_lm.py
│   │   │   │   ├── fconv_self_att.py
│   │   │   │   ├── hubert/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── hubert.py
│   │   │   │   │   └── hubert_asr.py
│   │   │   │   ├── huggingface/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── hf_gpt2.py
│   │   │   │   ├── lightconv.py
│   │   │   │   ├── lightconv_lm.py
│   │   │   │   ├── lstm.py
│   │   │   │   ├── lstm_lm.py
│   │   │   │   ├── masked_lm.py
│   │   │   │   ├── model_utils.py
│   │   │   │   ├── multilingual_transformer.py
│   │   │   │   ├── nat/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cmlm_transformer.py
│   │   │   │   │   ├── fairseq_nat_model.py
│   │   │   │   │   ├── insertion_transformer.py
│   │   │   │   │   ├── iterative_nonautoregressive_transformer.py
│   │   │   │   │   ├── levenshtein_transformer.py
│   │   │   │   │   ├── levenshtein_utils.py
│   │   │   │   │   ├── nat_crf_transformer.py
│   │   │   │   │   ├── nonautoregressive_ensembles.py
│   │   │   │   │   └── nonautoregressive_transformer.py
│   │   │   │   ├── roberta/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── alignment_utils.py
│   │   │   │   │   ├── enc_dec.py
│   │   │   │   │   ├── hub_interface.py
│   │   │   │   │   ├── model.py
│   │   │   │   │   ├── model_camembert.py
│   │   │   │   │   ├── model_gottbert.py
│   │   │   │   │   └── model_xlmr.py
│   │   │   │   ├── speech_to_speech/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── modules.py
│   │   │   │   │   └── s2s_transformer.py
│   │   │   │   ├── speech_to_text/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── berard.py
│   │   │   │   │   ├── convtransformer.py
│   │   │   │   │   ├── hub_interface.py
│   │   │   │   │   ├── modules/
│   │   │   │   │   │   ├── augmented_memory_attention.py
│   │   │   │   │   │   └── emformer.py
│   │   │   │   │   ├── s2t_conformer.py
│   │   │   │   │   ├── s2t_transformer.py
│   │   │   │   │   ├── utils.py
│   │   │   │   │   └── xm_transformer.py
│   │   │   │   ├── text_to_speech/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── codehifigan.py
│   │   │   │   │   ├── fastspeech2.py
│   │   │   │   │   ├── hifigan.py
│   │   │   │   │   ├── hub_interface.py
│   │   │   │   │   ├── tacotron2.py
│   │   │   │   │   ├── tts_transformer.py
│   │   │   │   │   └── vocoder.py
│   │   │   │   ├── transformer/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── transformer_base.py
│   │   │   │   │   ├── transformer_config.py
│   │   │   │   │   ├── transformer_decoder.py
│   │   │   │   │   ├── transformer_encoder.py
│   │   │   │   │   └── transformer_legacy.py
│   │   │   │   ├── transformer_align.py
│   │   │   │   ├── transformer_from_pretrained_xlm.py
│   │   │   │   ├── transformer_lm.py
│   │   │   │   └── wav2vec/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── utils.py
│   │   │   │       ├── wav2vec.py
│   │   │   │       ├── wav2vec2.py
│   │   │   │       └── wav2vec2_asr.py
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adaptive_input.py
│   │   │   │   ├── adaptive_softmax.py
│   │   │   │   ├── base_layer.py
│   │   │   │   ├── beamable_mm.py
│   │   │   │   ├── character_token_embedder.py
│   │   │   │   ├── checkpoint_activations.py
│   │   │   │   ├── conformer_layer.py
│   │   │   │   ├── conv_tbc.py
│   │   │   │   ├── cross_entropy.py
│   │   │   │   ├── cuda_utils.cu
│   │   │   │   ├── downsampled_multihead_attention.py
│   │   │   │   ├── dynamic_convolution.py
│   │   │   │   ├── dynamic_crf_layer.py
│   │   │   │   ├── dynamicconv_layer/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cuda_function_gen.py
│   │   │   │   │   ├── dynamicconv_cuda.cpp
│   │   │   │   │   ├── dynamicconv_cuda.cuh
│   │   │   │   │   ├── dynamicconv_cuda_kernel.cu
│   │   │   │   │   ├── dynamicconv_layer.py
│   │   │   │   │   ├── dynamiconv_cpu.cpp
│   │   │   │   │   └── setup.py
│   │   │   │   ├── espnet_multihead_attention.py
│   │   │   │   ├── fairseq_dropout.py
│   │   │   │   ├── fp32_batch_norm.py
│   │   │   │   ├── fp32_group_norm.py
│   │   │   │   ├── fp32_instance_norm.py
│   │   │   │   ├── gelu.py
│   │   │   │   ├── grad_multiply.py
│   │   │   │   ├── gumbel_vector_quantizer.py
│   │   │   │   ├── kmeans_attention.py
│   │   │   │   ├── kmeans_vector_quantizer.py
│   │   │   │   ├── layer_drop.py
│   │   │   │   ├── layer_norm.py
│   │   │   │   ├── learned_positional_embedding.py
│   │   │   │   ├── lightconv_layer/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cuda_function_gen.py
│   │   │   │   │   ├── lightconv_cuda.cpp
│   │   │   │   │   ├── lightconv_cuda.cuh
│   │   │   │   │   ├── lightconv_cuda_kernel.cu
│   │   │   │   │   ├── lightconv_layer.py
│   │   │   │   │   └── setup.py
│   │   │   │   ├── lightweight_convolution.py
│   │   │   │   ├── linearized_convolution.py
│   │   │   │   ├── location_attention.py
│   │   │   │   ├── lstm_cell_with_zoneout.py
│   │   │   │   ├── multihead_attention.py
│   │   │   │   ├── positional_embedding.py
│   │   │   │   ├── positional_encoding.py
│   │   │   │   ├── quant_noise.py
│   │   │   │   ├── quantization/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── pq/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── em.py
│   │   │   │   │   │   ├── modules/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── qconv.py
│   │   │   │   │   │   │   ├── qemb.py
│   │   │   │   │   │   │   └── qlinear.py
│   │   │   │   │   │   ├── pq.py
│   │   │   │   │   │   └── utils.py
│   │   │   │   │   ├── quantization_options.py
│   │   │   │   │   └── scalar/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── modules/
│   │   │   │   │       │   ├── __init__.py
│   │   │   │   │       │   ├── qact.py
│   │   │   │   │       │   ├── qconv.py
│   │   │   │   │       │   ├── qemb.py
│   │   │   │   │       │   └── qlinear.py
│   │   │   │   │       ├── ops.py
│   │   │   │   │       └── utils.py
│   │   │   │   ├── rotary_positional_embedding.py
│   │   │   │   ├── same_pad.py
│   │   │   │   ├── scalar_bias.py
│   │   │   │   ├── sinusoidal_positional_embedding.py
│   │   │   │   ├── sparse_multihead_attention.py
│   │   │   │   ├── sparse_transformer_sentence_encoder.py
│   │   │   │   ├── sparse_transformer_sentence_encoder_layer.py
│   │   │   │   ├── transformer_layer.py
│   │   │   │   ├── transformer_sentence_encoder.py
│   │   │   │   ├── transformer_sentence_encoder_layer.py
│   │   │   │   ├── transpose_last.py
│   │   │   │   ├── unfold.py
│   │   │   │   └── vggblock.py
│   │   │   ├── nan_detector.py
│   │   │   ├── ngram_repeat_block.py
│   │   │   ├── optim/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adadelta.py
│   │   │   │   ├── adafactor.py
│   │   │   │   ├── adagrad.py
│   │   │   │   ├── adam.py
│   │   │   │   ├── adamax.py
│   │   │   │   ├── amp_optimizer.py
│   │   │   │   ├── bmuf.py
│   │   │   │   ├── composite.py
│   │   │   │   ├── cpu_adam.py
│   │   │   │   ├── dynamic_loss_scaler.py
│   │   │   │   ├── fairseq_optimizer.py
│   │   │   │   ├── fp16_optimizer.py
│   │   │   │   ├── fused_adam.py
│   │   │   │   ├── fused_lamb.py
│   │   │   │   ├── lr_scheduler/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cosine_lr_scheduler.py
│   │   │   │   │   ├── fairseq_lr_scheduler.py
│   │   │   │   │   ├── fixed_schedule.py
│   │   │   │   │   ├── inverse_square_root_schedule.py
│   │   │   │   │   ├── manual_lr_scheduler.py
│   │   │   │   │   ├── pass_through.py
│   │   │   │   │   ├── polynomial_decay_schedule.py
│   │   │   │   │   ├── reduce_lr_on_plateau.py
│   │   │   │   │   ├── step_lr_scheduler.py
│   │   │   │   │   ├── tri_stage_lr_scheduler.py
│   │   │   │   │   └── triangular_lr_scheduler.py
│   │   │   │   ├── nag.py
│   │   │   │   ├── sgd.py
│   │   │   │   └── shard.py
│   │   │   ├── options.py
│   │   │   ├── pdb.py
│   │   │   ├── quantization_utils.py
│   │   │   ├── registry.py
│   │   │   ├── scoring/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bleu.py
│   │   │   │   ├── chrf.py
│   │   │   │   ├── meteor.py
│   │   │   │   ├── tokenizer.py
│   │   │   │   └── wer.py
│   │   │   ├── search.py
│   │   │   ├── sequence_generator.py
│   │   │   ├── sequence_scorer.py
│   │   │   ├── speech_generator.py
│   │   │   ├── tasks/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── audio_finetuning.py
│   │   │   │   ├── audio_pretraining.py
│   │   │   │   ├── cross_lingual_lm.py
│   │   │   │   ├── denoising.py
│   │   │   │   ├── fairseq_task.py
│   │   │   │   ├── frm_text_to_speech.py
│   │   │   │   ├── hubert_pretraining.py
│   │   │   │   ├── language_modeling.py
│   │   │   │   ├── legacy_masked_lm.py
│   │   │   │   ├── masked_lm.py
│   │   │   │   ├── multilingual_denoising.py
│   │   │   │   ├── multilingual_language_modeling.py
│   │   │   │   ├── multilingual_masked_lm.py
│   │   │   │   ├── multilingual_translation.py
│   │   │   │   ├── online_backtranslation.py
│   │   │   │   ├── semisupervised_translation.py
│   │   │   │   ├── sentence_prediction.py
│   │   │   │   ├── sentence_ranking.py
│   │   │   │   ├── simultaneous_translation.py
│   │   │   │   ├── speech_to_speech.py
│   │   │   │   ├── speech_to_text.py
│   │   │   │   ├── text_to_speech.py
│   │   │   │   ├── translation.py
│   │   │   │   ├── translation_from_pretrained_bart.py
│   │   │   │   ├── translation_from_pretrained_xlm.py
│   │   │   │   ├── translation_lev.py
│   │   │   │   └── translation_multi_simple_epoch.py
│   │   │   ├── token_generation_constraints.py
│   │   │   ├── tokenizer.py
│   │   │   ├── trainer.py
│   │   │   ├── utils.py
│   │   │   └── version.txt
│   │   ├── fairseq_cli/
│   │   │   ├── __init__.py
│   │   │   ├── eval_lm.py
│   │   │   ├── generate.py
│   │   │   ├── hydra_train.py
│   │   │   ├── interactive.py
│   │   │   ├── preprocess.py
│   │   │   ├── score.py
│   │   │   ├── train.py
│   │   │   └── validate.py
│   │   ├── hubconf.py
│   │   ├── pyproject.toml
│   │   ├── scripts/
│   │   │   ├── __init__.py
│   │   │   ├── average_checkpoints.py
│   │   │   ├── build_sym_alignment.py
│   │   │   ├── compare_namespaces.py
│   │   │   ├── compound_split_bleu.sh
│   │   │   ├── constraints/
│   │   │   │   ├── extract.py
│   │   │   │   └── validate.py
│   │   │   ├── convert_dictionary.lua
│   │   │   ├── convert_model.lua
│   │   │   ├── count_docs.py
│   │   │   ├── read_binarized.py
│   │   │   ├── rm_pt.py
│   │   │   ├── sacrebleu.sh
│   │   │   ├── shard_docs.py
│   │   │   ├── split_train_valid_docs.py
│   │   │   ├── spm_decode.py
│   │   │   ├── spm_encode.py
│   │   │   ├── spm_train.py
│   │   │   └── test_fsdp.sh
│   │   ├── setup.cfg
│   │   ├── setup.py
│   │   └── train.py
│   ├── generate.py
│   ├── infinibatch/
│   │   ├── .gitattributes
│   │   ├── .github/
│   │   │   └── workflows/
│   │   │       ├── gh-pages.yml
│   │   │       └── unit_tests.yml
│   │   ├── .gitignore
│   │   ├── CODE_OF_CONDUCT.md
│   │   ├── LICENSE
│   │   ├── README.md
│   │   ├── SECURITY.md
│   │   ├── docs/
│   │   │   └── config.mako
│   │   ├── infinibatch/
│   │   │   ├── __init__.py
│   │   │   ├── datasets.py
│   │   │   └── iterators.py
│   │   ├── pyproject.toml
│   │   ├── requirements.txt
│   │   ├── setup.py
│   │   └── test/
│   │       ├── test_datasets.py
│   │       ├── test_doctests.py
│   │       └── test_iterators.py
│   ├── interactive.py
│   ├── open_clip/
│   │   ├── .github/
│   │   │   └── workflows/
│   │   │       ├── ci.yml
│   │   │       └── python-publish.yml
│   │   ├── .gitignore
│   │   ├── CITATION.cff
│   │   ├── HISTORY.md
│   │   ├── LICENSE
│   │   ├── MANIFEST.in
│   │   ├── Makefile
│   │   ├── README.md
│   │   ├── docs/
│   │   │   ├── Interacting_with_open_clip.ipynb
│   │   │   └── clip_conceptual_captions.md
│   │   ├── requirements-test.txt
│   │   ├── requirements-training.txt
│   │   ├── requirements.txt
│   │   ├── setup.py
│   │   └── src/
│   │       ├── data/
│   │       │   └── gather_cc.py
│   │       ├── open_clip/
│   │       │   ├── __init__.py
│   │       │   ├── factory.py
│   │       │   ├── loss.py
│   │       │   ├── model.py
│   │       │   ├── model_configs/
│   │       │   │   ├── RN101-quickgelu.json
│   │       │   │   ├── RN101.json
│   │       │   │   ├── RN50-quickgelu.json
│   │       │   │   ├── RN50.json
│   │       │   │   ├── RN50x16.json
│   │       │   │   ├── RN50x4.json
│   │       │   │   ├── ViT-B-16-plus-240.json
│   │       │   │   ├── ViT-B-16-plus.json
│   │       │   │   ├── ViT-B-16.json
│   │       │   │   ├── ViT-B-32-plus-256.json
│   │       │   │   ├── ViT-B-32-quickgelu.json
│   │       │   │   ├── ViT-B-32.json
│   │       │   │   ├── ViT-H-14.json
│   │       │   │   ├── ViT-H-16.json
│   │       │   │   ├── ViT-L-14-280.json
│   │       │   │   ├── ViT-L-14-336.json
│   │       │   │   ├── ViT-L-14.json
│   │       │   │   ├── ViT-L-16-320.json
│   │       │   │   ├── ViT-L-16.json
│   │       │   │   ├── ViT-g-14.json
│   │       │   │   ├── timm-efficientnetv2_rw_s.json
│   │       │   │   ├── timm-resnet50d.json
│   │       │   │   ├── timm-resnetaa50d.json
│   │       │   │   ├── timm-resnetblur50.json
│   │       │   │   ├── timm-swin_base_patch4_window7_224.json
│   │       │   │   ├── timm-vit_base_patch16_224.json
│   │       │   │   ├── timm-vit_base_patch32_224.json
│   │       │   │   └── timm-vit_small_patch16_224.json
│   │       │   ├── openai.py
│   │       │   ├── pretrained.py
│   │       │   ├── timm_model.py
│   │       │   ├── tokenizer.py
│   │       │   ├── transform.py
│   │       │   ├── utils.py
│   │       │   └── version.py
│   │       └── training/
│   │           ├── .gitignore
│   │           ├── __init__.py
│   │           ├── data.py
│   │           ├── distributed.py
│   │           ├── imagenet_zeroshot_data.py
│   │           ├── logger.py
│   │           ├── main.py
│   │           ├── params.py
│   │           ├── scheduler.py
│   │           ├── train.py
│   │           └── zero_shot.py
│   ├── preprocess.py
│   ├── requirements.txt
│   ├── run_gradio.sh
│   ├── torchscale/
│   │   ├── .gitignore
│   │   ├── CODE_OF_CONDUCT.md
│   │   ├── LICENSE
│   │   ├── README.md
│   │   ├── SECURITY.md
│   │   ├── SUPPORT.md
│   │   ├── examples/
│   │   │   ├── __init__.py
│   │   │   └── fairseq/
│   │   │       ├── README.md
│   │   │       ├── __init__.py
│   │   │       ├── generate.py
│   │   │       ├── interactive.py
│   │   │       ├── laion-token-base.sh
│   │   │       ├── laion-wild-token-base.sh
│   │   │       ├── models/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── bert.py
│   │   │       │   ├── language_modeling.py
│   │   │       │   └── machine_translation.py
│   │   │       ├── tasks/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── data/
│   │   │       │   │   ├── __init__.py
│   │   │       │   │   ├── basic_loader.py
│   │   │       │   │   ├── laion_loader.py
│   │   │       │   │   ├── laion_loader_test.py
│   │   │       │   │   ├── lm_loader.py
│   │   │       │   │   ├── mlm_loader.py
│   │   │       │   │   ├── spm_lm_loader.py
│   │   │       │   │   ├── utils.py
│   │   │       │   │   ├── wild_loader.py
│   │   │       │   │   ├── wild_loader_test.py
│   │   │       │   │   └── wild_loader_test_2.py
│   │   │       │   ├── gpt_base.py
│   │   │       │   ├── pretraining.py
│   │   │       │   └── vl_gpt_base.py
│   │   │       ├── train.py
│   │   │       ├── utils/
│   │   │       │   ├── __init__.py
│   │   │       │   └── sparse_clip.py
│   │   │       └── wild-token-base.sh
│   │   ├── setup.py
│   │   └── torchscale/
│   │       ├── __init__.py
│   │       ├── architecture/
│   │       │   ├── __init__.py
│   │       │   ├── config.py
│   │       │   ├── decoder.py
│   │       │   ├── encoder.py
│   │       │   ├── encoder_decoder.py
│   │       │   └── utils.py
│   │       ├── component/
│   │       │   ├── __init__.py
│   │       │   ├── droppath.py
│   │       │   ├── embedding.py
│   │       │   ├── feedforward_network.py
│   │       │   ├── multihead_attention.py
│   │       │   ├── multiway_network.py
│   │       │   ├── relative_position_bias.py
│   │       │   ├── sope_relative_position.py
│   │       │   └── xmoe/
│   │       │       ├── __init__.py
│   │       │       ├── moe_layer.py
│   │       │       └── routing.py
│   │       └── model/
│   │           ├── BEiT3.py
│   │           └── __init__.py
│   ├── train.py
│   ├── train.sh
│   ├── unilm/
│   │   ├── __init__.py
│   │   ├── criterions/
│   │   │   ├── __init__.py
│   │   │   └── unigpt.py
│   │   ├── data/
│   │   │   ├── __init__.py
│   │   │   ├── basic_loader.py
│   │   │   ├── lm_loader.py
│   │   │   ├── spm_lm_loader.py
│   │   │   ├── utils.py
│   │   │   └── vl/
│   │   │       ├── Interleaved_loader.py
│   │   │       ├── laion2b_loader.py
│   │   │       ├── laion2b_obj_loader.py
│   │   │       ├── obj_utils.py
│   │   │       ├── vl_base_loader.py
│   │   │       └── vl_loader.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── connector.py
│   │   │   ├── gpt.py
│   │   │   ├── gpt_eval.py
│   │   │   ├── unigpt.py
│   │   │   └── vl/
│   │   │       ├── __init__.py
│   │   │       ├── clip.py
│   │   │       ├── openai.py
│   │   │       └── vlm_generator.py
│   │   └── tasks/
│   │       ├── __init__.py
│   │       ├── generation_obj.py
│   │       ├── gpt_base.py
│   │       └── gpt_interleaved_laion_obj.py
│   ├── validate.py
│   └── vl_setup_xl.sh
├── kosmos-2.5/
│   ├── CASES.md
│   ├── CODE_OF_CONDUCT.md
│   ├── LICENSE
│   ├── README.md
│   ├── SECURITY.md
│   ├── SUPPORT.md
│   ├── __init.py
│   ├── dict.txt
│   ├── draw_bbox.py
│   ├── inference.py
│   ├── kosmos2_5/
│   │   ├── __init__.py
│   │   ├── data/
│   │   │   ├── __init__.py
│   │   │   └── utils.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── connector.py
│   │   │   ├── gpt.py
│   │   │   └── unigpt.py
│   │   └── tasks/
│   │       ├── __init__.py
│   │       └── generation.py
│   └── requirements.txt
├── layoutlm/
│   ├── README.md
│   └── deprecated/
│       ├── .flake8
│       ├── .gitignore
│       ├── .isort.cfg
│       ├── .pre-commit-config.yaml
│       ├── README.md
│       ├── examples/
│       │   ├── classification/
│       │   │   └── run_classification.py
│       │   └── seq_labeling/
│       │       ├── preprocess.py
│       │       ├── preprocess.sh
│       │       └── run_seq_labeling.py
│       ├── layoutlm/
│       │   ├── __init__.py
│       │   ├── data/
│       │   │   ├── __init__.py
│       │   │   ├── funsd.py
│       │   │   └── rvl_cdip.py
│       │   └── modeling/
│       │       ├── __init__.py
│       │       └── layoutlm.py
│       ├── mypy.ini
│       └── setup.py
├── layoutlmft/
│   ├── .gitignore
│   ├── Makefile
│   ├── README.md
│   ├── examples/
│   │   ├── run_funsd.py
│   │   ├── run_xfun_re.py
│   │   └── run_xfun_ser.py
│   ├── layoutlmft/
│   │   ├── __init__.py
│   │   ├── data/
│   │   │   ├── __init__.py
│   │   │   ├── data_args.py
│   │   │   ├── data_collator.py
│   │   │   ├── datasets/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── funsd.py
│   │   │   │   └── xfun.py
│   │   │   └── utils.py
│   │   ├── evaluation.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── layoutlm/
│   │   │   │   └── __init__.py
│   │   │   ├── layoutlmv2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── configuration_layoutlmv2.py
│   │   │   │   ├── detectron2_config.py
│   │   │   │   ├── modeling_layoutlmv2.py
│   │   │   │   ├── tokenization_layoutlmv2.py
│   │   │   │   └── tokenization_layoutlmv2_fast.py
│   │   │   ├── layoutxlm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── configuration_layoutxlm.py
│   │   │   │   ├── modeling_layoutxlm.py
│   │   │   │   ├── tokenization_layoutxlm.py
│   │   │   │   └── tokenization_layoutxlm_fast.py
│   │   │   └── model_args.py
│   │   ├── modules/
│   │   │   ├── __init__.py
│   │   │   └── decoders/
│   │   │       ├── __init__.py
│   │   │       └── re.py
│   │   ├── trainers/
│   │   │   ├── __init__.py
│   │   │   ├── funsd_trainer.py
│   │   │   └── xfun_trainer.py
│   │   └── utils.py
│   ├── pyproject.toml
│   ├── requirements.txt
│   ├── setup.cfg
│   └── setup.py
├── layoutlmv2/
│   └── README.md
├── layoutlmv3/
│   ├── .gitignore
│   ├── README.md
│   ├── examples/
│   │   ├── object_detection/
│   │   │   ├── adaptive_binarize.py
│   │   │   ├── cascade_layoutlmv3.yaml
│   │   │   ├── convert_to_coco_format.py
│   │   │   ├── ditod/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── backbone.py
│   │   │   │   ├── beit.py
│   │   │   │   ├── config.py
│   │   │   │   ├── dataset_mapper.py
│   │   │   │   ├── deit.py
│   │   │   │   ├── icdar_evaluation.py
│   │   │   │   ├── mycheckpointer.py
│   │   │   │   ├── mytrainer.py
│   │   │   │   ├── rcnn_vl.py
│   │   │   │   └── table_evaluation/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── data_structure.py
│   │   │   │       └── evaluate.py
│   │   │   └── train_net.py
│   │   ├── run_funsd_cord.py
│   │   └── run_xfund.py
│   ├── layoutlmft/
│   │   ├── __init__.py
│   │   ├── data/
│   │   │   ├── __init__.py
│   │   │   ├── cord.py
│   │   │   ├── data_collator.py
│   │   │   ├── funsd.py
│   │   │   ├── image_utils.py
│   │   │   └── xfund.py
│   │   └── models/
│   │       ├── __init__.py
│   │       └── layoutlmv3/
│   │           ├── __init__.py
│   │           ├── configuration_layoutlmv3.py
│   │           ├── modeling_layoutlmv3.py
│   │           ├── tokenization_layoutlmv3.py
│   │           └── tokenization_layoutlmv3_fast.py
│   ├── requirements.txt
│   ├── setup.cfg
│   └── setup.py
├── layoutreader/
│   ├── README.md
│   ├── decode_seq2seq.py
│   ├── run_seq2seq.py
│   ├── s2s_ft/
│   │   ├── config.py
│   │   ├── configuration_minilm.py
│   │   ├── configuration_unilm.py
│   │   ├── convert_state_dict.py
│   │   ├── modeling.py
│   │   ├── modeling_decoding.py
│   │   ├── s2s_loader.py
│   │   ├── tokenization_minilm.py
│   │   ├── tokenization_unilm.py
│   │   └── utils.py
│   └── setup.py
├── layoutxlm/
│   └── README.md
├── longnet/
│   └── README.md
├── longvit/
│   └── README.md
├── markuplm/
│   ├── README.md
│   ├── examples/
│   │   └── fine_tuning/
│   │       ├── run_swde/
│   │       │   ├── constants.py
│   │       │   ├── eval_utils.py
│   │       │   ├── pack_data.py
│   │       │   ├── prepare_data.py
│   │       │   ├── run.py
│   │       │   └── utils.py
│   │       └── run_websrc/
│   │           ├── dataset_generation.py
│   │           ├── draft.py
│   │           ├── run.py
│   │           ├── utils.py
│   │           └── utils_evaluate.py
│   ├── markuplmft/
│   │   ├── __init__.py
│   │   ├── data/
│   │   │   ├── __init__.py
│   │   │   └── tag_utils.py
│   │   └── models/
│   │       ├── __init__.py
│   │       └── markuplm/
│   │           ├── __init__.py
│   │           ├── configuration_markuplm.py
│   │           ├── modeling_markuplm.py
│   │           ├── tokenization_markuplm.py
│   │           └── tokenization_markuplm_fast.py
│   ├── requirements.txt
│   └── setup.py
├── mathscale/
│   ├── MWPBench/
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── data/
│   │   │   ├── fresh_gaokao_math_2023.json
│   │   │   ├── full_test.json
│   │   │   └── full_train.json
│   │   ├── eval_openai/
│   │   │   └── driver.py
│   │   ├── eval_vllm/
│   │   │   ├── driver.py
│   │   │   └── util.py
│   │   ├── requirements.txt
│   │   └── scripts/
│   │       ├── eval_openai.alpaca_force_ans.sh
│   │       ├── eval_openai.freshgaokao.alpaca_force_ans.sh
│   │       ├── eval_vllm.alpaca.4gpus.sh
│   │       └── eval_vllm.freshgaokao.alpaca.4gpus.sh
│   └── README.md
├── metalm/
│   └── README.md
├── minilm/
│   ├── README.md
│   └── examples/
│       └── run_xnli.py
├── retnet/
│   └── README.md
├── s2s-ft/
│   ├── .gitignore
│   ├── README.md
│   ├── decode_seq2seq.py
│   ├── evaluations/
│   │   ├── bs_pyrouge.py
│   │   ├── eval_for_cnndm.py
│   │   ├── eval_for_gigaword.py
│   │   └── eval_for_xsum.py
│   ├── gen_seq_from_trace.py
│   ├── run_seq2seq.py
│   ├── s2s_ft/
│   │   ├── config.py
│   │   ├── configuration_minilm.py
│   │   ├── configuration_unilm.py
│   │   ├── convert_state_dict.py
│   │   ├── modeling.py
│   │   ├── modeling_decoding.py
│   │   ├── s2s_loader.py
│   │   ├── tokenization_minilm.py
│   │   ├── tokenization_unilm.py
│   │   └── utils.py
│   └── setup.py
├── simlm/
│   ├── README.md
│   ├── ds_config.json
│   ├── misc/
│   │   ├── compute_metrics_marco.py
│   │   ├── dpr/
│   │   │   ├── evaluate_dpr_retrieval.py
│   │   │   ├── format_and_evaluate.py
│   │   │   └── mine_hard_negatives.py
│   │   ├── marco_pred_to_cases.py
│   │   └── prepare_msmarco_data.py
│   ├── requirements.txt
│   ├── scripts/
│   │   ├── download_msmarco_data.sh
│   │   ├── dpr/
│   │   │   ├── encode_wiki.sh
│   │   │   ├── eval_dpr.sh
│   │   │   ├── nq_gen_kd_teacher_scores.sh
│   │   │   ├── rerank_nq.sh
│   │   │   ├── search_dpr.sh
│   │   │   ├── train_nq_biencoder.sh
│   │   │   ├── train_nq_kd.sh
│   │   │   └── train_nq_reranker.sh
│   │   ├── encode_marco.sh
│   │   ├── gen_kd_teacher_scores.sh
│   │   ├── rerank_marco.sh
│   │   ├── search_marco.sh
│   │   ├── train_biencoder_marco.sh
│   │   ├── train_kd_biencoder.sh
│   │   ├── train_reranker_marco.sh
│   │   └── train_rlm.sh
│   └── src/
│       ├── __init__.py
│       ├── collators/
│       │   ├── __init__.py
│       │   ├── biencoder_collator.py
│       │   ├── collator_utils.py
│       │   ├── cross_encoder_collator.py
│       │   └── rlm_collator.py
│       ├── config.py
│       ├── data_utils.py
│       ├── inference/
│       │   ├── __init__.py
│       │   ├── encode_main.py
│       │   ├── gen_teacher_scores.py
│       │   ├── rerank_main.py
│       │   └── search_main.py
│       ├── loaders/
│       │   ├── __init__.py
│       │   ├── biencoder_dataloader.py
│       │   ├── cross_encoder_dataloader.py
│       │   ├── loader_utils.py
│       │   └── rlm_dataloader.py
│       ├── logger_config.py
│       ├── metrics.py
│       ├── models/
│       │   ├── __init__.py
│       │   ├── biencoder_model.py
│       │   ├── cross_encoder_model.py
│       │   └── rlm.py
│       ├── train_biencoder.py
│       ├── train_cross_encoder.py
│       ├── train_rlm.py
│       ├── trainers/
│       │   ├── __init__.py
│       │   ├── biencoder_trainer.py
│       │   ├── reranker_trainer.py
│       │   └── rlm_trainer.py
│       └── utils.py
├── speechlm/
│   ├── README.md
│   ├── SpeechLM.py
│   ├── dataset/
│   │   ├── CommonVoice/
│   │   │   └── v4/
│   │   │       └── en/
│   │   │           └── en-de/
│   │   │               ├── config_base_ende.yaml
│   │   │               ├── config_large_ende.yaml
│   │   │               ├── dev-sample100_st_en_de_local.tsv
│   │   │               ├── spm_char_st_en_de.model
│   │   │               ├── spm_char_st_en_de.txt
│   │   │               └── spm_char_st_en_de.vocab
│   │   ├── LibriLM/
│   │   │   ├── hidden_unit/
│   │   │   │   └── bin-idx/
│   │   │   │       ├── config.yaml
│   │   │   │       └── dict.km.txt
│   │   │   └── phone_unit/
│   │   │       └── bin-idx/
│   │   │           ├── config.yaml
│   │   │           ├── dict.ltr.txt
│   │   │           └── dict.phn.txt
│   │   └── LibriSpeech/
│   │       ├── asr/
│   │       │   ├── train_sample100.ltr
│   │       │   └── train_sample100.tsv
│   │       ├── fast_phone2unit/
│   │       │   ├── config.yaml
│   │       │   ├── config_generate.yaml
│   │       │   ├── dict.km.txt
│   │       │   ├── genset_examples.tsv
│   │       │   └── train_exmples.tsv
│   │       ├── hidden_unit/
│   │       │   ├── dict.km.txt
│   │       │   ├── train_sample100.km
│   │       │   └── train_sample100.tsv
│   │       └── phone_unit/
│   │           ├── dict.phn.txt
│   │           ├── train_sample100.phn
│   │           └── train_sample100.tsv
│   ├── modules.py
│   ├── speechlm/
│   │   ├── __init__.py
│   │   ├── config/
│   │   │   ├── decode/
│   │   │   │   ├── infer_fsqlm.yaml
│   │   │   │   ├── infer_kenlm.yaml
│   │   │   │   └── infer_viterbi.yaml
│   │   │   ├── finetune/
│   │   │   │   ├── speechlm_base_100h.yaml
│   │   │   │   └── speechlm_large_960h.yaml
│   │   │   └── pretrain/
│   │   │       ├── speechlm_base_librispeech.yaml
│   │   │       ├── speechlm_large_librilight.yaml
│   │   │       └── speechlmp_base_cfg.pt
│   │   ├── criterions/
│   │   │   ├── __init__.py
│   │   │   ├── fasttext2unit_loss.py
│   │   │   └── speechlm_criterion.py
│   │   ├── data/
│   │   │   ├── concat_dataset.py
│   │   │   ├── hubert_dataset.py
│   │   │   ├── language_trible_dataset.py
│   │   │   ├── load_langpair_dataset.py
│   │   │   ├── multimodal_corpus_dataset.py
│   │   │   └── text_to_unit_dataset.py
│   │   ├── data_process/
│   │   │   ├── covost2/
│   │   │   │   ├── mp3_to_wav.py
│   │   │   │   └── prepare_covost_data.py
│   │   │   ├── filter_paireddata_by_len.py
│   │   │   ├── get_t2u_manifest.py
│   │   │   ├── get_t2u_manifest_textonly.py
│   │   │   ├── phoneize_with_sil.py
│   │   │   ├── phoneme_tokenizer/
│   │   │   │   ├── ltr2kaldi_phn_sil025.py
│   │   │   │   ├── mean5_and_std25_sil14_spn32.dict
│   │   │   │   └── repeat_withou_insert_sil_less_4375.py
│   │   │   ├── prepare_covost2_enxx.sh
│   │   │   ├── prepare_phn2ltr_librilm.sh
│   │   │   ├── txt2idx.sh
│   │   │   └── wrd2ltr.py
│   │   ├── generate_unit.py
│   │   ├── infer.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── fasttext2unit.py
│   │   │   ├── speechlm.py
│   │   │   ├── speechlm_ctcasr.py
│   │   │   └── speechlm_st.py
│   │   ├── modules/
│   │   │   ├── __init__.py
│   │   │   ├── learned_positional_embedding.py
│   │   │   ├── multihead_attention.py
│   │   │   ├── relative_pos_enc.py
│   │   │   ├── transformer_decoder.py
│   │   │   ├── transformer_encoder.py
│   │   │   ├── transformer_layer.py
│   │   │   └── w2v_encoder.py
│   │   ├── scripts/
│   │   │   ├── pretrain_speechlm/
│   │   │   │   ├── base_speechlmh.sh
│   │   │   │   ├── base_speechlmp.sh
│   │   │   │   └── large_speechlmp.sh
│   │   │   ├── tokenizer_fastT2U/
│   │   │   │   ├── generate.sh
│   │   │   │   ├── infer.sh
│   │   │   │   └── train_s_5e-4.sh
│   │   │   ├── tune_speechlm_asr/
│   │   │   │   ├── finetune_base_ctc.sh
│   │   │   │   ├── finetune_large_ctc.sh
│   │   │   │   ├── inference_ctc.sh
│   │   │   │   ├── inference_ctc_kenlm.sh
│   │   │   │   ├── inference_ctc_large.sh
│   │   │   │   └── inference_ctc_large_fsqlm.sh
│   │   │   └── tune_speechlm_st/
│   │   │       ├── ft_base_covost_enxx.sh
│   │   │       ├── ft_large_covost_enxx.sh
│   │   │       ├── inference_base.sh
│   │   │       └── inference_large.sh
│   │   ├── tasks/
│   │   │   ├── fast_text_to_unit.py
│   │   │   └── joint_sc2t_pretrain.py
│   │   └── unit_generator.py
│   └── speechlm_README.md
├── speecht5/
│   ├── README.md
│   ├── scripts/
│   │   └── generate_speech.py
│   └── speecht5/
│       ├── __init__.py
│       ├── criterions/
│       │   ├── __init__.py
│       │   ├── speech_pretrain_criterion.py
│       │   ├── speech_to_text_loss.py
│       │   ├── speecht5_criterion.py
│       │   ├── text_pretrain_criterion.py
│       │   └── text_to_speech_loss.py
│       ├── data/
│       │   ├── __init__.py
│       │   ├── multitask_dataset.py
│       │   ├── speech_dataset.py
│       │   ├── speech_to_speech_dataset.py
│       │   ├── speech_to_text_dataset.py
│       │   ├── text_dataset.py
│       │   └── text_to_speech_dataset.py
│       ├── models/
│       │   ├── __init__.py
│       │   ├── modules/
│       │   │   ├── __init__.py
│       │   │   ├── decoder.py
│       │   │   ├── encoder.py
│       │   │   ├── multihead_attention.py
│       │   │   ├── speaker_decoder_postnet.py
│       │   │   ├── speech_decoder_postnet.py
│       │   │   ├── speech_decoder_prenet.py
│       │   │   ├── speech_encoder_postnet.py
│       │   │   ├── speech_encoder_prenet.py
│       │   │   ├── text_decoder_postnet.py
│       │   │   ├── text_decoder_prenet.py
│       │   │   ├── text_encoder_prenet.py
│       │   │   └── transformer_layer.py
│       │   ├── speecht5.py
│       │   └── t5_transformer_lm.py
│       ├── sequence_generator.py
│       └── tasks/
│           ├── __init__.py
│           └── speecht5.py
├── storage/
│   ├── unilm-base-cased-config.json
│   ├── unilm-base-cased-vocab.txt
│   ├── unilm-large-cased-config.json
│   └── unilm-large-cased-vocab.txt
├── textdiffuser/
│   ├── README.md
│   ├── assets/
│   │   ├── examples/
│   │   │   ├── text-inpainting/
│   │   │   │   ├── case1.txt
│   │   │   │   ├── case2.txt
│   │   │   │   ├── case3.txt
│   │   │   │   └── case4.txt
│   │   │   ├── text-to-image/
│   │   │   │   ├── case1.txt
│   │   │   │   ├── case2.txt
│   │   │   │   └── case3.txt
│   │   │   └── text-to-image-with-template/
│   │   │       ├── case1.txt
│   │   │       ├── case2.txt
│   │   │       └── case3.txt
│   │   ├── files/
│   │   │   ├── modeling_utils.py
│   │   │   ├── scheduling_ddpm.py
│   │   │   └── unet_2d_condition.py
│   │   └── font/
│   │       └── .gitkeep
│   ├── data/
│   │   ├── mario-laion-example/
│   │   │   ├── 06269/
│   │   │   │   ├── 062690093/
│   │   │   │   │   ├── caption.txt
│   │   │   │   │   ├── charseg.npy
│   │   │   │   │   ├── info.json
│   │   │   │   │   └── ocr.txt
│   │   │   │   ├── 062692210/
│   │   │   │   │   ├── caption.txt
│   │   │   │   │   ├── charseg.npy
│   │   │   │   │   ├── info.json
│   │   │   │   │   └── ocr.txt
│   │   │   │   └── 062692530/
│   │   │   │       ├── caption.txt
│   │   │   │       ├── charseg.npy
│   │   │   │       ├── info.json
│   │   │   │       └── ocr.txt
│   │   │   └── 27197/
│   │   │       ├── 271975131/
│   │   │       │   ├── caption.txt
│   │   │       │   ├── charseg.npy
│   │   │       │   ├── info.json
│   │   │       │   └── ocr.txt
│   │   │       ├── 271975251/
│   │   │       │   ├── caption.txt
│   │   │       │   ├── charseg.npy
│   │   │       │   ├── info.json
│   │   │       │   └── ocr.txt
│   │   │       └── 271978467/
│   │   │           ├── caption.txt
│   │   │           ├── charseg.npy
│   │   │           ├── info.json
│   │   │           └── ocr.txt
│   │   ├── mario-laion-unzip.py
│   │   └── visualize_charseg.ipynb
│   ├── eval/
│   │   ├── MARIOEval_evaluate.py
│   │   ├── MARIOEval_generate.py
│   │   ├── README.md
│   │   ├── clipscore.py
│   │   ├── evaluate.sh
│   │   ├── fid_score.py
│   │   ├── generate.sh
│   │   ├── inception.py
│   │   ├── ocr_eval.py
│   │   └── requirements.txt
│   ├── evaluate.py
│   ├── gradio_app.py
│   ├── inference.py
│   ├── model/
│   │   ├── layout_generator.py
│   │   ├── layout_transformer.py
│   │   └── text_segmenter/
│   │       ├── unet.py
│   │       └── unet_parts.py
│   ├── requirements.txt
│   ├── text-inpainting.sh
│   ├── text-to-image-with-template.sh
│   ├── text-to-image.sh
│   ├── textdiffuser-ckpt/
│   │   └── .gitkeep
│   ├── train.py
│   ├── train.sh
│   └── util.py
├── textdiffuser-2/
│   ├── README.md
│   ├── assets/
│   │   ├── attention_processor.py
│   │   └── reference_requirements.txt
│   ├── cog.yaml
│   ├── data/
│   │   ├── check_layout_planner_data.py
│   │   └── layout_planner_data_5k.json
│   ├── demo_textdiffuser2_inpainting_full.py
│   ├── demo_textdiffuser2_t2i_full.py
│   ├── extensions/
│   │   ├── angle_template_file.txt
│   │   ├── inference_textdiffuser2_t2i_full_angle.py
│   │   ├── inference_textdiffuser2_t2i_full_angle.sh
│   │   ├── inference_textdiffuser2_t2i_full_quadrilateral.py
│   │   ├── inference_textdiffuser2_t2i_full_quadrilateral.sh
│   │   ├── quadrilateral_template_file.txt
│   │   ├── train_textdiffuser2_t2i_full_angle.py
│   │   ├── train_textdiffuser2_t2i_full_angle.sh
│   │   ├── train_textdiffuser2_t2i_full_quadrilateral.py
│   │   └── train_textdiffuser2_t2i_full_quadrilateral.sh
│   ├── inference_textdiffuser2_t2i_full.py
│   ├── inference_textdiffuser2_t2i_full.sh
│   ├── inference_textdiffuser2_t2i_lora.py
│   ├── inference_textdiffuser2_t2i_lora.sh
│   ├── predict.py
│   ├── requirements.txt
│   ├── train_layout_planner.sh
│   ├── train_textdiffuser2_inpainting_full.py
│   ├── train_textdiffuser2_inpainting_full.sh
│   ├── train_textdiffuser2_t2i_full.py
│   ├── train_textdiffuser2_t2i_full.sh
│   ├── train_textdiffuser2_t2i_lora.py
│   └── train_textdiffuser2_t2i_lora.sh
├── trocr/
│   ├── README.md
│   ├── __init__.py
│   ├── augmentation/
│   │   ├── __init__.py
│   │   ├── blur.py
│   │   ├── camera.py
│   │   ├── geometry.py
│   │   ├── noise.py
│   │   ├── ops.py
│   │   ├── pattern.py
│   │   ├── process.py
│   │   ├── test.py
│   │   ├── warp.py
│   │   └── weather.py
│   ├── bpe.py
│   ├── convert_to_SROIE_format.py
│   ├── data.py
│   ├── data_aug.py
│   ├── deit.py
│   ├── generator.py
│   ├── pic_inference.ipynb
│   ├── pic_inference.py
│   ├── requirements.txt
│   ├── scoring.py
│   ├── task.py
│   ├── trocr_models.py
│   ├── unilm3-cased.model
│   ├── unilm_models.py
│   └── vit_models.py
├── unilm/
│   └── README.md
├── unilm-v1/
│   ├── README.md
│   └── src/
│       ├── biunilm/
│       │   ├── __init__.py
│       │   ├── decode_seq2seq.py
│       │   ├── gen_seq_from_trace.py
│       │   ├── loader_utils.py
│       │   ├── run_seq2seq.py
│       │   └── seq2seq_loader.py
│       ├── cnndm/
│       │   ├── __init__.py
│       │   ├── bs_pyrouge.py
│       │   └── eval.py
│       ├── gigaword/
│       │   ├── __init__.py
│       │   ├── bs_pyrouge.py
│       │   └── eval.py
│       ├── nn/
│       │   ├── __init__.py
│       │   └── data_parallel.py
│       ├── pytorch_pretrained_bert/
│       │   ├── __init__.py
│       │   ├── __main__.py
│       │   ├── file_utils.py
│       │   ├── loss.py
│       │   ├── modeling.py
│       │   ├── optimization.py
│       │   ├── optimization_fp16.py
│       │   └── tokenization.py
│       ├── qg/
│       │   ├── eval.py
│       │   └── eval_on_unilm_tokenized_ref.py
│       └── setup.py
├── unimim/
│   └── README.md
├── valle/
│   └── README.md
├── vl-beit/
│   └── README.md
├── vlmo/
│   ├── DATA.md
│   ├── README.md
│   ├── requirements.txt
│   ├── run.py
│   ├── setup.py
│   └── vlmo/
│       ├── __init__.py
│       ├── config.py
│       ├── datamodules/
│       │   ├── __init__.py
│       │   ├── coco_caption_karpathy_datamodule.py
│       │   ├── conceptual_caption_datamodule.py
│       │   ├── datamodule_base.py
│       │   ├── f30k_caption_karpathy_datamodule.py
│       │   ├── multitask_datamodule.py
│       │   ├── nlvr2_datamodule.py
│       │   ├── sbu_datamodule.py
│       │   ├── vg_caption_datamodule.py
│       │   ├── vqav2_datamodule.py
│       │   └── wikibk_datamodule.py
│       ├── datasets/
│       │   ├── __init__.py
│       │   ├── base_dataset.py
│       │   ├── coco_caption_karpathy_dataset.py
│       │   ├── conceptual_caption_dataset.py
│       │   ├── f30k_caption_karpathy_dataset.py
│       │   ├── nlvr2_dataset.py
│       │   ├── sbu_caption_dataset.py
│       │   ├── vg_caption_dataset.py
│       │   ├── vqav2_dataset.py
│       │   └── wikibk_dataset.py
│       ├── gadgets/
│       │   ├── __init__.py
│       │   └── my_metrics.py
│       ├── modules/
│       │   ├── __init__.py
│       │   ├── dist_utils.py
│       │   ├── heads.py
│       │   ├── multiway_transformer.py
│       │   ├── objectives.py
│       │   ├── vlmo_module.py
│       │   └── vlmo_utils.py
│       ├── transforms/
│       │   ├── __init__.py
│       │   ├── pixelbert.py
│       │   ├── randaug.py
│       │   ├── randaugment.py
│       │   ├── square_transform.py
│       │   └── utils.py
│       └── utils/
│           ├── glossary.py
│           ├── write_coco_karpathy.py
│           ├── write_conceptual_caption.py
│           ├── write_f30k_karpathy.py
│           ├── write_nlvr2.py
│           ├── write_sbu.py
│           ├── write_vg.py
│           ├── write_vqa.py
│           └── write_wikibk.py
├── wavlm/
│   ├── README.md
│   ├── WavLM.py
│   └── modules.py
├── xdoc/
│   ├── README.md
│   └── fine_tuning/
│       ├── README.md
│       ├── funsd/
│       │   ├── layoutlmft/
│       │   │   ├── __init__.py
│       │   │   ├── data/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── data_args.py
│       │   │   │   ├── data_collator.py
│       │   │   │   ├── datasets/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── funsd.py
│       │   │   │   │   └── xfun.py
│       │   │   │   └── utils.py
│       │   │   ├── evaluation.py
│       │   │   ├── models/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── layoutlm/
│       │   │   │   │   └── __init__.py
│       │   │   │   ├── layoutlmv2/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── configuration_layoutlmv2.py
│       │   │   │   │   ├── detectron2_config.py
│       │   │   │   │   ├── modeling_layoutlmv2.py
│       │   │   │   │   ├── tokenization_layoutlmv2.py
│       │   │   │   │   └── tokenization_layoutlmv2_fast.py
│       │   │   │   ├── layoutxlm/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── configuration_layoutxlm.py
│       │   │   │   │   ├── modeling_layoutxlm.py
│       │   │   │   │   ├── tokenization_layoutxlm.py
│       │   │   │   │   └── tokenization_layoutxlm_fast.py
│       │   │   │   └── model_args.py
│       │   │   ├── modules/
│       │   │   │   ├── __init__.py
│       │   │   │   └── decoders/
│       │   │   │       ├── __init__.py
│       │   │   │       └── re.py
│       │   │   ├── trainers/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── funsd_trainer.py
│       │   │   │   └── xfun_trainer.py
│       │   │   └── utils.py
│       │   ├── model.py
│       │   ├── requirements.txt
│       │   ├── run_funsd.py
│       │   └── run_funsd.sh
│       ├── squad/
│       │   ├── requirements.txt
│       │   ├── run_squad.py
│       │   ├── run_squad_v1.sh
│       │   ├── run_squad_v2.sh
│       │   ├── trainer_qa.py
│       │   ├── trainer_seq2seq_qa.py
│       │   └── utils_qa.py
│       └── websrc/
│           ├── args.py
│           ├── cache/
│           │   └── .gitkeep
│           ├── model.py
│           ├── requirements.txt
│           ├── run_websrc.py
│           ├── trainer.py
│           ├── util.py
│           ├── web_tag_utils.py
│           └── websrc.py
├── xlmt/
│   └── README.md
├── xmoe/
│   └── README.md
└── xtune/
    ├── README.md
    ├── scripts/
    │   ├── cross-lingual-transfer/
    │   │   ├── train_mlqa.sh
    │   │   ├── train_panx.sh
    │   │   ├── train_pawsx.sh
    │   │   ├── train_tydiqa.sh
    │   │   ├── train_udpos.sh
    │   │   ├── train_xnli.sh
    │   │   └── train_xquad.sh
    │   ├── download_data.sh
    │   ├── download_model.sh
    │   ├── preprocess_panx.sh
    │   ├── preprocess_udpos.sh
    │   ├── train.sh
    │   └── translate-train-all/
    │       ├── train_mlqa.sh
    │       ├── train_panx.sh
    │       ├── train_pawsx.sh
    │       ├── train_tydiqa.sh
    │       ├── train_udpos.sh
    │       ├── train_xnli.sh
    │       └── train_xquad.sh
    ├── setup.py
    ├── src/
    │   ├── pequod/
    │   │   ├── __init__.py
    │   │   ├── data/
    │   │   │   ├── __init__.py
    │   │   │   ├── dataloader.py
    │   │   │   ├── sampler.py
    │   │   │   ├── utils_squad.py
    │   │   │   ├── utils_squad_evaluate.py
    │   │   │   ├── wili.py
    │   │   │   ├── xdoc.py
    │   │   │   ├── xqa.py
    │   │   │   └── xretrieval.py
    │   │   ├── eval/
    │   │   │   ├── __init__.py
    │   │   │   ├── bretrieval.py
    │   │   │   ├── evaluator.py
    │   │   │   ├── utils_retrieve.py
    │   │   │   └── xretrieval.py
    │   │   ├── io.py
    │   │   ├── model/
    │   │   │   ├── __init__.py
    │   │   │   └── roberta.py
    │   │   ├── optim/
    │   │   │   ├── __init__.py
    │   │   │   ├── la.py
    │   │   │   └── la0.py
    │   │   ├── text/
    │   │   │   ├── __init__.py
    │   │   │   └── tokenization_sentencepiece.py
    │   │   ├── tools/
    │   │   │   ├── __init__.py
    │   │   │   └── convert.py
    │   │   └── training/
    │   │       ├── __init__.py
    │   │       ├── trainer.py
    │   │       └── xtrainer.py
    │   ├── run_cls.py
    │   ├── run_qa.py
    │   ├── run_tag.py
    │   ├── tools/
    │   │   ├── __init__.py
    │   │   ├── check_many2many_alignment.py
    │   │   ├── dump_hf_state_dict.py
    │   │   ├── get_eval_results.py
    │   │   ├── sample_xnli.py
    │   │   └── xnli_sampling_statistics.py
    │   ├── transformers/
    │   │   ├── __init__.py
    │   │   ├── activations.py
    │   │   ├── commands/
    │   │   │   ├── __init__.py
    │   │   │   ├── convert.py
    │   │   │   ├── download.py
    │   │   │   ├── env.py
    │   │   │   ├── run.py
    │   │   │   ├── serving.py
    │   │   │   ├── train.py
    │   │   │   └── user.py
    │   │   ├── configuration_albert.py
    │   │   ├── configuration_auto.py
    │   │   ├── configuration_bart.py
    │   │   ├── configuration_bert.py
    │   │   ├── configuration_camembert.py
    │   │   ├── configuration_ctrl.py
    │   │   ├── configuration_distilbert.py
    │   │   ├── configuration_flaubert.py
    │   │   ├── configuration_gpt2.py
    │   │   ├── configuration_mmbt.py
    │   │   ├── configuration_openai.py
    │   │   ├── configuration_roberta.py
    │   │   ├── configuration_t5.py
    │   │   ├── configuration_transfo_xl.py
    │   │   ├── configuration_utils.py
    │   │   ├── configuration_xlm.py
    │   │   ├── configuration_xlm_roberta.py
    │   │   ├── configuration_xlnet.py
    │   │   ├── convert_albert_original_tf_checkpoint_to_pytorch.py
    │   │   ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py
    │   │   ├── convert_bert_original_tf_checkpoint_to_pytorch.py
    │   │   ├── convert_bert_pytorch_checkpoint_to_original_tf.py
    │   │   ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py
    │   │   ├── convert_openai_original_tf_checkpoint_to_pytorch.py
    │   │   ├── convert_pytorch_checkpoint_to_tf2.py
    │   │   ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py
    │   │   ├── convert_t5_original_tf_checkpoint_to_pytorch.py
    │   │   ├── convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
    │   │   ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py
    │   │   ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py
    │   │   ├── data/
    │   │   │   ├── __init__.py
    │   │   │   ├── metrics/
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── evaluate_mlqa.py
    │   │   │   │   ├── evaluate_squad.py
    │   │   │   │   ├── mlqa_evaluation_v1.py
    │   │   │   │   └── squad_metrics.py
    │   │   │   └── processors/
    │   │   │       ├── __init__.py
    │   │   │       ├── glue.py
    │   │   │       ├── squad.py
    │   │   │       ├── utils.py
    │   │   │       ├── xglue.py
    │   │   │       ├── xnli.py
    │   │   │       └── xtreme.py
    │   │   ├── file_utils.py
    │   │   ├── hf_api.py
    │   │   ├── modelcard.py
    │   │   ├── modeling_albert.py
    │   │   ├── modeling_auto.py
    │   │   ├── modeling_bart.py
    │   │   ├── modeling_bert.py
    │   │   ├── modeling_camembert.py
    │   │   ├── modeling_ctrl.py
    │   │   ├── modeling_distilbert.py
    │   │   ├── modeling_encoder_decoder.py
    │   │   ├── modeling_flaubert.py
    │   │   ├── modeling_gpt2.py
    │   │   ├── modeling_mmbt.py
    │   │   ├── modeling_openai.py
    │   │   ├── modeling_roberta.py
    │   │   ├── modeling_t5.py
    │   │   ├── modeling_tf_albert.py
    │   │   ├── modeling_tf_auto.py
    │   │   ├── modeling_tf_bert.py
    │   │   ├── modeling_tf_camembert.py
    │   │   ├── modeling_tf_ctrl.py
    │   │   ├── modeling_tf_distilbert.py
    │   │   ├── modeling_tf_gpt2.py
    │   │   ├── modeling_tf_openai.py
    │   │   ├── modeling_tf_pytorch_utils.py
    │   │   ├── modeling_tf_roberta.py
    │   │   ├── modeling_tf_t5.py
    │   │   ├── modeling_tf_transfo_xl.py
    │   │   ├── modeling_tf_transfo_xl_utilities.py
    │   │   ├── modeling_tf_utils.py
    │   │   ├── modeling_tf_xlm.py
    │   │   ├── modeling_tf_xlm_roberta.py
    │   │   ├── modeling_tf_xlnet.py
    │   │   ├── modeling_transfo_xl.py
    │   │   ├── modeling_transfo_xl_utilities.py
    │   │   ├── modeling_utils.py
    │   │   ├── modeling_xlm.py
    │   │   ├── modeling_xlm_roberta.py
    │   │   ├── modeling_xlnet.py
    │   │   ├── optimization.py
    │   │   ├── optimization_tf.py
    │   │   ├── pipelines.py
    │   │   ├── tokenization_albert.py
    │   │   ├── tokenization_auto.py
    │   │   ├── tokenization_bart.py
    │   │   ├── tokenization_bert.py
    │   │   ├── tokenization_bert_japanese.py
    │   │   ├── tokenization_camembert.py
    │   │   ├── tokenization_ctrl.py
    │   │   ├── tokenization_distilbert.py
    │   │   ├── tokenization_flaubert.py
    │   │   ├── tokenization_gpt2.py
    │   │   ├── tokenization_openai.py
    │   │   ├── tokenization_roberta.py
    │   │   ├── tokenization_t5.py
    │   │   ├── tokenization_transfo_xl.py
    │   │   ├── tokenization_utils.py
    │   │   ├── tokenization_xlm.py
    │   │   ├── tokenization_xlm_roberta.py
    │   │   ├── tokenization_xlnet.py
    │   │   └── utils_encoder_decoder.py
    │   ├── ud-conversion-tools/
    │   │   └── conllu_to_conll.py
    │   └── utils_tag.py
    ├── transformers-cli
    └── utils_preprocess.py