gitextract_3q2i3t76/

├── .gitignore
├── LICENSE
├── README.md
├── benchmark/
│   ├── batch_size_table.md
│   ├── flexgen/
│   │   └── bench_scan_175b.sh
│   ├── flexllmgen/
│   │   ├── README.md
│   │   ├── bench_175b_1x4.sh
│   │   ├── bench_175b_4x1.sh
│   │   ├── bench_30b_1x4.sh
│   │   ├── bench_30b_4x1.sh
│   │   ├── bench_6.7b_1x4.sh
│   │   ├── bench_6.7b_4x1.sh
│   │   ├── bench_dist_multi_node.sh
│   │   ├── bench_dist_single_node.sh
│   │   └── bench_suite.py
│   ├── hf_ds/
│   │   ├── README.md
│   │   ├── bench_all_1x4.sh
│   │   ├── bench_ds_175b_4x1.sh
│   │   ├── bench_ds_30b_1x4.sh
│   │   ├── bench_ds_30b_4x1.sh
│   │   ├── bench_ds_6.7b_1x4.sh
│   │   ├── bench_ds_6.7b_2x1.sh
│   │   ├── bench_ds_6.7b_4x1.sh
│   │   ├── bench_hf.py
│   │   ├── hf_opt.py
│   │   └── hostfile
│   ├── petals/
│   │   ├── README.md
│   │   └── run_opt_requests.py
│   └── third_party/
│       ├── DeepSpeed/
│       │   ├── .clang-format
│       │   ├── .github/
│       │   │   ├── ISSUE_TEMPLATE/
│       │   │   │   ├── compression_bug_report.md
│       │   │   │   ├── feature_request.md
│       │   │   │   ├── inference_bug_report.md
│       │   │   │   └── training_bug_report.md
│       │   │   └── workflows/
│       │   │       ├── amd.yml
│       │   │       ├── formatting.yml
│       │   │       ├── nv-accelerate-v100.yml
│       │   │       ├── nv-inference.yml
│       │   │       ├── nv-lightning-v100.yml
│       │   │       ├── nv-mii.yml
│       │   │       ├── nv-nightly.yml
│       │   │       ├── nv-torch-latest-v100.yml
│       │   │       ├── nv-torch-nightly-v100.yml
│       │   │       ├── nv-torch18-p40.yml
│       │   │       ├── nv-torch18-v100.yml
│       │   │       ├── nv-transformers-v100.yml
│       │   │       ├── pre-compile-ops.yml
│       │   │       └── python.yml
│       │   ├── .gitignore
│       │   ├── .pre-commit-config.yaml
│       │   ├── .pylintrc
│       │   ├── .readthedocs.yml
│       │   ├── .style.yapf
│       │   ├── CODEOWNERS
│       │   ├── CODE_OF_CONDUCT.md
│       │   ├── CONTRIBUTING.md
│       │   ├── LICENSE
│       │   ├── MANIFEST.in
│       │   ├── MANIFEST_win.in
│       │   ├── README.md
│       │   ├── SECURITY.md
│       │   ├── azure/
│       │   │   └── README.md
│       │   ├── benchmarks/
│       │   │   ├── __init__.py
│       │   │   ├── communication/
│       │   │   │   ├── README.md
│       │   │   │   ├── __init__.py
│       │   │   │   ├── all_gather.py
│       │   │   │   ├── all_reduce.py
│       │   │   │   ├── all_to_all.py
│       │   │   │   ├── broadcast.py
│       │   │   │   ├── constants.py
│       │   │   │   ├── pt2pt.py
│       │   │   │   ├── run_all.py
│       │   │   │   └── utils.py
│       │   │   └── inference/
│       │   │       ├── bert-bench.py
│       │   │       ├── collect_results.py
│       │   │       ├── gpt-bench.py
│       │   │       ├── requirements.txt
│       │   │       ├── run_model.sh
│       │   │       └── sweep.sh
│       │   ├── bin/
│       │   │   ├── ds
│       │   │   ├── ds_bench
│       │   │   ├── ds_elastic
│       │   │   ├── ds_report
│       │   │   └── ds_ssh
│       │   ├── build_win.bat
│       │   ├── csrc/
│       │   │   ├── adagrad/
│       │   │   │   └── cpu_adagrad.cpp
│       │   │   ├── adam/
│       │   │   │   ├── cpu_adam.cpp
│       │   │   │   ├── fused_adam_frontend.cpp
│       │   │   │   ├── multi_tensor_adam.cu
│       │   │   │   └── multi_tensor_apply.cuh
│       │   │   ├── aio/
│       │   │   │   ├── common/
│       │   │   │   │   ├── deepspeed_aio_common.cpp
│       │   │   │   │   ├── deepspeed_aio_common.h
│       │   │   │   │   ├── deepspeed_aio_types.cpp
│       │   │   │   │   ├── deepspeed_aio_types.h
│       │   │   │   │   ├── deepspeed_aio_utils.cpp
│       │   │   │   │   └── deepspeed_aio_utils.h
│       │   │   │   ├── py_lib/
│       │   │   │   │   ├── deepspeed_aio_thread.cpp
│       │   │   │   │   ├── deepspeed_aio_thread.h
│       │   │   │   │   ├── deepspeed_py_aio.cpp
│       │   │   │   │   ├── deepspeed_py_aio.h
│       │   │   │   │   ├── deepspeed_py_aio_handle.cpp
│       │   │   │   │   ├── deepspeed_py_aio_handle.h
│       │   │   │   │   ├── deepspeed_py_copy.cpp
│       │   │   │   │   ├── deepspeed_py_copy.h
│       │   │   │   │   └── py_ds_aio.cpp
│       │   │   │   └── py_test/
│       │   │   │       ├── aio_bench_generate_param.py
│       │   │   │       ├── aio_bench_perf_sweep.py
│       │   │   │       ├── ds_aio_basic.py
│       │   │   │       ├── ds_aio_handle.py
│       │   │   │       ├── parse_aio_stats.py
│       │   │   │       ├── perf_sweep_utils.py
│       │   │   │       ├── run_read_sweep.sh
│       │   │   │       ├── run_write_sweep.sh
│       │   │   │       ├── single_process_config.json
│       │   │   │       ├── test_ds_aio.py
│       │   │   │       ├── test_ds_aio_utils.py
│       │   │   │       └── validate_async_io.py
│       │   │   ├── common/
│       │   │   │   └── custom_cuda_kernel.cu
│       │   │   ├── includes/
│       │   │   │   ├── StopWatch.h
│       │   │   │   ├── Timer.h
│       │   │   │   ├── compat.h
│       │   │   │   ├── context.h
│       │   │   │   ├── conversion_utils.h
│       │   │   │   ├── cpu_adagrad.h
│       │   │   │   ├── cpu_adam.h
│       │   │   │   ├── cublas_wrappers.h
│       │   │   │   ├── custom_cuda_layers.h
│       │   │   │   ├── dequantization_utils.h
│       │   │   │   ├── dropout.h
│       │   │   │   ├── ds_kernel_utils.h
│       │   │   │   ├── ds_transformer_cuda.h
│       │   │   │   ├── feed_forward.h
│       │   │   │   ├── gelu.h
│       │   │   │   ├── gemm_test.h
│       │   │   │   ├── general_kernels.h
│       │   │   │   ├── memory_access_utils.h
│       │   │   │   ├── normalize_layer.h
│       │   │   │   ├── quantization.h
│       │   │   │   ├── quantization_utils.h
│       │   │   │   ├── quantizer.h
│       │   │   │   ├── reduction_utils.h
│       │   │   │   ├── simd.h
│       │   │   │   ├── softmax.h
│       │   │   │   ├── strided_batch_gemm.h
│       │   │   │   └── type_shim.h
│       │   │   ├── lamb/
│       │   │   │   ├── fused_lamb_cuda.cpp
│       │   │   │   └── fused_lamb_cuda_kernel.cu
│       │   │   ├── quantization/
│       │   │   │   ├── dequantize.cu
│       │   │   │   ├── fake_quantizer.cu
│       │   │   │   ├── pt_binding.cpp
│       │   │   │   └── quantize.cu
│       │   │   ├── sparse_attention/
│       │   │   │   └── utils.cpp
│       │   │   ├── spatial/
│       │   │   │   ├── csrc/
│       │   │   │   │   ├── opt_bias_add.cu
│       │   │   │   │   └── pt_binding.cpp
│       │   │   │   └── includes/
│       │   │   │       └── spatial_cuda_layers.h
│       │   │   ├── transformer/
│       │   │   │   ├── cublas_wrappers.cu
│       │   │   │   ├── dropout_kernels.cu
│       │   │   │   ├── ds_transformer_cuda.cpp
│       │   │   │   ├── gelu_kernels.cu
│       │   │   │   ├── general_kernels.cu
│       │   │   │   ├── inference/
│       │   │   │   │   ├── csrc/
│       │   │   │   │   │   ├── apply_rotary_pos_emb.cu
│       │   │   │   │   │   ├── dequantize.cu
│       │   │   │   │   │   ├── gelu.cu
│       │   │   │   │   │   ├── layer_norm.cu
│       │   │   │   │   │   ├── pt_binding.cpp
│       │   │   │   │   │   ├── relu.cu
│       │   │   │   │   │   ├── softmax.cu
│       │   │   │   │   │   └── transform.cu
│       │   │   │   │   └── includes/
│       │   │   │   │       ├── inference_context.h
│       │   │   │   │       ├── inference_cublas_wrappers.h
│       │   │   │   │       └── inference_cuda_layers.h
│       │   │   │   ├── normalize_kernels.cu
│       │   │   │   ├── softmax_kernels.cu
│       │   │   │   └── transform_kernels.cu
│       │   │   └── utils/
│       │   │       └── flatten_unflatten.cpp
│       │   ├── deepspeed/
│       │   │   ├── __init__.py
│       │   │   ├── accelerator/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── abstract_accelerator.py
│       │   │   │   ├── cuda_accelerator.py
│       │   │   │   └── real_accelerator.py
│       │   │   ├── autotuning/
│       │   │   │   ├── .gitignore
│       │   │   │   ├── README.md
│       │   │   │   ├── __init__.py
│       │   │   │   ├── autotuner.py
│       │   │   │   ├── config.py
│       │   │   │   ├── config_templates/
│       │   │   │   │   ├── template_zero0.json
│       │   │   │   │   ├── template_zero1.json
│       │   │   │   │   ├── template_zero2.json
│       │   │   │   │   └── template_zero3.json
│       │   │   │   ├── constants.py
│       │   │   │   ├── scheduler.py
│       │   │   │   ├── tuner/
│       │   │   │   │   ├── README.md
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── base_tuner.py
│       │   │   │   │   ├── cost_model.py
│       │   │   │   │   ├── index_based_tuner.py
│       │   │   │   │   ├── model_based_tuner.py
│       │   │   │   │   └── utils.py
│       │   │   │   └── utils.py
│       │   │   ├── checkpoint/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── constants.py
│       │   │   │   ├── deepspeed_checkpoint.py
│       │   │   │   ├── reshape_3d_utils.py
│       │   │   │   ├── reshape_meg_2d.py
│       │   │   │   ├── reshape_utils.py
│       │   │   │   ├── universal_checkpoint.py
│       │   │   │   ├── utils.py
│       │   │   │   └── zero_checkpoint.py
│       │   │   ├── comm/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── backend.py
│       │   │   │   ├── comm.py
│       │   │   │   ├── config.py
│       │   │   │   ├── constants.py
│       │   │   │   ├── torch.py
│       │   │   │   └── utils.py
│       │   │   ├── compression/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── basic_layer.py
│       │   │   │   ├── compress.py
│       │   │   │   ├── config.py
│       │   │   │   ├── constants.py
│       │   │   │   ├── helper.py
│       │   │   │   ├── scheduler.py
│       │   │   │   └── utils.py
│       │   │   ├── constants.py
│       │   │   ├── elasticity/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── config.py
│       │   │   │   ├── constants.py
│       │   │   │   ├── elastic_agent.py
│       │   │   │   ├── elasticity.py
│       │   │   │   └── utils.py
│       │   │   ├── env_report.py
│       │   │   ├── git_version_info.py
│       │   │   ├── inference/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── config.py
│       │   │   │   └── engine.py
│       │   │   ├── launcher/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── constants.py
│       │   │   │   ├── launch.py
│       │   │   │   ├── multinode_runner.py
│       │   │   │   └── runner.py
│       │   │   ├── model_implementations/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── diffusers/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── unet.py
│       │   │   │   │   └── vae.py
│       │   │   │   └── transformers/
│       │   │   │       ├── __init__.py
│       │   │   │       ├── clip_encoder.py
│       │   │   │       └── ds_transformer.py
│       │   │   ├── module_inject/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── inject.py
│       │   │   │   ├── layers.py
│       │   │   │   ├── load_checkpoint.py
│       │   │   │   ├── module_quantize.py
│       │   │   │   ├── replace_module.py
│       │   │   │   └── replace_policy.py
│       │   │   ├── moe/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── experts.py
│       │   │   │   ├── layer.py
│       │   │   │   ├── mappings.py
│       │   │   │   ├── sharded_moe.py
│       │   │   │   └── utils.py
│       │   │   ├── monitor/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── config.py
│       │   │   │   ├── constants.py
│       │   │   │   ├── csv_monitor.py
│       │   │   │   ├── monitor.py
│       │   │   │   ├── tensorboard.py
│       │   │   │   ├── utils.py
│       │   │   │   └── wandb.py
│       │   │   ├── nebula/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── config.py
│       │   │   │   └── constants.py
│       │   │   ├── ops/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── adagrad/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   └── cpu_adagrad.py
│       │   │   │   ├── adam/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── cpu_adam.py
│       │   │   │   │   ├── fused_adam.py
│       │   │   │   │   └── multi_tensor_apply.py
│       │   │   │   ├── aio/
│       │   │   │   │   └── __init__.py
│       │   │   │   ├── lamb/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   └── fused_lamb.py
│       │   │   │   ├── quantizer/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   └── quantizer.py
│       │   │   │   ├── sparse_attention/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── bert_sparse_self_attention.py
│       │   │   │   │   ├── matmul.py
│       │   │   │   │   ├── softmax.py
│       │   │   │   │   ├── sparse_attention_utils.py
│       │   │   │   │   ├── sparse_self_attention.py
│       │   │   │   │   ├── sparsity_config.py
│       │   │   │   │   └── trsrc/
│       │   │   │   │       ├── __init__.py
│       │   │   │   │       ├── matmul.tr
│       │   │   │   │       ├── softmax_bwd.tr
│       │   │   │   │       └── softmax_fwd.tr
│       │   │   │   └── transformer/
│       │   │   │       ├── __init__.py
│       │   │   │       ├── inference/
│       │   │   │       │   ├── __init__.py
│       │   │   │       │   ├── bias_add.py
│       │   │   │       │   ├── config.py
│       │   │   │       │   ├── diffusers_2d_transformer.py
│       │   │   │       │   ├── diffusers_attention.py
│       │   │   │       │   ├── diffusers_transformer_block.py
│       │   │   │       │   ├── ds_attention.py
│       │   │   │       │   ├── ds_mlp.py
│       │   │   │       │   ├── moe_inference.py
│       │   │   │       │   └── triton_ops.py
│       │   │   │       └── transformer.py
│       │   │   ├── pipe/
│       │   │   │   └── __init__.py
│       │   │   ├── profiling/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── config.py
│       │   │   │   ├── constants.py
│       │   │   │   └── flops_profiler/
│       │   │   │       ├── README.md
│       │   │   │       ├── __init__.py
│       │   │   │       └── profiler.py
│       │   │   ├── runtime/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── activation_checkpointing/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── checkpointing.py
│       │   │   │   │   └── config.py
│       │   │   │   ├── bf16_optimizer.py
│       │   │   │   ├── checkpoint_engine/
│       │   │   │   │   ├── README.md
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── checkpoint_engine.py
│       │   │   │   │   ├── nebula_checkpoint_engine.py
│       │   │   │   │   └── torch_checkpoint_engine.py
│       │   │   │   ├── comm/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── coalesced_collectives.py
│       │   │   │   │   ├── mpi.py
│       │   │   │   │   └── nccl.py
│       │   │   │   ├── compression/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   └── cupy.py
│       │   │   │   ├── config.py
│       │   │   │   ├── config_utils.py
│       │   │   │   ├── constants.py
│       │   │   │   ├── data_pipeline/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   └── curriculum_scheduler.py
│       │   │   │   ├── dataloader.py
│       │   │   │   ├── eigenvalue.py
│       │   │   │   ├── engine.py
│       │   │   │   ├── fp16/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── fused_optimizer.py
│       │   │   │   │   ├── loss_scaler.py
│       │   │   │   │   ├── onebit/
│       │   │   │   │   │   ├── __init__.py
│       │   │   │   │   │   ├── adam.py
│       │   │   │   │   │   ├── lamb.py
│       │   │   │   │   │   └── zoadam.py
│       │   │   │   │   └── unfused_optimizer.py
│       │   │   │   ├── lr_schedules.py
│       │   │   │   ├── pipe/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── engine.py
│       │   │   │   │   ├── module.py
│       │   │   │   │   ├── p2p.py
│       │   │   │   │   ├── schedule.py
│       │   │   │   │   └── topology.py
│       │   │   │   ├── progressive_layer_drop.py
│       │   │   │   ├── quantize.py
│       │   │   │   ├── sparse_tensor.py
│       │   │   │   ├── state_dict_factory.py
│       │   │   │   ├── swap_tensor/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── aio_config.py
│       │   │   │   │   ├── async_swapper.py
│       │   │   │   │   ├── constants.py
│       │   │   │   │   ├── optimizer_utils.py
│       │   │   │   │   ├── partitioned_optimizer_swapper.py
│       │   │   │   │   ├── partitioned_param_swapper.py
│       │   │   │   │   ├── pipelined_optimizer_swapper.py
│       │   │   │   │   └── utils.py
│       │   │   │   ├── utils.py
│       │   │   │   ├── weight_quantizer.py
│       │   │   │   └── zero/
│       │   │   │       ├── __init__.py
│       │   │   │       ├── config.py
│       │   │   │       ├── contiguous_memory_allocator.py
│       │   │   │       ├── linear.py
│       │   │   │       ├── offload_config.py
│       │   │   │       ├── parameter_offload.py
│       │   │   │       ├── partition_parameters.py
│       │   │   │       ├── partitioned_param_coordinator.py
│       │   │   │       ├── stage3.py
│       │   │   │       ├── stage_1_and_2.py
│       │   │   │       ├── test.py
│       │   │   │       ├── tiling.py
│       │   │   │       └── utils.py
│       │   │   └── utils/
│       │   │       ├── __init__.py
│       │   │       ├── comms_logging.py
│       │   │       ├── debug.py
│       │   │       ├── exceptions.py
│       │   │       ├── groups.py
│       │   │       ├── init_on_device.py
│       │   │       ├── logging.py
│       │   │       ├── mixed_precision_linkage.py
│       │   │       ├── nvtx.py
│       │   │       ├── tensor_fragment.py
│       │   │       ├── timer.py
│       │   │       ├── types.py
│       │   │       └── zero_to_fp32.py
│       │   ├── docker/
│       │   │   ├── Dockerfile
│       │   │   └── Dockerfile.rocm
│       │   ├── docs/
│       │   │   ├── 404.html
│       │   │   ├── CNAME
│       │   │   ├── Gemfile
│       │   │   ├── README.md
│       │   │   ├── _config.yml
│       │   │   ├── _data/
│       │   │   │   └── navigation.yml
│       │   │   ├── _includes/
│       │   │   │   ├── analytics.html
│       │   │   │   ├── archive-single.html
│       │   │   │   ├── author-profile-custom-links.html
│       │   │   │   ├── author-profile.html
│       │   │   │   ├── breadcrumbs.html
│       │   │   │   ├── browser-upgrade.html
│       │   │   │   ├── category-list.html
│       │   │   │   ├── comment.html
│       │   │   │   ├── comments.html
│       │   │   │   ├── documents-collection.html
│       │   │   │   ├── feature_row
│       │   │   │   ├── figure
│       │   │   │   ├── footer.html
│       │   │   │   ├── gallery
│       │   │   │   ├── group-by-array
│       │   │   │   ├── head.html
│       │   │   │   ├── masthead.html
│       │   │   │   ├── nav_list
│       │   │   │   ├── page__date.html
│       │   │   │   ├── page__hero.html
│       │   │   │   ├── page__hero_video.html
│       │   │   │   ├── page__meta.html
│       │   │   │   ├── page__taxonomy.html
│       │   │   │   ├── paginator.html
│       │   │   │   ├── post_pagination.html
│       │   │   │   ├── posts-category.html
│       │   │   │   ├── posts-tag.html
│       │   │   │   ├── scripts.html
│       │   │   │   ├── seo.html
│       │   │   │   ├── sidebar.html
│       │   │   │   ├── skip-links.html
│       │   │   │   ├── social-share.html
│       │   │   │   ├── tag-list.html
│       │   │   │   ├── toc
│       │   │   │   ├── toc.html
│       │   │   │   └── video
│       │   │   ├── _layouts/
│       │   │   │   └── single-full.html
│       │   │   ├── _pages/
│       │   │   │   ├── compression.md
│       │   │   │   ├── config-json.md
│       │   │   │   ├── inference.md
│       │   │   │   ├── posts-landing.md
│       │   │   │   ├── posts_list_landing.md
│       │   │   │   ├── training.md
│       │   │   │   └── tutorials-landing.md
│       │   │   ├── _posts/
│       │   │   │   ├── 2020-02-13-release.md
│       │   │   │   ├── 2020-02-13-turing-nlg.md
│       │   │   │   ├── 2020-03-17-reduce-scatter.md
│       │   │   │   ├── 2020-05-19-bert-record.md
│       │   │   │   ├── 2020-05-19-press-release.md
│       │   │   │   ├── 2020-05-19-zero-stage2.md
│       │   │   │   ├── 2020-05-28-fastest-bert-training.md
│       │   │   │   ├── 2020-07-24-deepspeed-webinar.md
│       │   │   │   ├── 2020-08-07-webinar-on-demand.md
│       │   │   │   ├── 2020-09-08-sparse-attention-news.md
│       │   │   │   ├── 2020-09-09-ZeRO-Offload.md
│       │   │   │   ├── 2020-09-09-onebit-adam-blog-post.md
│       │   │   │   ├── 2020-09-09-onebit-adam-news.md
│       │   │   │   ├── 2020-09-09-pipeline-parallelism.md
│       │   │   │   ├── 2020-09-09-sparse-attention.md
│       │   │   │   ├── 2020-10-28-progressive-layer-dropping-news.md
│       │   │   │   ├── 2021-03-08-zero3-offload.md
│       │   │   │   ├── 2021-05-05-MoQ.md
│       │   │   │   ├── 2021-05-05-inference-kernel-optimization.md
│       │   │   │   ├── 2021-05-14-inference-release.md
│       │   │   │   ├── 2021-08-18-deepspeed-moe.md
│       │   │   │   ├── 2021-11-15-autotuning.md
│       │   │   │   ├── 2021-12-09-deepspeed-moe-nlg.md
│       │   │   │   ├── 2022-01-19-moe-inference.md
│       │   │   │   ├── 2022-03-21-amd-support.md
│       │   │   │   ├── 2022-07-26-deepspeed-azure.md
│       │   │   │   ├── 2022-09-10-zero-inference.md
│       │   │   │   └── 2022-10-11-mii.md
│       │   │   ├── _sass/
│       │   │   │   ├── button-group.scss
│       │   │   │   ├── minimal-mistakes/
│       │   │   │   │   ├── _archive.scss
│       │   │   │   │   ├── _navigation.scss
│       │   │   │   │   ├── _page.scss
│       │   │   │   │   ├── _sidebar.scss
│       │   │   │   │   ├── _variables.scss
│       │   │   │   │   └── skins/
│       │   │   │   │       └── _air.scss
│       │   │   │   └── minimal-mistakes.scss
│       │   │   ├── _tutorials/
│       │   │   │   ├── MoQ-tutorial.md
│       │   │   │   ├── advanced-install.md
│       │   │   │   ├── autotuning.md
│       │   │   │   ├── azure.md
│       │   │   │   ├── bert-finetuning.md
│       │   │   │   ├── bert-pretraining.md
│       │   │   │   ├── cifar-10.md
│       │   │   │   ├── comms-logging.md
│       │   │   │   ├── curriculum-learning.md
│       │   │   │   ├── flops-profiler.md
│       │   │   │   ├── gan.md
│       │   │   │   ├── getting-started.md
│       │   │   │   ├── inference-tutorial.md
│       │   │   │   ├── large-models-w-deepspeed.md
│       │   │   │   ├── lrrt.md
│       │   │   │   ├── megatron.md
│       │   │   │   ├── mixture-of-experts-inference.md
│       │   │   │   ├── mixture-of-experts-nlg.md
│       │   │   │   ├── mixture-of-experts.md
│       │   │   │   ├── model-compression.md
│       │   │   │   ├── monitor.md
│       │   │   │   ├── one-cycle.md
│       │   │   │   ├── onebit-adam.md
│       │   │   │   ├── onebit-lamb.md
│       │   │   │   ├── pipeline.md
│       │   │   │   ├── progressive_layer_dropping.md
│       │   │   │   ├── pytorch-profiler.md
│       │   │   │   ├── sparse-attention.md
│       │   │   │   ├── transformer_kernel.md
│       │   │   │   ├── zero-offload.md
│       │   │   │   ├── zero-one-adam.md
│       │   │   │   └── zero.md
│       │   │   ├── assets/
│       │   │   │   └── css/
│       │   │   │       └── main.scss
│       │   │   ├── code-docs/
│       │   │   │   ├── Makefile
│       │   │   │   ├── build-api-docs.sh
│       │   │   │   └── source/
│       │   │   │       ├── activation-checkpointing.rst
│       │   │   │       ├── autotuning.rst
│       │   │   │       ├── conf.py
│       │   │   │       ├── flops-profiler.rst
│       │   │   │       ├── index.rst
│       │   │   │       ├── inference-engine.rst
│       │   │   │       ├── inference-init.rst
│       │   │   │       ├── initialize.rst
│       │   │   │       ├── kernel.rst
│       │   │   │       ├── memory.rst
│       │   │   │       ├── model-checkpointing.rst
│       │   │   │       ├── moe.rst
│       │   │   │       ├── optimizers.rst
│       │   │   │       ├── pipeline.rst
│       │   │   │       ├── schedulers.rst
│       │   │   │       ├── training.rst
│       │   │   │       └── zero3.rst
│       │   │   ├── contributing.md
│       │   │   └── index.md
│       │   ├── examples/
│       │   │   └── README.md
│       │   ├── install.sh
│       │   ├── op_builder/
│       │   │   ├── __init__.py
│       │   │   ├── all_ops.py
│       │   │   ├── async_io.py
│       │   │   ├── builder.py
│       │   │   ├── builder_names.py
│       │   │   ├── cpu_adagrad.py
│       │   │   ├── cpu_adam.py
│       │   │   ├── fused_adam.py
│       │   │   ├── fused_lamb.py
│       │   │   ├── quantizer.py
│       │   │   ├── sparse_attn.py
│       │   │   ├── spatial_inference.py
│       │   │   ├── stochastic_transformer.py
│       │   │   ├── transformer.py
│       │   │   ├── transformer_inference.py
│       │   │   └── utils.py
│       │   ├── release/
│       │   │   ├── bump_patch_version.py
│       │   │   └── release.sh
│       │   ├── requirements/
│       │   │   ├── requirements-1bit-mpi.txt
│       │   │   ├── requirements-autotuning-ml.txt
│       │   │   ├── requirements-autotuning.txt
│       │   │   ├── requirements-dev.txt
│       │   │   ├── requirements-inf.txt
│       │   │   ├── requirements-readthedocs.txt
│       │   │   ├── requirements-sd.txt
│       │   │   ├── requirements-sparse_attn.txt
│       │   │   └── requirements.txt
│       │   ├── scripts/
│       │   │   └── check-torchdist.py
│       │   ├── setup.cfg
│       │   ├── setup.py
│       │   ├── tests/
│       │   │   ├── benchmarks/
│       │   │   │   ├── flatten_bench.py
│       │   │   │   └── unflatten_bench.py
│       │   │   ├── conftest.py
│       │   │   ├── lightning/
│       │   │   │   └── test_simple.py
│       │   │   ├── model/
│       │   │   │   ├── BingBertSquad/
│       │   │   │   │   ├── BingBertSquad_run_func_test.py
│       │   │   │   │   ├── BingBertSquad_test_common.py
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── deepspeed_bsz24_fp16_config.json
│       │   │   │   │   ├── deepspeed_bsz24_fp16_eigenvalue_quantize_config.json
│       │   │   │   │   ├── deepspeed_bsz24_fp16_zero2_config.json
│       │   │   │   │   ├── deepspeed_bsz24_fp32_config.json
│       │   │   │   │   ├── run_BingBertSquad.sh
│       │   │   │   │   ├── run_BingBertSquad_sanity.sh
│       │   │   │   │   ├── run_tests.sh
│       │   │   │   │   └── test_e2e_squad.py
│       │   │   │   ├── Megatron_GPT2/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── ds_config_func_bs4_zero1.json
│       │   │   │   │   ├── ds_config_func_bs4_zero2.json
│       │   │   │   │   ├── ds_config_func_bs4_zero2_offload.json
│       │   │   │   │   ├── ds_config_func_bs8_no_zero.json
│       │   │   │   │   ├── ds_config_func_bs8_zero0_gas3.json
│       │   │   │   │   ├── ds_config_func_bs8_zero1.json
│       │   │   │   │   ├── ds_config_func_bs8_zero2.json
│       │   │   │   │   ├── ds_config_func_bs8_zero2_gas3.json
│       │   │   │   │   ├── ds_config_func_bs8_zero2_offload.json
│       │   │   │   │   ├── ds_config_func_scheduler.json
│       │   │   │   │   ├── ds_config_perf_bs16.json
│       │   │   │   │   ├── ds_config_perf_bs32.json
│       │   │   │   │   ├── ds_config_perf_bs8.json
│       │   │   │   │   ├── ds_gpt2_test.sh
│       │   │   │   │   ├── run_checkpoint_test.py
│       │   │   │   │   ├── run_func_test.py
│       │   │   │   │   ├── run_perf_baseline.py
│       │   │   │   │   ├── run_perf_test.py
│       │   │   │   │   └── test_common.py
│       │   │   │   └── run_sanity_check.py
│       │   │   ├── onebit/
│       │   │   │   ├── test_mpi_backend.py
│       │   │   │   ├── test_mpi_perf.py
│       │   │   │   ├── test_nccl_backend.py
│       │   │   │   └── test_nccl_perf.py
│       │   │   ├── perf/
│       │   │   │   ├── adam_test.py
│       │   │   │   └── adam_test1.py
│       │   │   ├── pytest.ini
│       │   │   ├── small_model_debugging/
│       │   │   │   ├── stage3_test.py
│       │   │   │   ├── test.py
│       │   │   │   └── test_model.py
│       │   │   └── unit/
│       │   │       ├── __init__.py
│       │   │       ├── alexnet_model.py
│       │   │       ├── autotuning/
│       │   │       │   └── test_autotuning.py
│       │   │       ├── checkpoint/
│       │   │       │   ├── common.py
│       │   │       │   ├── test_latest_checkpoint.py
│       │   │       │   ├── test_lr_scheduler.py
│       │   │       │   ├── test_moe_checkpoint.py
│       │   │       │   ├── test_other_optimizer.py
│       │   │       │   ├── test_pipeline.py
│       │   │       │   ├── test_reshape_checkpoint.py
│       │   │       │   ├── test_sparse.py
│       │   │       │   ├── test_tag_validation.py
│       │   │       │   └── test_zero_optimizer.py
│       │   │       ├── comm/
│       │   │       │   └── test_dist.py
│       │   │       ├── common.py
│       │   │       ├── compression/
│       │   │       │   └── test_compression.py
│       │   │       ├── ds_batch_config.json
│       │   │       ├── elasticity/
│       │   │       │   └── test_elastic.py
│       │   │       ├── gpt2-merges.txt
│       │   │       ├── gpt2-vocab.json
│       │   │       ├── inference/
│       │   │       │   ├── test_checkpoint_sharding.py
│       │   │       │   ├── test_inference.py
│       │   │       │   ├── test_inference_config.py
│       │   │       │   └── test_model_profiling.py
│       │   │       ├── launcher/
│       │   │       │   ├── test_ds_arguments.py
│       │   │       │   ├── test_multinode_runner.py
│       │   │       │   └── test_run.py
│       │   │       ├── megatron_model.py
│       │   │       ├── model_parallelism/
│       │   │       │   ├── test_configurable_parallel_mp.py
│       │   │       │   └── test_configurable_parallel_pp.py
│       │   │       ├── modeling.py
│       │   │       ├── modelingpreln.py
│       │   │       ├── moe/
│       │   │       │   ├── test_moe.py
│       │   │       │   └── test_moe_tp.py
│       │   │       ├── monitor/
│       │   │       │   └── test_monitor.py
│       │   │       ├── multi_output_model.py
│       │   │       ├── ops/
│       │   │       │   ├── adagrad/
│       │   │       │   │   └── test_cpu_adagrad.py
│       │   │       │   ├── adam/
│       │   │       │   │   ├── test_adamw.py
│       │   │       │   │   └── test_cpu_adam.py
│       │   │       │   ├── aio/
│       │   │       │   │   └── test_aio.py
│       │   │       │   ├── cuda/
│       │   │       │   │   ├── test_cuda_backward.py
│       │   │       │   │   └── test_cuda_forward.py
│       │   │       │   ├── quantizer/
│       │   │       │   │   ├── test_dequantize.py
│       │   │       │   │   ├── test_fake_quantization.py
│       │   │       │   │   └── test_quantize.py
│       │   │       │   ├── sparse_attention/
│       │   │       │   │   └── test_sparse_attention.py
│       │   │       │   ├── spatial/
│       │   │       │   │   └── test_nhwc_bias_add.py
│       │   │       │   └── transformer/
│       │   │       │       └── inference/
│       │   │       │           ├── test_bias_add.py
│       │   │       │           ├── test_bias_geglu.py
│       │   │       │           ├── test_bias_gelu.py
│       │   │       │           ├── test_bias_relu.py
│       │   │       │           ├── test_layer_norm.py
│       │   │       │           ├── test_moe_res_matmult.py
│       │   │       │           └── test_residual_add.py
│       │   │       ├── pipe/
│       │   │       │   └── test_pipe_module.py
│       │   │       ├── profiling/
│       │   │       │   └── flops_profiler/
│       │   │       │       └── test_flops_profiler.py
│       │   │       ├── runtime/
│       │   │       │   ├── activation_checkpointing/
│       │   │       │   │   └── test_activation_checkpointing.py
│       │   │       │   ├── comm/
│       │   │       │   │   └── test_coalesced_collectives.py
│       │   │       │   ├── half_precision/
│       │   │       │   │   ├── onebit/
│       │   │       │   │   │   └── test_onebit.py
│       │   │       │   │   ├── test_bf16.py
│       │   │       │   │   ├── test_dynamic_loss_scale.py
│       │   │       │   │   └── test_fp16.py
│       │   │       │   ├── pipe/
│       │   │       │   │   ├── test_pipe.py
│       │   │       │   │   ├── test_pipe_schedule.py
│       │   │       │   │   └── test_topology.py
│       │   │       │   ├── sparse_tensor/
│       │   │       │   │   ├── test_averaging_sparse_gradients.py
│       │   │       │   │   ├── test_csr.py
│       │   │       │   │   └── test_sparse_grads.py
│       │   │       │   ├── test_autocast.py
│       │   │       │   ├── test_curriculum_learning.py
│       │   │       │   ├── test_data.py
│       │   │       │   ├── test_ds_config_dict.py
│       │   │       │   ├── test_ds_config_model.py
│       │   │       │   ├── test_ds_initialize.py
│       │   │       │   ├── test_lr_schedulers.py
│       │   │       │   ├── test_multi_output_model.py
│       │   │       │   ├── test_pld.py
│       │   │       │   ├── test_runtime_utils.py
│       │   │       │   ├── utils/
│       │   │       │   │   └── test_partition.py
│       │   │       │   └── zero/
│       │   │       │       ├── test_ignore_unused_parameters.py
│       │   │       │       ├── test_zero.py
│       │   │       │       ├── test_zero_config.py
│       │   │       │       ├── test_zero_context.py
│       │   │       │       └── test_zero_tiled.py
│       │   │       ├── simple_model.py
│       │   │       ├── util.py
│       │   │       └── utils/
│       │   │           ├── test_get_optim_files.py
│       │   │           ├── test_groups.py
│       │   │           └── test_init_on_device.py
│       │   └── version.txt
│       ├── README.md
│       ├── pagecache-mangagement/
│       │   ├── .svn/
│       │   │   ├── all-wcprops
│       │   │   └── entries
│       │   ├── README.md
│       │   ├── branches/
│       │   │   └── .svn/
│       │   │       ├── all-wcprops
│       │   │       └── entries
│       │   ├── tags/
│       │   │   └── .svn/
│       │   │       ├── all-wcprops
│       │   │       └── entries
│       │   └── trunk/
│       │       ├── .svn/
│       │       │   ├── all-wcprops
│       │       │   ├── entries
│       │       │   ├── prop-base/
│       │       │   │   ├── benchmar_plain.svn-base
│       │       │   │   ├── benchmar_prepare.svn-base
│       │       │   │   ├── benchmar_qemu.svn-base
│       │       │   │   ├── benchmar_qemu2.svn-base
│       │       │   │   ├── benchmar_squashfs.svn-base
│       │       │   │   ├── benchmar_test.sh.svn-base
│       │       │   │   ├── fadv.sh.svn-base
│       │       │   │   ├── fadv_command.sh.svn-base
│       │       │   │   ├── pagecache-management-fadv.sh.svn-base
│       │       │   │   ├── pagecache-management-ignore-reads.sh.svn-base
│       │       │   │   ├── pagecache-management-lazy200.sh.svn-base
│       │       │   │   ├── pagecache-management-lazy200ir.sh.svn-base
│       │       │   │   ├── pagecache-management-null.sh.svn-base
│       │       │   │   └── pagecache-management.sh.svn-base
│       │       │   └── text-base/
│       │       │       ├── Makefile.svn-base
│       │       │       ├── benchmar_plain.svn-base
│       │       │       ├── benchmar_prepare.svn-base
│       │       │       ├── benchmar_qemu.svn-base
│       │       │       ├── benchmar_qemu2.svn-base
│       │       │       ├── benchmar_squashfs.svn-base
│       │       │       ├── benchmar_test.sh.svn-base
│       │       │       ├── fadv.c.svn-base
│       │       │       ├── fadv.sh.svn-base
│       │       │       ├── fadv_command.sh.svn-base
│       │       │       ├── pagecache-management-fadv.sh.svn-base
│       │       │       ├── pagecache-management-ignore-reads.sh.svn-base
│       │       │       ├── pagecache-management-lazy200.sh.svn-base
│       │       │       ├── pagecache-management-lazy200ir.sh.svn-base
│       │       │       ├── pagecache-management-null.sh.svn-base
│       │       │       ├── pagecache-management.c.svn-base
│       │       │       ├── pagecache-management.sh.svn-base
│       │       │       ├── pagecache-management.txt.svn-base
│       │       │       ├── sfr.c.svn-base
│       │       │       ├── sync_file_range.h.svn-base
│       │       │       └── test.c.svn-base
│       │       ├── benchmar_plain
│       │       ├── benchmar_prepare
│       │       ├── benchmar_qemu
│       │       ├── benchmar_qemu2
│       │       ├── benchmar_squashfs
│       │       ├── benchmar_test.sh
│       │       ├── fadv.c
│       │       ├── fadv.sh
│       │       ├── fadv_command.sh
│       │       ├── pagecache-management-fadv.sh
│       │       ├── pagecache-management-ignore-reads.sh
│       │       ├── pagecache-management-lazy200.sh
│       │       ├── pagecache-management-lazy200ir.sh
│       │       ├── pagecache-management-null.sh
│       │       ├── pagecache-management.c
│       │       ├── pagecache-management.sh
│       │       ├── pagecache-management.txt
│       │       ├── sfr.c
│       │       ├── sync_file_range.h
│       │       └── test.c
│       └── transformers/
│           ├── .circleci/
│           │   ├── TROUBLESHOOT.md
│           │   ├── config.yml
│           │   └── create_circleci_config.py
│           ├── .coveragerc
│           ├── .gitattributes
│           ├── .github/
│           │   ├── ISSUE_TEMPLATE/
│           │   │   ├── bug-report.yml
│           │   │   ├── config.yml
│           │   │   ├── feature-request.yml
│           │   │   ├── migration.yml
│           │   │   └── new-model-addition.yml
│           │   ├── PULL_REQUEST_TEMPLATE.md
│           │   ├── conda/
│           │   │   ├── build.sh
│           │   │   └── meta.yaml
│           │   └── workflows/
│           │       ├── TROUBLESHOOT.md
│           │       ├── add-model-like.yml
│           │       ├── build-docker-images.yml
│           │       ├── build-past-ci-docker-images.yml
│           │       ├── build_documentation.yml
│           │       ├── build_pr_documentation.yml
│           │       ├── check_runner_status.yml
│           │       ├── delete_doc_comment.yml
│           │       ├── doctests.yml
│           │       ├── model-templates.yml
│           │       ├── release-conda.yml
│           │       ├── self-nightly-scheduled.yml
│           │       ├── self-past-caller.yml
│           │       ├── self-past.yml
│           │       ├── self-push-caller.yml
│           │       ├── self-push.yml
│           │       ├── self-scheduled.yml
│           │       ├── stale.yml
│           │       └── update_metdata.yml
│           ├── .gitignore
│           ├── CITATION.cff
│           ├── CODE_OF_CONDUCT.md
│           ├── CONTRIBUTING.md
│           ├── ISSUES.md
│           ├── LICENSE
│           ├── MANIFEST.in
│           ├── Makefile
│           ├── README.md
│           ├── README_es.md
│           ├── README_ko.md
│           ├── README_zh-hans.md
│           ├── README_zh-hant.md
│           ├── conftest.py
│           ├── docker/
│           │   ├── transformers-all-latest-gpu/
│           │   │   └── Dockerfile
│           │   ├── transformers-cpu/
│           │   │   └── Dockerfile
│           │   ├── transformers-doc-builder/
│           │   │   └── Dockerfile
│           │   ├── transformers-gpu/
│           │   │   └── Dockerfile
│           │   ├── transformers-past-gpu/
│           │   │   └── Dockerfile
│           │   ├── transformers-pytorch-cpu/
│           │   │   └── Dockerfile
│           │   ├── transformers-pytorch-deepspeed-latest-gpu/
│           │   │   └── Dockerfile
│           │   ├── transformers-pytorch-deepspeed-nightly-gpu/
│           │   │   └── Dockerfile
│           │   ├── transformers-pytorch-gpu/
│           │   │   └── Dockerfile
│           │   ├── transformers-pytorch-tpu/
│           │   │   ├── Dockerfile
│           │   │   ├── bert-base-cased.jsonnet
│           │   │   ├── dataset.yaml
│           │   │   └── docker-entrypoint.sh
│           │   ├── transformers-tensorflow-cpu/
│           │   │   └── Dockerfile
│           │   └── transformers-tensorflow-gpu/
│           │       └── Dockerfile
│           ├── docs/
│           │   ├── README.md
│           │   ├── TRANSLATING.md
│           │   └── source/
│           │       ├── _config.py
│           │       ├── de/
│           │       │   ├── _config.py
│           │       │   ├── _toctree.yml
│           │       │   ├── accelerate.mdx
│           │       │   ├── autoclass_tutorial.mdx
│           │       │   ├── index.mdx
│           │       │   ├── installation.mdx
│           │       │   ├── model_sharing.mdx
│           │       │   ├── pipeline_tutorial.mdx
│           │       │   ├── preprocessing.mdx
│           │       │   ├── quicktour.mdx
│           │       │   └── training.mdx
│           │       ├── en/
│           │       │   ├── _config.py
│           │       │   ├── _toctree.yml
│           │       │   ├── accelerate.mdx
│           │       │   ├── add_new_model.mdx
│           │       │   ├── add_new_pipeline.mdx
│           │       │   ├── add_tensorflow_model.mdx
│           │       │   ├── autoclass_tutorial.mdx
│           │       │   ├── benchmarks.mdx
│           │       │   ├── bertology.mdx
│           │       │   ├── big_models.mdx
│           │       │   ├── community.mdx
│           │       │   ├── converting_tensorflow_models.mdx
│           │       │   ├── create_a_model.mdx
│           │       │   ├── custom_models.mdx
│           │       │   ├── debugging.mdx
│           │       │   ├── fast_tokenizers.mdx
│           │       │   ├── glossary.mdx
│           │       │   ├── hpo_train.mdx
│           │       │   ├── index.mdx
│           │       │   ├── installation.mdx
│           │       │   ├── internal/
│           │       │   │   ├── file_utils.mdx
│           │       │   │   ├── generation_utils.mdx
│           │       │   │   ├── image_processing_utils.mdx
│           │       │   │   ├── modeling_utils.mdx
│           │       │   │   ├── pipelines_utils.mdx
│           │       │   │   ├── tokenization_utils.mdx
│           │       │   │   └── trainer_utils.mdx
│           │       │   ├── main_classes/
│           │       │   │   ├── callback.mdx
│           │       │   │   ├── configuration.mdx
│           │       │   │   ├── data_collator.mdx
│           │       │   │   ├── deepspeed.mdx
│           │       │   │   ├── feature_extractor.mdx
│           │       │   │   ├── keras_callbacks.mdx
│           │       │   │   ├── logging.mdx
│           │       │   │   ├── model.mdx
│           │       │   │   ├── onnx.mdx
│           │       │   │   ├── optimizer_schedules.mdx
│           │       │   │   ├── output.mdx
│           │       │   │   ├── pipelines.mdx
│           │       │   │   ├── processors.mdx
│           │       │   │   ├── text_generation.mdx
│           │       │   │   ├── tokenizer.mdx
│           │       │   │   └── trainer.mdx
│           │       │   ├── migration.mdx
│           │       │   ├── model_doc/
│           │       │   │   ├── albert.mdx
│           │       │   │   ├── auto.mdx
│           │       │   │   ├── bart.mdx
│           │       │   │   ├── barthez.mdx
│           │       │   │   ├── bartpho.mdx
│           │       │   │   ├── beit.mdx
│           │       │   │   ├── bert-generation.mdx
│           │       │   │   ├── bert-japanese.mdx
│           │       │   │   ├── bert.mdx
│           │       │   │   ├── bertweet.mdx
│           │       │   │   ├── big_bird.mdx
│           │       │   │   ├── bigbird_pegasus.mdx
│           │       │   │   ├── blenderbot-small.mdx
│           │       │   │   ├── blenderbot.mdx
│           │       │   │   ├── bloom.mdx
│           │       │   │   ├── bort.mdx
│           │       │   │   ├── byt5.mdx
│           │       │   │   ├── camembert.mdx
│           │       │   │   ├── canine.mdx
│           │       │   │   ├── clip.mdx
│           │       │   │   ├── codegen.mdx
│           │       │   │   ├── conditional_detr.mdx
│           │       │   │   ├── convbert.mdx
│           │       │   │   ├── convnext.mdx
│           │       │   │   ├── cpm.mdx
│           │       │   │   ├── ctrl.mdx
│           │       │   │   ├── cvt.mdx
│           │       │   │   ├── data2vec.mdx
│           │       │   │   ├── deberta-v2.mdx
│           │       │   │   ├── deberta.mdx
│           │       │   │   ├── decision_transformer.mdx
│           │       │   │   ├── deformable_detr.mdx
│           │       │   │   ├── deit.mdx
│           │       │   │   ├── detr.mdx
│           │       │   │   ├── dialogpt.mdx
│           │       │   │   ├── distilbert.mdx
│           │       │   │   ├── dit.mdx
│           │       │   │   ├── donut.mdx
│           │       │   │   ├── dpr.mdx
│           │       │   │   ├── dpt.mdx
│           │       │   │   ├── electra.mdx
│           │       │   │   ├── encoder-decoder.mdx
│           │       │   │   ├── ernie.mdx
│           │       │   │   ├── esm.mdx
│           │       │   │   ├── flan-t5.mdx
│           │       │   │   ├── flaubert.mdx
│           │       │   │   ├── flava.mdx
│           │       │   │   ├── fnet.mdx
│           │       │   │   ├── fsmt.mdx
│           │       │   │   ├── funnel.mdx
│           │       │   │   ├── glpn.mdx
│           │       │   │   ├── gpt2.mdx
│           │       │   │   ├── gpt_neo.mdx
│           │       │   │   ├── gpt_neox.mdx
│           │       │   │   ├── gpt_neox_japanese.mdx
│           │       │   │   ├── gptj.mdx
│           │       │   │   ├── groupvit.mdx
│           │       │   │   ├── herbert.mdx
│           │       │   │   ├── hubert.mdx
│           │       │   │   ├── ibert.mdx
│           │       │   │   ├── imagegpt.mdx
│           │       │   │   ├── layoutlm.mdx
│           │       │   │   ├── layoutlmv2.mdx
│           │       │   │   ├── layoutlmv3.mdx
│           │       │   │   ├── layoutxlm.mdx
│           │       │   │   ├── led.mdx
│           │       │   │   ├── levit.mdx
│           │       │   │   ├── lilt.mdx
│           │       │   │   ├── longformer.mdx
│           │       │   │   ├── longt5.mdx
│           │       │   │   ├── luke.mdx
│           │       │   │   ├── lxmert.mdx
│           │       │   │   ├── m2m_100.mdx
│           │       │   │   ├── marian.mdx
│           │       │   │   ├── markuplm.mdx
│           │       │   │   ├── maskformer.mdx
│           │       │   │   ├── mbart.mdx
│           │       │   │   ├── mctct.mdx
│           │       │   │   ├── megatron-bert.mdx
│           │       │   │   ├── megatron_gpt2.mdx
│           │       │   │   ├── mluke.mdx
│           │       │   │   ├── mobilebert.mdx
│           │       │   │   ├── mobilevit.mdx
│           │       │   │   ├── mpnet.mdx
│           │       │   │   ├── mt5.mdx
│           │       │   │   ├── mvp.mdx
│           │       │   │   ├── nezha.mdx
│           │       │   │   ├── nllb.mdx
│           │       │   │   ├── nystromformer.mdx
│           │       │   │   ├── openai-gpt.mdx
│           │       │   │   ├── opt.mdx
│           │       │   │   ├── owlvit.mdx
│           │       │   │   ├── pegasus.mdx
│           │       │   │   ├── pegasus_x.mdx
│           │       │   │   ├── perceiver.mdx
│           │       │   │   ├── phobert.mdx
│           │       │   │   ├── plbart.mdx
│           │       │   │   ├── poolformer.mdx
│           │       │   │   ├── prophetnet.mdx
│           │       │   │   ├── qdqbert.mdx
│           │       │   │   ├── rag.mdx
│           │       │   │   ├── realm.mdx
│           │       │   │   ├── reformer.mdx
│           │       │   │   ├── regnet.mdx
│           │       │   │   ├── rembert.mdx
│           │       │   │   ├── resnet.mdx
│           │       │   │   ├── retribert.mdx
│           │       │   │   ├── roberta.mdx
│           │       │   │   ├── roformer.mdx
│           │       │   │   ├── segformer.mdx
│           │       │   │   ├── sew-d.mdx
│           │       │   │   ├── sew.mdx
│           │       │   │   ├── speech-encoder-decoder.mdx
│           │       │   │   ├── speech_to_text.mdx
│           │       │   │   ├── speech_to_text_2.mdx
│           │       │   │   ├── splinter.mdx
│           │       │   │   ├── squeezebert.mdx
│           │       │   │   ├── swin.mdx
│           │       │   │   ├── swinv2.mdx
│           │       │   │   ├── t5.mdx
│           │       │   │   ├── t5v1.1.mdx
│           │       │   │   ├── table-transformer.mdx
│           │       │   │   ├── tapas.mdx
│           │       │   │   ├── tapex.mdx
│           │       │   │   ├── time_series_transformer.mdx
│           │       │   │   ├── trajectory_transformer.mdx
│           │       │   │   ├── transfo-xl.mdx
│           │       │   │   ├── trocr.mdx
│           │       │   │   ├── ul2.mdx
│           │       │   │   ├── unispeech-sat.mdx
│           │       │   │   ├── unispeech.mdx
│           │       │   │   ├── van.mdx
│           │       │   │   ├── videomae.mdx
│           │       │   │   ├── vilt.mdx
│           │       │   │   ├── vision-encoder-decoder.mdx
│           │       │   │   ├── vision-text-dual-encoder.mdx
│           │       │   │   ├── visual_bert.mdx
│           │       │   │   ├── vit.mdx
│           │       │   │   ├── vit_mae.mdx
│           │       │   │   ├── vit_msn.mdx
│           │       │   │   ├── wav2vec2-conformer.mdx
│           │       │   │   ├── wav2vec2.mdx
│           │       │   │   ├── wav2vec2_phoneme.mdx
│           │       │   │   ├── wavlm.mdx
│           │       │   │   ├── whisper.mdx
│           │       │   │   ├── xclip.mdx
│           │       │   │   ├── xglm.mdx
│           │       │   │   ├── xlm-prophetnet.mdx
│           │       │   │   ├── xlm-roberta-xl.mdx
│           │       │   │   ├── xlm-roberta.mdx
│           │       │   │   ├── xlm.mdx
│           │       │   │   ├── xlnet.mdx
│           │       │   │   ├── xls_r.mdx
│           │       │   │   ├── xlsr_wav2vec2.mdx
│           │       │   │   ├── yolos.mdx
│           │       │   │   └── yoso.mdx
│           │       │   ├── model_sharing.mdx
│           │       │   ├── model_summary.mdx
│           │       │   ├── multilingual.mdx
│           │       │   ├── pad_truncation.mdx
│           │       │   ├── perf_hardware.mdx
│           │       │   ├── perf_infer_cpu.mdx
│           │       │   ├── perf_infer_gpu_many.mdx
│           │       │   ├── perf_infer_gpu_one.mdx
│           │       │   ├── perf_infer_special.mdx
│           │       │   ├── perf_train_cpu.mdx
│           │       │   ├── perf_train_cpu_many.mdx
│           │       │   ├── perf_train_gpu_many.mdx
│           │       │   ├── perf_train_gpu_one.mdx
│           │       │   ├── perf_train_special.mdx
│           │       │   ├── perf_train_tpu.mdx
│           │       │   ├── performance.mdx
│           │       │   ├── perplexity.mdx
│           │       │   ├── philosophy.mdx
│           │       │   ├── pipeline_tutorial.mdx
│           │       │   ├── pr_checks.mdx
│           │       │   ├── preprocessing.mdx
│           │       │   ├── quicktour.mdx
│           │       │   ├── run_scripts.mdx
│           │       │   ├── sagemaker.mdx
│           │       │   ├── serialization.mdx
│           │       │   ├── task_summary.mdx
│           │       │   ├── tasks/
│           │       │   │   ├── asr.mdx
│           │       │   │   ├── audio_classification.mdx
│           │       │   │   ├── image_classification.mdx
│           │       │   │   ├── language_modeling.mdx
│           │       │   │   ├── multiple_choice.mdx
│           │       │   │   ├── question_answering.mdx
│           │       │   │   ├── semantic_segmentation.mdx
│           │       │   │   ├── sequence_classification.mdx
│           │       │   │   ├── summarization.mdx
│           │       │   │   ├── token_classification.mdx
│           │       │   │   └── translation.mdx
│           │       │   ├── testing.mdx
│           │       │   ├── tokenizer_summary.mdx
│           │       │   ├── torchscript.mdx
│           │       │   ├── training.mdx
│           │       │   └── troubleshooting.mdx
│           │       ├── es/
│           │       │   ├── _config.py
│           │       │   ├── _toctree.yml
│           │       │   ├── accelerate.mdx
│           │       │   ├── autoclass_tutorial.mdx
│           │       │   ├── bertology.mdx
│           │       │   ├── converting_tensorflow_models.mdx
│           │       │   ├── create_a_model.mdx
│           │       │   ├── custom_models.mdx
│           │       │   ├── fast_tokenizers.mdx
│           │       │   ├── index.mdx
│           │       │   ├── installation.mdx
│           │       │   ├── model_sharing.mdx
│           │       │   ├── multilingual.mdx
│           │       │   ├── philosophy.mdx
│           │       │   ├── pipeline_tutorial.mdx
│           │       │   ├── preprocessing.mdx
│           │       │   ├── quicktour.mdx
│           │       │   ├── run_scripts.mdx
│           │       │   ├── sagemaker.mdx
│           │       │   ├── tasks/
│           │       │   │   ├── image_classification.mdx
│           │       │   │   ├── language_modeling.mdx
│           │       │   │   ├── multiple_choice.mdx
│           │       │   │   ├── question_answering.mdx
│           │       │   │   └── summarization.mdx
│           │       │   └── training.mdx
│           │       ├── it/
│           │       │   ├── _config.py
│           │       │   ├── _toctree.yml
│           │       │   ├── accelerate.mdx
│           │       │   ├── add_new_model.mdx
│           │       │   ├── add_new_pipeline.mdx
│           │       │   ├── autoclass_tutorial.mdx
│           │       │   ├── converting_tensorflow_models.mdx
│           │       │   ├── create_a_model.mdx
│           │       │   ├── custom_models.mdx
│           │       │   ├── debugging.mdx
│           │       │   ├── index.mdx
│           │       │   ├── installation.mdx
│           │       │   ├── model_sharing.mdx
│           │       │   ├── multilingual.mdx
│           │       │   ├── perf_hardware.mdx
│           │       │   ├── pipeline_tutorial.mdx
│           │       │   ├── preprocessing.mdx
│           │       │   ├── quicktour.mdx
│           │       │   ├── run_scripts.mdx
│           │       │   ├── serialization.mdx
│           │       │   └── training.mdx
│           │       └── pt/
│           │           ├── _config.py
│           │           ├── _toctree.yml
│           │           ├── accelerate.mdx
│           │           ├── converting_tensorflow_models.mdx
│           │           ├── create_a_model.mdx
│           │           ├── custom_models.mdx
│           │           ├── fast_tokenizers.mdx
│           │           ├── index.mdx
│           │           ├── installation.mdx
│           │           ├── multilingual.mdx
│           │           ├── pipeline_tutorial.mdx
│           │           ├── quicktour.mdx
│           │           ├── run_scripts.mdx
│           │           ├── serialization.mdx
│           │           ├── tasks/
│           │           │   ├── sequence_classification.mdx
│           │           │   └── token_classification.mdx
│           │           └── training.mdx
│           ├── examples/
│           │   ├── README.md
│           │   ├── flax/
│           │   │   ├── README.md
│           │   │   ├── _tests_requirements.txt
│           │   │   ├── conftest.py
│           │   │   ├── image-captioning/
│           │   │   │   ├── README.md
│           │   │   │   ├── create_model_from_encoder_decoder_models.py
│           │   │   │   └── run_image_captioning_flax.py
│           │   │   ├── language-modeling/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_bart_dlm_flax.py
│           │   │   │   ├── run_clm_flax.py
│           │   │   │   ├── run_mlm_flax.py
│           │   │   │   ├── run_t5_mlm_flax.py
│           │   │   │   └── t5_tokenizer_model.py
│           │   │   ├── question-answering/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_qa.py
│           │   │   │   └── utils_qa.py
│           │   │   ├── summarization/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   └── run_summarization_flax.py
│           │   │   ├── test_flax_examples.py
│           │   │   ├── text-classification/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   └── run_flax_glue.py
│           │   │   ├── token-classification/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   └── run_flax_ner.py
│           │   │   └── vision/
│           │   │       ├── README.md
│           │   │       ├── requirements.txt
│           │   │       └── run_image_classification.py
│           │   ├── legacy/
│           │   │   ├── README.md
│           │   │   ├── multiple_choice/
│           │   │   │   ├── run_multiple_choice.py
│           │   │   │   └── utils_multiple_choice.py
│           │   │   ├── pytorch-lightning/
│           │   │   │   ├── lightning_base.py
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_glue.py
│           │   │   │   ├── run_glue.sh
│           │   │   │   ├── run_ner.py
│           │   │   │   ├── run_ner.sh
│           │   │   │   └── run_pos.sh
│           │   │   ├── question-answering/
│           │   │   │   ├── README.md
│           │   │   │   ├── run_squad.py
│           │   │   │   └── run_squad_trainer.py
│           │   │   ├── run_camembert.py
│           │   │   ├── run_chinese_ref.py
│           │   │   ├── run_language_modeling.py
│           │   │   ├── run_openai_gpt.py
│           │   │   ├── run_swag.py
│           │   │   ├── run_transfo_xl.py
│           │   │   ├── seq2seq/
│           │   │   │   ├── README.md
│           │   │   │   ├── __init__.py
│           │   │   │   ├── convert_model_to_fp16.py
│           │   │   │   ├── download_wmt.py
│           │   │   │   ├── finetune.sh
│           │   │   │   ├── finetune_tpu.sh
│           │   │   │   ├── finetune_trainer.py
│           │   │   │   ├── minify_dataset.py
│           │   │   │   ├── old_test_calculate_rouge.py
│           │   │   │   ├── old_test_datasets.py
│           │   │   │   ├── old_test_fsmt_bleu_score.py
│           │   │   │   ├── old_test_seq2seq_examples.py
│           │   │   │   ├── old_test_seq2seq_examples_multi_gpu.py
│           │   │   │   ├── old_test_tatoeba_conversion.py
│           │   │   │   ├── pack_dataset.py
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── romanian_postprocessing.md
│           │   │   │   ├── rouge_cli.py
│           │   │   │   ├── run_distributed_eval.py
│           │   │   │   ├── run_eval.py
│           │   │   │   ├── run_eval_search.py
│           │   │   │   ├── save_len_file.py
│           │   │   │   ├── save_randomly_initialized_model.py
│           │   │   │   ├── sentence_splitter.py
│           │   │   │   ├── seq2seq_trainer.py
│           │   │   │   ├── seq2seq_training_args.py
│           │   │   │   ├── test_data/
│           │   │   │   │   ├── fsmt/
│           │   │   │   │   │   ├── build-eval-data.py
│           │   │   │   │   │   └── fsmt_val_data.json
│           │   │   │   │   └── wmt_en_ro/
│           │   │   │   │       ├── test.source
│           │   │   │   │       ├── test.target
│           │   │   │   │       ├── train.len
│           │   │   │   │       ├── train.source
│           │   │   │   │       ├── train.target
│           │   │   │   │       ├── val.len
│           │   │   │   │       ├── val.source
│           │   │   │   │       └── val.target
│           │   │   │   ├── train_distil_marian_enro.sh
│           │   │   │   ├── train_distil_marian_enro_tpu.sh
│           │   │   │   ├── train_distilbart_cnn.sh
│           │   │   │   ├── train_mbart_cc25_enro.sh
│           │   │   │   ├── utils.py
│           │   │   │   └── xla_spawn.py
│           │   │   ├── text-classification/
│           │   │   │   └── run_tf_text_classification.py
│           │   │   └── token-classification/
│           │   │       ├── README.md
│           │   │       ├── run.sh
│           │   │       ├── run_chunk.sh
│           │   │       ├── run_ner.py
│           │   │       ├── run_pos.sh
│           │   │       ├── run_tf_ner.py
│           │   │       ├── scripts/
│           │   │       │   └── preprocess.py
│           │   │       ├── tasks.py
│           │   │       └── utils_ner.py
│           │   ├── pytorch/
│           │   │   ├── README.md
│           │   │   ├── _tests_requirements.txt
│           │   │   ├── audio-classification/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   └── run_audio_classification.py
│           │   │   ├── benchmarking/
│           │   │   │   ├── README.md
│           │   │   │   ├── plot_csv_file.py
│           │   │   │   ├── requirements.txt
│           │   │   │   └── run_benchmark.py
│           │   │   ├── conftest.py
│           │   │   ├── contrastive-image-text/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   └── run_clip.py
│           │   │   ├── image-classification/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_image_classification.py
│           │   │   │   └── run_image_classification_no_trainer.py
│           │   │   ├── image-pretraining/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_mae.py
│           │   │   │   └── run_mim.py
│           │   │   ├── language-modeling/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_clm.py
│           │   │   │   ├── run_clm_no_trainer.py
│           │   │   │   ├── run_mlm.py
│           │   │   │   ├── run_mlm_no_trainer.py
│           │   │   │   └── run_plm.py
│           │   │   ├── multiple-choice/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_no_trainer.sh
│           │   │   │   ├── run_swag.py
│           │   │   │   └── run_swag_no_trainer.py
│           │   │   ├── question-answering/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_qa.py
│           │   │   │   ├── run_qa_beam_search.py
│           │   │   │   ├── run_qa_beam_search_no_trainer.py
│           │   │   │   ├── run_qa_no_trainer.py
│           │   │   │   ├── run_seq2seq_qa.py
│           │   │   │   ├── trainer_qa.py
│           │   │   │   ├── trainer_seq2seq_qa.py
│           │   │   │   └── utils_qa.py
│           │   │   ├── semantic-segmentation/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_semantic_segmentation.py
│           │   │   │   └── run_semantic_segmentation_no_trainer.py
│           │   │   ├── speech-pretraining/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   └── run_wav2vec2_pretraining_no_trainer.py
│           │   │   ├── speech-recognition/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_speech_recognition_ctc.py
│           │   │   │   └── run_speech_recognition_seq2seq.py
│           │   │   ├── summarization/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_summarization.py
│           │   │   │   └── run_summarization_no_trainer.py
│           │   │   ├── test_accelerate_examples.py
│           │   │   ├── test_pytorch_examples.py
│           │   │   ├── test_xla_examples.py
│           │   │   ├── text-classification/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_glue.py
│           │   │   │   ├── run_glue_no_trainer.py
│           │   │   │   └── run_xnli.py
│           │   │   ├── text-generation/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_generation.py
│           │   │   │   └── run_generation_contrastive_search.py
│           │   │   ├── token-classification/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run.sh
│           │   │   │   ├── run_ner.py
│           │   │   │   ├── run_ner_no_trainer.py
│           │   │   │   └── run_no_trainer.sh
│           │   │   ├── translation/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_translation.py
│           │   │   │   └── run_translation_no_trainer.py
│           │   │   └── xla_spawn.py
│           │   ├── research_projects/
│           │   │   ├── README.md
│           │   │   ├── adversarial/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_hans.py
│           │   │   │   └── utils_hans.py
│           │   │   ├── bert-loses-patience/
│           │   │   │   ├── README.md
│           │   │   │   ├── pabee/
│           │   │   │   │   ├── __init__.py
│           │   │   │   │   ├── modeling_pabee_albert.py
│           │   │   │   │   └── modeling_pabee_bert.py
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_glue_with_pabee.py
│           │   │   │   └── test_run_glue_with_pabee.py
│           │   │   ├── bertabs/
│           │   │   │   ├── README.md
│           │   │   │   ├── __init__.py
│           │   │   │   ├── configuration_bertabs.py
│           │   │   │   ├── convert_bertabs_original_pytorch_checkpoint.py
│           │   │   │   ├── modeling_bertabs.py
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_summarization.py
│           │   │   │   ├── test_utils_summarization.py
│           │   │   │   └── utils_summarization.py
│           │   │   ├── bertology/
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_bertology.py
│           │   │   │   └── run_prune_gpt.py
│           │   │   ├── codeparrot/
│           │   │   │   ├── README.md
│           │   │   │   ├── examples/
│           │   │   │   │   ├── README.md
│           │   │   │   │   ├── requirements.txt
│           │   │   │   │   └── train_complexity_predictor.py
│           │   │   │   ├── requirements.txt
│           │   │   │   └── scripts/
│           │   │   │       ├── arguments.py
│           │   │   │       ├── bpe_training.py
│           │   │   │       ├── codeparrot_training.py
│           │   │   │       ├── human_eval.py
│           │   │   │       ├── initialize_model.py
│           │   │   │       ├── minhash_deduplication.py
│           │   │   │       ├── preprocessing.py
│           │   │   │       ├── pretokenizing.py
│           │   │   │       ├── tests/
│           │   │   │       │   ├── __init__.py
│           │   │   │       │   └── test_deduplicate.py
│           │   │   │       └── validation_loss.py
│           │   │   ├── decision_transformer/
│           │   │   │   ├── requirements.txt
│           │   │   │   └── run_decision_transformer.py
│           │   │   ├── deebert/
│           │   │   │   ├── README.md
│           │   │   │   ├── entropy_eval.sh
│           │   │   │   ├── eval_deebert.sh
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_glue_deebert.py
│           │   │   │   ├── src/
│           │   │   │   │   ├── __init__.py
│           │   │   │   │   ├── modeling_highway_bert.py
│           │   │   │   │   └── modeling_highway_roberta.py
│           │   │   │   ├── test_glue_deebert.py
│           │   │   │   └── train_deebert.sh
│           │   │   ├── distillation/
│           │   │   │   ├── README.md
│           │   │   │   ├── distiller.py
│           │   │   │   ├── grouped_batch_sampler.py
│           │   │   │   ├── lm_seqs_dataset.py
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_squad_w_distillation.py
│           │   │   │   ├── scripts/
│           │   │   │   │   ├── binarized_data.py
│           │   │   │   │   ├── extract.py
│           │   │   │   │   ├── extract_distilbert.py
│           │   │   │   │   └── token_counts.py
│           │   │   │   ├── train.py
│           │   │   │   ├── training_configs/
│           │   │   │   │   ├── distilbert-base-cased.json
│           │   │   │   │   ├── distilbert-base-multilingual-cased.json
│           │   │   │   │   ├── distilbert-base-uncased.json
│           │   │   │   │   ├── distilgpt2.json
│           │   │   │   │   └── distilroberta-base.json
│           │   │   │   └── utils.py
│           │   │   ├── fsner/
│           │   │   │   ├── README.md
│           │   │   │   ├── pyproject.toml
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── setup.py
│           │   │   │   └── src/
│           │   │   │       └── fsner/
│           │   │   │           ├── __init__.py
│           │   │   │           ├── model.py
│           │   │   │           └── tokenizer_utils.py
│           │   │   ├── information-gain-filtration/
│           │   │   │   ├── README.md
│           │   │   │   ├── igf/
│           │   │   │   │   ├── __init__.py
│           │   │   │   │   └── igf.py
│           │   │   │   ├── requirements.txt
│           │   │   │   └── run_clm_igf.py
│           │   │   ├── jax-projects/
│           │   │   │   ├── HOW_TO_PROPOSE_PROJECT.md
│           │   │   │   ├── README.md
│           │   │   │   ├── big_bird/
│           │   │   │   │   ├── README.md
│           │   │   │   │   ├── bigbird_flax.py
│           │   │   │   │   ├── evaluate.py
│           │   │   │   │   ├── prepare_natural_questions.py
│           │   │   │   │   ├── requirements.txt
│           │   │   │   │   ├── sweep_flax.yaml
│           │   │   │   │   └── train.py
│           │   │   │   ├── dataset-streaming/
│           │   │   │   │   ├── README.md
│           │   │   │   │   └── run_mlm_flax_stream.py
│           │   │   │   ├── hybrid_clip/
│           │   │   │   │   ├── README.md
│           │   │   │   │   ├── configuration_hybrid_clip.py
│           │   │   │   │   ├── modeling_hybrid_clip.py
│           │   │   │   │   ├── requirements.txt
│           │   │   │   │   └── run_hybrid_clip.py
│           │   │   │   ├── model_parallel/
│           │   │   │   │   ├── README.md
│           │   │   │   │   ├── partitions.py
│           │   │   │   │   └── run_clm_mp.py
│           │   │   │   └── wav2vec2/
│           │   │   │       ├── README.md
│           │   │   │       └── run_wav2vec2_pretrain_flax.py
│           │   │   ├── layoutlmv3/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   └── run_funsd_cord.py
│           │   │   ├── longform-qa/
│           │   │   │   ├── README.md
│           │   │   │   ├── eli5_app.py
│           │   │   │   ├── eli5_utils.py
│           │   │   │   └── requirements.txt
│           │   │   ├── luke/
│           │   │   │   ├── README.md
│           │   │   │   ├── luke_utils.py
│           │   │   │   └── run_luke_ner_no_trainer.py
│           │   │   ├── lxmert/
│           │   │   │   ├── README.md
│           │   │   │   ├── demo.ipynb
│           │   │   │   ├── extracting_data.py
│           │   │   │   ├── modeling_frcnn.py
│           │   │   │   ├── processing_image.py
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── utils.py
│           │   │   │   └── visualizing_image.py
│           │   │   ├── mlm_wwm/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_chinese_ref.py
│           │   │   │   └── run_mlm_wwm.py
│           │   │   ├── mm-imdb/
│           │   │   │   ├── README.md
│           │   │   │   ├── run_mmimdb.py
│           │   │   │   └── utils_mmimdb.py
│           │   │   ├── movement-pruning/
│           │   │   │   ├── README.md
│           │   │   │   ├── Saving_PruneBERT.ipynb
│           │   │   │   ├── bertarize.py
│           │   │   │   ├── counts_parameters.py
│           │   │   │   ├── emmental/
│           │   │   │   │   ├── __init__.py
│           │   │   │   │   ├── configuration_bert_masked.py
│           │   │   │   │   ├── modeling_bert_masked.py
│           │   │   │   │   └── modules/
│           │   │   │   │       ├── __init__.py
│           │   │   │   │       ├── binarizer.py
│           │   │   │   │       └── masked_nn.py
│           │   │   │   ├── masked_run_glue.py
│           │   │   │   ├── masked_run_squad.py
│           │   │   │   └── requirements.txt
│           │   │   ├── onnx/
│           │   │   │   └── summarization/
│           │   │   │       ├── README.md
│           │   │   │       ├── bart_onnx/
│           │   │   │       │   ├── generation_onnx.py
│           │   │   │       │   └── reduce_onnx_size.py
│           │   │   │       ├── requirements.txt
│           │   │   │       └── run_onnx_exporter.py
│           │   │   ├── performer/
│           │   │   │   ├── README.md
│           │   │   │   ├── full_script.sh
│           │   │   │   ├── modeling_flax_performer.py
│           │   │   │   ├── modeling_flax_performer_utils.py
│           │   │   │   ├── run_mlm_performer.py
│           │   │   │   └── sanity_script.sh
│           │   │   ├── pplm/
│           │   │   │   ├── README.md
│           │   │   │   ├── pplm_classification_head.py
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_pplm.py
│           │   │   │   └── run_pplm_discrim_train.py
│           │   │   ├── quantization-qdqbert/
│           │   │   │   ├── Dockerfile
│           │   │   │   ├── README.md
│           │   │   │   ├── evaluate-hf-trt-qa.py
│           │   │   │   ├── ort-infer-benchmark.py
│           │   │   │   ├── quant_trainer.py
│           │   │   │   ├── run_quant_qa.py
│           │   │   │   ├── trainer_quant_qa.py
│           │   │   │   └── utils_qa.py
│           │   │   ├── rag/
│           │   │   │   ├── README.md
│           │   │   │   ├── __init__.py
│           │   │   │   ├── _test_finetune_rag.py
│           │   │   │   ├── callbacks_rag.py
│           │   │   │   ├── consolidate_rag_checkpoint.py
│           │   │   │   ├── distributed_pytorch_retriever.py
│           │   │   │   ├── distributed_ray_retriever.py
│           │   │   │   ├── eval_rag.py
│           │   │   │   ├── finetune_rag.py
│           │   │   │   ├── finetune_rag.sh
│           │   │   │   ├── finetune_rag_ray.sh
│           │   │   │   ├── lightning_base.py
│           │   │   │   ├── parse_dpr_relevance_data.py
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── test_data/
│           │   │   │   │   └── my_knowledge_dataset.csv
│           │   │   │   ├── test_distributed_retriever.py
│           │   │   │   ├── use_own_knowledge_dataset.py
│           │   │   │   └── utils_rag.py
│           │   │   ├── rag-end2end-retriever/
│           │   │   │   ├── README.md
│           │   │   │   ├── callbacks_rag.py
│           │   │   │   ├── distributed_ray_retriever.py
│           │   │   │   ├── eval_rag.py
│           │   │   │   ├── finetune_rag.py
│           │   │   │   ├── finetune_rag_ray_end2end.sh
│           │   │   │   ├── kb_encode_utils.py
│           │   │   │   ├── lightning_base.py
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── test_run/
│           │   │   │   │   ├── dummy-kb/
│           │   │   │   │   │   └── my_knowledge_dataset.csv
│           │   │   │   │   ├── dummy-train-data/
│           │   │   │   │   │   ├── train.source
│           │   │   │   │   │   ├── train.target
│           │   │   │   │   │   ├── val.source
│           │   │   │   │   │   └── val.target
│           │   │   │   │   ├── test_finetune.sh
│           │   │   │   │   └── test_rag_new_features.sh
│           │   │   │   ├── use_own_knowledge_dataset.py
│           │   │   │   └── utils_rag.py
│           │   │   ├── robust-speech-event/
│           │   │   │   ├── README.md
│           │   │   │   ├── eval.py
│           │   │   │   ├── run_speech_recognition_ctc_bnb.py
│           │   │   │   └── run_speech_recognition_ctc_streaming.py
│           │   │   ├── self-training-text-classification/
│           │   │   │   ├── README.md
│           │   │   │   ├── finetuning.py
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run.sh
│           │   │   │   └── selftraining.py
│           │   │   ├── seq2seq-distillation/
│           │   │   │   ├── README.md
│           │   │   │   ├── _test_bash_script.py
│           │   │   │   ├── _test_make_student.py
│           │   │   │   ├── _test_seq2seq_examples.py
│           │   │   │   ├── _test_seq2seq_examples_multi_gpu.py
│           │   │   │   ├── callbacks.py
│           │   │   │   ├── convert_pl_checkpoint_to_hf.py
│           │   │   │   ├── distil_marian_enro_teacher.sh
│           │   │   │   ├── distil_marian_no_teacher.sh
│           │   │   │   ├── distillation.py
│           │   │   │   ├── dynamic_bs_example.sh
│           │   │   │   ├── finetune.py
│           │   │   │   ├── finetune.sh
│           │   │   │   ├── finetune_bart_tiny.sh
│           │   │   │   ├── finetune_pegasus_xsum.sh
│           │   │   │   ├── finetune_t5.sh
│           │   │   │   ├── lightning_base.py
│           │   │   │   ├── make_student.py
│           │   │   │   ├── precomputed_pseudo_labels.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_eval.py
│           │   │   │   ├── sentence_splitter.py
│           │   │   │   ├── train_distilbart_cnn.sh
│           │   │   │   ├── train_distilbart_xsum.sh
│           │   │   │   ├── train_mbart_cc25_enro.sh
│           │   │   │   └── utils.py
│           │   │   ├── tapex/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_tabfact_with_tapex.py
│           │   │   │   ├── run_wikisql_with_tapex.py
│           │   │   │   ├── run_wikitablequestions_with_tapex.py
│           │   │   │   └── wikisql_utils.py
│           │   │   ├── visual_bert/
│           │   │   │   ├── README.md
│           │   │   │   ├── demo.ipynb
│           │   │   │   ├── extracting_data.py
│           │   │   │   ├── modeling_frcnn.py
│           │   │   │   ├── processing_image.py
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── utils.py
│           │   │   │   └── visualizing_image.py
│           │   │   ├── wav2vec2/
│           │   │   │   ├── FINE_TUNE_XLSR_WAV2VEC2.md
│           │   │   │   ├── README.md
│           │   │   │   ├── alignment.py
│           │   │   │   ├── ds_config_wav2vec2_zero2.json
│           │   │   │   ├── ds_config_wav2vec2_zero3.json
│           │   │   │   ├── finetune_base_100.sh
│           │   │   │   ├── finetune_base_timit_asr.sh
│           │   │   │   ├── finetune_large_lv60_100.sh
│           │   │   │   ├── finetune_large_lv60_timit_asr.sh
│           │   │   │   ├── finetune_large_xlsr_53_arabic_speech_corpus.sh
│           │   │   │   ├── finetune_wav2vec2_xlsr_turkish.sh
│           │   │   │   ├── requirements.txt
│           │   │   │   ├── run_alignment.sh
│           │   │   │   ├── run_asr.py
│           │   │   │   ├── run_common_voice.py
│           │   │   │   ├── run_pretrain.py
│           │   │   │   ├── test_wav2vec2_deepspeed.py
│           │   │   │   └── vocab/
│           │   │   │       └── buckwalter.json
│           │   │   ├── xtreme-s/
│           │   │   │   ├── README.md
│           │   │   │   ├── requirements.txt
│           │   │   │   └── run_xtreme_s.py
│           │   │   └── zero-shot-distillation/
│           │   │       ├── README.md
│           │   │       └── distill_classifier.py
│           │   └── tensorflow/
│           │       ├── README.md
│           │       ├── _tests_requirements.txt
│           │       ├── benchmarking/
│           │       │   ├── README.md
│           │       │   ├── plot_csv_file.py
│           │       │   ├── requirements.txt
│           │       │   └── run_benchmark_tf.py
│           │       ├── language-modeling/
│           │       │   ├── README.md
│           │       │   ├── requirements.txt
│           │       │   ├── run_clm.py
│           │       │   └── run_mlm.py
│           │       ├── multiple-choice/
│           │       │   ├── README.md
│           │       │   ├── requirements.txt
│           │       │   └── run_swag.py
│           │       ├── question-answering/
│           │       │   ├── README.md
│           │       │   ├── requirements.txt
│           │       │   ├── run_qa.py
│           │       │   └── utils_qa.py
│           │       ├── summarization/
│           │       │   ├── README.md
│           │       │   ├── requirements.txt
│           │       │   └── run_summarization.py
│           │       ├── test_tensorflow_examples.py
│           │       ├── text-classification/
│           │       │   ├── README.md
│           │       │   ├── requirements.txt
│           │       │   ├── run_glue.py
│           │       │   └── run_text_classification.py
│           │       ├── token-classification/
│           │       │   ├── README.md
│           │       │   ├── requirements.txt
│           │       │   └── run_ner.py
│           │       └── translation/
│           │           ├── README.md
│           │           ├── requirements.txt
│           │           └── run_translation.py
│           ├── hubconf.py
│           ├── model_cards/
│           │   └── README.md
│           ├── notebooks/
│           │   └── README.md
│           ├── pyproject.toml
│           ├── scripts/
│           │   ├── benchmark/
│           │   │   └── trainer-benchmark.py
│           │   ├── check_tokenizers.py
│           │   ├── distributed/
│           │   │   └── torch-distributed-gpu-test.py
│           │   ├── fsmt/
│           │   │   ├── convert-allenai-wmt16.sh
│           │   │   ├── convert-allenai-wmt19.sh
│           │   │   ├── convert-facebook-wmt19.sh
│           │   │   ├── eval-allenai-wmt16.sh
│           │   │   ├── eval-allenai-wmt19.sh
│           │   │   ├── eval-facebook-wmt19.sh
│           │   │   ├── fsmt-make-super-tiny-model.py
│           │   │   ├── fsmt-make-tiny-model.py
│           │   │   ├── gen-card-allenai-wmt16.py
│           │   │   ├── gen-card-allenai-wmt19.py
│           │   │   ├── gen-card-facebook-wmt19.py
│           │   │   ├── s3-move.sh
│           │   │   └── tests-to-run.sh
│           │   ├── pegasus/
│           │   │   └── build_test_sample_spm_no_bos.py
│           │   ├── stale.py
│           │   └── tatoeba/
│           │       ├── README.md
│           │       └── upload_models.sh
│           ├── setup.cfg
│           ├── setup.py
│           ├── src/
│           │   └── transformers/
│           │       ├── __init__.py
│           │       ├── activations.py
│           │       ├── activations_tf.py
│           │       ├── benchmark/
│           │       │   ├── __init__.py
│           │       │   ├── benchmark.py
│           │       │   ├── benchmark_args.py
│           │       │   ├── benchmark_args_tf.py
│           │       │   ├── benchmark_args_utils.py
│           │       │   ├── benchmark_tf.py
│           │       │   └── benchmark_utils.py
│           │       ├── commands/
│           │       │   ├── __init__.py
│           │       │   ├── add_new_model.py
│           │       │   ├── add_new_model_like.py
│           │       │   ├── convert.py
│           │       │   ├── download.py
│           │       │   ├── env.py
│           │       │   ├── lfs.py
│           │       │   ├── pt_to_tf.py
│           │       │   ├── run.py
│           │       │   ├── serving.py
│           │       │   ├── train.py
│           │       │   ├── transformers_cli.py
│           │       │   └── user.py
│           │       ├── configuration_utils.py
│           │       ├── convert_graph_to_onnx.py
│           │       ├── convert_pytorch_checkpoint_to_tf2.py
│           │       ├── convert_slow_tokenizer.py
│           │       ├── convert_slow_tokenizers_checkpoints_to_fast.py
│           │       ├── convert_tf_hub_seq_to_seq_bert_to_pytorch.py
│           │       ├── data/
│           │       │   ├── __init__.py
│           │       │   ├── data_collator.py
│           │       │   ├── datasets/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── glue.py
│           │       │   │   ├── language_modeling.py
│           │       │   │   └── squad.py
│           │       │   ├── metrics/
│           │       │   │   ├── __init__.py
│           │       │   │   └── squad_metrics.py
│           │       │   ├── processors/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── glue.py
│           │       │   │   ├── squad.py
│           │       │   │   ├── utils.py
│           │       │   │   └── xnli.py
│           │       │   └── test_generation_utils.py
│           │       ├── debug_utils.py
│           │       ├── deepspeed.py
│           │       ├── dependency_versions_check.py
│           │       ├── dependency_versions_table.py
│           │       ├── dynamic_module_utils.py
│           │       ├── feature_extraction_sequence_utils.py
│           │       ├── feature_extraction_utils.py
│           │       ├── file_utils.py
│           │       ├── generation_beam_constraints.py
│           │       ├── generation_beam_search.py
│           │       ├── generation_flax_logits_process.py
│           │       ├── generation_flax_utils.py
│           │       ├── generation_logits_process.py
│           │       ├── generation_stopping_criteria.py
│           │       ├── generation_tf_logits_process.py
│           │       ├── generation_tf_utils.py
│           │       ├── generation_utils.py
│           │       ├── hf_argparser.py
│           │       ├── image_processing_utils.py
│           │       ├── image_transforms.py
│           │       ├── image_utils.py
│           │       ├── integrations.py
│           │       ├── keras_callbacks.py
│           │       ├── modelcard.py
│           │       ├── modeling_flax_outputs.py
│           │       ├── modeling_flax_pytorch_utils.py
│           │       ├── modeling_flax_utils.py
│           │       ├── modeling_outputs.py
│           │       ├── modeling_tf_outputs.py
│           │       ├── modeling_tf_pytorch_utils.py
│           │       ├── modeling_tf_utils.py
│           │       ├── modeling_utils.py
│           │       ├── models/
│           │       │   ├── __init__.py
│           │       │   ├── albert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_albert.py
│           │       │   │   ├── convert_albert_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_albert.py
│           │       │   │   ├── modeling_flax_albert.py
│           │       │   │   ├── modeling_tf_albert.py
│           │       │   │   ├── tokenization_albert.py
│           │       │   │   └── tokenization_albert_fast.py
│           │       │   ├── auto/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── auto_factory.py
│           │       │   │   ├── configuration_auto.py
│           │       │   │   ├── feature_extraction_auto.py
│           │       │   │   ├── modeling_auto.py
│           │       │   │   ├── modeling_flax_auto.py
│           │       │   │   ├── modeling_tf_auto.py
│           │       │   │   ├── processing_auto.py
│           │       │   │   └── tokenization_auto.py
│           │       │   ├── bart/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_bart.py
│           │       │   │   ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_bart.py
│           │       │   │   ├── modeling_flax_bart.py
│           │       │   │   ├── modeling_tf_bart.py
│           │       │   │   ├── tokenization_bart.py
│           │       │   │   └── tokenization_bart_fast.py
│           │       │   ├── barthez/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── tokenization_barthez.py
│           │       │   │   └── tokenization_barthez_fast.py
│           │       │   ├── bartpho/
│           │       │   │   ├── __init__.py
│           │       │   │   └── tokenization_bartpho.py
│           │       │   ├── beit/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_beit.py
│           │       │   │   ├── convert_beit_unilm_to_pytorch.py
│           │       │   │   ├── feature_extraction_beit.py
│           │       │   │   ├── modeling_beit.py
│           │       │   │   └── modeling_flax_beit.py
│           │       │   ├── bert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_bert.py
│           │       │   │   ├── convert_bert_original_tf2_checkpoint_to_pytorch.py
│           │       │   │   ├── convert_bert_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── convert_bert_pytorch_checkpoint_to_original_tf.py
│           │       │   │   ├── convert_bert_token_dropping_original_tf2_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_bert.py
│           │       │   │   ├── modeling_flax_bert.py
│           │       │   │   ├── modeling_tf_bert.py
│           │       │   │   ├── tokenization_bert.py
│           │       │   │   ├── tokenization_bert_fast.py
│           │       │   │   └── tokenization_bert_tf.py
│           │       │   ├── bert_generation/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_bert_generation.py
│           │       │   │   ├── modeling_bert_generation.py
│           │       │   │   └── tokenization_bert_generation.py
│           │       │   ├── bert_japanese/
│           │       │   │   ├── __init__.py
│           │       │   │   └── tokenization_bert_japanese.py
│           │       │   ├── bertweet/
│           │       │   │   ├── __init__.py
│           │       │   │   └── tokenization_bertweet.py
│           │       │   ├── big_bird/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_big_bird.py
│           │       │   │   ├── convert_bigbird_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_big_bird.py
│           │       │   │   ├── modeling_flax_big_bird.py
│           │       │   │   ├── tokenization_big_bird.py
│           │       │   │   └── tokenization_big_bird_fast.py
│           │       │   ├── bigbird_pegasus/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_bigbird_pegasus.py
│           │       │   │   ├── convert_bigbird_pegasus_tf_to_pytorch.py
│           │       │   │   └── modeling_bigbird_pegasus.py
│           │       │   ├── blenderbot/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_blenderbot.py
│           │       │   │   ├── convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_blenderbot.py
│           │       │   │   ├── modeling_flax_blenderbot.py
│           │       │   │   ├── modeling_tf_blenderbot.py
│           │       │   │   ├── tokenization_blenderbot.py
│           │       │   │   └── tokenization_blenderbot_fast.py
│           │       │   ├── blenderbot_small/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_blenderbot_small.py
│           │       │   │   ├── modeling_blenderbot_small.py
│           │       │   │   ├── modeling_flax_blenderbot_small.py
│           │       │   │   ├── modeling_tf_blenderbot_small.py
│           │       │   │   ├── tokenization_blenderbot_small.py
│           │       │   │   └── tokenization_blenderbot_small_fast.py
│           │       │   ├── bloom/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_bloom.py
│           │       │   │   ├── convert_bloom_original_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_bloom.py
│           │       │   │   └── tokenization_bloom_fast.py
│           │       │   ├── bort/
│           │       │   │   ├── __init__.py
│           │       │   │   └── convert_bort_original_gluonnlp_checkpoint_to_pytorch.py
│           │       │   ├── byt5/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── convert_byt5_original_tf_checkpoint_to_pytorch.py
│           │       │   │   └── tokenization_byt5.py
│           │       │   ├── camembert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_camembert.py
│           │       │   │   ├── modeling_camembert.py
│           │       │   │   ├── modeling_tf_camembert.py
│           │       │   │   ├── tokenization_camembert.py
│           │       │   │   └── tokenization_camembert_fast.py
│           │       │   ├── canine/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_canine.py
│           │       │   │   ├── convert_canine_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_canine.py
│           │       │   │   └── tokenization_canine.py
│           │       │   ├── clip/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_clip.py
│           │       │   │   ├── convert_clip_original_pytorch_to_hf.py
│           │       │   │   ├── feature_extraction_clip.py
│           │       │   │   ├── modeling_clip.py
│           │       │   │   ├── modeling_flax_clip.py
│           │       │   │   ├── modeling_tf_clip.py
│           │       │   │   ├── processing_clip.py
│           │       │   │   ├── tokenization_clip.py
│           │       │   │   └── tokenization_clip_fast.py
│           │       │   ├── codegen/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_codegen.py
│           │       │   │   ├── modeling_codegen.py
│           │       │   │   ├── tokenization_codegen.py
│           │       │   │   └── tokenization_codegen_fast.py
│           │       │   ├── conditional_detr/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_conditional_detr.py
│           │       │   │   ├── convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── feature_extraction_conditional_detr.py
│           │       │   │   └── modeling_conditional_detr.py
│           │       │   ├── convbert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_convbert.py
│           │       │   │   ├── convert_convbert_original_tf1_checkpoint_to_pytorch_and_tf2.py
│           │       │   │   ├── modeling_convbert.py
│           │       │   │   ├── modeling_tf_convbert.py
│           │       │   │   ├── tokenization_convbert.py
│           │       │   │   └── tokenization_convbert_fast.py
│           │       │   ├── convnext/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_convnext.py
│           │       │   │   ├── convert_convnext_to_pytorch.py
│           │       │   │   ├── feature_extraction_convnext.py
│           │       │   │   ├── modeling_convnext.py
│           │       │   │   └── modeling_tf_convnext.py
│           │       │   ├── cpm/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── tokenization_cpm.py
│           │       │   │   └── tokenization_cpm_fast.py
│           │       │   ├── ctrl/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_ctrl.py
│           │       │   │   ├── modeling_ctrl.py
│           │       │   │   ├── modeling_tf_ctrl.py
│           │       │   │   └── tokenization_ctrl.py
│           │       │   ├── cvt/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_cvt.py
│           │       │   │   ├── convert_cvt_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_cvt.py
│           │       │   │   └── modeling_tf_cvt.py
│           │       │   ├── data2vec/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_data2vec_audio.py
│           │       │   │   ├── configuration_data2vec_text.py
│           │       │   │   ├── configuration_data2vec_vision.py
│           │       │   │   ├── convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_data2vec_audio.py
│           │       │   │   ├── modeling_data2vec_text.py
│           │       │   │   ├── modeling_data2vec_vision.py
│           │       │   │   └── modeling_tf_data2vec_vision.py
│           │       │   ├── deberta/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_deberta.py
│           │       │   │   ├── modeling_deberta.py
│           │       │   │   ├── modeling_tf_deberta.py
│           │       │   │   ├── tokenization_deberta.py
│           │       │   │   └── tokenization_deberta_fast.py
│           │       │   ├── deberta_v2/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_deberta_v2.py
│           │       │   │   ├── modeling_deberta_v2.py
│           │       │   │   ├── modeling_tf_deberta_v2.py
│           │       │   │   ├── tokenization_deberta_v2.py
│           │       │   │   └── tokenization_deberta_v2_fast.py
│           │       │   ├── decision_transformer/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_decision_transformer.py
│           │       │   │   └── modeling_decision_transformer.py
│           │       │   ├── deformable_detr/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_deformable_detr.py
│           │       │   │   ├── convert_deformable_detr_to_pytorch.py
│           │       │   │   ├── custom_kernel/
│           │       │   │   │   ├── cpu/
│           │       │   │   │   │   ├── ms_deform_attn_cpu.cpp
│           │       │   │   │   │   └── ms_deform_attn_cpu.h
│           │       │   │   │   ├── cuda/
│           │       │   │   │   │   ├── ms_deform_attn_cuda.cu
│           │       │   │   │   │   ├── ms_deform_attn_cuda.cuh
│           │       │   │   │   │   ├── ms_deform_attn_cuda.h
│           │       │   │   │   │   └── ms_deform_im2col_cuda.cuh
│           │       │   │   │   ├── ms_deform_attn.h
│           │       │   │   │   └── vision.cpp
│           │       │   │   ├── feature_extraction_deformable_detr.py
│           │       │   │   ├── load_custom.py
│           │       │   │   └── modeling_deformable_detr.py
│           │       │   ├── deit/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_deit.py
│           │       │   │   ├── convert_deit_timm_to_pytorch.py
│           │       │   │   ├── feature_extraction_deit.py
│           │       │   │   ├── modeling_deit.py
│           │       │   │   └── modeling_tf_deit.py
│           │       │   ├── detr/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_detr.py
│           │       │   │   ├── convert_detr_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── feature_extraction_detr.py
│           │       │   │   └── modeling_detr.py
│           │       │   ├── dialogpt/
│           │       │   │   ├── __init__.py
│           │       │   │   └── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
│           │       │   ├── distilbert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_distilbert.py
│           │       │   │   ├── modeling_distilbert.py
│           │       │   │   ├── modeling_flax_distilbert.py
│           │       │   │   ├── modeling_tf_distilbert.py
│           │       │   │   ├── tokenization_distilbert.py
│           │       │   │   └── tokenization_distilbert_fast.py
│           │       │   ├── dit/
│           │       │   │   ├── __init__.py
│           │       │   │   └── convert_dit_unilm_to_pytorch.py
│           │       │   ├── donut/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_donut_swin.py
│           │       │   │   ├── convert_donut_to_pytorch.py
│           │       │   │   ├── feature_extraction_donut.py
│           │       │   │   ├── modeling_donut_swin.py
│           │       │   │   └── processing_donut.py
│           │       │   ├── dpr/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_dpr.py
│           │       │   │   ├── convert_dpr_original_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_dpr.py
│           │       │   │   ├── modeling_tf_dpr.py
│           │       │   │   ├── tokenization_dpr.py
│           │       │   │   └── tokenization_dpr_fast.py
│           │       │   ├── dpt/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_dpt.py
│           │       │   │   ├── convert_dpt_to_pytorch.py
│           │       │   │   ├── feature_extraction_dpt.py
│           │       │   │   └── modeling_dpt.py
│           │       │   ├── electra/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_electra.py
│           │       │   │   ├── convert_electra_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_electra.py
│           │       │   │   ├── modeling_flax_electra.py
│           │       │   │   ├── modeling_tf_electra.py
│           │       │   │   ├── tokenization_electra.py
│           │       │   │   └── tokenization_electra_fast.py
│           │       │   ├── encoder_decoder/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_encoder_decoder.py
│           │       │   │   ├── modeling_encoder_decoder.py
│           │       │   │   ├── modeling_flax_encoder_decoder.py
│           │       │   │   └── modeling_tf_encoder_decoder.py
│           │       │   ├── ernie/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_ernie.py
│           │       │   │   └── modeling_ernie.py
│           │       │   ├── esm/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_esm.py
│           │       │   │   ├── convert_esm.py
│           │       │   │   ├── modeling_esm.py
│           │       │   │   ├── modeling_esmfold.py
│           │       │   │   ├── modeling_tf_esm.py
│           │       │   │   ├── openfold_utils/
│           │       │   │   │   ├── __init__.py
│           │       │   │   │   ├── chunk_utils.py
│           │       │   │   │   ├── data_transforms.py
│           │       │   │   │   ├── feats.py
│           │       │   │   │   ├── loss.py
│           │       │   │   │   ├── protein.py
│           │       │   │   │   ├── residue_constants.py
│           │       │   │   │   ├── rigid_utils.py
│           │       │   │   │   └── tensor_utils.py
│           │       │   │   └── tokenization_esm.py
│           │       │   ├── flaubert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_flaubert.py
│           │       │   │   ├── modeling_flaubert.py
│           │       │   │   ├── modeling_tf_flaubert.py
│           │       │   │   └── tokenization_flaubert.py
│           │       │   ├── flava/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_flava.py
│           │       │   │   ├── convert_dalle_to_flava_codebook.py
│           │       │   │   ├── convert_flava_original_pytorch_to_hf.py
│           │       │   │   ├── feature_extraction_flava.py
│           │       │   │   ├── modeling_flava.py
│           │       │   │   └── processing_flava.py
│           │       │   ├── fnet/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_fnet.py
│           │       │   │   ├── convert_fnet_original_flax_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_fnet.py
│           │       │   │   ├── tokenization_fnet.py
│           │       │   │   └── tokenization_fnet_fast.py
│           │       │   ├── fsmt/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_fsmt.py
│           │       │   │   ├── convert_fsmt_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_fsmt.py
│           │       │   │   └── tokenization_fsmt.py
│           │       │   ├── funnel/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_funnel.py
│           │       │   │   ├── convert_funnel_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_funnel.py
│           │       │   │   ├── modeling_tf_funnel.py
│           │       │   │   ├── tokenization_funnel.py
│           │       │   │   └── tokenization_funnel_fast.py
│           │       │   ├── glpn/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_glpn.py
│           │       │   │   ├── convert_glpn_to_pytorch.py
│           │       │   │   ├── feature_extraction_glpn.py
│           │       │   │   ├── image_processing_glpn.py
│           │       │   │   └── modeling_glpn.py
│           │       │   ├── gpt2/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_gpt2.py
│           │       │   │   ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_flax_gpt2.py
│           │       │   │   ├── modeling_gpt2.py
│           │       │   │   ├── modeling_tf_gpt2.py
│           │       │   │   ├── tokenization_gpt2.py
│           │       │   │   └── tokenization_gpt2_fast.py
│           │       │   ├── gpt_neo/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_gpt_neo.py
│           │       │   │   ├── convert_gpt_neo_mesh_tf_to_pytorch.py
│           │       │   │   ├── modeling_flax_gpt_neo.py
│           │       │   │   └── modeling_gpt_neo.py
│           │       │   ├── gpt_neox/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_gpt_neox.py
│           │       │   │   ├── modeling_gpt_neox.py
│           │       │   │   └── tokenization_gpt_neox_fast.py
│           │       │   ├── gpt_neox_japanese/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_gpt_neox_japanese.py
│           │       │   │   ├── modeling_gpt_neox_japanese.py
│           │       │   │   └── tokenization_gpt_neox_japanese.py
│           │       │   ├── gptj/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_gptj.py
│           │       │   │   ├── modeling_flax_gptj.py
│           │       │   │   ├── modeling_gptj.py
│           │       │   │   └── modeling_tf_gptj.py
│           │       │   ├── groupvit/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_groupvit.py
│           │       │   │   ├── convert_groupvit_nvlab_to_hf.py
│           │       │   │   ├── modeling_groupvit.py
│           │       │   │   └── modeling_tf_groupvit.py
│           │       │   ├── herbert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── tokenization_herbert.py
│           │       │   │   └── tokenization_herbert_fast.py
│           │       │   ├── hubert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_hubert.py
│           │       │   │   ├── convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py
│           │       │   │   ├── convert_hubert_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── convert_hubert_original_s3prl_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_hubert.py
│           │       │   │   └── modeling_tf_hubert.py
│           │       │   ├── ibert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_ibert.py
│           │       │   │   ├── modeling_ibert.py
│           │       │   │   └── quant_modules.py
│           │       │   ├── imagegpt/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_imagegpt.py
│           │       │   │   ├── convert_imagegpt_original_tf2_to_pytorch.py
│           │       │   │   ├── feature_extraction_imagegpt.py
│           │       │   │   └── modeling_imagegpt.py
│           │       │   ├── layoutlm/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_layoutlm.py
│           │       │   │   ├── modeling_layoutlm.py
│           │       │   │   ├── modeling_tf_layoutlm.py
│           │       │   │   ├── tokenization_layoutlm.py
│           │       │   │   └── tokenization_layoutlm_fast.py
│           │       │   ├── layoutlmv2/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_layoutlmv2.py
│           │       │   │   ├── feature_extraction_layoutlmv2.py
│           │       │   │   ├── modeling_layoutlmv2.py
│           │       │   │   ├── processing_layoutlmv2.py
│           │       │   │   ├── tokenization_layoutlmv2.py
│           │       │   │   └── tokenization_layoutlmv2_fast.py
│           │       │   ├── layoutlmv3/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_layoutlmv3.py
│           │       │   │   ├── feature_extraction_layoutlmv3.py
│           │       │   │   ├── modeling_layoutlmv3.py
│           │       │   │   ├── modeling_tf_layoutlmv3.py
│           │       │   │   ├── processing_layoutlmv3.py
│           │       │   │   ├── tokenization_layoutlmv3.py
│           │       │   │   └── tokenization_layoutlmv3_fast.py
│           │       │   ├── layoutxlm/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── processing_layoutxlm.py
│           │       │   │   ├── tokenization_layoutxlm.py
│           │       │   │   └── tokenization_layoutxlm_fast.py
│           │       │   ├── led/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_led.py
│           │       │   │   ├── modeling_led.py
│           │       │   │   ├── modeling_tf_led.py
│           │       │   │   ├── tokenization_led.py
│           │       │   │   └── tokenization_led_fast.py
│           │       │   ├── levit/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_levit.py
│           │       │   │   ├── convert_levit_timm_to_pytorch.py
│           │       │   │   ├── feature_extraction_levit.py
│           │       │   │   └── modeling_levit.py
│           │       │   ├── lilt/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_lilt.py
│           │       │   │   └── modeling_lilt.py
│           │       │   ├── longformer/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_longformer.py
│           │       │   │   ├── convert_longformer_original_pytorch_lightning_to_pytorch.py
│           │       │   │   ├── modeling_longformer.py
│           │       │   │   ├── modeling_tf_longformer.py
│           │       │   │   ├── tokenization_longformer.py
│           │       │   │   └── tokenization_longformer_fast.py
│           │       │   ├── longt5/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_longt5.py
│           │       │   │   ├── convert_longt5x_checkpoint_to_flax.py
│           │       │   │   ├── modeling_flax_longt5.py
│           │       │   │   └── modeling_longt5.py
│           │       │   ├── luke/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_luke.py
│           │       │   │   ├── convert_luke_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_luke.py
│           │       │   │   └── tokenization_luke.py
│           │       │   ├── lxmert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_lxmert.py
│           │       │   │   ├── convert_lxmert_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_lxmert.py
│           │       │   │   ├── modeling_tf_lxmert.py
│           │       │   │   ├── tokenization_lxmert.py
│           │       │   │   └── tokenization_lxmert_fast.py
│           │       │   ├── m2m_100/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_m2m_100.py
│           │       │   │   ├── convert_m2m100_original_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_m2m_100.py
│           │       │   │   └── tokenization_m2m_100.py
│           │       │   ├── marian/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_marian.py
│           │       │   │   ├── convert_marian_tatoeba_to_pytorch.py
│           │       │   │   ├── convert_marian_to_pytorch.py
│           │       │   │   ├── modeling_flax_marian.py
│           │       │   │   ├── modeling_marian.py
│           │       │   │   ├── modeling_tf_marian.py
│           │       │   │   └── tokenization_marian.py
│           │       │   ├── markuplm/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_markuplm.py
│           │       │   │   ├── feature_extraction_markuplm.py
│           │       │   │   ├── modeling_markuplm.py
│           │       │   │   ├── processing_markuplm.py
│           │       │   │   ├── tokenization_markuplm.py
│           │       │   │   └── tokenization_markuplm_fast.py
│           │       │   ├── maskformer/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_maskformer.py
│           │       │   │   ├── convert_maskformer_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── feature_extraction_maskformer.py
│           │       │   │   └── modeling_maskformer.py
│           │       │   ├── mbart/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_mbart.py
│           │       │   │   ├── convert_mbart_original_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_flax_mbart.py
│           │       │   │   ├── modeling_mbart.py
│           │       │   │   ├── modeling_tf_mbart.py
│           │       │   │   ├── tokenization_mbart.py
│           │       │   │   └── tokenization_mbart_fast.py
│           │       │   ├── mbart50/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── tokenization_mbart50.py
│           │       │   │   └── tokenization_mbart50_fast.py
│           │       │   ├── mctct/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_mctct.py
│           │       │   │   ├── feature_extraction_mctct.py
│           │       │   │   ├── modeling_mctct.py
│           │       │   │   └── processing_mctct.py
│           │       │   ├── megatron_bert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_megatron_bert.py
│           │       │   │   ├── convert_megatron_bert_checkpoint.py
│           │       │   │   └── modeling_megatron_bert.py
│           │       │   ├── megatron_gpt2/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── checkpoint_reshaping_and_interoperability.py
│           │       │   │   └── convert_megatron_gpt2_checkpoint.py
│           │       │   ├── mluke/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── convert_mluke_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   └── tokenization_mluke.py
│           │       │   ├── mmbt/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_mmbt.py
│           │       │   │   └── modeling_mmbt.py
│           │       │   ├── mobilebert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_mobilebert.py
│           │       │   │   ├── convert_mobilebert_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_mobilebert.py
│           │       │   │   ├── modeling_tf_mobilebert.py
│           │       │   │   ├── tokenization_mobilebert.py
│           │       │   │   └── tokenization_mobilebert_fast.py
│           │       │   ├── mobilevit/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_mobilevit.py
│           │       │   │   ├── convert_mlcvnets_to_pytorch.py
│           │       │   │   ├── feature_extraction_mobilevit.py
│           │       │   │   ├── modeling_mobilevit.py
│           │       │   │   └── modeling_tf_mobilevit.py
│           │       │   ├── mpnet/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_mpnet.py
│           │       │   │   ├── modeling_mpnet.py
│           │       │   │   ├── modeling_tf_mpnet.py
│           │       │   │   ├── tokenization_mpnet.py
│           │       │   │   └── tokenization_mpnet_fast.py
│           │       │   ├── mt5/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_mt5.py
│           │       │   │   ├── modeling_flax_mt5.py
│           │       │   │   ├── modeling_mt5.py
│           │       │   │   └── modeling_tf_mt5.py
│           │       │   ├── mvp/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_mvp.py
│           │       │   │   ├── modeling_mvp.py
│           │       │   │   ├── tokenization_mvp.py
│           │       │   │   └── tokenization_mvp_fast.py
│           │       │   ├── nezha/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_nezha.py
│           │       │   │   └── modeling_nezha.py
│           │       │   ├── nllb/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── tokenization_nllb.py
│           │       │   │   └── tokenization_nllb_fast.py
│           │       │   ├── nystromformer/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_nystromformer.py
│           │       │   │   ├── convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   └── modeling_nystromformer.py
│           │       │   ├── openai/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_openai.py
│           │       │   │   ├── convert_openai_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_openai.py
│           │       │   │   ├── modeling_tf_openai.py
│           │       │   │   ├── tokenization_openai.py
│           │       │   │   └── tokenization_openai_fast.py
│           │       │   ├── opt/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_opt.py
│           │       │   │   ├── convert_opt_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_flax_opt.py
│           │       │   │   ├── modeling_opt.py
│           │       │   │   └── modeling_tf_opt.py
│           │       │   ├── owlvit/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_owlvit.py
│           │       │   │   ├── convert_owlvit_original_flax_to_hf.py
│           │       │   │   ├── feature_extraction_owlvit.py
│           │       │   │   ├── modeling_owlvit.py
│           │       │   │   └── processing_owlvit.py
│           │       │   ├── pegasus/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_pegasus.py
│           │       │   │   ├── convert_pegasus_tf_to_pytorch.py
│           │       │   │   ├── modeling_flax_pegasus.py
│           │       │   │   ├── modeling_pegasus.py
│           │       │   │   ├── modeling_tf_pegasus.py
│           │       │   │   ├── tokenization_pegasus.py
│           │       │   │   └── tokenization_pegasus_fast.py
│           │       │   ├── pegasus_x/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_pegasus_x.py
│           │       │   │   └── modeling_pegasus_x.py
│           │       │   ├── perceiver/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_perceiver.py
│           │       │   │   ├── convert_perceiver_haiku_to_pytorch.py
│           │       │   │   ├── feature_extraction_perceiver.py
│           │       │   │   ├── modeling_perceiver.py
│           │       │   │   └── tokenization_perceiver.py
│           │       │   ├── phobert/
│           │       │   │   ├── __init__.py
│           │       │   │   └── tokenization_phobert.py
│           │       │   ├── plbart/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_plbart.py
│           │       │   │   ├── convert_plbart_original_checkpoint_to_torch.py
│           │       │   │   ├── modeling_plbart.py
│           │       │   │   └── tokenization_plbart.py
│           │       │   ├── poolformer/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_poolformer.py
│           │       │   │   ├── convert_poolformer_original_to_pytorch.py
│           │       │   │   ├── feature_extraction_poolformer.py
│           │       │   │   └── modeling_poolformer.py
│           │       │   ├── prophetnet/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_prophetnet.py
│           │       │   │   ├── convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_prophetnet.py
│           │       │   │   └── tokenization_prophetnet.py
│           │       │   ├── qdqbert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_qdqbert.py
│           │       │   │   └── modeling_qdqbert.py
│           │       │   ├── rag/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_rag.py
│           │       │   │   ├── modeling_rag.py
│           │       │   │   ├── modeling_tf_rag.py
│           │       │   │   ├── retrieval_rag.py
│           │       │   │   └── tokenization_rag.py
│           │       │   ├── realm/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_realm.py
│           │       │   │   ├── modeling_realm.py
│           │       │   │   ├── retrieval_realm.py
│           │       │   │   ├── tokenization_realm.py
│           │       │   │   └── tokenization_realm_fast.py
│           │       │   ├── reformer/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_reformer.py
│           │       │   │   ├── convert_reformer_trax_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_reformer.py
│           │       │   │   ├── tokenization_reformer.py
│           │       │   │   └── tokenization_reformer_fast.py
│           │       │   ├── regnet/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_regnet.py
│           │       │   │   ├── convert_regnet_seer_10b_to_pytorch.py
│           │       │   │   ├── convert_regnet_to_pytorch.py
│           │       │   │   ├── modeling_regnet.py
│           │       │   │   └── modeling_tf_regnet.py
│           │       │   ├── rembert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_rembert.py
│           │       │   │   ├── convert_rembert_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_rembert.py
│           │       │   │   ├── modeling_tf_rembert.py
│           │       │   │   ├── tokenization_rembert.py
│           │       │   │   └── tokenization_rembert_fast.py
│           │       │   ├── resnet/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_resnet.py
│           │       │   │   ├── convert_resnet_to_pytorch.py
│           │       │   │   ├── modeling_resnet.py
│           │       │   │   └── modeling_tf_resnet.py
│           │       │   ├── retribert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_retribert.py
│           │       │   │   ├── modeling_retribert.py
│           │       │   │   ├── tokenization_retribert.py
│           │       │   │   └── tokenization_retribert_fast.py
│           │       │   ├── roberta/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_roberta.py
│           │       │   │   ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_flax_roberta.py
│           │       │   │   ├── modeling_roberta.py
│           │       │   │   ├── modeling_tf_roberta.py
│           │       │   │   ├── tokenization_roberta.py
│           │       │   │   └── tokenization_roberta_fast.py
│           │       │   ├── roformer/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_roformer.py
│           │       │   │   ├── convert_roformer_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_flax_roformer.py
│           │       │   │   ├── modeling_roformer.py
│           │       │   │   ├── modeling_tf_roformer.py
│           │       │   │   ├── tokenization_roformer.py
│           │       │   │   ├── tokenization_roformer_fast.py
│           │       │   │   └── tokenization_utils.py
│           │       │   ├── segformer/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_segformer.py
│           │       │   │   ├── convert_segformer_original_to_pytorch.py
│           │       │   │   ├── feature_extraction_segformer.py
│           │       │   │   ├── modeling_segformer.py
│           │       │   │   └── modeling_tf_segformer.py
│           │       │   ├── sew/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_sew.py
│           │       │   │   ├── convert_sew_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   └── modeling_sew.py
│           │       │   ├── sew_d/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_sew_d.py
│           │       │   │   ├── convert_sew_d_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   └── modeling_sew_d.py
│           │       │   ├── speech_encoder_decoder/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_speech_encoder_decoder.py
│           │       │   │   ├── convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py
│           │       │   │   ├── convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py
│           │       │   │   ├── modeling_flax_speech_encoder_decoder.py
│           │       │   │   └── modeling_speech_encoder_decoder.py
│           │       │   ├── speech_to_text/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_speech_to_text.py
│           │       │   │   ├── convert_s2t_fairseq_to_tfms.py
│           │       │   │   ├── feature_extraction_speech_to_text.py
│           │       │   │   ├── modeling_speech_to_text.py
│           │       │   │   ├── modeling_tf_speech_to_text.py
│           │       │   │   ├── processing_speech_to_text.py
│           │       │   │   └── tokenization_speech_to_text.py
│           │       │   ├── speech_to_text_2/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_speech_to_text_2.py
│           │       │   │   ├── modeling_speech_to_text_2.py
│           │       │   │   ├── processing_speech_to_text_2.py
│           │       │   │   └── tokenization_speech_to_text_2.py
│           │       │   ├── splinter/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_splinter.py
│           │       │   │   ├── modeling_splinter.py
│           │       │   │   ├── tokenization_splinter.py
│           │       │   │   └── tokenization_splinter_fast.py
│           │       │   ├── squeezebert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_squeezebert.py
│           │       │   │   ├── modeling_squeezebert.py
│           │       │   │   ├── tokenization_squeezebert.py
│           │       │   │   └── tokenization_squeezebert_fast.py
│           │       │   ├── swin/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_swin.py
│           │       │   │   ├── convert_swin_timm_to_pytorch.py
│           │       │   │   ├── modeling_swin.py
│           │       │   │   └── modeling_tf_swin.py
│           │       │   ├── swinv2/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_swinv2.py
│           │       │   │   ├── convert_swinv2_timm_to_pytorch.py
│           │       │   │   └── modeling_swinv2.py
│           │       │   ├── t5/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_t5.py
│           │       │   │   ├── convert_t5_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── convert_t5x_checkpoint_to_flax.py
│           │       │   │   ├── download_from_gcp.sh
│           │       │   │   ├── modeling_flax_t5.py
│           │       │   │   ├── modeling_t5.py
│           │       │   │   ├── modeling_tf_t5.py
│           │       │   │   ├── tokenization_t5.py
│           │       │   │   └── tokenization_t5_fast.py
│           │       │   ├── table_transformer/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_table_transformer.py
│           │       │   │   ├── convert_table_transformer_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   └── modeling_table_transformer.py
│           │       │   ├── tapas/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_tapas.py
│           │       │   │   ├── convert_tapas_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_tapas.py
│           │       │   │   ├── modeling_tf_tapas.py
│           │       │   │   └── tokenization_tapas.py
│           │       │   ├── tapex/
│           │       │   │   ├── __init__.py
│           │       │   │   └── tokenization_tapex.py
│           │       │   ├── time_series_transformer/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_time_series_transformer.py
│           │       │   │   └── modeling_time_series_transformer.py
│           │       │   ├── trajectory_transformer/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_trajectory_transformer.py
│           │       │   │   ├── convert_trajectory_transformer_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   └── modeling_trajectory_transformer.py
│           │       │   ├── transfo_xl/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_transfo_xl.py
│           │       │   │   ├── convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_tf_transfo_xl.py
│           │       │   │   ├── modeling_tf_transfo_xl_utilities.py
│           │       │   │   ├── modeling_transfo_xl.py
│           │       │   │   ├── modeling_transfo_xl_utilities.py
│           │       │   │   └── tokenization_transfo_xl.py
│           │       │   ├── trocr/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_trocr.py
│           │       │   │   ├── convert_trocr_unilm_to_pytorch.py
│           │       │   │   ├── modeling_trocr.py
│           │       │   │   └── processing_trocr.py
│           │       │   ├── unispeech/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_unispeech.py
│           │       │   │   ├── convert_unispeech_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   └── modeling_unispeech.py
│           │       │   ├── unispeech_sat/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_unispeech_sat.py
│           │       │   │   ├── convert_unispeech_original_s3prl_checkpoint_to_pytorch.py
│           │       │   │   ├── convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   └── modeling_unispeech_sat.py
│           │       │   ├── van/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_van.py
│           │       │   │   ├── convert_van_to_pytorch.py
│           │       │   │   └── modeling_van.py
│           │       │   ├── videomae/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_videomae.py
│           │       │   │   ├── convert_videomae_to_pytorch.py
│           │       │   │   ├── feature_extraction_videomae.py
│           │       │   │   └── modeling_videomae.py
│           │       │   ├── vilt/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_vilt.py
│           │       │   │   ├── convert_vilt_original_to_pytorch.py
│           │       │   │   ├── feature_extraction_vilt.py
│           │       │   │   ├── modeling_vilt.py
│           │       │   │   └── processing_vilt.py
│           │       │   ├── vision_encoder_decoder/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_vision_encoder_decoder.py
│           │       │   │   ├── modeling_flax_vision_encoder_decoder.py
│           │       │   │   ├── modeling_tf_vision_encoder_decoder.py
│           │       │   │   └── modeling_vision_encoder_decoder.py
│           │       │   ├── vision_text_dual_encoder/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_vision_text_dual_encoder.py
│           │       │   │   ├── modeling_flax_vision_text_dual_encoder.py
│           │       │   │   ├── modeling_vision_text_dual_encoder.py
│           │       │   │   └── processing_vision_text_dual_encoder.py
│           │       │   ├── visual_bert/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_visual_bert.py
│           │       │   │   ├── convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   └── modeling_visual_bert.py
│           │       │   ├── vit/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_vit.py
│           │       │   │   ├── convert_dino_to_pytorch.py
│           │       │   │   ├── convert_vit_timm_to_pytorch.py
│           │       │   │   ├── feature_extraction_vit.py
│           │       │   │   ├── modeling_flax_vit.py
│           │       │   │   ├── modeling_tf_vit.py
│           │       │   │   └── modeling_vit.py
│           │       │   ├── vit_mae/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_vit_mae.py
│           │       │   │   ├── convert_vit_mae_to_pytorch.py
│           │       │   │   ├── modeling_tf_vit_mae.py
│           │       │   │   └── modeling_vit_mae.py
│           │       │   ├── vit_msn/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_vit_msn.py
│           │       │   │   ├── convert_msn_to_pytorch.py
│           │       │   │   └── modeling_vit_msn.py
│           │       │   ├── wav2vec2/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_wav2vec2.py
│           │       │   │   ├── convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py
│           │       │   │   ├── feature_extraction_wav2vec2.py
│           │       │   │   ├── modeling_flax_wav2vec2.py
│           │       │   │   ├── modeling_tf_wav2vec2.py
│           │       │   │   ├── modeling_wav2vec2.py
│           │       │   │   ├── processing_wav2vec2.py
│           │       │   │   └── tokenization_wav2vec2.py
│           │       │   ├── wav2vec2_conformer/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_wav2vec2_conformer.py
│           │       │   │   ├── convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   └── modeling_wav2vec2_conformer.py
│           │       │   ├── wav2vec2_phoneme/
│           │       │   │   ├── __init__.py
│           │       │   │   └── tokenization_wav2vec2_phoneme.py
│           │       │   ├── wav2vec2_with_lm/
│           │       │   │   ├── __init__.py
│           │       │   │   └── processing_wav2vec2_with_lm.py
│           │       │   ├── wavlm/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_wavlm.py
│           │       │   │   ├── convert_wavlm_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── convert_wavlm_original_s3prl_checkpoint_to_pytorch.py
│           │       │   │   └── modeling_wavlm.py
│           │       │   ├── whisper/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_whisper.py
│           │       │   │   ├── english_normalizer.py
│           │       │   │   ├── feature_extraction_whisper.py
│           │       │   │   ├── modeling_tf_whisper.py
│           │       │   │   ├── modeling_whisper.py
│           │       │   │   ├── processing_whisper.py
│           │       │   │   └── tokenization_whisper.py
│           │       │   ├── x_clip/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_x_clip.py
│           │       │   │   ├── convert_x_clip_original_pytorch_to_hf.py
│           │       │   │   ├── modeling_x_clip.py
│           │       │   │   └── processing_x_clip.py
│           │       │   ├── xglm/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_xglm.py
│           │       │   │   ├── convert_xglm_original_ckpt_to_trfms.py
│           │       │   │   ├── modeling_flax_xglm.py
│           │       │   │   ├── modeling_tf_xglm.py
│           │       │   │   ├── modeling_xglm.py
│           │       │   │   ├── tokenization_xglm.py
│           │       │   │   └── tokenization_xglm_fast.py
│           │       │   ├── xlm/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_xlm.py
│           │       │   │   ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_tf_xlm.py
│           │       │   │   ├── modeling_xlm.py
│           │       │   │   └── tokenization_xlm.py
│           │       │   ├── xlm_prophetnet/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_xlm_prophetnet.py
│           │       │   │   ├── modeling_xlm_prophetnet.py
│           │       │   │   └── tokenization_xlm_prophetnet.py
│           │       │   ├── xlm_roberta/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_xlm_roberta.py
│           │       │   │   ├── modeling_flax_xlm_roberta.py
│           │       │   │   ├── modeling_tf_xlm_roberta.py
│           │       │   │   ├── modeling_xlm_roberta.py
│           │       │   │   ├── tokenization_xlm_roberta.py
│           │       │   │   └── tokenization_xlm_roberta_fast.py
│           │       │   ├── xlm_roberta_xl/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_xlm_roberta_xl.py
│           │       │   │   ├── convert_xlm_roberta_xl_original_pytorch_checkpoint_to_pytorch.py
│           │       │   │   └── modeling_xlm_roberta_xl.py
│           │       │   ├── xlnet/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_xlnet.py
│           │       │   │   ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py
│           │       │   │   ├── modeling_tf_xlnet.py
│           │       │   │   ├── modeling_xlnet.py
│           │       │   │   ├── tokenization_xlnet.py
│           │       │   │   └── tokenization_xlnet_fast.py
│           │       │   ├── yolos/
│           │       │   │   ├── __init__.py
│           │       │   │   ├── configuration_yolos.py
│           │       │   │   ├── convert_yolos_to_pytorch.py
│           │       │   │   ├── feature_extraction_yolos.py
│           │       │   │   └── modeling_yolos.py
│           │       │   └── yoso/
│           │       │       ├── __init__.py
│           │       │       ├── common.h
│           │       │       ├── common_cuda.h
│           │       │       ├── common_cuda_device.h
│           │       │       ├── configuration_yoso.py
│           │       │       ├── convert_yoso_pytorch_to_pytorch.py
│           │       │       ├── fast_lsh_cumulation.cu
│           │       │       ├── fast_lsh_cumulation.h
│           │       │       ├── fast_lsh_cumulation_cuda.cu
│           │       │       ├── fast_lsh_cumulation_cuda.h
│           │       │       ├── fast_lsh_cumulation_torch.cpp
│           │       │       └── modeling_yoso.py
│           │       ├── onnx/
│           │       │   ├── __init__.py
│           │       │   ├── __main__.py
│           │       │   ├── config.py
│           │       │   ├── convert.py
│           │       │   ├── features.py
│           │       │   └── utils.py
│           │       ├── optimization.py
│           │       ├── optimization_tf.py
│           │       ├── pipelines/
│           │       │   ├── __init__.py
│           │       │   ├── audio_classification.py
│           │       │   ├── audio_utils.py
│           │       │   ├── automatic_speech_recognition.py
│           │       │   ├── base.py
│           │       │   ├── conversational.py
│           │       │   ├── depth_estimation.py
│           │       │   ├── document_question_answering.py
│           │       │   ├── feature_extraction.py
│           │       │   ├── fill_mask.py
│           │       │   ├── image_classification.py
│           │       │   ├── image_segmentation.py
│           │       │   ├── image_to_text.py
│           │       │   ├── object_detection.py
│           │       │   ├── pt_utils.py
│           │       │   ├── question_answering.py
│           │       │   ├── table_question_answering.py
│           │       │   ├── text2text_generation.py
│           │       │   ├── text_classification.py
│           │       │   ├── text_generation.py
│           │       │   ├── token_classification.py
│           │       │   ├── visual_question_answering.py
│           │       │   ├── zero_shot_classification.py
│           │       │   ├── zero_shot_image_classification.py
│           │       │   └── zero_shot_object_detection.py
│           │       ├── processing_utils.py
│           │       ├── pytorch_utils.py
│           │       ├── sagemaker/
│           │       │   ├── __init__.py
│           │       │   ├── trainer_sm.py
│           │       │   └── training_args_sm.py
│           │       ├── testing_utils.py
│           │       ├── tf_utils.py
│           │       ├── tokenization_utils.py
│           │       ├── tokenization_utils_base.py
│           │       ├── tokenization_utils_fast.py
│           │       ├── trainer.py
│           │       ├── trainer_callback.py
│           │       ├── trainer_pt_utils.py
│           │       ├── trainer_seq2seq.py
│           │       ├── trainer_tf.py
│           │       ├── trainer_utils.py
│           │       ├── training_args.py
│           │       ├── training_args_seq2seq.py
│           │       ├── training_args_tf.py
│           │       └── utils/
│           │           ├── __init__.py
│           │           ├── bitsandbytes.py
│           │           ├── constants.py
│           │           ├── doc.py
│           │           ├── dummy_detectron2_objects.py
│           │           ├── dummy_flax_objects.py
│           │           ├── dummy_pt_objects.py
│           │           ├── dummy_scatter_objects.py
│           │           ├── dummy_sentencepiece_and_speech_objects.py
│           │           ├── dummy_sentencepiece_and_tokenizers_objects.py
│           │           ├── dummy_sentencepiece_objects.py
│           │           ├── dummy_speech_objects.py
│           │           ├── dummy_tensorflow_text_objects.py
│           │           ├── dummy_tf_objects.py
│           │           ├── dummy_timm_and_vision_objects.py
│           │           ├── dummy_tokenizers_objects.py
│           │           ├── dummy_vision_objects.py
│           │           ├── fx.py
│           │           ├── generic.py
│           │           ├── hp_naming.py
│           │           ├── hub.py
│           │           ├── import_utils.py
│           │           ├── logging.py
│           │           ├── model_parallel_utils.py
│           │           ├── notebook.py
│           │           ├── sentencepiece_model_pb2.py
│           │           └── versions.py
│           ├── templates/
│           │   ├── adding_a_missing_tokenization_test/
│           │   │   ├── README.md
│           │   │   ├── cookiecutter-template-{{cookiecutter.modelname}}/
│           │   │   │   └── test_tokenization_{{cookiecutter.lowercase_modelname}}.py
│           │   │   └── cookiecutter.json
│           │   ├── adding_a_new_example_script/
│           │   │   ├── README.md
│           │   │   ├── cookiecutter.json
│           │   │   └── {{cookiecutter.directory_name}}/
│           │   │       └── run_{{cookiecutter.example_shortcut}}.py
│           │   └── adding_a_new_model/
│           │       ├── ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md
│           │       ├── README.md
│           │       ├── cookiecutter-template-{{cookiecutter.modelname}}/
│           │       │   ├── __init__.py
│           │       │   ├── configuration.json
│           │       │   ├── configuration_{{cookiecutter.lowercase_modelname}}.py
│           │       │   ├── modeling_flax_{{cookiecutter.lowercase_modelname}}.py
│           │       │   ├── modeling_tf_{{cookiecutter.lowercase_modelname}}.py
│           │       │   ├── modeling_{{cookiecutter.lowercase_modelname}}.py
│           │       │   ├── test_modeling_flax_{{cookiecutter.lowercase_modelname}}.py
│           │       │   ├── test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py
│           │       │   ├── test_modeling_{{cookiecutter.lowercase_modelname}}.py
│           │       │   ├── to_replace_{{cookiecutter.lowercase_modelname}}.py
│           │       │   ├── tokenization_fast_{{cookiecutter.lowercase_modelname}}.py
│           │       │   ├── tokenization_{{cookiecutter.lowercase_modelname}}.py
│           │       │   └── {{cookiecutter.lowercase_modelname}}.mdx
│           │       ├── cookiecutter.json
│           │       ├── open_model_proposals/
│           │       │   ├── ADD_BIG_BIRD.md
│           │       │   └── README.md
│           │       └── tests/
│           │           ├── encoder-bert-tokenizer.json
│           │           ├── flax-encoder-bert-tokenizer.json
│           │           ├── flax-seq-2-seq-bart-tokenizer.json
│           │           ├── pt-encoder-bert-tokenizer.json
│           │           ├── pt-seq-2-seq-bart-tokenizer.json
│           │           ├── standalone.json
│           │           ├── tf-encoder-bert-tokenizer.json
│           │           └── tf-seq-2-seq-bart-tokenizer.json
│           ├── tests/
│           │   ├── __init__.py
│           │   ├── benchmark/
│           │   │   ├── __init__.py
│           │   │   ├── test_benchmark.py
│           │   │   └── test_benchmark_tf.py
│           │   ├── deepspeed/
│           │   │   ├── ds_config_zero2.json
│           │   │   ├── ds_config_zero3.json
│           │   │   ├── test_deepspeed.py
│           │   │   ├── test_model_zoo.py
│           │   │   └── vit_feature_extractor.json
│           │   ├── extended/
│           │   │   └── test_trainer_ext.py
│           │   ├── fixtures/
│           │   │   ├── add_distilbert_like_config.json
│           │   │   ├── dummy-config.json
│           │   │   ├── dummy_feature_extractor_config.json
│           │   │   ├── empty.txt
│           │   │   ├── input.txt
│           │   │   ├── merges.txt
│           │   │   ├── preprocessor_config.json
│           │   │   ├── sample_text.txt
│           │   │   ├── sample_text_no_unicode.txt
│           │   │   ├── spiece.model
│           │   │   ├── test_entity_vocab.json
│           │   │   ├── test_sentencepiece.model
│           │   │   ├── test_sentencepiece_bpe.model
│           │   │   ├── test_sentencepiece_no_bos.model
│           │   │   ├── test_sentencepiece_with_bytefallback.model
│           │   │   ├── tests_samples/
│           │   │   │   ├── .gitignore
│           │   │   │   ├── COCO/
│           │   │   │   │   ├── coco_annotations.txt
│           │   │   │   │   └── coco_panoptic_annotations.txt
│           │   │   │   ├── GermEval/
│           │   │   │   │   ├── dev.txt
│           │   │   │   │   ├── labels.txt
│           │   │   │   │   └── train.txt
│           │   │   │   ├── MRPC/
│           │   │   │   │   ├── dev.csv
│           │   │   │   │   ├── dev.tsv
│           │   │   │   │   ├── train.csv
│           │   │   │   │   └── train.tsv
│           │   │   │   ├── SQUAD/
│           │   │   │   │   └── sample.json
│           │   │   │   ├── STS-B/
│           │   │   │   │   ├── dev.tsv
│           │   │   │   │   └── train.tsv
│           │   │   │   ├── conll/
│           │   │   │   │   └── sample.json
│           │   │   │   ├── swag/
│           │   │   │   │   └── sample.json
│           │   │   │   ├── wiki_text/
│           │   │   │   │   └── wiki_00
│           │   │   │   ├── wmt16/
│           │   │   │   │   └── sample.json
│           │   │   │   ├── wmt_en_ro/
│           │   │   │   │   ├── test.json
│           │   │   │   │   ├── train.json
│           │   │   │   │   └── val.json
│           │   │   │   └── xsum/
│           │   │   │       └── sample.json
│           │   │   ├── vocab.json
│           │   │   └── vocab.txt
│           │   ├── generation/
│           │   │   ├── __init__.py
│           │   │   ├── test_generation_beam_constraints.py
│           │   │   ├── test_generation_beam_search.py
│           │   │   ├── test_generation_flax_logits_process.py
│           │   │   ├── test_generation_flax_utils.py
│           │   │   ├── test_generation_logits_process.py
│           │   │   ├── test_generation_stopping_criteria.py
│           │   │   ├── test_generation_tf_logits_process.py
│           │   │   ├── test_generation_tf_utils.py
│           │   │   └── test_generation_utils.py
│           │   ├── mixed_int8/
│           │   │   ├── README.md
│           │   │   ├── __init__.py
│           │   │   └── test_mixed_int8.py
│           │   ├── models/
│           │   │   ├── __init__.py
│           │   │   ├── albert/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_albert.py
│           │   │   │   ├── test_modeling_flax_albert.py
│           │   │   │   ├── test_modeling_tf_albert.py
│           │   │   │   └── test_tokenization_albert.py
│           │   │   ├── auto/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_configuration_auto.py
│           │   │   │   ├── test_feature_extraction_auto.py
│           │   │   │   ├── test_modeling_auto.py
│           │   │   │   ├── test_modeling_flax_auto.py
│           │   │   │   ├── test_modeling_tf_auto.py
│           │   │   │   ├── test_modeling_tf_pytorch.py
│           │   │   │   ├── test_processor_auto.py
│           │   │   │   └── test_tokenization_auto.py
│           │   │   ├── bart/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_bart.py
│           │   │   │   ├── test_modeling_flax_bart.py
│           │   │   │   ├── test_modeling_tf_bart.py
│           │   │   │   └── test_tokenization_bart.py
│           │   │   ├── barthez/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_barthez.py
│           │   │   ├── bartpho/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_bartpho.py
│           │   │   ├── beit/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_beit.py
│           │   │   │   ├── test_modeling_beit.py
│           │   │   │   └── test_modeling_flax_beit.py
│           │   │   ├── bert/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_bert.py
│           │   │   │   ├── test_modeling_flax_bert.py
│           │   │   │   ├── test_modeling_tf_bert.py
│           │   │   │   ├── test_tokenization_bert.py
│           │   │   │   └── test_tokenization_bert_tf.py
│           │   │   ├── bert_generation/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_bert_generation.py
│           │   │   │   └── test_tokenization_bert_generation.py
│           │   │   ├── bert_japanese/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_bert_japanese.py
│           │   │   ├── bertweet/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_bertweet.py
│           │   │   ├── big_bird/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_big_bird.py
│           │   │   │   ├── test_modeling_flax_big_bird.py
│           │   │   │   └── test_tokenization_big_bird.py
│           │   │   ├── bigbird_pegasus/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_bigbird_pegasus.py
│           │   │   ├── blenderbot/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_blenderbot.py
│           │   │   │   ├── test_modeling_flax_blenderbot.py
│           │   │   │   ├── test_modeling_tf_blenderbot.py
│           │   │   │   └── test_tokenization_blenderbot.py
│           │   │   ├── blenderbot_small/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_blenderbot_small.py
│           │   │   │   ├── test_modeling_flax_blenderbot_small.py
│           │   │   │   ├── test_modeling_tf_blenderbot_small.py
│           │   │   │   └── test_tokenization_blenderbot_small.py
│           │   │   ├── bloom/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_bloom.py
│           │   │   │   └── test_tokenization_bloom.py
│           │   │   ├── bort/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_bort.py
│           │   │   │   └── test_modeling_tf_bort.py
│           │   │   ├── byt5/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_byt5.py
│           │   │   ├── camembert/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_camembert.py
│           │   │   │   ├── test_modeling_tf_camembert.py
│           │   │   │   └── test_tokenization_camembert.py
│           │   │   ├── canine/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_canine.py
│           │   │   │   └── test_tokenization_canine.py
│           │   │   ├── clip/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_clip.py
│           │   │   │   ├── test_modeling_clip.py
│           │   │   │   ├── test_modeling_flax_clip.py
│           │   │   │   ├── test_modeling_tf_clip.py
│           │   │   │   ├── test_processor_clip.py
│           │   │   │   └── test_tokenization_clip.py
│           │   │   ├── codegen/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_codegen.py
│           │   │   │   └── test_tokenization_codegen.py
│           │   │   ├── conditional_detr/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_conditional_detr.py
│           │   │   │   └── test_modeling_conditional_detr.py
│           │   │   ├── convbert/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_convbert.py
│           │   │   │   └── test_modeling_tf_convbert.py
│           │   │   ├── convnext/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_convnext.py
│           │   │   │   ├── test_modeling_convnext.py
│           │   │   │   └── test_modeling_tf_convnext.py
│           │   │   ├── cpm/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_cpm.py
│           │   │   ├── ctrl/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_ctrl.py
│           │   │   │   ├── test_modeling_tf_ctrl.py
│           │   │   │   └── test_tokenization_ctrl.py
│           │   │   ├── cvt/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_cvt.py
│           │   │   │   └── test_modeling_tf_cvt.py
│           │   │   ├── data2vec/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_data2vec_audio.py
│           │   │   │   ├── test_modeling_data2vec_text.py
│           │   │   │   ├── test_modeling_data2vec_vision.py
│           │   │   │   └── test_modeling_tf_data2vec_vision.py
│           │   │   ├── deberta/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_deberta.py
│           │   │   │   ├── test_modeling_tf_deberta.py
│           │   │   │   └── test_tokenization_deberta.py
│           │   │   ├── deberta_v2/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_deberta_v2.py
│           │   │   │   ├── test_modeling_tf_deberta_v2.py
│           │   │   │   └── test_tokenization_deberta_v2.py
│           │   │   ├── decision_transformer/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_decision_transformer.py
│           │   │   ├── deformable_detr/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_deformable_detr.py
│           │   │   │   └── test_modeling_deformable_detr.py
│           │   │   ├── deit/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_deit.py
│           │   │   │   ├── test_modeling_deit.py
│           │   │   │   └── test_modeling_tf_deit.py
│           │   │   ├── detr/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_detr.py
│           │   │   │   └── test_modeling_detr.py
│           │   │   ├── distilbert/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_distilbert.py
│           │   │   │   ├── test_modeling_flax_distilbert.py
│           │   │   │   ├── test_modeling_tf_distilbert.py
│           │   │   │   └── test_tokenization_distilbert.py
│           │   │   ├── dit/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_dit.py
│           │   │   ├── donut/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_donut.py
│           │   │   │   └── test_modeling_donut_swin.py
│           │   │   ├── dpr/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_dpr.py
│           │   │   │   ├── test_modeling_tf_dpr.py
│           │   │   │   └── test_tokenization_dpr.py
│           │   │   ├── dpt/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_dpt.py
│           │   │   │   └── test_modeling_dpt.py
│           │   │   ├── electra/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_electra.py
│           │   │   │   ├── test_modeling_flax_electra.py
│           │   │   │   └── test_modeling_tf_electra.py
│           │   │   ├── encoder_decoder/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_encoder_decoder.py
│           │   │   │   ├── test_modeling_flax_encoder_decoder.py
│           │   │   │   └── test_modeling_tf_encoder_decoder.py
│           │   │   ├── ernie/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_ernie.py
│           │   │   ├── esm/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_esm.py
│           │   │   │   ├── test_modeling_esmfold.py
│           │   │   │   ├── test_modeling_tf_esm.py
│           │   │   │   └── test_tokenization_esm.py
│           │   │   ├── flaubert/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flaubert.py
│           │   │   │   └── test_modeling_tf_flaubert.py
│           │   │   ├── flava/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_flava.py
│           │   │   │   ├── test_modeling_flava.py
│           │   │   │   └── test_processor_flava.py
│           │   │   ├── fnet/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_fnet.py
│           │   │   │   └── test_tokenization_fnet.py
│           │   │   ├── fsmt/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_fsmt.py
│           │   │   │   └── test_tokenization_fsmt.py
│           │   │   ├── funnel/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_funnel.py
│           │   │   │   ├── test_modeling_tf_funnel.py
│           │   │   │   └── test_tokenization_funnel.py
│           │   │   ├── glpn/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_glpn.py
│           │   │   │   └── test_modeling_glpn.py
│           │   │   ├── gpt2/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_gpt2.py
│           │   │   │   ├── test_modeling_gpt2.py
│           │   │   │   ├── test_modeling_tf_gpt2.py
│           │   │   │   └── test_tokenization_gpt2.py
│           │   │   ├── gpt_neo/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_gpt_neo.py
│           │   │   │   └── test_modeling_gpt_neo.py
│           │   │   ├── gpt_neox/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_gpt_neox.py
│           │   │   ├── gpt_neox_japanese/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_gpt_neox_japanese.py
│           │   │   │   └── test_tokenization_gpt_neox_japanese.py
│           │   │   ├── gptj/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_gptj.py
│           │   │   │   ├── test_modeling_gptj.py
│           │   │   │   └── test_modeling_tf_gptj.py
│           │   │   ├── groupvit/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_groupvit.py
│           │   │   │   └── test_modeling_tf_groupvit.py
│           │   │   ├── herbert/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_herbert.py
│           │   │   ├── hubert/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_hubert.py
│           │   │   │   └── test_modeling_tf_hubert.py
│           │   │   ├── ibert/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_ibert.py
│           │   │   ├── imagegpt/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_imagegpt.py
│           │   │   │   └── test_modeling_imagegpt.py
│           │   │   ├── layoutlm/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_layoutlm.py
│           │   │   │   ├── test_modeling_tf_layoutlm.py
│           │   │   │   └── test_tokenization_layoutlm.py
│           │   │   ├── layoutlmv2/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_layoutlmv2.py
│           │   │   │   ├── test_modeling_layoutlmv2.py
│           │   │   │   ├── test_processor_layoutlmv2.py
│           │   │   │   └── test_tokenization_layoutlmv2.py
│           │   │   ├── layoutlmv3/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_layoutlmv3.py
│           │   │   │   ├── test_modeling_layoutlmv3.py
│           │   │   │   ├── test_modeling_tf_layoutlmv3.py
│           │   │   │   ├── test_processor_layoutlmv3.py
│           │   │   │   └── test_tokenization_layoutlmv3.py
│           │   │   ├── layoutxlm/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_processor_layoutxlm.py
│           │   │   │   └── test_tokenization_layoutxlm.py
│           │   │   ├── led/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_led.py
│           │   │   │   └── test_modeling_tf_led.py
│           │   │   ├── levit/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_levit.py
│           │   │   │   └── test_modeling_levit.py
│           │   │   ├── lilt/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_lilt.py
│           │   │   ├── longformer/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_longformer.py
│           │   │   │   ├── test_modeling_tf_longformer.py
│           │   │   │   └── test_tokenization_longformer.py
│           │   │   ├── longt5/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_longt5.py
│           │   │   │   └── test_modeling_longt5.py
│           │   │   ├── luke/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_luke.py
│           │   │   │   └── test_tokenization_luke.py
│           │   │   ├── lxmert/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_lxmert.py
│           │   │   │   ├── test_modeling_tf_lxmert.py
│           │   │   │   └── test_tokenization_lxmert.py
│           │   │   ├── m2m_100/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_m2m_100.py
│           │   │   │   └── test_tokenization_m2m_100.py
│           │   │   ├── marian/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_marian.py
│           │   │   │   ├── test_modeling_marian.py
│           │   │   │   ├── test_modeling_tf_marian.py
│           │   │   │   └── test_tokenization_marian.py
│           │   │   ├── markuplm/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_markuplm.py
│           │   │   │   ├── test_modeling_markuplm.py
│           │   │   │   ├── test_processor_markuplm.py
│           │   │   │   └── test_tokenization_markuplm.py
│           │   │   ├── maskformer/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_maskformer.py
│           │   │   │   └── test_modeling_maskformer.py
│           │   │   ├── mbart/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_mbart.py
│           │   │   │   ├── test_modeling_mbart.py
│           │   │   │   ├── test_modeling_tf_mbart.py
│           │   │   │   └── test_tokenization_mbart.py
│           │   │   ├── mbart50/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_mbart50.py
│           │   │   ├── mctct/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_mctct.py
│           │   │   │   ├── test_modeling_mctct.py
│           │   │   │   └── test_processor_mctct.py
│           │   │   ├── megatron_bert/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_megatron_bert.py
│           │   │   ├── megatron_gpt2/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_megatron_gpt2.py
│           │   │   ├── mluke/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_mluke.py
│           │   │   ├── mobilebert/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_mobilebert.py
│           │   │   │   ├── test_modeling_tf_mobilebert.py
│           │   │   │   └── test_tokenization_mobilebert.py
│           │   │   ├── mobilevit/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_mobilevit.py
│           │   │   │   ├── test_modeling_mobilevit.py
│           │   │   │   └── test_modeling_tf_mobilevit.py
│           │   │   ├── mpnet/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_mpnet.py
│           │   │   │   ├── test_modeling_tf_mpnet.py
│           │   │   │   └── test_tokenization_mpnet.py
│           │   │   ├── mt5/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_mt5.py
│           │   │   │   ├── test_modeling_mt5.py
│           │   │   │   └── test_modeling_tf_mt5.py
│           │   │   ├── mvp/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_mvp.py
│           │   │   │   └── test_tokenization_mvp.py
│           │   │   ├── nezha/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_nezha.py
│           │   │   ├── nllb/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_nllb.py
│           │   │   ├── nystromformer/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_nystromformer.py
│           │   │   ├── openai/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_openai.py
│           │   │   │   ├── test_modeling_tf_openai.py
│           │   │   │   └── test_tokenization_openai.py
│           │   │   ├── opt/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_opt.py
│           │   │   │   ├── test_modeling_opt.py
│           │   │   │   └── test_modeling_tf_opt.py
│           │   │   ├── owlvit/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_owlvit.py
│           │   │   │   ├── test_modeling_owlvit.py
│           │   │   │   └── test_processor_owlvit.py
│           │   │   ├── pegasus/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_pegasus.py
│           │   │   │   ├── test_modeling_pegasus.py
│           │   │   │   ├── test_modeling_tf_pegasus.py
│           │   │   │   └── test_tokenization_pegasus.py
│           │   │   ├── pegasus_x/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_pegasus_x.py
│           │   │   ├── perceiver/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_perceiver.py
│           │   │   │   └── test_tokenization_perceiver.py
│           │   │   ├── phobert/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_phobert.py
│           │   │   ├── plbart/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_plbart.py
│           │   │   │   └── test_tokenization_plbart.py
│           │   │   ├── poolformer/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_poolformer.py
│           │   │   │   └── test_modeling_poolformer.py
│           │   │   ├── prophetnet/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_prophetnet.py
│           │   │   │   └── test_tokenization_prophetnet.py
│           │   │   ├── qdqbert/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_qdqbert.py
│           │   │   ├── rag/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_rag.py
│           │   │   │   ├── test_modeling_tf_rag.py
│           │   │   │   ├── test_retrieval_rag.py
│           │   │   │   └── test_tokenization_rag.py
│           │   │   ├── realm/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_realm.py
│           │   │   │   ├── test_retrieval_realm.py
│           │   │   │   └── test_tokenization_realm.py
│           │   │   ├── reformer/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_reformer.py
│           │   │   │   └── test_tokenization_reformer.py
│           │   │   ├── regnet/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_regnet.py
│           │   │   │   └── test_modeling_tf_regnet.py
│           │   │   ├── rembert/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_rembert.py
│           │   │   │   └── test_modeling_tf_rembert.py
│           │   │   ├── resnet/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_resnet.py
│           │   │   │   └── test_modeling_tf_resnet.py
│           │   │   ├── retribert/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_retribert.py
│           │   │   ├── roberta/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_roberta.py
│           │   │   │   ├── test_modeling_roberta.py
│           │   │   │   ├── test_modeling_tf_roberta.py
│           │   │   │   └── test_tokenization_roberta.py
│           │   │   ├── roformer/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_roformer.py
│           │   │   │   ├── test_modeling_roformer.py
│           │   │   │   ├── test_modeling_tf_roformer.py
│           │   │   │   └── test_tokenization_roformer.py
│           │   │   ├── segformer/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_segformer.py
│           │   │   │   ├── test_modeling_segformer.py
│           │   │   │   └── test_modeling_tf_segformer.py
│           │   │   ├── sew/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_sew.py
│           │   │   ├── sew_d/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_sew_d.py
│           │   │   ├── speech_encoder_decoder/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_speech_encoder_decoder.py
│           │   │   │   └── test_modeling_speech_encoder_decoder.py
│           │   │   ├── speech_to_text/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_speech_to_text.py
│           │   │   │   ├── test_modeling_speech_to_text.py
│           │   │   │   ├── test_modeling_tf_speech_to_text.py
│           │   │   │   ├── test_processor_speech_to_text.py
│           │   │   │   └── test_tokenization_speech_to_text.py
│           │   │   ├── speech_to_text_2/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_speech_to_text_2.py
│           │   │   │   └── test_tokenization_speech_to_text_2.py
│           │   │   ├── splinter/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_splinter.py
│           │   │   ├── squeezebert/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_squeezebert.py
│           │   │   │   └── test_tokenization_squeezebert.py
│           │   │   ├── swin/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_swin.py
│           │   │   │   └── test_modeling_tf_swin.py
│           │   │   ├── swinv2/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_swinv2.py
│           │   │   ├── t5/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_t5.py
│           │   │   │   ├── test_modeling_t5.py
│           │   │   │   ├── test_modeling_tf_t5.py
│           │   │   │   └── test_tokenization_t5.py
│           │   │   ├── table_transformer/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_table_transformer.py
│           │   │   ├── tapas/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_tapas.py
│           │   │   │   ├── test_modeling_tf_tapas.py
│           │   │   │   └── test_tokenization_tapas.py
│           │   │   ├── tapex/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_tapex.py
│           │   │   ├── time_series_transformer/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_time_series_transformer.py
│           │   │   ├── trajectory_transformer/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_trajectory_transformer.py
│           │   │   ├── transfo_xl/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_tf_transfo_xl.py
│           │   │   │   ├── test_modeling_transfo_xl.py
│           │   │   │   └── test_tokenization_transfo_xl.py
│           │   │   ├── trocr/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_trocr.py
│           │   │   ├── unispeech/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_unispeech.py
│           │   │   ├── unispeech_sat/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_unispeech_sat.py
│           │   │   ├── van/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_van.py
│           │   │   ├── videomae/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_videomae.py
│           │   │   │   └── test_modeling_videomae.py
│           │   │   ├── vilt/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_vilt.py
│           │   │   │   └── test_modeling_vilt.py
│           │   │   ├── vision_encoder_decoder/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_vision_encoder_decoder.py
│           │   │   │   ├── test_modeling_tf_vision_encoder_decoder.py
│           │   │   │   └── test_modeling_vision_encoder_decoder.py
│           │   │   ├── vision_text_dual_encoder/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_vision_text_dual_encoder.py
│           │   │   │   ├── test_modeling_vision_text_dual_encoder.py
│           │   │   │   └── test_processor_vision_text_dual_encoder.py
│           │   │   ├── visual_bert/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_visual_bert.py
│           │   │   ├── vit/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_vit.py
│           │   │   │   ├── test_modeling_flax_vit.py
│           │   │   │   ├── test_modeling_tf_vit.py
│           │   │   │   └── test_modeling_vit.py
│           │   │   ├── vit_mae/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_tf_vit_mae.py
│           │   │   │   └── test_modeling_vit_mae.py
│           │   │   ├── vit_msn/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_vit_msn.py
│           │   │   ├── wav2vec2/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_wav2vec2.py
│           │   │   │   ├── test_modeling_flax_wav2vec2.py
│           │   │   │   ├── test_modeling_tf_wav2vec2.py
│           │   │   │   ├── test_modeling_wav2vec2.py
│           │   │   │   ├── test_processor_wav2vec2.py
│           │   │   │   └── test_tokenization_wav2vec2.py
│           │   │   ├── wav2vec2_conformer/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_wav2vec2_conformer.py
│           │   │   ├── wav2vec2_phoneme/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_tokenization_wav2vec2_phoneme.py
│           │   │   ├── wav2vec2_with_lm/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_processor_wav2vec2_with_lm.py
│           │   │   ├── wavlm/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_wavlm.py
│           │   │   ├── whisper/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_whisper.py
│           │   │   │   ├── test_modeling_tf_whisper.py
│           │   │   │   ├── test_modeling_whisper.py
│           │   │   │   ├── test_processor_whisper.py
│           │   │   │   └── test_tokenization_whisper.py
│           │   │   ├── x_clip/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_x_clip.py
│           │   │   ├── xglm/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_xglm.py
│           │   │   │   ├── test_modeling_tf_xglm.py
│           │   │   │   ├── test_modeling_xglm.py
│           │   │   │   └── test_tokenization_xglm.py
│           │   │   ├── xlm/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_tf_xlm.py
│           │   │   │   ├── test_modeling_xlm.py
│           │   │   │   └── test_tokenization_xlm.py
│           │   │   ├── xlm_prophetnet/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_xlm_prophetnet.py
│           │   │   │   └── test_tokenization_xlm_prophetnet.py
│           │   │   ├── xlm_roberta/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_flax_xlm_roberta.py
│           │   │   │   ├── test_modeling_tf_xlm_roberta.py
│           │   │   │   ├── test_modeling_xlm_roberta.py
│           │   │   │   └── test_tokenization_xlm_roberta.py
│           │   │   ├── xlm_roberta_xl/
│           │   │   │   ├── __init__.py
│           │   │   │   └── test_modeling_xlm_roberta_xl.py
│           │   │   ├── xlnet/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_modeling_tf_xlnet.py
│           │   │   │   ├── test_modeling_xlnet.py
│           │   │   │   └── test_tokenization_xlnet.py
│           │   │   ├── yolos/
│           │   │   │   ├── __init__.py
│           │   │   │   ├── test_feature_extraction_yolos.py
│           │   │   │   └── test_modeling_yolos.py
│           │   │   └── yoso/
│           │   │       ├── __init__.py
│           │   │       └── test_modeling_yoso.py
│           │   ├── onnx/
│           │   │   ├── __init__.py
│           │   │   ├── test_features.py
│           │   │   ├── test_onnx.py
│           │   │   └── test_onnx_v2.py
│           │   ├── optimization/
│           │   │   ├── __init__.py
│           │   │   ├── test_optimization.py
│           │   │   └── test_optimization_tf.py
│           │   ├── pipelines/
│           │   │   ├── __init__.py
│           │   │   ├── test_pipelines_audio_classification.py
│           │   │   ├── test_pipelines_automatic_speech_recognition.py
│           │   │   ├── test_pipelines_common.py
│           │   │   ├── test_pipelines_conversational.py
│           │   │   ├── test_pipelines_depth_estimation.py
│           │   │   ├── test_pipelines_document_question_answering.py
│           │   │   ├── test_pipelines_feature_extraction.py
│           │   │   ├── test_pipelines_fill_mask.py
│           │   │   ├── test_pipelines_image_classification.py
│           │   │   ├── test_pipelines_image_segmentation.py
│           │   │   ├── test_pipelines_image_to_text.py
│           │   │   ├── test_pipelines_object_detection.py
│           │   │   ├── test_pipelines_question_answering.py
│           │   │   ├── test_pipelines_summarization.py
│           │   │   ├── test_pipelines_table_question_answering.py
│           │   │   ├── test_pipelines_text2text_generation.py
│           │   │   ├── test_pipelines_text_classification.py
│           │   │   ├── test_pipelines_text_generation.py
│           │   │   ├── test_pipelines_token_classification.py
│           │   │   ├── test_pipelines_translation.py
│           │   │   ├── test_pipelines_visual_question_answering.py
│           │   │   ├── test_pipelines_zero_shot.py
│           │   │   ├── test_pipelines_zero_shot_image_classification.py
│           │   │   └── test_pipelines_zero_shot_object_detection.py
│           │   ├── repo_utils/
│           │   │   ├── test_check_copies.py
│           │   │   ├── test_check_dummies.py
│           │   │   └── test_tests_fetcher.py
│           │   ├── sagemaker/
│           │   │   ├── README.md
│           │   │   ├── __init__.py
│           │   │   ├── conftest.py
│           │   │   ├── scripts/
│           │   │   │   ├── pytorch/
│           │   │   │   │   ├── requirements.txt
│           │   │   │   │   ├── run_ddp.py
│           │   │   │   │   └── run_glue_model_parallelism.py
│           │   │   │   └── tensorflow/
│           │   │   │       ├── requirements.txt
│           │   │   │       ├── run_tf.py
│           │   │   │       └── run_tf_dist.py
│           │   │   ├── test_multi_node_data_parallel.py
│           │   │   ├── test_multi_node_model_parallel.py
│           │   │   └── test_single_node_gpu.py
│           │   ├── test_configuration_common.py
│           │   ├── test_feature_extraction_common.py
│           │   ├── test_image_transforms.py
│           │   ├── test_modeling_common.py
│           │   ├── test_modeling_flax_common.py
│           │   ├── test_modeling_tf_common.py
│           │   ├── test_sequence_feature_extraction_common.py
│           │   ├── test_tokenization_common.py
│           │   ├── tokenization/
│           │   │   ├── __init__.py
│           │   │   ├── test_tokenization_fast.py
│           │   │   └── test_tokenization_utils.py
│           │   ├── trainer/
│           │   │   ├── __init__.py
│           │   │   ├── test_data_collator.py
│           │   │   ├── test_trainer.py
│           │   │   ├── test_trainer_callback.py
│           │   │   ├── test_trainer_distributed.py
│           │   │   ├── test_trainer_seq2seq.py
│           │   │   ├── test_trainer_tpu.py
│           │   │   └── test_trainer_utils.py
│           │   └── utils/
│           │       ├── __init__.py
│           │       ├── test_activations.py
│           │       ├── test_activations_tf.py
│           │       ├── test_add_new_model_like.py
│           │       ├── test_cli.py
│           │       ├── test_convert_slow_tokenizer.py
│           │       ├── test_doc_samples.py
│           │       ├── test_file_utils.py
│           │       ├── test_generic.py
│           │       ├── test_hf_argparser.py
│           │       ├── test_hub_utils.py
│           │       ├── test_image_utils.py
│           │       ├── test_logging.py
│           │       ├── test_model_card.py
│           │       ├── test_model_output.py
│           │       ├── test_modeling_tf_core.py
│           │       ├── test_offline.py
│           │       ├── test_skip_decorators.py
│           │       └── test_versions_utils.py
│           └── utils/
│               ├── check_config_docstrings.py
│               ├── check_copies.py
│               ├── check_doc_toc.py
│               ├── check_dummies.py
│               ├── check_inits.py
│               ├── check_repo.py
│               ├── check_self_hosted_runner.py
│               ├── check_table.py
│               ├── check_tf_ops.py
│               ├── create_dummy_models.py
│               ├── custom_init_isort.py
│               ├── documentation_tests.txt
│               ├── download_glue_data.py
│               ├── get_ci_error_statistics.py
│               ├── get_github_job_time.py
│               ├── get_modified_files.py
│               ├── notification_service.py
│               ├── notification_service_doc_tests.py
│               ├── past_ci_versions.py
│               ├── prepare_for_doc_test.py
│               ├── print_env.py
│               ├── release.py
│               ├── sort_auto_mappings.py
│               ├── test_module/
│               │   ├── __init__.py
│               │   ├── custom_configuration.py
│               │   ├── custom_feature_extraction.py
│               │   ├── custom_modeling.py
│               │   ├── custom_pipeline.py
│               │   ├── custom_processing.py
│               │   ├── custom_tokenization.py
│               │   └── custom_tokenization_fast.py
│               ├── tests_fetcher.py
│               ├── tf_ops/
│               │   └── onnx.json
│               └── update_metadata.py
├── docs/
│   ├── disk_commands.txt
│   ├── gcp_setup.md
│   └── paper.md
├── experimental/
│   ├── cost_model.py
│   └── fit_cost_model.py
├── flexllmgen/
│   ├── __init__.py
│   ├── apps/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── completion.py
│   │   ├── data_wrangle/
│   │   │   ├── README.md
│   │   │   ├── data_wrangle_run.py
│   │   │   ├── install.sh
│   │   │   ├── test_batch_query_all_opt175b.sh
│   │   │   ├── test_batch_query_all_opt30b.sh
│   │   │   ├── test_batch_query_all_opt6.7b.sh
│   │   │   ├── test_batch_query_case.sh
│   │   │   ├── test_single_query_all_opt6.7b.sh
│   │   │   ├── test_single_query_case.sh
│   │   │   └── utils/
│   │   │       ├── constants.py
│   │   │       ├── data_utils.py
│   │   │       ├── prompt_utils.py
│   │   │       └── utils.py
│   │   ├── helm_fast_test.py
│   │   ├── helm_passed_30b.sh
│   │   └── helm_run.py
│   ├── compression.py
│   ├── dist_flex_opt.py
│   ├── dist_utils.py
│   ├── flex_opt.py
│   ├── opt_config.py
│   ├── profile_bandwidth.py
│   ├── profile_matmul.py
│   ├── pytorch_backend.py
│   ├── timer.py
│   └── utils.py
├── pyproject.toml
└── scripts/
    ├── mount_nvme_aws.sh
    ├── mount_nvme_gcp.sh
    ├── step_2_consolidate_992_shards_to_singleton.py
    ├── step_3_convert_to_numpy_weights.py
    ├── upload_pypi.sh
    └── utils.py