gitextract_jggdbnd_/

├── .ai/
│   ├── AGENTS.md
│   └── skills/
│       └── add-or-fix-type-checking/
│           └── SKILL.md
├── .circleci/
│   ├── TROUBLESHOOT.md
│   ├── config.yml
│   ├── create_circleci_config.py
│   └── parse_test_outputs.py
├── .git-blame-ignore-revs
├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug-report.yml
│   │   ├── config.yml
│   │   ├── feature-request.yml
│   │   ├── i18n.md
│   │   ├── migration.yml
│   │   └── new-model-addition.yml
│   ├── PULL_REQUEST_TEMPLATE.md
│   ├── conda/
│   │   ├── build.sh
│   │   └── meta.yaml
│   ├── copilot-instructions.md
│   ├── scripts/
│   │   ├── assign_reviewers.py
│   │   └── codeowners_for_review_action
│   └── workflows/
│       ├── TROUBLESHOOT.md
│       ├── add-model-like.yml
│       ├── anti-slop.yml
│       ├── assign-reviewers.yml
│       ├── benchmark.yml
│       ├── benchmark_v2.yml
│       ├── benchmark_v2_a10_caller.yml
│       ├── benchmark_v2_mi325_caller.yml
│       ├── build-ci-docker-images.yml
│       ├── build-docker-images.yml
│       ├── build-nightly-ci-docker-images.yml
│       ├── build-past-ci-docker-images.yml
│       ├── build_documentation.yml
│       ├── build_pr_documentation.yml
│       ├── check-workflow-permissions.yml
│       ├── check_failed_tests.yml
│       ├── check_tiny_models.yml
│       ├── circleci-failure-summary-comment.yml
│       ├── codeql.yml
│       ├── collated-reports.yml
│       ├── doctest_job.yml
│       ├── doctests.yml
│       ├── extras-smoke-test.yml
│       ├── get-pr-info.yml
│       ├── get-pr-number.yml
│       ├── model_jobs.yml
│       ├── model_jobs_intel_gaudi.yml
│       ├── new_model_pr_merged_notification.yml
│       ├── pr-repo-consistency-bot.yml
│       ├── pr_build_doc_with_comment.yml
│       ├── pr_slow_ci_suggestion.yml
│       ├── push-important-models.yml
│       ├── release-conda.yml
│       ├── release.yml
│       ├── self-comment-ci.yml
│       ├── self-nightly-caller.yml
│       ├── self-nightly-past-ci-caller.yml
│       ├── self-past-caller.yml
│       ├── self-scheduled-amd-caller.yml
│       ├── self-scheduled-amd-mi250-caller.yml
│       ├── self-scheduled-amd-mi325-caller.yml
│       ├── self-scheduled-amd-mi355-caller.yml
│       ├── self-scheduled-caller.yml
│       ├── self-scheduled-flash-attn-caller.yml
│       ├── self-scheduled-intel-gaudi.yml
│       ├── self-scheduled-intel-gaudi3-caller.yml
│       ├── self-scheduled.yml
│       ├── slack-report.yml
│       ├── ssh-runner.yml
│       ├── stale.yml
│       ├── trl-ci-bot.yml
│       ├── trufflehog.yml
│       ├── update_metdata.yml
│       └── upload_pr_documentation.yml
├── .gitignore
├── CITATION.cff
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── ISSUES.md
├── LICENSE
├── MIGRATION_GUIDE_V5.md
├── Makefile
├── README.md
├── SECURITY.md
├── awesome-transformers.md
├── benchmark/
│   ├── .gitignore
│   ├── README.md
│   ├── __init__.py
│   ├── benches/
│   │   └── llama.py
│   ├── benchmark.py
│   ├── benchmarks_entrypoint.py
│   ├── config/
│   │   └── generation.yaml
│   ├── default.yml
│   ├── grafana_dashboard.json
│   ├── grafana_datasource.yaml
│   ├── optimum_benchmark_wrapper.py
│   ├── requirements.txt
│   └── utils/
│       └── init_db.sql
├── benchmark_v2/
│   ├── .gitignore
│   ├── README.md
│   ├── benchmark_scripts/
│   │   └── continuous_batching_overall.py
│   ├── framework/
│   │   ├── benchmark_config.py
│   │   ├── benchmark_runner.py
│   │   ├── data_classes.py
│   │   └── hardware_metrics.py
│   ├── requirements.txt
│   └── run_benchmarks.py
├── conftest.py
├── docker/
│   ├── README.md
│   ├── consistency.dockerfile
│   ├── custom-tokenizers.dockerfile
│   ├── examples-torch.dockerfile
│   ├── exotic-models.dockerfile
│   ├── pipeline-torch.dockerfile
│   ├── quality.dockerfile
│   ├── torch-light.dockerfile
│   ├── transformers-all-latest-gpu/
│   │   └── Dockerfile
│   ├── transformers-doc-builder/
│   │   └── Dockerfile
│   ├── transformers-gpu/
│   │   └── Dockerfile
│   ├── transformers-intel-cpu/
│   │   └── Dockerfile
│   ├── transformers-pytorch-amd-gpu/
│   │   └── Dockerfile
│   ├── transformers-pytorch-deepspeed-amd-gpu/
│   │   └── Dockerfile
│   ├── transformers-pytorch-deepspeed-latest-gpu/
│   │   └── Dockerfile
│   ├── transformers-pytorch-deepspeed-nightly-gpu/
│   │   └── Dockerfile
│   ├── transformers-pytorch-gpu/
│   │   └── Dockerfile
│   ├── transformers-pytorch-tpu/
│   │   ├── Dockerfile
│   │   ├── bert-base-cased.jsonnet
│   │   ├── dataset.yaml
│   │   └── docker-entrypoint.sh
│   ├── transformers-pytorch-xpu/
│   │   └── Dockerfile
│   └── transformers-quantization-latest-gpu/
│       └── Dockerfile
├── docs/
│   ├── README.md
│   ├── TRANSLATING.md
│   └── source/
│       ├── _config.py
│       ├── ar/
│       │   ├── _config.py
│       │   ├── _toctree.yml
│       │   ├── accelerate.md
│       │   ├── attention.md
│       │   ├── autoclass_tutorial.md
│       │   ├── bertology.md
│       │   ├── chat_templating.md
│       │   ├── community.md
│       │   ├── conversations.md
│       │   ├── create_a_model.md
│       │   ├── custom_models.md
│       │   ├── fast_tokenizers.md
│       │   ├── gguf.md
│       │   ├── glossary.md
│       │   ├── how_to_hack_models.md
│       │   ├── index.md
│       │   ├── installation.md
│       │   ├── llm_tutorial.md
│       │   ├── llm_tutorial_optimization.md
│       │   ├── model_memory_anatomy.md
│       │   ├── model_sharing.md
│       │   ├── model_summary.md
│       │   ├── modular_transformers.md
│       │   ├── multilingual.md
│       │   ├── notebooks.md
│       │   ├── pad_truncation.md
│       │   ├── peft.md
│       │   ├── perplexity.md
│       │   ├── philosophy.md
│       │   ├── pipeline_tutorial.md
│       │   ├── pipeline_webserver.md
│       │   ├── preprocessing.md
│       │   ├── quicktour.md
│       │   ├── run_scripts.md
│       │   ├── sagemaker.md
│       │   ├── serialization.md
│       │   ├── task_summary.md
│       │   ├── tasks/
│       │   │   ├── language_modeling.md
│       │   │   ├── masked_language_modeling.md
│       │   │   ├── multiple_choice.md
│       │   │   ├── question_answering.md
│       │   │   ├── sequence_classification.md
│       │   │   ├── summarization.md
│       │   │   ├── token_classification.md
│       │   │   └── translation.md
│       │   ├── tasks_explained.md
│       │   ├── tiktoken.md
│       │   ├── tokenizer_summary.md
│       │   ├── trainer.md
│       │   ├── training.md
│       │   └── troubleshooting.md
│       ├── de/
│       │   ├── _config.py
│       │   ├── _toctree.yml
│       │   ├── accelerate.md
│       │   ├── add_new_model.md
│       │   ├── add_new_pipeline.md
│       │   ├── autoclass_tutorial.md
│       │   ├── contributing.md
│       │   ├── index.md
│       │   ├── installation.md
│       │   ├── llm_tutorial.md
│       │   ├── model_sharing.md
│       │   ├── peft.md
│       │   ├── pipeline_tutorial.md
│       │   ├── pr_checks.md
│       │   ├── preprocessing.md
│       │   ├── quicktour.md
│       │   ├── run_scripts.md
│       │   ├── testing.md
│       │   └── training.md
│       ├── en/
│       │   ├── _config.py
│       │   ├── _redirects.yml
│       │   ├── _toctree.yml
│       │   ├── accelerate.md
│       │   ├── accelerator_selection.md
│       │   ├── add_new_model.md
│       │   ├── add_new_pipeline.md
│       │   ├── assisted_decoding.md
│       │   ├── attention_interface.md
│       │   ├── auto_docstring.md
│       │   ├── backbones.md
│       │   ├── cache_explanation.md
│       │   ├── chat_content_patterns.md
│       │   ├── chat_extras.md
│       │   ├── chat_response_parsing.md
│       │   ├── chat_templating.md
│       │   ├── chat_templating_multimodal.md
│       │   ├── chat_templating_writing.md
│       │   ├── community.md
│       │   ├── community_integrations/
│       │   │   ├── axolotl.md
│       │   │   ├── candle.md
│       │   │   ├── executorch.md
│       │   │   ├── llama_cpp.md
│       │   │   ├── mlx.md
│       │   │   ├── nanotron.md
│       │   │   ├── nemo_automodel_finetuning.md
│       │   │   ├── nemo_automodel_pretraining.md
│       │   │   ├── sglang.md
│       │   │   ├── tensorrt-llm.md
│       │   │   ├── torchtitan.md
│       │   │   ├── transformers_as_backend.md
│       │   │   ├── trl.md
│       │   │   ├── unsloth.md
│       │   │   └── vllm.md
│       │   ├── continuous_batching.md
│       │   ├── continuous_batching_architecture.md
│       │   ├── conversations.md
│       │   ├── custom_models.md
│       │   ├── custom_tokenizers.md
│       │   ├── data_collators.md
│       │   ├── debugging.md
│       │   ├── deepspeed.md
│       │   ├── expert_parallelism.md
│       │   ├── experts_interface.md
│       │   ├── fast_tokenizers.md
│       │   ├── feature_extractors.md
│       │   ├── fsdp.md
│       │   ├── generation_features.md
│       │   ├── generation_strategies.md
│       │   ├── gguf.md
│       │   ├── glossary.md
│       │   ├── how_to_hack_models.md
│       │   ├── hpo_train.md
│       │   ├── image_processors.md
│       │   ├── index.md
│       │   ├── installation.md
│       │   ├── internal/
│       │   │   ├── audio_utils.md
│       │   │   ├── file_utils.md
│       │   │   ├── generation_utils.md
│       │   │   ├── image_processing_utils.md
│       │   │   ├── import_utils.md
│       │   │   ├── model_debugging_utils.md
│       │   │   ├── modeling_utils.md
│       │   │   ├── pipelines_utils.md
│       │   │   ├── rope_utils.md
│       │   │   ├── time_series_utils.md
│       │   │   ├── tokenization_utils.md
│       │   │   └── trainer_utils.md
│       │   ├── kernel_doc/
│       │   │   ├── loading_kernels.md
│       │   │   └── overview.md
│       │   ├── kv_cache.md
│       │   ├── llm_tutorial.md
│       │   ├── llm_tutorial_optimization.md
│       │   ├── main_classes/
│       │   │   ├── backbones.md
│       │   │   ├── callback.md
│       │   │   ├── configuration.md
│       │   │   ├── continuous_batching.md
│       │   │   ├── data_collator.md
│       │   │   ├── deepspeed.md
│       │   │   ├── executorch.md
│       │   │   ├── feature_extractor.md
│       │   │   ├── image_processor.md
│       │   │   ├── kernels.md
│       │   │   ├── logging.md
│       │   │   ├── model.md
│       │   │   ├── optimizer_schedules.md
│       │   │   ├── output.md
│       │   │   ├── peft.md
│       │   │   ├── pipelines.md
│       │   │   ├── processors.md
│       │   │   ├── quantization.md
│       │   │   ├── text_generation.md
│       │   │   ├── tokenizer.md
│       │   │   ├── trainer.md
│       │   │   └── video_processor.md
│       │   ├── model_doc/
│       │   │   ├── afmoe.md
│       │   │   ├── aimv2.md
│       │   │   ├── albert.md
│       │   │   ├── align.md
│       │   │   ├── altclip.md
│       │   │   ├── apertus.md
│       │   │   ├── arcee.md
│       │   │   ├── aria.md
│       │   │   ├── audio-spectrogram-transformer.md
│       │   │   ├── audioflamingo3.md
│       │   │   ├── auto.md
│       │   │   ├── autoformer.md
│       │   │   ├── aya_vision.md
│       │   │   ├── bamba.md
│       │   │   ├── bark.md
│       │   │   ├── bart.md
│       │   │   ├── barthez.md
│       │   │   ├── bartpho.md
│       │   │   ├── beit.md
│       │   │   ├── bert-generation.md
│       │   │   ├── bert-japanese.md
│       │   │   ├── bert.md
│       │   │   ├── bertweet.md
│       │   │   ├── big_bird.md
│       │   │   ├── bigbird_pegasus.md
│       │   │   ├── biogpt.md
│       │   │   ├── bit.md
│       │   │   ├── bitnet.md
│       │   │   ├── blenderbot-small.md
│       │   │   ├── blenderbot.md
│       │   │   ├── blip-2.md
│       │   │   ├── blip.md
│       │   │   ├── bloom.md
│       │   │   ├── blt.md
│       │   │   ├── bridgetower.md
│       │   │   ├── bros.md
│       │   │   ├── byt5.md
│       │   │   ├── camembert.md
│       │   │   ├── canine.md
│       │   │   ├── chameleon.md
│       │   │   ├── chinese_clip.md
│       │   │   ├── chmv2.md
│       │   │   ├── clap.md
│       │   │   ├── clip.md
│       │   │   ├── clipseg.md
│       │   │   ├── clvp.md
│       │   │   ├── code_llama.md
│       │   │   ├── codegen.md
│       │   │   ├── cohere.md
│       │   │   ├── cohere2.md
│       │   │   ├── cohere2_vision.md
│       │   │   ├── cohere_asr.md
│       │   │   ├── colmodernvbert.md
│       │   │   ├── colpali.md
│       │   │   ├── colqwen2.md
│       │   │   ├── conditional_detr.md
│       │   │   ├── convbert.md
│       │   │   ├── convnext.md
│       │   │   ├── convnextv2.md
│       │   │   ├── cpm.md
│       │   │   ├── cpmant.md
│       │   │   ├── csm.md
│       │   │   ├── ctrl.md
│       │   │   ├── cvt.md
│       │   │   ├── cwm.md
│       │   │   ├── d_fine.md
│       │   │   ├── dab-detr.md
│       │   │   ├── dac.md
│       │   │   ├── data2vec.md
│       │   │   ├── dbrx.md
│       │   │   ├── deberta-v2.md
│       │   │   ├── deberta.md
│       │   │   ├── decision_transformer.md
│       │   │   ├── deepseek_v2.md
│       │   │   ├── deepseek_v3.md
│       │   │   ├── deepseek_vl.md
│       │   │   ├── deepseek_vl_hybrid.md
│       │   │   ├── deformable_detr.md
│       │   │   ├── deit.md
│       │   │   ├── deplot.md
│       │   │   ├── depth_anything.md
│       │   │   ├── depth_anything_v2.md
│       │   │   ├── depth_pro.md
│       │   │   ├── detr.md
│       │   │   ├── dia.md
│       │   │   ├── dialogpt.md
│       │   │   ├── diffllama.md
│       │   │   ├── dinat.md
│       │   │   ├── dinov2.md
│       │   │   ├── dinov2_with_registers.md
│       │   │   ├── dinov3.md
│       │   │   ├── distilbert.md
│       │   │   ├── dit.md
│       │   │   ├── doge.md
│       │   │   ├── donut.md
│       │   │   ├── dots1.md
│       │   │   ├── dpr.md
│       │   │   ├── dpt.md
│       │   │   ├── edgetam.md
│       │   │   ├── edgetam_video.md
│       │   │   ├── efficientloftr.md
│       │   │   ├── efficientnet.md
│       │   │   ├── electra.md
│       │   │   ├── emu3.md
│       │   │   ├── encodec.md
│       │   │   ├── encoder-decoder.md
│       │   │   ├── eomt.md
│       │   │   ├── eomt_dinov3.md
│       │   │   ├── ernie.md
│       │   │   ├── ernie4_5.md
│       │   │   ├── ernie4_5_moe.md
│       │   │   ├── ernie4_5_vl_moe.md
│       │   │   ├── esm.md
│       │   │   ├── eurobert.md
│       │   │   ├── evolla.md
│       │   │   ├── exaone4.md
│       │   │   ├── exaone_moe.md
│       │   │   ├── falcon.md
│       │   │   ├── falcon3.md
│       │   │   ├── falcon_h1.md
│       │   │   ├── falcon_mamba.md
│       │   │   ├── fast_vlm.md
│       │   │   ├── fastspeech2_conformer.md
│       │   │   ├── flan-t5.md
│       │   │   ├── flan-ul2.md
│       │   │   ├── flaubert.md
│       │   │   ├── flava.md
│       │   │   ├── flex_olmo.md
│       │   │   ├── florence2.md
│       │   │   ├── fnet.md
│       │   │   ├── focalnet.md
│       │   │   ├── fsmt.md
│       │   │   ├── funnel.md
│       │   │   ├── fuyu.md
│       │   │   ├── gemma.md
│       │   │   ├── gemma2.md
│       │   │   ├── gemma3.md
│       │   │   ├── gemma3n.md
│       │   │   ├── git.md
│       │   │   ├── glm.md
│       │   │   ├── glm4.md
│       │   │   ├── glm46v.md
│       │   │   ├── glm4_moe.md
│       │   │   ├── glm4_moe_lite.md
│       │   │   ├── glm4v.md
│       │   │   ├── glm4v_moe.md
│       │   │   ├── glm_image.md
│       │   │   ├── glm_moe_dsa.md
│       │   │   ├── glm_ocr.md
│       │   │   ├── glmasr.md
│       │   │   ├── glpn.md
│       │   │   ├── got_ocr2.md
│       │   │   ├── gpt-sw3.md
│       │   │   ├── gpt2.md
│       │   │   ├── gpt_bigcode.md
│       │   │   ├── gpt_neo.md
│       │   │   ├── gpt_neox.md
│       │   │   ├── gpt_neox_japanese.md
│       │   │   ├── gpt_oss.md
│       │   │   ├── gptj.md
│       │   │   ├── granite.md
│       │   │   ├── granite_speech.md
│       │   │   ├── granitemoe.md
│       │   │   ├── granitemoehybrid.md
│       │   │   ├── granitemoeshared.md
│       │   │   ├── granitevision.md
│       │   │   ├── grounding-dino.md
│       │   │   ├── groupvit.md
│       │   │   ├── helium.md
│       │   │   ├── herbert.md
│       │   │   ├── hgnet_v2.md
│       │   │   ├── hiera.md
│       │   │   ├── higgs_audio_v2.md
│       │   │   ├── higgs_audio_v2_tokenizer.md
│       │   │   ├── hubert.md
│       │   │   ├── hunyuan_v1_dense.md
│       │   │   ├── hunyuan_v1_moe.md
│       │   │   ├── ibert.md
│       │   │   ├── idefics.md
│       │   │   ├── idefics2.md
│       │   │   ├── idefics3.md
│       │   │   ├── ijepa.md
│       │   │   ├── imagegpt.md
│       │   │   ├── informer.md
│       │   │   ├── instructblip.md
│       │   │   ├── instructblipvideo.md
│       │   │   ├── internvl.md
│       │   │   ├── jais2.md
│       │   │   ├── jamba.md
│       │   │   ├── janus.md
│       │   │   ├── jetmoe.md
│       │   │   ├── jina_embeddings_v3.md
│       │   │   ├── kosmos-2.md
│       │   │   ├── kosmos2_5.md
│       │   │   ├── kyutai_speech_to_text.md
│       │   │   ├── lasr.md
│       │   │   ├── layoutlm.md
│       │   │   ├── layoutlmv2.md
│       │   │   ├── layoutlmv3.md
│       │   │   ├── layoutxlm.md
│       │   │   ├── led.md
│       │   │   ├── levit.md
│       │   │   ├── lfm2.md
│       │   │   ├── lfm2_moe.md
│       │   │   ├── lfm2_vl.md
│       │   │   ├── lightglue.md
│       │   │   ├── lighton_ocr.md
│       │   │   ├── lilt.md
│       │   │   ├── llama.md
│       │   │   ├── llama2.md
│       │   │   ├── llama3.md
│       │   │   ├── llama4.md
│       │   │   ├── llava.md
│       │   │   ├── llava_next.md
│       │   │   ├── llava_next_video.md
│       │   │   ├── llava_onevision.md
│       │   │   ├── longcat_flash.md
│       │   │   ├── longformer.md
│       │   │   ├── longt5.md
│       │   │   ├── luke.md
│       │   │   ├── lw_detr.md
│       │   │   ├── lxmert.md
│       │   │   ├── m2m_100.md
│       │   │   ├── madlad-400.md
│       │   │   ├── mamba.md
│       │   │   ├── mamba2.md
│       │   │   ├── marian.md
│       │   │   ├── markuplm.md
│       │   │   ├── mask2former.md
│       │   │   ├── maskformer.md
│       │   │   ├── matcha.md
│       │   │   ├── mbart.md
│       │   │   ├── megatron-bert.md
│       │   │   ├── megatron_gpt2.md
│       │   │   ├── metaclip_2.md
│       │   │   ├── mgp-str.md
│       │   │   ├── mimi.md
│       │   │   ├── minimax.md
│       │   │   ├── minimax_m2.md
│       │   │   ├── ministral.md
│       │   │   ├── ministral3.md
│       │   │   ├── mistral.md
│       │   │   ├── mistral3.md
│       │   │   ├── mistral4.md
│       │   │   ├── mixtral.md
│       │   │   ├── mlcd.md
│       │   │   ├── mllama.md
│       │   │   ├── mluke.md
│       │   │   ├── mm-grounding-dino.md
│       │   │   ├── mms.md
│       │   │   ├── mobilebert.md
│       │   │   ├── mobilenet_v1.md
│       │   │   ├── mobilenet_v2.md
│       │   │   ├── mobilevit.md
│       │   │   ├── mobilevitv2.md
│       │   │   ├── modernbert-decoder.md
│       │   │   ├── modernbert.md
│       │   │   ├── modernvbert.md
│       │   │   ├── moonshine.md
│       │   │   ├── moonshine_streaming.md
│       │   │   ├── moshi.md
│       │   │   ├── mpnet.md
│       │   │   ├── mpt.md
│       │   │   ├── mra.md
│       │   │   ├── mt5.md
│       │   │   ├── musicflamingo.md
│       │   │   ├── musicgen.md
│       │   │   ├── musicgen_melody.md
│       │   │   ├── mvp.md
│       │   │   ├── myt5.md
│       │   │   ├── nanochat.md
│       │   │   ├── nemotron.md
│       │   │   ├── nemotron_h.md
│       │   │   ├── nllb-moe.md
│       │   │   ├── nllb.md
│       │   │   ├── nougat.md
│       │   │   ├── nystromformer.md
│       │   │   ├── olmo.md
│       │   │   ├── olmo2.md
│       │   │   ├── olmo3.md
│       │   │   ├── olmo_hybrid.md
│       │   │   ├── olmoe.md
│       │   │   ├── omdet-turbo.md
│       │   │   ├── oneformer.md
│       │   │   ├── openai-gpt.md
│       │   │   ├── opt.md
│       │   │   ├── ovis2.md
│       │   │   ├── owlv2.md
│       │   │   ├── owlvit.md
│       │   │   ├── paddleocr_vl.md
│       │   │   ├── paligemma.md
│       │   │   ├── parakeet.md
│       │   │   ├── patchtsmixer.md
│       │   │   ├── patchtst.md
│       │   │   ├── pe_audio.md
│       │   │   ├── pe_audio_video.md
│       │   │   ├── pe_video.md
│       │   │   ├── pegasus.md
│       │   │   ├── pegasus_x.md
│       │   │   ├── perceiver.md
│       │   │   ├── perception_lm.md
│       │   │   ├── persimmon.md
│       │   │   ├── phi.md
│       │   │   ├── phi3.md
│       │   │   ├── phi4_multimodal.md
│       │   │   ├── phimoe.md
│       │   │   ├── phobert.md
│       │   │   ├── pi0.md
│       │   │   ├── pix2struct.md
│       │   │   ├── pixio.md
│       │   │   ├── pixtral.md
│       │   │   ├── plbart.md
│       │   │   ├── poolformer.md
│       │   │   ├── pop2piano.md
│       │   │   ├── pp_chart2table.md
│       │   │   ├── pp_doclayout_v2.md
│       │   │   ├── pp_doclayout_v3.md
│       │   │   ├── pp_lcnet.md
│       │   │   ├── pp_lcnet_v3.md
│       │   │   ├── pp_ocrv5_mobile_det.md
│       │   │   ├── pp_ocrv5_mobile_rec.md
│       │   │   ├── pp_ocrv5_server_det.md
│       │   │   ├── pp_ocrv5_server_rec.md
│       │   │   ├── prompt_depth_anything.md
│       │   │   ├── prophetnet.md
│       │   │   ├── pvt.md
│       │   │   ├── pvt_v2.md
│       │   │   ├── qwen2.md
│       │   │   ├── qwen2_5_omni.md
│       │   │   ├── qwen2_5_vl.md
│       │   │   ├── qwen2_audio.md
│       │   │   ├── qwen2_moe.md
│       │   │   ├── qwen2_vl.md
│       │   │   ├── qwen3.md
│       │   │   ├── qwen3_5.md
│       │   │   ├── qwen3_5_moe.md
│       │   │   ├── qwen3_moe.md
│       │   │   ├── qwen3_next.md
│       │   │   ├── qwen3_omni_moe.md
│       │   │   ├── qwen3_vl.md
│       │   │   ├── qwen3_vl_moe.md
│       │   │   ├── rag.md
│       │   │   ├── recurrent_gemma.md
│       │   │   ├── reformer.md
│       │   │   ├── regnet.md
│       │   │   ├── rembert.md
│       │   │   ├── resnet.md
│       │   │   ├── roberta-prelayernorm.md
│       │   │   ├── roberta.md
│       │   │   ├── roc_bert.md
│       │   │   ├── roformer.md
│       │   │   ├── rt_detr.md
│       │   │   ├── rt_detr_v2.md
│       │   │   ├── rwkv.md
│       │   │   ├── sam.md
│       │   │   ├── sam2.md
│       │   │   ├── sam2_video.md
│       │   │   ├── sam3.md
│       │   │   ├── sam3_tracker.md
│       │   │   ├── sam3_tracker_video.md
│       │   │   ├── sam3_video.md
│       │   │   ├── sam_hq.md
│       │   │   ├── seamless_m4t.md
│       │   │   ├── seamless_m4t_v2.md
│       │   │   ├── seed_oss.md
│       │   │   ├── segformer.md
│       │   │   ├── seggpt.md
│       │   │   ├── sew-d.md
│       │   │   ├── sew.md
│       │   │   ├── shieldgemma2.md
│       │   │   ├── siglip.md
│       │   │   ├── siglip2.md
│       │   │   ├── slanext.md
│       │   │   ├── smollm3.md
│       │   │   ├── smolvlm.md
│       │   │   ├── solar_open.md
│       │   │   ├── speech-encoder-decoder.md
│       │   │   ├── speech_to_text.md
│       │   │   ├── speecht5.md
│       │   │   ├── splinter.md
│       │   │   ├── squeezebert.md
│       │   │   ├── stablelm.md
│       │   │   ├── starcoder2.md
│       │   │   ├── superglue.md
│       │   │   ├── superpoint.md
│       │   │   ├── swiftformer.md
│       │   │   ├── swin.md
│       │   │   ├── swin2sr.md
│       │   │   ├── swinv2.md
│       │   │   ├── switch_transformers.md
│       │   │   ├── t5.md
│       │   │   ├── t5gemma.md
│       │   │   ├── t5gemma2.md
│       │   │   ├── t5v1.1.md
│       │   │   ├── table-transformer.md
│       │   │   ├── tapas.md
│       │   │   ├── textnet.md
│       │   │   ├── time_series_transformer.md
│       │   │   ├── timesfm.md
│       │   │   ├── timesfm2_5.md
│       │   │   ├── timesformer.md
│       │   │   ├── timm_wrapper.md
│       │   │   ├── trocr.md
│       │   │   ├── tvp.md
│       │   │   ├── udop.md
│       │   │   ├── ul2.md
│       │   │   ├── umt5.md
│       │   │   ├── unispeech-sat.md
│       │   │   ├── unispeech.md
│       │   │   ├── univnet.md
│       │   │   ├── upernet.md
│       │   │   ├── uvdoc.md
│       │   │   ├── vaultgemma.md
│       │   │   ├── vibevoice_acoustic_tokenizer.md
│       │   │   ├── vibevoice_asr.md
│       │   │   ├── video_llama_3.md
│       │   │   ├── video_llava.md
│       │   │   ├── videomae.md
│       │   │   ├── videomt.md
│       │   │   ├── vilt.md
│       │   │   ├── vipllava.md
│       │   │   ├── vision-encoder-decoder.md
│       │   │   ├── vision-text-dual-encoder.md
│       │   │   ├── visual_bert.md
│       │   │   ├── vit.md
│       │   │   ├── vit_mae.md
│       │   │   ├── vit_msn.md
│       │   │   ├── vitdet.md
│       │   │   ├── vitmatte.md
│       │   │   ├── vitpose.md
│       │   │   ├── vits.md
│       │   │   ├── vivit.md
│       │   │   ├── vjepa2.md
│       │   │   ├── voxtral.md
│       │   │   ├── voxtral_realtime.md
│       │   │   ├── wav2vec2-bert.md
│       │   │   ├── wav2vec2-conformer.md
│       │   │   ├── wav2vec2.md
│       │   │   ├── wav2vec2_phoneme.md
│       │   │   ├── wavlm.md
│       │   │   ├── whisper.md
│       │   │   ├── xclip.md
│       │   │   ├── xcodec.md
│       │   │   ├── xglm.md
│       │   │   ├── xlm-roberta-xl.md
│       │   │   ├── xlm-roberta.md
│       │   │   ├── xlm-v.md
│       │   │   ├── xlm.md
│       │   │   ├── xlnet.md
│       │   │   ├── xls_r.md
│       │   │   ├── xlsr_wav2vec2.md
│       │   │   ├── xlstm.md
│       │   │   ├── xmod.md
│       │   │   ├── yolos.md
│       │   │   ├── yoso.md
│       │   │   ├── youtu.md
│       │   │   ├── zamba.md
│       │   │   ├── zamba2.md
│       │   │   └── zoedepth.md
│       │   ├── model_memory_anatomy.md
│       │   ├── model_output_tracing.md
│       │   ├── model_sharing.md
│       │   ├── models.md
│       │   ├── models_timeline.md
│       │   ├── modular_transformers.md
│       │   ├── monkey_patching.md
│       │   ├── optimization_overview.md
│       │   ├── optimizers.md
│       │   ├── paged_attention.md
│       │   ├── peft.md
│       │   ├── perf_hardware.md
│       │   ├── perf_infer_gpu_multi.md
│       │   ├── perf_torch_compile.md
│       │   ├── perf_train_cpu.md
│       │   ├── perf_train_cpu_many.md
│       │   ├── perf_train_gaudi.md
│       │   ├── perf_train_gpu_many.md
│       │   ├── perf_train_gpu_one.md
│       │   ├── perf_train_special.md
│       │   ├── perplexity.md
│       │   ├── philosophy.md
│       │   ├── pipeline_gradio.md
│       │   ├── pipeline_tutorial.md
│       │   ├── pipeline_webserver.md
│       │   ├── pr_checks.md
│       │   ├── processors.md
│       │   ├── quantization/
│       │   │   ├── aqlm.md
│       │   │   ├── auto_round.md
│       │   │   ├── awq.md
│       │   │   ├── bitnet.md
│       │   │   ├── bitsandbytes.md
│       │   │   ├── compressed_tensors.md
│       │   │   ├── concept_guide.md
│       │   │   ├── contribute.md
│       │   │   ├── eetq.md
│       │   │   ├── fbgemm_fp8.md
│       │   │   ├── finegrained_fp8.md
│       │   │   ├── fouroversix.md
│       │   │   ├── fp_quant.md
│       │   │   ├── gptq.md
│       │   │   ├── higgs.md
│       │   │   ├── hqq.md
│       │   │   ├── metal.md
│       │   │   ├── mxfp4.md
│       │   │   ├── optimum.md
│       │   │   ├── overview.md
│       │   │   ├── quanto.md
│       │   │   ├── quark.md
│       │   │   ├── selecting.md
│       │   │   ├── sinq.md
│       │   │   ├── spqr.md
│       │   │   ├── torchao.md
│       │   │   └── vptq.md
│       │   ├── quicktour.md
│       │   ├── reference/
│       │   │   └── environment_variables.md
│       │   ├── run_scripts.md
│       │   ├── serialization.md
│       │   ├── serve-cli/
│       │   │   ├── cursor.md
│       │   │   ├── jan.md
│       │   │   ├── openweb_ui.md
│       │   │   ├── serving.md
│       │   │   ├── serving_optims.md
│       │   │   └── tiny_agents.md
│       │   ├── tasks/
│       │   │   ├── any_to_any.md
│       │   │   ├── asr.md
│       │   │   ├── audio_classification.md
│       │   │   ├── audio_text_to_text.md
│       │   │   ├── document_question_answering.md
│       │   │   ├── idefics.md
│       │   │   ├── image_captioning.md
│       │   │   ├── image_classification.md
│       │   │   ├── image_feature_extraction.md
│       │   │   ├── image_text_to_text.md
│       │   │   ├── keypoint_detection.md
│       │   │   ├── keypoint_matching.md
│       │   │   ├── knowledge_distillation_for_image_classification.md
│       │   │   ├── language_modeling.md
│       │   │   ├── mask_generation.md
│       │   │   ├── masked_language_modeling.md
│       │   │   ├── monocular_depth_estimation.md
│       │   │   ├── multiple_choice.md
│       │   │   ├── object_detection.md
│       │   │   ├── prompting.md
│       │   │   ├── question_answering.md
│       │   │   ├── semantic_segmentation.md
│       │   │   ├── sequence_classification.md
│       │   │   ├── summarization.md
│       │   │   ├── text-to-speech.md
│       │   │   ├── token_classification.md
│       │   │   ├── training_vision_backbone.md
│       │   │   ├── translation.md
│       │   │   ├── video_classification.md
│       │   │   ├── video_text_to_text.md
│       │   │   ├── visual_document_retrieval.md
│       │   │   ├── visual_question_answering.md
│       │   │   ├── zero_shot_image_classification.md
│       │   │   └── zero_shot_object_detection.md
│       │   ├── testing.md
│       │   ├── tokenizer_summary.md
│       │   ├── trainer.md
│       │   ├── trainer_callbacks.md
│       │   ├── trainer_customize.md
│       │   ├── training.md
│       │   ├── troubleshooting.md
│       │   ├── video_processors.md
│       │   └── weightconverter.md
│       ├── es/
│       │   ├── _config.py
│       │   ├── _toctree.yml
│       │   ├── accelerate.md
│       │   ├── add_new_pipeline.md
│       │   ├── attention.md
│       │   ├── autoclass_tutorial.md
│       │   ├── bertology.md
│       │   ├── chat_templating.md
│       │   ├── community.md
│       │   ├── conversations.md
│       │   ├── create_a_model.md
│       │   ├── custom_models.md
│       │   ├── debugging.md
│       │   ├── fast_tokenizers.md
│       │   ├── glossary.md
│       │   ├── index.md
│       │   ├── installation.md
│       │   ├── model_memory_anatomy.md
│       │   ├── model_sharing.md
│       │   ├── multilingual.md
│       │   ├── pad_truncation.md
│       │   ├── performance.md
│       │   ├── perplexity.md
│       │   ├── philosophy.md
│       │   ├── pipeline_tutorial.md
│       │   ├── pipeline_webserver.md
│       │   ├── pr_checks.md
│       │   ├── preprocessing.md
│       │   ├── quicktour.md
│       │   ├── run_scripts.md
│       │   ├── sagemaker.md
│       │   ├── task_summary.md
│       │   ├── tasks/
│       │   │   ├── asr.md
│       │   │   ├── audio_classification.md
│       │   │   ├── image_captioning.md
│       │   │   ├── image_classification.md
│       │   │   ├── language_modeling.md
│       │   │   ├── multiple_choice.md
│       │   │   ├── question_answering.md
│       │   │   └── summarization.md
│       │   ├── tasks_explained.md
│       │   ├── tokenizer_summary.md
│       │   ├── trainer.md
│       │   └── training.md
│       ├── fr/
│       │   ├── _config.py
│       │   ├── _toctree.yml
│       │   ├── autoclass_tutorial.md
│       │   ├── in_translation.md
│       │   ├── index.md
│       │   ├── installation.md
│       │   ├── quicktour.md
│       │   ├── run_scripts_fr.md
│       │   ├── task_summary.md
│       │   ├── tasks_explained.md
│       │   └── tutoriel_pipeline.md
│       ├── hi/
│       │   ├── _toctree.yml
│       │   ├── accelerate.md
│       │   └── pipeline_tutorial.md
│       ├── it/
│       │   ├── _config.py
│       │   ├── _toctree.yml
│       │   ├── accelerate.md
│       │   ├── add_new_model.md
│       │   ├── add_new_pipeline.md
│       │   ├── autoclass_tutorial.md
│       │   ├── big_models.md
│       │   ├── community.md
│       │   ├── create_a_model.md
│       │   ├── custom_models.md
│       │   ├── debugging.md
│       │   ├── index.md
│       │   ├── installation.md
│       │   ├── migration.md
│       │   ├── model_sharing.md
│       │   ├── multilingual.md
│       │   ├── perf_hardware.md
│       │   ├── perf_infer_cpu.md
│       │   ├── perf_infer_gpu_many.md
│       │   ├── perf_infer_gpu_one.md
│       │   ├── perf_infer_special.md
│       │   ├── perf_train_cpu.md
│       │   ├── perf_train_cpu_many.md
│       │   ├── perf_train_special.md
│       │   ├── perf_train_tpu.md
│       │   ├── pipeline_tutorial.md
│       │   ├── pr_checks.md
│       │   ├── preprocessing.md
│       │   ├── quicktour.md
│       │   ├── run_scripts.md
│       │   └── training.md
│       ├── ja/
│       │   ├── _toctree.yml
│       │   ├── accelerate.md
│       │   ├── add_new_model.md
│       │   ├── attention.md
│       │   ├── autoclass_tutorial.md
│       │   ├── bertology.md
│       │   ├── big_models.md
│       │   ├── chat_templating.md
│       │   ├── community.md
│       │   ├── create_a_model.md
│       │   ├── custom_models.md
│       │   ├── fast_tokenizers.md
│       │   ├── generation_strategies.md
│       │   ├── glossary.md
│       │   ├── hpo_train.md
│       │   ├── index.md
│       │   ├── installation.md
│       │   ├── internal/
│       │   │   ├── audio_utils.md
│       │   │   ├── file_utils.md
│       │   │   ├── generation_utils.md
│       │   │   ├── image_processing_utils.md
│       │   │   ├── modeling_utils.md
│       │   │   ├── pipelines_utils.md
│       │   │   ├── time_series_utils.md
│       │   │   ├── tokenization_utils.md
│       │   │   └── trainer_utils.md
│       │   ├── llm_tutorial.md
│       │   ├── main_classes/
│       │   │   ├── callback.md
│       │   │   ├── configuration.md
│       │   │   ├── data_collator.md
│       │   │   ├── deepspeed.md
│       │   │   ├── feature_extractor.md
│       │   │   ├── image_processor.md
│       │   │   ├── logging.md
│       │   │   ├── model.md
│       │   │   ├── optimizer_schedules.md
│       │   │   ├── output.md
│       │   │   ├── pipelines.md
│       │   │   ├── processors.md
│       │   │   ├── quantization.md
│       │   │   ├── text_generation.md
│       │   │   ├── tokenizer.md
│       │   │   └── trainer.md
│       │   ├── model_doc/
│       │   │   ├── albert.md
│       │   │   ├── align.md
│       │   │   ├── altclip.md
│       │   │   ├── audio-spectrogram-transformer.md
│       │   │   ├── auto.md
│       │   │   ├── autoformer.md
│       │   │   ├── bark.md
│       │   │   ├── bart.md
│       │   │   ├── barthez.md
│       │   │   ├── bartpho.md
│       │   │   ├── beit.md
│       │   │   ├── bert-generation.md
│       │   │   ├── bert-japanese.md
│       │   │   ├── bert.md
│       │   │   ├── bertweet.md
│       │   │   ├── big_bird.md
│       │   │   ├── bigbird_pegasus.md
│       │   │   ├── biogpt.md
│       │   │   ├── bit.md
│       │   │   ├── blenderbot-small.md
│       │   │   ├── blenderbot.md
│       │   │   ├── blip-2.md
│       │   │   ├── blip.md
│       │   │   ├── bloom.md
│       │   │   ├── bridgetower.md
│       │   │   ├── bros.md
│       │   │   ├── byt5.md
│       │   │   ├── camembert.md
│       │   │   ├── canine.md
│       │   │   ├── chinese_clip.md
│       │   │   ├── clap.md
│       │   │   ├── clip.md
│       │   │   ├── clipseg.md
│       │   │   ├── clvp.md
│       │   │   ├── code_llama.md
│       │   │   ├── codegen.md
│       │   │   ├── conditional_detr.md
│       │   │   ├── convbert.md
│       │   │   ├── convnext.md
│       │   │   ├── convnextv2.md
│       │   │   ├── cpm.md
│       │   │   ├── cpmant.md
│       │   │   ├── ctrl.md
│       │   │   ├── cvt.md
│       │   │   ├── data2vec.md
│       │   │   ├── deberta-v2.md
│       │   │   ├── deberta.md
│       │   │   ├── decision_transformer.md
│       │   │   ├── deformable_detr.md
│       │   │   ├── deit.md
│       │   │   ├── deplot.md
│       │   │   ├── detr.md
│       │   │   ├── dialogpt.md
│       │   │   └── dinat.md
│       │   ├── model_memory_anatomy.md
│       │   ├── model_sharing.md
│       │   ├── model_summary.md
│       │   ├── multilingual.md
│       │   ├── pad_truncation.md
│       │   ├── peft.md
│       │   ├── perf_hardware.md
│       │   ├── perf_infer_cpu.md
│       │   ├── perf_infer_gpu_many.md
│       │   ├── perf_infer_gpu_one.md
│       │   ├── perf_infer_special.md
│       │   ├── perf_torch_compile.md
│       │   ├── perf_train_cpu.md
│       │   ├── perf_train_cpu_many.md
│       │   ├── perf_train_gpu_many.md
│       │   ├── perf_train_gpu_one.md
│       │   ├── perf_train_special.md
│       │   ├── perf_train_tpu.md
│       │   ├── performance.md
│       │   ├── perplexity.md
│       │   ├── philosophy.md
│       │   ├── pipeline_tutorial.md
│       │   ├── pipeline_webserver.md
│       │   ├── pr_checks.md
│       │   ├── preprocessing.md
│       │   ├── quicktour.md
│       │   ├── run_scripts.md
│       │   ├── serialization.md
│       │   ├── task_summary.md
│       │   ├── tasks/
│       │   │   ├── asr.md
│       │   │   ├── audio_classification.md
│       │   │   ├── document_question_answering.md
│       │   │   ├── idefics.md
│       │   │   ├── image_captioning.md
│       │   │   ├── image_classification.md
│       │   │   ├── knowledge_distillation_for_image_classification.md
│       │   │   ├── language_modeling.md
│       │   │   ├── masked_language_modeling.md
│       │   │   ├── monocular_depth_estimation.md
│       │   │   ├── multiple_choice.md
│       │   │   ├── object_detection.md
│       │   │   ├── prompting.md
│       │   │   ├── question_answering.md
│       │   │   ├── semantic_segmentation.md
│       │   │   ├── summarization.md
│       │   │   ├── text-to-speech.md
│       │   │   ├── token_classification.md
│       │   │   ├── translation.md
│       │   │   ├── video_classification.md
│       │   │   ├── visual_question_answering.md
│       │   │   ├── zero_shot_image_classification.md
│       │   │   └── zero_shot_object_detection.md
│       │   ├── tasks_explained.md
│       │   ├── testing.md
│       │   ├── tokenizer_summary.md
│       │   ├── training.md
│       │   └── troubleshooting.md
│       ├── ko/
│       │   ├── _config.py
│       │   ├── _toctree.yml
│       │   ├── accelerate.md
│       │   ├── accelerator_selection.md
│       │   ├── add_new_model.md
│       │   ├── add_new_pipeline.md
│       │   ├── cache_explanation.md
│       │   ├── chat_extras.md
│       │   ├── chat_templating.md
│       │   ├── community.md
│       │   ├── contributing.md
│       │   ├── conversations.md
│       │   ├── custom_models.md
│       │   ├── debugging.md
│       │   ├── deepspeed.md
│       │   ├── executorch.md
│       │   ├── fast_tokenizers.md
│       │   ├── fsdp.md
│       │   ├── generation_strategies.md
│       │   ├── gguf.md
│       │   ├── glossary.md
│       │   ├── how_to_hack_models.md
│       │   ├── hpo_train.md
│       │   ├── image_processors.md
│       │   ├── in_translation.md
│       │   ├── index.md
│       │   ├── installation.md
│       │   ├── internal/
│       │   │   ├── audio_utils.md
│       │   │   ├── file_utils.md
│       │   │   ├── generation_utils.md
│       │   │   ├── image_processing_utils.md
│       │   │   ├── modeling_utils.md
│       │   │   ├── pipelines_utils.md
│       │   │   ├── time_series_utils.md
│       │   │   ├── tokenization_utils.md
│       │   │   └── trainer_utils.md
│       │   ├── llm_optims.md
│       │   ├── llm_tutorial.md
│       │   ├── llm_tutorial_optimization.md
│       │   ├── main_classes/
│       │   │   ├── callback.md
│       │   │   ├── configuration.md
│       │   │   ├── data_collator.md
│       │   │   ├── feature_extractor.md
│       │   │   ├── logging.md
│       │   │   ├── model.md
│       │   │   ├── optimizer_schedules.md
│       │   │   ├── output.md
│       │   │   ├── peft.md
│       │   │   ├── pipelines.md
│       │   │   ├── processors.md
│       │   │   ├── quantization.md
│       │   │   ├── text_generation.md
│       │   │   ├── tokenizer.md
│       │   │   └── trainer.md
│       │   ├── model_doc/
│       │   │   ├── albert.md
│       │   │   ├── altclip.md
│       │   │   ├── auto.md
│       │   │   ├── autoformer.md
│       │   │   ├── bart.md
│       │   │   ├── barthez.md
│       │   │   ├── bartpho.md
│       │   │   ├── bert-japanese.md
│       │   │   ├── bert.md
│       │   │   ├── bertweet.md
│       │   │   ├── big_bird.md
│       │   │   ├── biogpt.md
│       │   │   ├── blip-2.md
│       │   │   ├── blip.md
│       │   │   ├── chameleon.md
│       │   │   ├── clip.md
│       │   │   ├── clipseg.md
│       │   │   ├── code_llama.md
│       │   │   ├── codegen.md
│       │   │   ├── cohere.md
│       │   │   ├── convbert.md
│       │   │   ├── dbrx.md
│       │   │   ├── deberta-v2.md
│       │   │   ├── deberta.md
│       │   │   ├── deepseek_v3.md
│       │   │   ├── electra.md
│       │   │   ├── encoder-decoder.md
│       │   │   ├── esm.md
│       │   │   ├── exaone4.md
│       │   │   ├── exaone_moe.md
│       │   │   ├── gemma.md
│       │   │   ├── gemma2.md
│       │   │   ├── gemma3.md
│       │   │   ├── gemma3n.md
│       │   │   ├── gpt2.md
│       │   │   ├── gpt_neox_japanese.md
│       │   │   ├── grounding-dino.md
│       │   │   ├── informer.md
│       │   │   ├── jamba.md
│       │   │   ├── lfm2.md
│       │   │   ├── llama.md
│       │   │   ├── llama2.md
│       │   │   ├── llama3.md
│       │   │   ├── llama4.md
│       │   │   ├── mamba.md
│       │   │   ├── mamba2.md
│       │   │   ├── marian.md
│       │   │   ├── mistral.md
│       │   │   ├── openai-gpt.md
│       │   │   ├── paligemma.md
│       │   │   ├── patchtsmixer.md
│       │   │   ├── patchtst.md
│       │   │   ├── qwen2_vl.md
│       │   │   ├── rag.md
│       │   │   ├── roberta.md
│       │   │   ├── sam_hq.md
│       │   │   ├── siglip.md
│       │   │   ├── smolvlm.md
│       │   │   ├── swin.md
│       │   │   ├── swin2sr.md
│       │   │   ├── swinv2.md
│       │   │   ├── time_series_transformer.md
│       │   │   ├── timesformer.md
│       │   │   ├── tvp.md
│       │   │   ├── vit.md
│       │   │   ├── vivit.md
│       │   │   ├── whisper.md
│       │   │   └── xclip.md
│       │   ├── model_memory_anatomy.md
│       │   ├── model_sharing.md
│       │   ├── models.md
│       │   ├── modular_transformers.md
│       │   ├── optimizers.md
│       │   ├── pad_truncation.md
│       │   ├── peft.md
│       │   ├── perf_hardware.md
│       │   ├── perf_infer_cpu.md
│       │   ├── perf_infer_gpu_multi.md
│       │   ├── perf_infer_gpu_one.md
│       │   ├── perf_train_cpu.md
│       │   ├── perf_train_cpu_many.md
│       │   ├── perf_train_gpu_many.md
│       │   ├── perf_train_gpu_one.md
│       │   ├── perf_train_special.md
│       │   ├── perplexity.md
│       │   ├── philosophy.md
│       │   ├── pipeline_gradio.md
│       │   ├── pipeline_tutorial.md
│       │   ├── pipeline_webserver.md
│       │   ├── pr_checks.md
│       │   ├── quantization/
│       │   │   ├── awq.md
│       │   │   ├── bitsandbytes.md
│       │   │   ├── eetq.md
│       │   │   ├── gptq.md
│       │   │   ├── quanto.md
│       │   │   └── quark.md
│       │   ├── quicktour.md
│       │   ├── run_scripts.md
│       │   ├── serialization.md
│       │   ├── serving.md
│       │   ├── tasks/
│       │   │   ├── asr.md
│       │   │   ├── audio_classification.md
│       │   │   ├── document_question_answering.md
│       │   │   ├── idefics.md
│       │   │   ├── image_captioning.md
│       │   │   ├── image_classification.md
│       │   │   ├── image_feature_extraction.md
│       │   │   ├── keypoint_detection.md
│       │   │   ├── knowledge_distillation_for_image_classification.md
│       │   │   ├── language_modeling.md
│       │   │   ├── mask_generation.md
│       │   │   ├── masked_language_modeling.md
│       │   │   ├── monocular_depth_estimation.md
│       │   │   ├── multiple_choice.md
│       │   │   ├── object_detection.md
│       │   │   ├── prompting.md
│       │   │   ├── question_answering.md
│       │   │   ├── semantic_segmentation.md
│       │   │   ├── sequence_classification.md
│       │   │   ├── summarization.md
│       │   │   ├── token_classification.md
│       │   │   ├── translation.md
│       │   │   ├── video_classification.md
│       │   │   ├── visual_question_answering.md
│       │   │   ├── zero_shot_image_classification.md
│       │   │   └── zero_shot_object_detection.md
│       │   ├── testing.md
│       │   ├── tiny_agents.md
│       │   ├── tokenizer_summary.md
│       │   ├── trainer.md
│       │   ├── training.md
│       │   └── troubleshooting.md
│       ├── pt/
│       │   ├── _config.py
│       │   ├── _toctree.yml
│       │   ├── accelerate.md
│       │   ├── create_a_model.md
│       │   ├── custom_models.md
│       │   ├── fast_tokenizers.md
│       │   ├── index.md
│       │   ├── installation.md
│       │   ├── multilingual.md
│       │   ├── pipeline_tutorial.md
│       │   ├── quicktour.md
│       │   ├── run_scripts.md
│       │   ├── tasks/
│       │   │   ├── sequence_classification.md
│       │   │   └── token_classification.md
│       │   └── training.md
│       └── zh/
│           ├── _toctree.yml
│           ├── accelerate.md
│           ├── add_new_pipeline.md
│           ├── attention.md
│           ├── autoclass_tutorial.md
│           ├── bertology.md
│           ├── big_models.md
│           ├── chat_templating.md
│           ├── community.md
│           ├── contributing.md
│           ├── create_a_model.md
│           ├── custom_models.md
│           ├── debugging.md
│           ├── fast_tokenizers.md
│           ├── fsdp.md
│           ├── generation_strategies.md
│           ├── gguf.md
│           ├── hpo_train.md
│           ├── index.md
│           ├── installation.md
│           ├── internal/
│           │   ├── audio_utils.md
│           │   ├── file_utils.md
│           │   ├── generation_utils.md
│           │   ├── image_processing_utils.md
│           │   ├── modeling_utils.md
│           │   ├── pipelines_utils.md
│           │   ├── time_series_utils.md
│           │   ├── tokenization_utils.md
│           │   └── trainer_utils.md
│           ├── llm_tutorial.md
│           ├── main_classes/
│           │   ├── callback.md
│           │   ├── configuration.md
│           │   ├── data_collator.md
│           │   ├── deepspeed.md
│           │   ├── feature_extractor.md
│           │   ├── image_processor.md
│           │   ├── logging.md
│           │   ├── model.md
│           │   ├── optimizer_schedules.md
│           │   ├── output.md
│           │   ├── pipelines.md
│           │   ├── processors.md
│           │   ├── quantization.md
│           │   ├── text_generation.md
│           │   ├── tokenizer.md
│           │   └── trainer.md
│           ├── model_doc/
│           │   └── bert.md
│           ├── model_sharing.md
│           ├── multilingual.md
│           ├── peft.md
│           ├── perf_hardware.md
│           ├── perf_infer_gpu_multi.md
│           ├── perf_torch_compile.md
│           ├── perf_train_cpu.md
│           ├── perf_train_special.md
│           ├── performance.md
│           ├── philosophy.md
│           ├── pipeline_tutorial.md
│           ├── preprocessing.md
│           ├── quicktour.md
│           ├── run_scripts.md
│           ├── serialization.md
│           ├── task_summary.md
│           ├── tasks/
│           │   ├── asr.md
│           │   ├── question_answering.md
│           │   ├── sequence_classification.md
│           │   ├── summarization.md
│           │   ├── token_classification.md
│           │   └── translation.md
│           ├── tiktoken.md
│           ├── tokenizer_summary.md
│           └── training.md
├── doctest_list.txt
├── examples/
│   ├── 3D_parallel.py
│   ├── README.md
│   ├── metrics-monitoring/
│   │   ├── README.md
│   │   ├── continuous-batching-dashboard.json
│   │   ├── docker-compose.yml
│   │   ├── grafana-dashboard.yaml
│   │   ├── grafana-datasources.yaml
│   │   ├── metrics_example.py
│   │   ├── prometheus.yml
│   │   └── tempo.yaml
│   ├── modular-transformers/
│   │   ├── README.md
│   │   ├── configuration_dummy.py
│   │   ├── configuration_duplicated_method.py
│   │   ├── configuration_my_new_model.py
│   │   ├── configuration_my_new_model2.py
│   │   ├── configuration_new_model.py
│   │   ├── configuration_super.py
│   │   ├── convert_examples.sh
│   │   ├── image_processing_new_imgproc_model.py
│   │   ├── modeling_add_function.py
│   │   ├── modeling_dummy_bert.py
│   │   ├── modeling_from_uppercase_model.py
│   │   ├── modeling_global_indexing.py
│   │   ├── modeling_multimodal2.py
│   │   ├── modeling_my_new_model2.py
│   │   ├── modeling_new_task_model.py
│   │   ├── modeling_roberta.py
│   │   ├── modeling_super.py
│   │   ├── modeling_switch_function.py
│   │   ├── modeling_test_detr.py
│   │   ├── modeling_test_suffix.py
│   │   ├── modular_add_function.py
│   │   ├── modular_dummy_bert.py
│   │   ├── modular_duplicated_method.py
│   │   ├── modular_from_uppercase_model.py
│   │   ├── modular_global_indexing.py
│   │   ├── modular_multimodal2.py
│   │   ├── modular_my_new_model.py
│   │   ├── modular_my_new_model2.py
│   │   ├── modular_new_imgproc_model.py
│   │   ├── modular_new_model.py
│   │   ├── modular_new_task_model.py
│   │   ├── modular_roberta.py
│   │   ├── modular_super.py
│   │   ├── modular_switch_function.py
│   │   ├── modular_test_detr.py
│   │   └── modular_test_suffix.py
│   ├── pytorch/
│   │   ├── 3d_parallel_checks.py
│   │   ├── README.md
│   │   ├── _tests_requirements.txt
│   │   ├── audio-classification/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   └── run_audio_classification.py
│   │   ├── conftest.py
│   │   ├── context_parallel.py
│   │   ├── continuous_batching.py
│   │   ├── continuous_batching_simple.py
│   │   ├── contrastive-image-text/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   └── run_clip.py
│   │   ├── image-classification/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   ├── run_image_classification.py
│   │   │   └── run_image_classification_no_trainer.py
│   │   ├── image-pretraining/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   ├── run_mae.py
│   │   │   ├── run_mim.py
│   │   │   └── run_mim_no_trainer.py
│   │   ├── instance-segmentation/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   ├── run_instance_segmentation.py
│   │   │   └── run_instance_segmentation_no_trainer.py
│   │   ├── language-modeling/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   ├── run_clm.py
│   │   │   ├── run_clm_no_trainer.py
│   │   │   ├── run_fim.py
│   │   │   ├── run_fim_no_trainer.py
│   │   │   ├── run_mlm.py
│   │   │   ├── run_mlm_no_trainer.py
│   │   │   └── run_plm.py
│   │   ├── multiple-choice/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   ├── run_no_trainer.sh
│   │   │   ├── run_swag.py
│   │   │   └── run_swag_no_trainer.py
│   │   ├── object-detection/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   ├── run_object_detection.py
│   │   │   └── run_object_detection_no_trainer.py
│   │   ├── old_test_xla_examples.py
│   │   ├── question-answering/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   ├── run_qa.py
│   │   │   ├── run_qa_beam_search.py
│   │   │   ├── run_qa_beam_search_no_trainer.py
│   │   │   ├── run_qa_no_trainer.py
│   │   │   ├── run_seq2seq_qa.py
│   │   │   ├── trainer_qa.py
│   │   │   ├── trainer_seq2seq_qa.py
│   │   │   └── utils_qa.py
│   │   ├── semantic-segmentation/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   ├── run_semantic_segmentation.py
│   │   │   └── run_semantic_segmentation_no_trainer.py
│   │   ├── speech-pretraining/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   └── run_wav2vec2_pretraining_no_trainer.py
│   │   ├── speech-recognition/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   ├── run_speech_recognition_ctc.py
│   │   │   ├── run_speech_recognition_ctc_adapter.py
│   │   │   └── run_speech_recognition_seq2seq.py
│   │   ├── summarization/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   ├── run_summarization.py
│   │   │   └── run_summarization_no_trainer.py
│   │   ├── test_accelerate_examples.py
│   │   ├── test_pytorch_examples.py
│   │   ├── text-classification/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   ├── run_classification.py
│   │   │   ├── run_glue.py
│   │   │   ├── run_glue_no_trainer.py
│   │   │   └── run_xnli.py
│   │   ├── text-generation/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   └── run_generation.py
│   │   ├── token-classification/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   ├── run.sh
│   │   │   ├── run_ner.py
│   │   │   ├── run_ner_no_trainer.py
│   │   │   └── run_no_trainer.sh
│   │   ├── transformers_serve_cb_eval_job.py
│   │   ├── translation/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   ├── run_translation.py
│   │   │   └── run_translation_no_trainer.py
│   │   └── xla_spawn.py
│   ├── quantization/
│   │   ├── custom_quantization.py
│   │   └── custom_quantization_int8_example.py
│   ├── research_projects/
│   │   └── README.md
│   ├── run_on_remote.py
│   ├── scheduler/
│   │   ├── README.md
│   │   └── run_greedy.py
│   └── training/
│       └── distributed_training.py
├── i18n/
│   ├── README_ar.md
│   ├── README_bn.md
│   ├── README_de.md
│   ├── README_es.md
│   ├── README_fr.md
│   ├── README_hd.md
│   ├── README_it.md
│   ├── README_ja.md
│   ├── README_ko.md
│   ├── README_pt-br.md
│   ├── README_ru.md
│   ├── README_te.md
│   ├── README_ur.md
│   ├── README_vi.md
│   ├── README_zh-hans.md
│   └── README_zh-hant.md
├── notebooks/
│   └── README.md
├── pyproject.toml
├── scripts/
│   ├── check_tokenizers.py
│   ├── distributed/
│   │   └── torch-distributed-gpu-test.py
│   └── stale.py
├── setup.py
├── src/
│   └── transformers/
│       ├── __init__.py
│       ├── _typing.py
│       ├── activations.py
│       ├── audio_utils.py
│       ├── backbone_utils.py
│       ├── cache_utils.py
│       ├── cli/
│       │   ├── __init__.py
│       │   ├── add_new_model_like.py
│       │   ├── chat.py
│       │   ├── download.py
│       │   ├── serve.py
│       │   ├── serving/
│       │   │   ├── __init__.py
│       │   │   ├── chat_completion.py
│       │   │   ├── model_manager.py
│       │   │   ├── response.py
│       │   │   ├── server.py
│       │   │   ├── transcription.py
│       │   │   └── utils.py
│       │   ├── system.py
│       │   └── transformers.py
│       ├── configuration_utils.py
│       ├── conversion_mapping.py
│       ├── convert_slow_tokenizer.py
│       ├── convert_slow_tokenizers_checkpoints_to_fast.py
│       ├── core_model_loading.py
│       ├── data/
│       │   ├── __init__.py
│       │   ├── data_collator.py
│       │   ├── datasets/
│       │   │   ├── __init__.py
│       │   │   ├── glue.py
│       │   │   └── squad.py
│       │   ├── metrics/
│       │   │   ├── __init__.py
│       │   │   └── squad_metrics.py
│       │   └── processors/
│       │       ├── __init__.py
│       │       ├── glue.py
│       │       ├── squad.py
│       │       ├── utils.py
│       │       └── xnli.py
│       ├── debug_utils.py
│       ├── dependency_versions_check.py
│       ├── dependency_versions_table.py
│       ├── distributed/
│       │   ├── __init__.py
│       │   └── configuration_utils.py
│       ├── dynamic_module_utils.py
│       ├── feature_extraction_sequence_utils.py
│       ├── feature_extraction_utils.py
│       ├── file_utils.py
│       ├── generation/
│       │   ├── __init__.py
│       │   ├── candidate_generator.py
│       │   ├── configuration_utils.py
│       │   ├── continuous_batching/
│       │   │   ├── __init__.py
│       │   │   ├── cache.py
│       │   │   ├── cache_manager.py
│       │   │   ├── continuous_api.py
│       │   │   ├── input_outputs.py
│       │   │   ├── requests.py
│       │   │   ├── scheduler.py
│       │   │   └── utils.py
│       │   ├── logits_process.py
│       │   ├── stopping_criteria.py
│       │   ├── streamers.py
│       │   ├── utils.py
│       │   └── watermarking.py
│       ├── hf_argparser.py
│       ├── hyperparameter_search.py
│       ├── image_processing_backends.py
│       ├── image_processing_base.py
│       ├── image_processing_utils.py
│       ├── image_transforms.py
│       ├── image_utils.py
│       ├── initialization.py
│       ├── integrations/
│       │   ├── __init__.py
│       │   ├── accelerate.py
│       │   ├── aqlm.py
│       │   ├── awq.py
│       │   ├── bitnet.py
│       │   ├── bitsandbytes.py
│       │   ├── deepspeed.py
│       │   ├── eager_paged.py
│       │   ├── eetq.py
│       │   ├── executorch.py
│       │   ├── fbgemm_fp8.py
│       │   ├── finegrained_fp8.py
│       │   ├── flash_attention.py
│       │   ├── flash_paged.py
│       │   ├── flex_attention.py
│       │   ├── fouroversix.py
│       │   ├── fp_quant.py
│       │   ├── fsdp.py
│       │   ├── ggml.py
│       │   ├── higgs.py
│       │   ├── hqq.py
│       │   ├── hub_kernels.py
│       │   ├── integration_utils.py
│       │   ├── liger.py
│       │   ├── metal_quantization.py
│       │   ├── mistral.py
│       │   ├── moe.py
│       │   ├── mxfp4.py
│       │   ├── neftune.py
│       │   ├── npu_flash_attention.py
│       │   ├── peft.py
│       │   ├── quanto.py
│       │   ├── quark.py
│       │   ├── sdpa_attention.py
│       │   ├── sdpa_paged.py
│       │   ├── sinq.py
│       │   ├── spqr.py
│       │   ├── tensor_parallel.py
│       │   ├── tiktoken.py
│       │   ├── torchao.py
│       │   ├── tpu.py
│       │   └── vptq.py
│       ├── loss/
│       │   ├── __init__.py
│       │   ├── loss_d_fine.py
│       │   ├── loss_deformable_detr.py
│       │   ├── loss_for_object_detection.py
│       │   ├── loss_grounding_dino.py
│       │   ├── loss_lw_detr.py
│       │   ├── loss_rt_detr.py
│       │   └── loss_utils.py
│       ├── masking_utils.py
│       ├── model_debugging_utils.py
│       ├── modelcard.py
│       ├── modeling_attn_mask_utils.py
│       ├── modeling_flash_attention_utils.py
│       ├── modeling_gguf_pytorch_utils.py
│       ├── modeling_layers.py
│       ├── modeling_outputs.py
│       ├── modeling_rope_utils.py
│       ├── modeling_utils.py
│       ├── models/
│       │   ├── __init__.py
│       │   ├── afmoe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_afmoe.py
│       │   │   ├── modeling_afmoe.py
│       │   │   └── modular_afmoe.py
│       │   ├── aimv2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_aimv2.py
│       │   │   ├── convert_aimv2_original_pytorch_to_hf.py
│       │   │   ├── modeling_aimv2.py
│       │   │   └── modular_aimv2.py
│       │   ├── albert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_albert.py
│       │   │   ├── convert_albert_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── modeling_albert.py
│       │   │   └── tokenization_albert.py
│       │   ├── align/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_align.py
│       │   │   ├── convert_align_tf_to_hf.py
│       │   │   ├── modeling_align.py
│       │   │   └── processing_align.py
│       │   ├── altclip/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_altclip.py
│       │   │   ├── modeling_altclip.py
│       │   │   └── processing_altclip.py
│       │   ├── apertus/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_apertus.py
│       │   │   ├── modeling_apertus.py
│       │   │   └── modular_apertus.py
│       │   ├── arcee/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_arcee.py
│       │   │   ├── modeling_arcee.py
│       │   │   └── modular_arcee.py
│       │   ├── aria/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_aria.py
│       │   │   ├── convert_aria_weights_to_hf.py
│       │   │   ├── image_processing_aria.py
│       │   │   ├── image_processing_pil_aria.py
│       │   │   ├── modeling_aria.py
│       │   │   ├── modular_aria.py
│       │   │   └── processing_aria.py
│       │   ├── audio_spectrogram_transformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_audio_spectrogram_transformer.py
│       │   │   ├── convert_audio_spectrogram_transformer_original_to_pytorch.py
│       │   │   ├── feature_extraction_audio_spectrogram_transformer.py
│       │   │   └── modeling_audio_spectrogram_transformer.py
│       │   ├── audioflamingo3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_audioflamingo3.py
│       │   │   ├── convert_audioflamingo3_to_hf.py
│       │   │   ├── modeling_audioflamingo3.py
│       │   │   ├── modular_audioflamingo3.py
│       │   │   └── processing_audioflamingo3.py
│       │   ├── auto/
│       │   │   ├── __init__.py
│       │   │   ├── auto_factory.py
│       │   │   ├── configuration_auto.py
│       │   │   ├── feature_extraction_auto.py
│       │   │   ├── image_processing_auto.py
│       │   │   ├── modeling_auto.py
│       │   │   ├── processing_auto.py
│       │   │   ├── tokenization_auto.py
│       │   │   └── video_processing_auto.py
│       │   ├── autoformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_autoformer.py
│       │   │   └── modeling_autoformer.py
│       │   ├── aya_vision/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_aya_vision.py
│       │   │   ├── modeling_aya_vision.py
│       │   │   ├── modular_aya_vision.py
│       │   │   └── processing_aya_vision.py
│       │   ├── bamba/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_bamba.py
│       │   │   ├── convert_mamba_ssm_checkpoint.py
│       │   │   ├── modeling_bamba.py
│       │   │   └── modular_bamba.py
│       │   ├── bark/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_bark.py
│       │   │   ├── convert_suno_to_hf.py
│       │   │   ├── generation_configuration_bark.py
│       │   │   ├── modeling_bark.py
│       │   │   └── processing_bark.py
│       │   ├── bart/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_bart.py
│       │   │   ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_bart.py
│       │   │   └── tokenization_bart.py
│       │   ├── barthez/
│       │   │   ├── __init__.py
│       │   │   └── tokenization_barthez.py
│       │   ├── bartpho/
│       │   │   ├── __init__.py
│       │   │   └── tokenization_bartpho.py
│       │   ├── beit/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_beit.py
│       │   │   ├── convert_beit_unilm_to_pytorch.py
│       │   │   ├── image_processing_beit.py
│       │   │   ├── image_processing_pil_beit.py
│       │   │   └── modeling_beit.py
│       │   ├── bert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_bert.py
│       │   │   ├── convert_bert_original_tf2_checkpoint_to_pytorch.py
│       │   │   ├── convert_bert_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── convert_bert_token_dropping_original_tf2_checkpoint_to_pytorch.py
│       │   │   ├── modeling_bert.py
│       │   │   ├── tokenization_bert.py
│       │   │   └── tokenization_bert_legacy.py
│       │   ├── bert_generation/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_bert_generation.py
│       │   │   ├── modeling_bert_generation.py
│       │   │   └── tokenization_bert_generation.py
│       │   ├── bert_japanese/
│       │   │   ├── __init__.py
│       │   │   └── tokenization_bert_japanese.py
│       │   ├── bertweet/
│       │   │   ├── __init__.py
│       │   │   └── tokenization_bertweet.py
│       │   ├── big_bird/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_big_bird.py
│       │   │   ├── convert_bigbird_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── modeling_big_bird.py
│       │   │   └── tokenization_big_bird.py
│       │   ├── bigbird_pegasus/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_bigbird_pegasus.py
│       │   │   ├── convert_bigbird_pegasus_tf_to_pytorch.py
│       │   │   └── modeling_bigbird_pegasus.py
│       │   ├── biogpt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_biogpt.py
│       │   │   ├── convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_biogpt.py
│       │   │   ├── modular_biogpt.py
│       │   │   └── tokenization_biogpt.py
│       │   ├── bit/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_bit.py
│       │   │   ├── convert_bit_to_pytorch.py
│       │   │   ├── image_processing_bit.py
│       │   │   ├── image_processing_pil_bit.py
│       │   │   └── modeling_bit.py
│       │   ├── bitnet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_bitnet.py
│       │   │   ├── modeling_bitnet.py
│       │   │   └── modular_bitnet.py
│       │   ├── blenderbot/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_blenderbot.py
│       │   │   ├── convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_blenderbot.py
│       │   │   └── tokenization_blenderbot.py
│       │   ├── blenderbot_small/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_blenderbot_small.py
│       │   │   ├── modeling_blenderbot_small.py
│       │   │   └── tokenization_blenderbot_small.py
│       │   ├── blip/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_blip.py
│       │   │   ├── convert_blip_original_pytorch_to_hf.py
│       │   │   ├── image_processing_blip.py
│       │   │   ├── image_processing_pil_blip.py
│       │   │   ├── modeling_blip.py
│       │   │   ├── modeling_blip_text.py
│       │   │   └── processing_blip.py
│       │   ├── blip_2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_blip_2.py
│       │   │   ├── convert_blip_2_original_to_pytorch.py
│       │   │   ├── modeling_blip_2.py
│       │   │   └── processing_blip_2.py
│       │   ├── bloom/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_bloom.py
│       │   │   ├── convert_bloom_original_checkpoint_to_pytorch.py
│       │   │   └── modeling_bloom.py
│       │   ├── blt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_blt.py
│       │   │   ├── convert_blt_weights_to_hf.py
│       │   │   ├── modeling_blt.py
│       │   │   └── modular_blt.py
│       │   ├── bridgetower/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_bridgetower.py
│       │   │   ├── image_processing_bridgetower.py
│       │   │   ├── image_processing_pil_bridgetower.py
│       │   │   ├── modeling_bridgetower.py
│       │   │   └── processing_bridgetower.py
│       │   ├── bros/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_bros.py
│       │   │   ├── convert_bros_to_pytorch.py
│       │   │   ├── modeling_bros.py
│       │   │   └── processing_bros.py
│       │   ├── byt5/
│       │   │   ├── __init__.py
│       │   │   ├── convert_byt5_original_tf_checkpoint_to_pytorch.py
│       │   │   └── tokenization_byt5.py
│       │   ├── camembert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_camembert.py
│       │   │   ├── modeling_camembert.py
│       │   │   ├── modular_camembert.py
│       │   │   └── tokenization_camembert.py
│       │   ├── canine/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_canine.py
│       │   │   ├── convert_canine_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── modeling_canine.py
│       │   │   └── tokenization_canine.py
│       │   ├── chameleon/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_chameleon.py
│       │   │   ├── convert_chameleon_weights_to_hf.py
│       │   │   ├── image_processing_chameleon.py
│       │   │   ├── image_processing_pil_chameleon.py
│       │   │   ├── modeling_chameleon.py
│       │   │   └── processing_chameleon.py
│       │   ├── chinese_clip/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_chinese_clip.py
│       │   │   ├── convert_chinese_clip_original_pytorch_to_hf.py
│       │   │   ├── image_processing_chinese_clip.py
│       │   │   ├── image_processing_chinese_pil_clip.py
│       │   │   ├── modeling_chinese_clip.py
│       │   │   └── processing_chinese_clip.py
│       │   ├── chmv2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_chmv2.py
│       │   │   ├── convert_chmv2_to_hf.py
│       │   │   ├── image_processing_chmv2.py
│       │   │   ├── modeling_chmv2.py
│       │   │   └── modular_chmv2.py
│       │   ├── clap/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_clap.py
│       │   │   ├── convert_clap_original_pytorch_to_hf.py
│       │   │   ├── feature_extraction_clap.py
│       │   │   ├── modeling_clap.py
│       │   │   └── processing_clap.py
│       │   ├── clip/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_clip.py
│       │   │   ├── convert_clip_original_pytorch_to_hf.py
│       │   │   ├── image_processing_clip.py
│       │   │   ├── image_processing_pil_clip.py
│       │   │   ├── modeling_clip.py
│       │   │   ├── processing_clip.py
│       │   │   └── tokenization_clip.py
│       │   ├── clipseg/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_clipseg.py
│       │   │   ├── convert_clipseg_original_pytorch_to_hf.py
│       │   │   ├── modeling_clipseg.py
│       │   │   └── processing_clipseg.py
│       │   ├── clvp/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_clvp.py
│       │   │   ├── convert_clvp_to_hf.py
│       │   │   ├── feature_extraction_clvp.py
│       │   │   ├── modeling_clvp.py
│       │   │   ├── number_normalizer.py
│       │   │   ├── processing_clvp.py
│       │   │   └── tokenization_clvp.py
│       │   ├── code_llama/
│       │   │   ├── __init__.py
│       │   │   └── tokenization_code_llama.py
│       │   ├── codegen/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_codegen.py
│       │   │   ├── modeling_codegen.py
│       │   │   └── tokenization_codegen.py
│       │   ├── cohere/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_cohere.py
│       │   │   ├── modeling_cohere.py
│       │   │   ├── modular_cohere.py
│       │   │   └── tokenization_cohere.py
│       │   ├── cohere2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_cohere2.py
│       │   │   ├── modeling_cohere2.py
│       │   │   └── modular_cohere2.py
│       │   ├── cohere2_vision/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_cohere2_vision.py
│       │   │   ├── image_processing_cohere2_vision.py
│       │   │   ├── modeling_cohere2_vision.py
│       │   │   ├── modular_cohere2_vision.py
│       │   │   └── processing_cohere2_vision.py
│       │   ├── cohere_asr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_cohere_asr.py
│       │   │   ├── feature_extraction_cohere_asr.py
│       │   │   ├── modeling_cohere_asr.py
│       │   │   ├── modular_cohere_asr.py
│       │   │   └── processing_cohere_asr.py
│       │   ├── colmodernvbert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_colmodernvbert.py
│       │   │   ├── modeling_colmodernvbert.py
│       │   │   ├── modular_colmodernvbert.py
│       │   │   └── processing_colmodernvbert.py
│       │   ├── colpali/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_colpali.py
│       │   │   ├── convert_colpali_weights_to_hf.py
│       │   │   ├── modeling_colpali.py
│       │   │   ├── modular_colpali.py
│       │   │   └── processing_colpali.py
│       │   ├── colqwen2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_colqwen2.py
│       │   │   ├── convert_colqwen2_weights_to_hf.py
│       │   │   ├── modeling_colqwen2.py
│       │   │   ├── modular_colqwen2.py
│       │   │   └── processing_colqwen2.py
│       │   ├── conditional_detr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_conditional_detr.py
│       │   │   ├── convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── image_processing_conditional_detr.py
│       │   │   ├── image_processing_pil_conditional_detr.py
│       │   │   ├── modeling_conditional_detr.py
│       │   │   └── modular_conditional_detr.py
│       │   ├── convbert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_convbert.py
│       │   │   ├── convert_convbert_original_tf1_checkpoint_to_pytorch.py
│       │   │   ├── modeling_convbert.py
│       │   │   └── tokenization_convbert.py
│       │   ├── convnext/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_convnext.py
│       │   │   ├── convert_convnext_to_pytorch.py
│       │   │   ├── image_processing_convnext.py
│       │   │   ├── image_processing_pil_convnext.py
│       │   │   └── modeling_convnext.py
│       │   ├── convnextv2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_convnextv2.py
│       │   │   ├── convert_convnextv2_to_pytorch.py
│       │   │   └── modeling_convnextv2.py
│       │   ├── cpm/
│       │   │   ├── __init__.py
│       │   │   ├── tokenization_cpm.py
│       │   │   └── tokenization_cpm_fast.py
│       │   ├── cpmant/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_cpmant.py
│       │   │   ├── modeling_cpmant.py
│       │   │   └── tokenization_cpmant.py
│       │   ├── csm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_csm.py
│       │   │   ├── convert_csm.py
│       │   │   ├── generation_csm.py
│       │   │   ├── modeling_csm.py
│       │   │   ├── modular_csm.py
│       │   │   └── processing_csm.py
│       │   ├── ctrl/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_ctrl.py
│       │   │   ├── modeling_ctrl.py
│       │   │   └── tokenization_ctrl.py
│       │   ├── cvt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_cvt.py
│       │   │   ├── convert_cvt_original_pytorch_checkpoint_to_pytorch.py
│       │   │   └── modeling_cvt.py
│       │   ├── cwm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_cwm.py
│       │   │   ├── modeling_cwm.py
│       │   │   └── modular_cwm.py
│       │   ├── d_fine/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_d_fine.py
│       │   │   ├── convert_d_fine_original_pytorch_checkpoint_to_hf.py
│       │   │   ├── modeling_d_fine.py
│       │   │   └── modular_d_fine.py
│       │   ├── dab_detr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_dab_detr.py
│       │   │   ├── convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py
│       │   │   └── modeling_dab_detr.py
│       │   ├── dac/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_dac.py
│       │   │   ├── convert_dac_checkpoint.py
│       │   │   ├── feature_extraction_dac.py
│       │   │   └── modeling_dac.py
│       │   ├── data2vec/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_data2vec_audio.py
│       │   │   ├── configuration_data2vec_text.py
│       │   │   ├── configuration_data2vec_vision.py
│       │   │   ├── convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_data2vec_audio.py
│       │   │   ├── modeling_data2vec_text.py
│       │   │   ├── modeling_data2vec_vision.py
│       │   │   ├── modular_data2vec_audio.py
│       │   │   └── modular_data2vec_text.py
│       │   ├── dbrx/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_dbrx.py
│       │   │   ├── modeling_dbrx.py
│       │   │   └── modular_dbrx.py
│       │   ├── deberta/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_deberta.py
│       │   │   ├── modeling_deberta.py
│       │   │   └── tokenization_deberta.py
│       │   ├── deberta_v2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_deberta_v2.py
│       │   │   ├── modeling_deberta_v2.py
│       │   │   └── tokenization_deberta_v2.py
│       │   ├── decision_transformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_decision_transformer.py
│       │   │   └── modeling_decision_transformer.py
│       │   ├── deepseek_v2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_deepseek_v2.py
│       │   │   ├── modeling_deepseek_v2.py
│       │   │   └── modular_deepseek_v2.py
│       │   ├── deepseek_v3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_deepseek_v3.py
│       │   │   ├── modeling_deepseek_v3.py
│       │   │   └── modular_deepseek_v3.py
│       │   ├── deepseek_vl/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_deepseek_vl.py
│       │   │   ├── convert_deepseek_vl_weights_to_hf.py
│       │   │   ├── image_processing_deepseek_vl.py
│       │   │   ├── image_processing_pil_deepseek_vl.py
│       │   │   ├── modeling_deepseek_vl.py
│       │   │   ├── modular_deepseek_vl.py
│       │   │   └── processing_deepseek_vl.py
│       │   ├── deepseek_vl_hybrid/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_deepseek_vl_hybrid.py
│       │   │   ├── convert_deepseek_vl_hybrid_weights_to_hf.py
│       │   │   ├── image_processing_deepseek_vl_hybrid.py
│       │   │   ├── image_processing_pil_deepseek_vl_hybrid.py
│       │   │   ├── modeling_deepseek_vl_hybrid.py
│       │   │   ├── modular_deepseek_vl_hybrid.py
│       │   │   └── processing_deepseek_vl_hybrid.py
│       │   ├── deformable_detr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_deformable_detr.py
│       │   │   ├── convert_deformable_detr_to_pytorch.py
│       │   │   ├── image_processing_deformable_detr.py
│       │   │   ├── image_processing_pil_deformable_detr.py
│       │   │   ├── modeling_deformable_detr.py
│       │   │   └── modular_deformable_detr.py
│       │   ├── deit/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_deit.py
│       │   │   ├── convert_deit_timm_to_pytorch.py
│       │   │   ├── image_processing_deit.py
│       │   │   ├── image_processing_pil_deit.py
│       │   │   └── modeling_deit.py
│       │   ├── deprecated/
│       │   │   └── __init__.py
│       │   ├── depth_anything/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_depth_anything.py
│       │   │   ├── convert_depth_anything_to_hf.py
│       │   │   ├── convert_distill_any_depth_to_hf.py
│       │   │   └── modeling_depth_anything.py
│       │   ├── depth_pro/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_depth_pro.py
│       │   │   ├── convert_depth_pro_weights_to_hf.py
│       │   │   ├── image_processing_depth_pro.py
│       │   │   └── modeling_depth_pro.py
│       │   ├── detr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_detr.py
│       │   │   ├── convert_detr_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── convert_detr_to_pytorch.py
│       │   │   ├── image_processing_detr.py
│       │   │   ├── image_processing_pil_detr.py
│       │   │   └── modeling_detr.py
│       │   ├── dia/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_dia.py
│       │   │   ├── convert_dia_to_hf.py
│       │   │   ├── feature_extraction_dia.py
│       │   │   ├── generation_dia.py
│       │   │   ├── modeling_dia.py
│       │   │   ├── modular_dia.py
│       │   │   ├── processing_dia.py
│       │   │   └── tokenization_dia.py
│       │   ├── dialogpt/
│       │   │   ├── __init__.py
│       │   │   └── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
│       │   ├── diffllama/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_diffllama.py
│       │   │   ├── modeling_diffllama.py
│       │   │   └── modular_diffllama.py
│       │   ├── dinat/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_dinat.py
│       │   │   └── modeling_dinat.py
│       │   ├── dinov2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_dinov2.py
│       │   │   ├── convert_dinov2_to_hf.py
│       │   │   └── modeling_dinov2.py
│       │   ├── dinov2_with_registers/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_dinov2_with_registers.py
│       │   │   ├── convert_dinov2_with_registers_to_hf.py
│       │   │   ├── modeling_dinov2_with_registers.py
│       │   │   └── modular_dinov2_with_registers.py
│       │   ├── dinov3_convnext/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_dinov3_convnext.py
│       │   │   ├── convert_dinov3_convnext_to_hf.py
│       │   │   └── modeling_dinov3_convnext.py
│       │   ├── dinov3_vit/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_dinov3_vit.py
│       │   │   ├── convert_dinov3_vit_to_hf.py
│       │   │   ├── image_processing_dinov3_vit.py
│       │   │   ├── modeling_dinov3_vit.py
│       │   │   └── modular_dinov3_vit.py
│       │   ├── distilbert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_distilbert.py
│       │   │   ├── modeling_distilbert.py
│       │   │   └── tokenization_distilbert.py
│       │   ├── dit/
│       │   │   ├── __init__.py
│       │   │   └── convert_dit_unilm_to_pytorch.py
│       │   ├── doge/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_doge.py
│       │   │   ├── convert_doge_weights_to_hf.py
│       │   │   ├── modeling_doge.py
│       │   │   └── modular_doge.py
│       │   ├── donut/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_donut_swin.py
│       │   │   ├── convert_donut_to_pytorch.py
│       │   │   ├── image_processing_donut.py
│       │   │   ├── image_processing_pil_donut.py
│       │   │   ├── modeling_donut_swin.py
│       │   │   └── processing_donut.py
│       │   ├── dots1/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_dots1.py
│       │   │   ├── modeling_dots1.py
│       │   │   └── modular_dots1.py
│       │   ├── dpr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_dpr.py
│       │   │   ├── convert_dpr_original_checkpoint_to_pytorch.py
│       │   │   ├── modeling_dpr.py
│       │   │   ├── tokenization_dpr.py
│       │   │   └── tokenization_dpr_fast.py
│       │   ├── dpt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_dpt.py
│       │   │   ├── convert_dinov2_depth_to_hf.py
│       │   │   ├── convert_dpt_beit_to_hf.py
│       │   │   ├── convert_dpt_hybrid_to_pytorch.py
│       │   │   ├── convert_dpt_swinv2_to_hf.py
│       │   │   ├── convert_dpt_to_pytorch.py
│       │   │   ├── image_processing_dpt.py
│       │   │   ├── image_processing_pil_dpt.py
│       │   │   ├── modeling_dpt.py
│       │   │   └── modular_dpt.py
│       │   ├── edgetam/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_edgetam.py
│       │   │   ├── convert_edgetam_to_hf.py
│       │   │   ├── modeling_edgetam.py
│       │   │   └── modular_edgetam.py
│       │   ├── edgetam_video/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_edgetam_video.py
│       │   │   ├── convert_edgetam_video_to_hf.py
│       │   │   ├── modeling_edgetam_video.py
│       │   │   └── modular_edgetam_video.py
│       │   ├── efficientloftr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_efficientloftr.py
│       │   │   ├── convert_efficientloftr_to_hf.py
│       │   │   ├── image_processing_efficientloftr.py
│       │   │   ├── image_processing_pil_efficientloftr.py
│       │   │   ├── modeling_efficientloftr.py
│       │   │   └── modular_efficientloftr.py
│       │   ├── efficientnet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_efficientnet.py
│       │   │   ├── convert_efficientnet_to_pytorch.py
│       │   │   ├── image_processing_efficientnet.py
│       │   │   ├── image_processing_pil_efficientnet.py
│       │   │   └── modeling_efficientnet.py
│       │   ├── electra/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_electra.py
│       │   │   ├── convert_electra_original_tf_checkpoint_to_pytorch.py
│       │   │   └── modeling_electra.py
│       │   ├── emu3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_emu3.py
│       │   │   ├── convert_emu3_weights_to_hf.py
│       │   │   ├── image_processing_emu3.py
│       │   │   ├── modeling_emu3.py
│       │   │   ├── modular_emu3.py
│       │   │   └── processing_emu3.py
│       │   ├── encodec/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_encodec.py
│       │   │   ├── convert_encodec_checkpoint_to_pytorch.py
│       │   │   ├── feature_extraction_encodec.py
│       │   │   └── modeling_encodec.py
│       │   ├── encoder_decoder/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_encoder_decoder.py
│       │   │   └── modeling_encoder_decoder.py
│       │   ├── eomt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_eomt.py
│       │   │   ├── convert_eomt_to_hf.py
│       │   │   ├── image_processing_eomt.py
│       │   │   ├── image_processing_pil_eomt.py
│       │   │   ├── modeling_eomt.py
│       │   │   └── modular_eomt.py
│       │   ├── eomt_dinov3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_eomt_dinov3.py
│       │   │   ├── convert_eomt_dinov3_to_hf.py
│       │   │   ├── modeling_eomt_dinov3.py
│       │   │   └── modular_eomt_dinov3.py
│       │   ├── ernie/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_ernie.py
│       │   │   ├── modeling_ernie.py
│       │   │   └── modular_ernie.py
│       │   ├── ernie4_5/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_ernie4_5.py
│       │   │   ├── convert_ernie4_5_tokenizer.py
│       │   │   ├── modeling_ernie4_5.py
│       │   │   └── modular_ernie4_5.py
│       │   ├── ernie4_5_moe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_ernie4_5_moe.py
│       │   │   ├── modeling_ernie4_5_moe.py
│       │   │   └── modular_ernie4_5_moe.py
│       │   ├── ernie4_5_vl_moe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_ernie4_5_vl_moe.py
│       │   │   ├── convert_ernie4_5_vl_moe_to_hf.py
│       │   │   ├── image_processing_ernie4_5_vl_moe.py
│       │   │   ├── image_processing_pil_ernie4_5_vl_moe.py
│       │   │   ├── modeling_ernie4_5_vl_moe.py
│       │   │   ├── modular_ernie4_5_vl_moe.py
│       │   │   ├── processing_ernie4_5_vl_moe.py
│       │   │   └── video_processing_ernie4_5_vl_moe.py
│       │   ├── esm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_esm.py
│       │   │   ├── convert_esm.py
│       │   │   ├── modeling_esm.py
│       │   │   ├── modeling_esmfold.py
│       │   │   ├── openfold_utils/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── chunk_utils.py
│       │   │   │   ├── data_transforms.py
│       │   │   │   ├── feats.py
│       │   │   │   ├── loss.py
│       │   │   │   ├── protein.py
│       │   │   │   ├── residue_constants.py
│       │   │   │   ├── rigid_utils.py
│       │   │   │   └── tensor_utils.py
│       │   │   └── tokenization_esm.py
│       │   ├── eurobert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_eurobert.py
│       │   │   ├── modeling_eurobert.py
│       │   │   └── modular_eurobert.py
│       │   ├── evolla/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_evolla.py
│       │   │   ├── modeling_evolla.py
│       │   │   ├── modular_evolla.py
│       │   │   └── processing_evolla.py
│       │   ├── exaone4/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_exaone4.py
│       │   │   ├── modeling_exaone4.py
│       │   │   └── modular_exaone4.py
│       │   ├── exaone_moe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_exaone_moe.py
│       │   │   ├── modeling_exaone_moe.py
│       │   │   └── modular_exaone_moe.py
│       │   ├── falcon/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_falcon.py
│       │   │   ├── convert_custom_code_checkpoint.py
│       │   │   └── modeling_falcon.py
│       │   ├── falcon_h1/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_falcon_h1.py
│       │   │   ├── convert_mamba_ssm_checkpoint.py
│       │   │   ├── modeling_falcon_h1.py
│       │   │   └── modular_falcon_h1.py
│       │   ├── falcon_mamba/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_falcon_mamba.py
│       │   │   ├── modeling_falcon_mamba.py
│       │   │   └── modular_falcon_mamba.py
│       │   ├── fast_vlm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_fast_vlm.py
│       │   │   ├── convert_fastvlm_weights_to_hf.py
│       │   │   ├── modeling_fast_vlm.py
│       │   │   └── modular_fast_vlm.py
│       │   ├── fastspeech2_conformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_fastspeech2_conformer.py
│       │   │   ├── convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── convert_hifigan.py
│       │   │   ├── convert_model_with_hifigan.py
│       │   │   ├── modeling_fastspeech2_conformer.py
│       │   │   └── tokenization_fastspeech2_conformer.py
│       │   ├── flaubert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_flaubert.py
│       │   │   ├── modeling_flaubert.py
│       │   │   └── tokenization_flaubert.py
│       │   ├── flava/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_flava.py
│       │   │   ├── convert_dalle_to_flava_codebook.py
│       │   │   ├── convert_flava_original_pytorch_to_hf.py
│       │   │   ├── image_processing_flava.py
│       │   │   ├── image_processing_pil_flava.py
│       │   │   ├── modeling_flava.py
│       │   │   └── processing_flava.py
│       │   ├── flex_olmo/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_flex_olmo.py
│       │   │   ├── modeling_flex_olmo.py
│       │   │   └── modular_flex_olmo.py
│       │   ├── florence2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_florence2.py
│       │   │   ├── convert_florence2_original_pytorch_to_hf.py
│       │   │   ├── modeling_florence2.py
│       │   │   ├── modular_florence2.py
│       │   │   └── processing_florence2.py
│       │   ├── fnet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_fnet.py
│       │   │   ├── convert_fnet_original_flax_checkpoint_to_pytorch.py
│       │   │   ├── modeling_fnet.py
│       │   │   └── tokenization_fnet.py
│       │   ├── focalnet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_focalnet.py
│       │   │   ├── convert_focalnet_to_hf_format.py
│       │   │   └── modeling_focalnet.py
│       │   ├── fsmt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_fsmt.py
│       │   │   ├── convert_fsmt_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_fsmt.py
│       │   │   └── tokenization_fsmt.py
│       │   ├── funnel/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_funnel.py
│       │   │   ├── convert_funnel_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── modeling_funnel.py
│       │   │   └── tokenization_funnel.py
│       │   ├── fuyu/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_fuyu.py
│       │   │   ├── convert_fuyu_model_weights_to_hf.py
│       │   │   ├── image_processing_fuyu.py
│       │   │   ├── image_processing_pil_fuyu.py
│       │   │   ├── modeling_fuyu.py
│       │   │   └── processing_fuyu.py
│       │   ├── gemma/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_gemma.py
│       │   │   ├── convert_gemma_weights_to_hf.py
│       │   │   ├── modeling_gemma.py
│       │   │   ├── modular_gemma.py
│       │   │   └── tokenization_gemma.py
│       │   ├── gemma2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_gemma2.py
│       │   │   ├── convert_gemma2_weights_to_hf.py
│       │   │   ├── modeling_gemma2.py
│       │   │   └── modular_gemma2.py
│       │   ├── gemma3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_gemma3.py
│       │   │   ├── convert_gemma3_weights.py
│       │   │   ├── image_processing_gemma3.py
│       │   │   ├── image_processing_pil_gemma3.py
│       │   │   ├── modeling_gemma3.py
│       │   │   ├── modular_gemma3.py
│       │   │   └── processing_gemma3.py
│       │   ├── gemma3n/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_gemma3n.py
│       │   │   ├── convert_gemma3n_weights.py
│       │   │   ├── feature_extraction_gemma3n.py
│       │   │   ├── modeling_gemma3n.py
│       │   │   ├── modular_gemma3n.py
│       │   │   └── processing_gemma3n.py
│       │   ├── git/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_git.py
│       │   │   ├── convert_git_to_pytorch.py
│       │   │   ├── modeling_git.py
│       │   │   └── processing_git.py
│       │   ├── glm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_glm.py
│       │   │   ├── convert_glm_weights_to_hf.py
│       │   │   ├── modeling_glm.py
│       │   │   └── modular_glm.py
│       │   ├── glm4/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_glm4.py
│       │   │   ├── convert_glm4_weights_to_hf.py
│       │   │   ├── modeling_glm4.py
│       │   │   └── modular_glm4.py
│       │   ├── glm46v/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_glm46v.py
│       │   │   ├── image_processing_glm46v.py
│       │   │   ├── image_processing_pil_glm46v.py
│       │   │   ├── modeling_glm46v.py
│       │   │   ├── modular_glm46v.py
│       │   │   ├── processing_glm46v.py
│       │   │   └── video_processing_glm46v.py
│       │   ├── glm4_moe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_glm4_moe.py
│       │   │   ├── modeling_glm4_moe.py
│       │   │   └── modular_glm4_moe.py
│       │   ├── glm4_moe_lite/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_glm4_moe_lite.py
│       │   │   ├── modeling_glm4_moe_lite.py
│       │   │   └── modular_glm4_moe_lite.py
│       │   ├── glm4v/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_glm4v.py
│       │   │   ├── convert_glm4v_mgt_weights_to_hf.py
│       │   │   ├── image_processing_glm4v.py
│       │   │   ├── image_processing_pil_glm4v.py
│       │   │   ├── modeling_glm4v.py
│       │   │   ├── modular_glm4v.py
│       │   │   ├── processing_glm4v.py
│       │   │   └── video_processing_glm4v.py
│       │   ├── glm4v_moe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_glm4v_moe.py
│       │   │   ├── convert_glm4v_moe_mgt_weights_to_hf.py
│       │   │   ├── modeling_glm4v_moe.py
│       │   │   └── modular_glm4v_moe.py
│       │   ├── glm_image/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_glm_image.py
│       │   │   ├── image_processing_glm_image.py
│       │   │   ├── image_processing_pil_glm_image.py
│       │   │   ├── modeling_glm_image.py
│       │   │   ├── modular_glm_image.py
│       │   │   └── processing_glm_image.py
│       │   ├── glm_moe_dsa/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_glm_moe_dsa.py
│       │   │   ├── modeling_glm_moe_dsa.py
│       │   │   └── modular_glm_moe_dsa.py
│       │   ├── glm_ocr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_glm_ocr.py
│       │   │   ├── modeling_glm_ocr.py
│       │   │   └── modular_glm_ocr.py
│       │   ├── glmasr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_glmasr.py
│       │   │   ├── convert_glmasr_weights_to_hf.py
│       │   │   ├── modeling_glmasr.py
│       │   │   ├── modular_glmasr.py
│       │   │   └── processing_glmasr.py
│       │   ├── glpn/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_glpn.py
│       │   │   ├── convert_glpn_to_pytorch.py
│       │   │   ├── image_processing_glpn.py
│       │   │   ├── image_processing_pil_glpn.py
│       │   │   └── modeling_glpn.py
│       │   ├── got_ocr2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_got_ocr2.py
│       │   │   ├── convert_got_ocr2_weights_to_hf.py
│       │   │   ├── image_processing_got_ocr2.py
│       │   │   ├── image_processing_pil_got_ocr2.py
│       │   │   ├── modeling_got_ocr2.py
│       │   │   ├── modular_got_ocr2.py
│       │   │   └── processing_got_ocr2.py
│       │   ├── gpt2/
│       │   │   ├── CONVERSION.md
│       │   │   ├── __init__.py
│       │   │   ├── configuration_gpt2.py
│       │   │   ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── modeling_gpt2.py
│       │   │   └── tokenization_gpt2.py
│       │   ├── gpt_bigcode/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_gpt_bigcode.py
│       │   │   └── modeling_gpt_bigcode.py
│       │   ├── gpt_neo/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_gpt_neo.py
│       │   │   ├── convert_gpt_neo_mesh_tf_to_pytorch.py
│       │   │   └── modeling_gpt_neo.py
│       │   ├── gpt_neox/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_gpt_neox.py
│       │   │   ├── modeling_gpt_neox.py
│       │   │   ├── modular_gpt_neox.py
│       │   │   └── tokenization_gpt_neox.py
│       │   ├── gpt_neox_japanese/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_gpt_neox_japanese.py
│       │   │   ├── modeling_gpt_neox_japanese.py
│       │   │   └── tokenization_gpt_neox_japanese.py
│       │   ├── gpt_oss/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_gpt_oss.py
│       │   │   ├── convert_gpt_oss_weights_to_hf.py
│       │   │   ├── modeling_gpt_oss.py
│       │   │   └── modular_gpt_oss.py
│       │   ├── gpt_sw3/
│       │   │   ├── __init__.py
│       │   │   ├── convert_megatron_to_pytorch.py
│       │   │   └── tokenization_gpt_sw3.py
│       │   ├── gptj/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_gptj.py
│       │   │   └── modeling_gptj.py
│       │   ├── granite/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_granite.py
│       │   │   ├── modeling_granite.py
│       │   │   └── modular_granite.py
│       │   ├── granite_speech/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_granite_speech.py
│       │   │   ├── feature_extraction_granite_speech.py
│       │   │   ├── modeling_granite_speech.py
│       │   │   └── processing_granite_speech.py
│       │   ├── granitemoe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_granitemoe.py
│       │   │   ├── modeling_granitemoe.py
│       │   │   └── modular_granitemoe.py
│       │   ├── granitemoehybrid/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_granitemoehybrid.py
│       │   │   ├── modeling_granitemoehybrid.py
│       │   │   └── modular_granitemoehybrid.py
│       │   ├── granitemoeshared/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_granitemoeshared.py
│       │   │   ├── modeling_granitemoeshared.py
│       │   │   └── modular_granitemoeshared.py
│       │   ├── grounding_dino/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_grounding_dino.py
│       │   │   ├── convert_grounding_dino_to_hf.py
│       │   │   ├── image_processing_grounding_dino.py
│       │   │   ├── image_processing_pil_grounding_dino.py
│       │   │   ├── modeling_grounding_dino.py
│       │   │   ├── modular_grounding_dino.py
│       │   │   └── processing_grounding_dino.py
│       │   ├── groupvit/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_groupvit.py
│       │   │   ├── convert_groupvit_nvlab_to_hf.py
│       │   │   └── modeling_groupvit.py
│       │   ├── helium/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_helium.py
│       │   │   ├── modeling_helium.py
│       │   │   └── modular_helium.py
│       │   ├── herbert/
│       │   │   ├── __init__.py
│       │   │   └── tokenization_herbert.py
│       │   ├── hgnet_v2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_hgnet_v2.py
│       │   │   ├── modeling_hgnet_v2.py
│       │   │   └── modular_hgnet_v2.py
│       │   ├── hiera/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_hiera.py
│       │   │   ├── convert_hiera_to_hf.py
│       │   │   └── modeling_hiera.py
│       │   ├── higgs_audio_v2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_higgs_audio_v2.py
│       │   │   ├── convert_higgs_audio_v2_to_hf.py
│       │   │   ├── generation_higgs_audio_v2.py
│       │   │   ├── modeling_higgs_audio_v2.py
│       │   │   ├── modular_higgs_audio_v2.py
│       │   │   └── processing_higgs_audio_v2.py
│       │   ├── higgs_audio_v2_tokenizer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_higgs_audio_v2_tokenizer.py
│       │   │   ├── convert_higgs_audio_v2_tokenizer_to_hf.py
│       │   │   ├── modeling_higgs_audio_v2_tokenizer.py
│       │   │   └── modular_higgs_audio_v2_tokenizer.py
│       │   ├── hubert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_hubert.py
│       │   │   ├── convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py
│       │   │   ├── convert_hubert_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── convert_hubert_original_s3prl_checkpoint_to_pytorch.py
│       │   │   ├── modeling_hubert.py
│       │   │   └── modular_hubert.py
│       │   ├── hunyuan_v1_dense/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_hunyuan_v1_dense.py
│       │   │   ├── modeling_hunyuan_v1_dense.py
│       │   │   └── modular_hunyuan_v1_dense.py
│       │   ├── hunyuan_v1_moe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_hunyuan_v1_moe.py
│       │   │   ├── modeling_hunyuan_v1_moe.py
│       │   │   └── modular_hunyuan_v1_moe.py
│       │   ├── ibert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_ibert.py
│       │   │   ├── modeling_ibert.py
│       │   │   └── quant_modules.py
│       │   ├── idefics/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_idefics.py
│       │   │   ├── image_processing_idefics.py
│       │   │   ├── image_processing_pil_idefics.py
│       │   │   ├── modeling_idefics.py
│       │   │   ├── perceiver.py
│       │   │   ├── processing_idefics.py
│       │   │   └── vision.py
│       │   ├── idefics2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_idefics2.py
│       │   │   ├── convert_idefics2_weights_to_hf.py
│       │   │   ├── image_processing_idefics2.py
│       │   │   ├── image_processing_pil_idefics2.py
│       │   │   ├── modeling_idefics2.py
│       │   │   └── processing_idefics2.py
│       │   ├── idefics3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_idefics3.py
│       │   │   ├── convert_idefics3_weights_to_hf.py
│       │   │   ├── image_processing_idefics3.py
│       │   │   ├── image_processing_pil_idefics3.py
│       │   │   ├── modeling_idefics3.py
│       │   │   └── processing_idefics3.py
│       │   ├── ijepa/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_ijepa.py
│       │   │   ├── convert_ijepa_to_hf.py
│       │   │   ├── modeling_ijepa.py
│       │   │   └── modular_ijepa.py
│       │   ├── imagegpt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_imagegpt.py
│       │   │   ├── convert_imagegpt_original_tf2_to_pytorch.py
│       │   │   ├── image_processing_imagegpt.py
│       │   │   ├── image_processing_pil_imagegpt.py
│       │   │   └── modeling_imagegpt.py
│       │   ├── informer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_informer.py
│       │   │   ├── modeling_informer.py
│       │   │   └── modular_informer.py
│       │   ├── instructblip/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_instructblip.py
│       │   │   ├── convert_instructblip_original_to_pytorch.py
│       │   │   ├── modeling_instructblip.py
│       │   │   └── processing_instructblip.py
│       │   ├── instructblipvideo/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_instructblipvideo.py
│       │   │   ├── convert_instructblipvideo_original_to_pytorch.py
│       │   │   ├── modeling_instructblipvideo.py
│       │   │   ├── modular_instructblipvideo.py
│       │   │   ├── processing_instructblipvideo.py
│       │   │   └── video_processing_instructblipvideo.py
│       │   ├── internvl/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_internvl.py
│       │   │   ├── convert_internvl_weights_to_hf.py
│       │   │   ├── modeling_internvl.py
│       │   │   ├── modular_internvl.py
│       │   │   ├── processing_internvl.py
│       │   │   └── video_processing_internvl.py
│       │   ├── jais2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_jais2.py
│       │   │   ├── modeling_jais2.py
│       │   │   └── modular_jais2.py
│       │   ├── jamba/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_jamba.py
│       │   │   ├── modeling_jamba.py
│       │   │   └── modular_jamba.py
│       │   ├── janus/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_janus.py
│       │   │   ├── convert_janus_weights_to_hf.py
│       │   │   ├── image_processing_janus.py
│       │   │   ├── image_processing_pil_janus.py
│       │   │   ├── modeling_janus.py
│       │   │   ├── modular_janus.py
│       │   │   └── processing_janus.py
│       │   ├── jetmoe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_jetmoe.py
│       │   │   ├── modeling_jetmoe.py
│       │   │   └── modular_jetmoe.py
│       │   ├── jina_embeddings_v3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_jina_embeddings_v3.py
│       │   │   ├── modeling_jina_embeddings_v3.py
│       │   │   └── modular_jina_embeddings_v3.py
│       │   ├── kosmos2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_kosmos2.py
│       │   │   ├── convert_kosmos2_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_kosmos2.py
│       │   │   └── processing_kosmos2.py
│       │   ├── kosmos2_5/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_kosmos2_5.py
│       │   │   ├── convert_kosmos2_5.py
│       │   │   ├── image_processing_kosmos2_5.py
│       │   │   ├── image_processing_pil_kosmos2_5.py
│       │   │   ├── modeling_kosmos2_5.py
│       │   │   └── processing_kosmos2_5.py
│       │   ├── kyutai_speech_to_text/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_kyutai_speech_to_text.py
│       │   │   ├── convert_kyutai_speech_to_text_to_hf.py
│       │   │   ├── feature_extraction_kyutai_speech_to_text.py
│       │   │   ├── modeling_kyutai_speech_to_text.py
│       │   │   ├── modular_kyutai_speech_to_text.py
│       │   │   └── processing_kyutai_speech_to_text.py
│       │   ├── lasr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_lasr.py
│       │   │   ├── feature_extraction_lasr.py
│       │   │   ├── modeling_lasr.py
│       │   │   ├── modular_lasr.py
│       │   │   ├── processing_lasr.py
│       │   │   └── tokenization_lasr.py
│       │   ├── layoutlm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_layoutlm.py
│       │   │   └── modeling_layoutlm.py
│       │   ├── layoutlmv2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_layoutlmv2.py
│       │   │   ├── image_processing_layoutlmv2.py
│       │   │   ├── image_processing_pil_layoutlmv2.py
│       │   │   ├── modeling_layoutlmv2.py
│       │   │   ├── processing_layoutlmv2.py
│       │   │   └── tokenization_layoutlmv2.py
│       │   ├── layoutlmv3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_layoutlmv3.py
│       │   │   ├── image_processing_layoutlmv3.py
│       │   │   ├── image_processing_pil_layoutlmv3.py
│       │   │   ├── modeling_layoutlmv3.py
│       │   │   ├── processing_layoutlmv3.py
│       │   │   └── tokenization_layoutlmv3.py
│       │   ├── layoutxlm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_layoutxlm.py
│       │   │   ├── modular_layoutxlm.py
│       │   │   ├── processing_layoutxlm.py
│       │   │   └── tokenization_layoutxlm.py
│       │   ├── led/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_led.py
│       │   │   └── modeling_led.py
│       │   ├── levit/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_levit.py
│       │   │   ├── convert_levit_timm_to_pytorch.py
│       │   │   ├── image_processing_levit.py
│       │   │   ├── image_processing_pil_levit.py
│       │   │   └── modeling_levit.py
│       │   ├── lfm2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_lfm2.py
│       │   │   ├── modeling_lfm2.py
│       │   │   └── modular_lfm2.py
│       │   ├── lfm2_moe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_lfm2_moe.py
│       │   │   ├── modeling_lfm2_moe.py
│       │   │   └── modular_lfm2_moe.py
│       │   ├── lfm2_vl/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_lfm2_vl.py
│       │   │   ├── image_processing_lfm2_vl.py
│       │   │   ├── modeling_lfm2_vl.py
│       │   │   ├── modular_lfm2_vl.py
│       │   │   └── processing_lfm2_vl.py
│       │   ├── lightglue/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_lightglue.py
│       │   │   ├── convert_lightglue_to_hf.py
│       │   │   ├── image_processing_lightglue.py
│       │   │   ├── image_processing_pil_lightglue.py
│       │   │   ├── modeling_lightglue.py
│       │   │   └── modular_lightglue.py
│       │   ├── lighton_ocr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_lighton_ocr.py
│       │   │   ├── modeling_lighton_ocr.py
│       │   │   ├── modular_lighton_ocr.py
│       │   │   └── processing_lighton_ocr.py
│       │   ├── lilt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_lilt.py
│       │   │   └── modeling_lilt.py
│       │   ├── llama/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_llama.py
│       │   │   ├── convert_llama_weights_to_hf.py
│       │   │   ├── modeling_llama.py
│       │   │   └── tokenization_llama.py
│       │   ├── llama4/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_llama4.py
│       │   │   ├── convert_llama4_weights_to_hf.py
│       │   │   ├── image_processing_llama4.py
│       │   │   ├── modeling_llama4.py
│       │   │   └── processing_llama4.py
│       │   ├── llava/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_llava.py
│       │   │   ├── convert_llava_weights_to_hf.py
│       │   │   ├── image_processing_llava.py
│       │   │   ├── image_processing_pil_llava.py
│       │   │   ├── modeling_llava.py
│       │   │   └── processing_llava.py
│       │   ├── llava_next/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_llava_next.py
│       │   │   ├── convert_llava_next_weights_to_hf.py
│       │   │   ├── image_processing_llava_next.py
│       │   │   ├── image_processing_pil_llava_next.py
│       │   │   ├── modeling_llava_next.py
│       │   │   └── processing_llava_next.py
│       │   ├── llava_next_video/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_llava_next_video.py
│       │   │   ├── convert_llava_next_video_weights_to_hf.py
│       │   │   ├── modeling_llava_next_video.py
│       │   │   ├── modular_llava_next_video.py
│       │   │   ├── processing_llava_next_video.py
│       │   │   └── video_processing_llava_next_video.py
│       │   ├── llava_onevision/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_llava_onevision.py
│       │   │   ├── convert_llava_onevision_weights_to_hf.py
│       │   │   ├── image_processing_llava_onevision.py
│       │   │   ├── image_processing_pil_llava_onevision.py
│       │   │   ├── modeling_llava_onevision.py
│       │   │   ├── modular_llava_onevision.py
│       │   │   ├── processing_llava_onevision.py
│       │   │   └── video_processing_llava_onevision.py
│       │   ├── longcat_flash/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_longcat_flash.py
│       │   │   ├── modeling_longcat_flash.py
│       │   │   └── modular_longcat_flash.py
│       │   ├── longformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_longformer.py
│       │   │   ├── convert_longformer_original_pytorch_lightning_to_pytorch.py
│       │   │   └── modeling_longformer.py
│       │   ├── longt5/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_longt5.py
│       │   │   └── modeling_longt5.py
│       │   ├── luke/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_luke.py
│       │   │   ├── convert_luke_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_luke.py
│       │   │   └── tokenization_luke.py
│       │   ├── lw_detr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_lw_detr.py
│       │   │   ├── convert_lw_detr_to_hf.py
│       │   │   ├── modeling_lw_detr.py
│       │   │   └── modular_lw_detr.py
│       │   ├── lxmert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_lxmert.py
│       │   │   ├── convert_lxmert_original_tf_checkpoint_to_pytorch.py
│       │   │   └── modeling_lxmert.py
│       │   ├── m2m_100/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_m2m_100.py
│       │   │   ├── convert_m2m100_original_checkpoint_to_pytorch.py
│       │   │   ├── modeling_m2m_100.py
│       │   │   └── tokenization_m2m_100.py
│       │   ├── mamba/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mamba.py
│       │   │   ├── convert_mamba_ssm_checkpoint_to_pytorch.py
│       │   │   └── modeling_mamba.py
│       │   ├── mamba2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mamba2.py
│       │   │   ├── convert_mamba2_ssm_checkpoint_to_pytorch.py
│       │   │   └── modeling_mamba2.py
│       │   ├── marian/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_marian.py
│       │   │   ├── convert_marian_tatoeba_to_pytorch.py
│       │   │   ├── convert_marian_to_pytorch.py
│       │   │   ├── modeling_marian.py
│       │   │   └── tokenization_marian.py
│       │   ├── markuplm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_markuplm.py
│       │   │   ├── feature_extraction_markuplm.py
│       │   │   ├── modeling_markuplm.py
│       │   │   ├── processing_markuplm.py
│       │   │   └── tokenization_markuplm.py
│       │   ├── mask2former/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mask2former.py
│       │   │   ├── convert_mask2former_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── image_processing_mask2former.py
│       │   │   ├── image_processing_pil_mask2former.py
│       │   │   ├── modeling_mask2former.py
│       │   │   └── modular_mask2former.py
│       │   ├── maskformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_maskformer.py
│       │   │   ├── configuration_maskformer_swin.py
│       │   │   ├── convert_maskformer_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── convert_maskformer_resnet_to_pytorch.py
│       │   │   ├── convert_maskformer_swin_to_pytorch.py
│       │   │   ├── image_processing_maskformer.py
│       │   │   ├── image_processing_pil_maskformer.py
│       │   │   ├── modeling_maskformer.py
│       │   │   ├── modeling_maskformer_swin.py
│       │   │   └── modular_maskformer.py
│       │   ├── mbart/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mbart.py
│       │   │   ├── convert_mbart_original_checkpoint_to_pytorch.py
│       │   │   ├── modeling_mbart.py
│       │   │   └── tokenization_mbart.py
│       │   ├── mbart50/
│       │   │   ├── __init__.py
│       │   │   └── tokenization_mbart50.py
│       │   ├── megatron_bert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_megatron_bert.py
│       │   │   ├── convert_megatron_bert_checkpoint.py
│       │   │   └── modeling_megatron_bert.py
│       │   ├── megatron_gpt2/
│       │   │   ├── __init__.py
│       │   │   ├── checkpoint_reshaping_and_interoperability.py
│       │   │   └── convert_megatron_gpt2_checkpoint.py
│       │   ├── metaclip_2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_metaclip_2.py
│       │   │   ├── convert_metaclip_2_to_hf.py
│       │   │   ├── modeling_metaclip_2.py
│       │   │   └── modular_metaclip_2.py
│       │   ├── mgp_str/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mgp_str.py
│       │   │   ├── modeling_mgp_str.py
│       │   │   ├── processing_mgp_str.py
│       │   │   └── tokenization_mgp_str.py
│       │   ├── mimi/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mimi.py
│       │   │   ├── convert_mimi_checkpoint_to_pytorch.py
│       │   │   └── modeling_mimi.py
│       │   ├── minimax/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_minimax.py
│       │   │   ├── modeling_minimax.py
│       │   │   └── modular_minimax.py
│       │   ├── minimax_m2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_minimax_m2.py
│       │   │   ├── modeling_minimax_m2.py
│       │   │   └── modular_minimax_m2.py
│       │   ├── ministral/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_ministral.py
│       │   │   ├── modeling_ministral.py
│       │   │   └── modular_ministral.py
│       │   ├── ministral3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_ministral3.py
│       │   │   ├── convert_ministral3_weights_to_hf.py
│       │   │   ├── modeling_ministral3.py
│       │   │   └── modular_ministral3.py
│       │   ├── mistral/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mistral.py
│       │   │   ├── convert_mistral_weights_to_hf.py
│       │   │   ├── modeling_mistral.py
│       │   │   └── modular_mistral.py
│       │   ├── mistral3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mistral3.py
│       │   │   ├── convert_mistral3_weights_to_hf.py
│       │   │   ├── modeling_mistral3.py
│       │   │   └── modular_mistral3.py
│       │   ├── mistral4/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mistral4.py
│       │   │   ├── convert_mistral4_weight_to_hf.py
│       │   │   ├── modeling_mistral4.py
│       │   │   └── modular_mistral4.py
│       │   ├── mixtral/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mixtral.py
│       │   │   ├── convert_mixtral_weights_to_hf.py
│       │   │   ├── modeling_mixtral.py
│       │   │   └── modular_mixtral.py
│       │   ├── mlcd/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mlcd.py
│       │   │   ├── convert_mlcd_weights_to_hf.py
│       │   │   ├── modeling_mlcd.py
│       │   │   └── modular_mlcd.py
│       │   ├── mllama/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mllama.py
│       │   │   ├── convert_mllama_weights_to_hf.py
│       │   │   ├── image_processing_mllama.py
│       │   │   ├── image_processing_pil_mllama.py
│       │   │   ├── modeling_mllama.py
│       │   │   └── processing_mllama.py
│       │   ├── mluke/
│       │   │   ├── __init__.py
│       │   │   ├── convert_mluke_original_pytorch_checkpoint_to_pytorch.py
│       │   │   └── tokenization_mluke.py
│       │   ├── mm_grounding_dino/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mm_grounding_dino.py
│       │   │   ├── convert_mm_grounding_dino_to_hf.py
│       │   │   ├── modeling_mm_grounding_dino.py
│       │   │   └── modular_mm_grounding_dino.py
│       │   ├── mobilebert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mobilebert.py
│       │   │   ├── convert_mobilebert_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── modeling_mobilebert.py
│       │   │   └── tokenization_mobilebert.py
│       │   ├── mobilenet_v1/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mobilenet_v1.py
│       │   │   ├── convert_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── image_processing_mobilenet_pil_v1.py
│       │   │   ├── image_processing_mobilenet_v1.py
│       │   │   └── modeling_mobilenet_v1.py
│       │   ├── mobilenet_v2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mobilenet_v2.py
│       │   │   ├── convert_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── image_processing_mobilenet_v2.py
│       │   │   ├── image_processing_pil_mobilenet_v2.py
│       │   │   └── modeling_mobilenet_v2.py
│       │   ├── mobilevit/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mobilevit.py
│       │   │   ├── convert_mlcvnets_to_pytorch.py
│       │   │   ├── image_processing_mobilevit.py
│       │   │   ├── image_processing_pil_mobilevit.py
│       │   │   └── modeling_mobilevit.py
│       │   ├── mobilevitv2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mobilevitv2.py
│       │   │   ├── convert_mlcvnets_to_pytorch.py
│       │   │   └── modeling_mobilevitv2.py
│       │   ├── modernbert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_modernbert.py
│       │   │   ├── modeling_modernbert.py
│       │   │   └── modular_modernbert.py
│       │   ├── modernbert_decoder/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_modernbert_decoder.py
│       │   │   ├── modeling_modernbert_decoder.py
│       │   │   └── modular_modernbert_decoder.py
│       │   ├── modernvbert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_modernvbert.py
│       │   │   ├── modeling_modernvbert.py
│       │   │   └── modular_modernvbert.py
│       │   ├── moonshine/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_moonshine.py
│       │   │   ├── convert_usefulsensors_to_hf.py
│       │   │   ├── modeling_moonshine.py
│       │   │   └── modular_moonshine.py
│       │   ├── moonshine_streaming/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_moonshine_streaming.py
│       │   │   ├── modeling_moonshine_streaming.py
│       │   │   ├── modular_moonshine_streaming.py
│       │   │   └── processing_moonshine_streaming.py
│       │   ├── moshi/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_moshi.py
│       │   │   ├── convert_moshi_transformers.py
│       │   │   └── modeling_moshi.py
│       │   ├── mpnet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mpnet.py
│       │   │   ├── modeling_mpnet.py
│       │   │   └── tokenization_mpnet.py
│       │   ├── mpt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mpt.py
│       │   │   └── modeling_mpt.py
│       │   ├── mra/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mra.py
│       │   │   ├── convert_mra_pytorch_to_pytorch.py
│       │   │   └── modeling_mra.py
│       │   ├── mt5/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mt5.py
│       │   │   └── modeling_mt5.py
│       │   ├── musicflamingo/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_musicflamingo.py
│       │   │   ├── convert_musicflamingo_to_hf.py
│       │   │   ├── modeling_musicflamingo.py
│       │   │   ├── modular_musicflamingo.py
│       │   │   └── processing_musicflamingo.py
│       │   ├── musicgen/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_musicgen.py
│       │   │   ├── convert_musicgen_transformers.py
│       │   │   ├── modeling_musicgen.py
│       │   │   └── processing_musicgen.py
│       │   ├── musicgen_melody/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_musicgen_melody.py
│       │   │   ├── convert_musicgen_melody_transformers.py
│       │   │   ├── feature_extraction_musicgen_melody.py
│       │   │   ├── modeling_musicgen_melody.py
│       │   │   └── processing_musicgen_melody.py
│       │   ├── mvp/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_mvp.py
│       │   │   └── modeling_mvp.py
│       │   ├── myt5/
│       │   │   ├── __init__.py
│       │   │   ├── convert_myt5_original_tf_checkpoint_to_pytorch.py
│       │   │   └── tokenization_myt5.py
│       │   ├── nanochat/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_nanochat.py
│       │   │   ├── convert_nanochat_checkpoints.py
│       │   │   ├── modeling_nanochat.py
│       │   │   └── modular_nanochat.py
│       │   ├── nemotron/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_nemotron.py
│       │   │   ├── convert_nemotron_nemo_to_hf.py
│       │   │   └── modeling_nemotron.py
│       │   ├── nemotron_h/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_nemotron_h.py
│       │   │   ├── modeling_nemotron_h.py
│       │   │   └── modular_nemotron_h.py
│       │   ├── nllb/
│       │   │   ├── __init__.py
│       │   │   └── tokenization_nllb.py
│       │   ├── nllb_moe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_nllb_moe.py
│       │   │   ├── convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py
│       │   │   └── modeling_nllb_moe.py
│       │   ├── nougat/
│       │   │   ├── __init__.py
│       │   │   ├── convert_nougat_to_hf.py
│       │   │   ├── image_processing_nougat.py
│       │   │   ├── image_processing_pil_nougat.py
│       │   │   ├── processing_nougat.py
│       │   │   └── tokenization_nougat.py
│       │   ├── nystromformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_nystromformer.py
│       │   │   ├── convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py
│       │   │   └── modeling_nystromformer.py
│       │   ├── olmo/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_olmo.py
│       │   │   ├── convert_olmo_weights_to_hf.py
│       │   │   ├── modeling_olmo.py
│       │   │   └── modular_olmo.py
│       │   ├── olmo2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_olmo2.py
│       │   │   ├── convert_olmo2_weights_to_hf.py
│       │   │   ├── modeling_olmo2.py
│       │   │   └── modular_olmo2.py
│       │   ├── olmo3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_olmo3.py
│       │   │   ├── convert_olmo3_weights_to_hf.py
│       │   │   ├── modeling_olmo3.py
│       │   │   └── modular_olmo3.py
│       │   ├── olmo_hybrid/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_olmo_hybrid.py
│       │   │   ├── convert_olmo_hybrid_weights_to_hf.py
│       │   │   ├── modeling_olmo_hybrid.py
│       │   │   └── modular_olmo_hybrid.py
│       │   ├── olmoe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_olmoe.py
│       │   │   ├── convert_olmoe_weights_to_hf.py
│       │   │   ├── modeling_olmoe.py
│       │   │   └── modular_olmoe.py
│       │   ├── omdet_turbo/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_omdet_turbo.py
│       │   │   ├── convert_omdet_turbo_to_hf.py
│       │   │   ├── modeling_omdet_turbo.py
│       │   │   └── processing_omdet_turbo.py
│       │   ├── oneformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_oneformer.py
│       │   │   ├── convert_to_hf_oneformer.py
│       │   │   ├── image_processing_oneformer.py
│       │   │   ├── image_processing_pil_oneformer.py
│       │   │   ├── modeling_oneformer.py
│       │   │   └── processing_oneformer.py
│       │   ├── openai/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_openai.py
│       │   │   ├── convert_openai_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── modeling_openai.py
│       │   │   └── tokenization_openai.py
│       │   ├── opt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_opt.py
│       │   │   ├── convert_opt_original_pytorch_checkpoint_to_pytorch.py
│       │   │   └── modeling_opt.py
│       │   ├── ovis2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_ovis2.py
│       │   │   ├── convert_ovis2_weights_to_hf.py
│       │   │   ├── image_processing_ovis2.py
│       │   │   ├── image_processing_pil_ovis2.py
│       │   │   ├── modeling_ovis2.py
│       │   │   ├── modular_ovis2.py
│       │   │   └── processing_ovis2.py
│       │   ├── owlv2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_owlv2.py
│       │   │   ├── convert_owlv2_to_hf.py
│       │   │   ├── image_processing_owlv2.py
│       │   │   ├── image_processing_pil_owlv2.py
│       │   │   ├── modeling_owlv2.py
│       │   │   ├── modular_owlv2.py
│       │   │   └── processing_owlv2.py
│       │   ├── owlvit/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_owlvit.py
│       │   │   ├── convert_owlvit_original_flax_to_hf.py
│       │   │   ├── image_processing_owlvit.py
│       │   │   ├── image_processing_pil_owlvit.py
│       │   │   ├── modeling_owlvit.py
│       │   │   └── processing_owlvit.py
│       │   ├── paddleocr_vl/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_paddleocr_vl.py
│       │   │   ├── image_processing_paddleocr_vl.py
│       │   │   ├── image_processing_pil_paddleocr_vl.py
│       │   │   ├── modeling_paddleocr_vl.py
│       │   │   ├── modular_paddleocr_vl.py
│       │   │   └── processing_paddleocr_vl.py
│       │   ├── paligemma/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_paligemma.py
│       │   │   ├── convert_paligemma2_weights_to_hf.py
│       │   │   ├── convert_paligemma_weights_to_hf.py
│       │   │   ├── modeling_paligemma.py
│       │   │   └── processing_paligemma.py
│       │   ├── parakeet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_parakeet.py
│       │   │   ├── convert_nemo_to_hf.py
│       │   │   ├── feature_extraction_parakeet.py
│       │   │   ├── modeling_parakeet.py
│       │   │   ├── modular_parakeet.py
│       │   │   ├── processing_parakeet.py
│       │   │   └── tokenization_parakeet.py
│       │   ├── patchtsmixer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_patchtsmixer.py
│       │   │   └── modeling_patchtsmixer.py
│       │   ├── patchtst/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_patchtst.py
│       │   │   └── modeling_patchtst.py
│       │   ├── pe_audio/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pe_audio.py
│       │   │   ├── feature_extraction_pe_audio.py
│       │   │   ├── modeling_pe_audio.py
│       │   │   ├── modular_pe_audio.py
│       │   │   └── processing_pe_audio.py
│       │   ├── pe_audio_video/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pe_audio_video.py
│       │   │   ├── convert_pe_audio_video_to_hf.py
│       │   │   ├── modeling_pe_audio_video.py
│       │   │   ├── modular_pe_audio_video.py
│       │   │   └── processing_pe_audio_video.py
│       │   ├── pe_video/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pe_video.py
│       │   │   ├── modeling_pe_video.py
│       │   │   ├── modular_pe_video.py
│       │   │   ├── processing_pe_video.py
│       │   │   └── video_processing_pe_video.py
│       │   ├── pegasus/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pegasus.py
│       │   │   ├── convert_pegasus_tf_to_pytorch.py
│       │   │   ├── modeling_pegasus.py
│       │   │   └── tokenization_pegasus.py
│       │   ├── pegasus_x/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pegasus_x.py
│       │   │   └── modeling_pegasus_x.py
│       │   ├── perceiver/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_perceiver.py
│       │   │   ├── convert_perceiver_haiku_to_pytorch.py
│       │   │   ├── image_processing_perceiver.py
│       │   │   ├── image_processing_pil_perceiver.py
│       │   │   ├── modeling_perceiver.py
│       │   │   └── tokenization_perceiver.py
│       │   ├── perception_lm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_perception_lm.py
│       │   │   ├── convert_perception_lm_weights_to_hf.py
│       │   │   ├── image_processing_perception_lm.py
│       │   │   ├── modeling_perception_lm.py
│       │   │   ├── modular_perception_lm.py
│       │   │   ├── processing_perception_lm.py
│       │   │   └── video_processing_perception_lm.py
│       │   ├── persimmon/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_persimmon.py
│       │   │   ├── convert_persimmon_weights_to_hf.py
│       │   │   └── modeling_persimmon.py
│       │   ├── phi/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_phi.py
│       │   │   ├── convert_phi_weights_to_hf.py
│       │   │   ├── modeling_phi.py
│       │   │   └── modular_phi.py
│       │   ├── phi3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_phi3.py
│       │   │   ├── modeling_phi3.py
│       │   │   └── modular_phi3.py
│       │   ├── phi4_multimodal/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_phi4_multimodal.py
│       │   │   ├── convert_phi4_multimodal_weights_to_hf.py
│       │   │   ├── feature_extraction_phi4_multimodal.py
│       │   │   ├── image_processing_phi4_multimodal.py
│       │   │   ├── modeling_phi4_multimodal.py
│       │   │   ├── modular_phi4_multimodal.py
│       │   │   └── processing_phi4_multimodal.py
│       │   ├── phimoe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_phimoe.py
│       │   │   ├── modeling_phimoe.py
│       │   │   └── modular_phimoe.py
│       │   ├── phobert/
│       │   │   ├── __init__.py
│       │   │   └── tokenization_phobert.py
│       │   ├── pi0/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pi0.py
│       │   │   ├── image_processing_pi0.py
│       │   │   ├── modeling_pi0.py
│       │   │   ├── modular_pi0.py
│       │   │   └── processing_pi0.py
│       │   ├── pix2struct/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pix2struct.py
│       │   │   ├── convert_pix2struct_original_pytorch_to_hf.py
│       │   │   ├── image_processing_pil_pix2struct.py
│       │   │   ├── image_processing_pix2struct.py
│       │   │   ├── modeling_pix2struct.py
│       │   │   └── processing_pix2struct.py
│       │   ├── pixio/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pixio.py
│       │   │   ├── convert_pixio_to_pytorch.py
│       │   │   ├── modeling_pixio.py
│       │   │   └── modular_pixio.py
│       │   ├── pixtral/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pixtral.py
│       │   │   ├── convert_pixtral_weights_to_hf.py
│       │   │   ├── image_processing_pil_pixtral.py
│       │   │   ├── image_processing_pixtral.py
│       │   │   ├── modeling_pixtral.py
│       │   │   └── processing_pixtral.py
│       │   ├── plbart/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_plbart.py
│       │   │   ├── convert_plbart_original_checkpoint_to_torch.py
│       │   │   ├── modeling_plbart.py
│       │   │   ├── modular_plbart.py
│       │   │   └── tokenization_plbart.py
│       │   ├── poolformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_poolformer.py
│       │   │   ├── convert_poolformer_original_to_pytorch.py
│       │   │   ├── image_processing_pil_poolformer.py
│       │   │   ├── image_processing_poolformer.py
│       │   │   └── modeling_poolformer.py
│       │   ├── pop2piano/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pop2piano.py
│       │   │   ├── convert_pop2piano_weights_to_hf.py
│       │   │   ├── feature_extraction_pop2piano.py
│       │   │   ├── modeling_pop2piano.py
│       │   │   ├── processing_pop2piano.py
│       │   │   └── tokenization_pop2piano.py
│       │   ├── pp_chart2table/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pp_chart2table.py
│       │   │   ├── image_processing_pil_pp_chart2table.py
│       │   │   ├── image_processing_pp_chart2table.py
│       │   │   ├── modular_pp_chart2table.py
│       │   │   └── processing_pp_chart2table.py
│       │   ├── pp_doclayout_v2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pp_doclayout_v2.py
│       │   │   ├── image_processing_pp_doclayout_v2.py
│       │   │   ├── modeling_pp_doclayout_v2.py
│       │   │   └── modular_pp_doclayout_v2.py
│       │   ├── pp_doclayout_v3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pp_doclayout_v3.py
│       │   │   ├── image_processing_pp_doclayout_v3.py
│       │   │   ├── modeling_pp_doclayout_v3.py
│       │   │   └── modular_pp_doclayout_v3.py
│       │   ├── pp_lcnet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pp_lcnet.py
│       │   │   ├── image_processing_pp_lcnet.py
│       │   │   ├── modeling_pp_lcnet.py
│       │   │   └── modular_pp_lcnet.py
│       │   ├── pp_lcnet_v3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pp_lcnet_v3.py
│       │   │   ├── modeling_pp_lcnet_v3.py
│       │   │   └── modular_pp_lcnet_v3.py
│       │   ├── pp_ocrv5_mobile_det/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pp_ocrv5_mobile_det.py
│       │   │   ├── modeling_pp_ocrv5_mobile_det.py
│       │   │   └── modular_pp_ocrv5_mobile_det.py
│       │   ├── pp_ocrv5_mobile_rec/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pp_ocrv5_mobile_rec.py
│       │   │   ├── modeling_pp_ocrv5_mobile_rec.py
│       │   │   └── modular_pp_ocrv5_mobile_rec.py
│       │   ├── pp_ocrv5_server_det/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pp_ocrv5_server_det.py
│       │   │   ├── image_processing_pp_ocrv5_server_det.py
│       │   │   ├── modeling_pp_ocrv5_server_det.py
│       │   │   └── modular_pp_ocrv5_server_det.py
│       │   ├── pp_ocrv5_server_rec/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pp_ocrv5_server_rec.py
│       │   │   ├── image_processing_pp_ocrv5_server_rec.py
│       │   │   ├── modeling_pp_ocrv5_server_rec.py
│       │   │   └── modular_pp_ocrv5_server_rec.py
│       │   ├── prompt_depth_anything/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_prompt_depth_anything.py
│       │   │   ├── convert_prompt_depth_anything_to_hf.py
│       │   │   ├── image_processing_pil_prompt_depth_anything.py
│       │   │   ├── image_processing_prompt_depth_anything.py
│       │   │   ├── modeling_prompt_depth_anything.py
│       │   │   └── modular_prompt_depth_anything.py
│       │   ├── prophetnet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_prophetnet.py
│       │   │   ├── convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_prophetnet.py
│       │   │   └── tokenization_prophetnet.py
│       │   ├── pvt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pvt.py
│       │   │   ├── convert_pvt_to_pytorch.py
│       │   │   ├── image_processing_pil_pvt.py
│       │   │   ├── image_processing_pvt.py
│       │   │   └── modeling_pvt.py
│       │   ├── pvt_v2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_pvt_v2.py
│       │   │   ├── convert_pvt_v2_to_pytorch.py
│       │   │   └── modeling_pvt_v2.py
│       │   ├── qwen2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen2.py
│       │   │   ├── modeling_qwen2.py
│       │   │   ├── modular_qwen2.py
│       │   │   └── tokenization_qwen2.py
│       │   ├── qwen2_5_omni/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen2_5_omni.py
│       │   │   ├── modeling_qwen2_5_omni.py
│       │   │   ├── modular_qwen2_5_omni.py
│       │   │   └── processing_qwen2_5_omni.py
│       │   ├── qwen2_5_vl/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen2_5_vl.py
│       │   │   ├── modeling_qwen2_5_vl.py
│       │   │   ├── modular_qwen2_5_vl.py
│       │   │   └── processing_qwen2_5_vl.py
│       │   ├── qwen2_audio/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen2_audio.py
│       │   │   ├── modeling_qwen2_audio.py
│       │   │   └── processing_qwen2_audio.py
│       │   ├── qwen2_moe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen2_moe.py
│       │   │   ├── modeling_qwen2_moe.py
│       │   │   └── modular_qwen2_moe.py
│       │   ├── qwen2_vl/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen2_vl.py
│       │   │   ├── image_processing_pil_qwen2_vl.py
│       │   │   ├── image_processing_qwen2_vl.py
│       │   │   ├── modeling_qwen2_vl.py
│       │   │   ├── processing_qwen2_vl.py
│       │   │   └── video_processing_qwen2_vl.py
│       │   ├── qwen3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen3.py
│       │   │   ├── modeling_qwen3.py
│       │   │   └── modular_qwen3.py
│       │   ├── qwen3_5/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen3_5.py
│       │   │   ├── modeling_qwen3_5.py
│       │   │   ├── modular_qwen3_5.py
│       │   │   └── tokenization_qwen3_5.py
│       │   ├── qwen3_5_moe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen3_5_moe.py
│       │   │   ├── modeling_qwen3_5_moe.py
│       │   │   └── modular_qwen3_5_moe.py
│       │   ├── qwen3_moe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen3_moe.py
│       │   │   ├── modeling_qwen3_moe.py
│       │   │   └── modular_qwen3_moe.py
│       │   ├── qwen3_next/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen3_next.py
│       │   │   ├── modeling_qwen3_next.py
│       │   │   └── modular_qwen3_next.py
│       │   ├── qwen3_omni_moe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen3_omni_moe.py
│       │   │   ├── modeling_qwen3_omni_moe.py
│       │   │   ├── modular_qwen3_omni_moe.py
│       │   │   └── processing_qwen3_omni_moe.py
│       │   ├── qwen3_vl/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen3_vl.py
│       │   │   ├── modeling_qwen3_vl.py
│       │   │   ├── modular_qwen3_vl.py
│       │   │   ├── processing_qwen3_vl.py
│       │   │   └── video_processing_qwen3_vl.py
│       │   ├── qwen3_vl_moe/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_qwen3_vl_moe.py
│       │   │   ├── modeling_qwen3_vl_moe.py
│       │   │   └── modular_qwen3_vl_moe.py
│       │   ├── rag/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_rag.py
│       │   │   ├── modeling_rag.py
│       │   │   ├── retrieval_rag.py
│       │   │   └── tokenization_rag.py
│       │   ├── recurrent_gemma/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_recurrent_gemma.py
│       │   │   ├── convert_recurrent_gemma_to_hf.py
│       │   │   └── modeling_recurrent_gemma.py
│       │   ├── reformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_reformer.py
│       │   │   ├── convert_reformer_trax_checkpoint_to_pytorch.py
│       │   │   ├── modeling_reformer.py
│       │   │   └── tokenization_reformer.py
│       │   ├── regnet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_regnet.py
│       │   │   ├── convert_regnet_seer_10b_to_pytorch.py
│       │   │   ├── convert_regnet_to_pytorch.py
│       │   │   └── modeling_regnet.py
│       │   ├── rembert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_rembert.py
│       │   │   ├── convert_rembert_tf_checkpoint_to_pytorch.py
│       │   │   ├── modeling_rembert.py
│       │   │   └── tokenization_rembert.py
│       │   ├── resnet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_resnet.py
│       │   │   ├── convert_resnet_to_pytorch.py
│       │   │   └── modeling_resnet.py
│       │   ├── roberta/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_roberta.py
│       │   │   ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_roberta.py
│       │   │   ├── modular_roberta.py
│       │   │   ├── tokenization_roberta.py
│       │   │   └── tokenization_roberta_old.py
│       │   ├── roberta_prelayernorm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_roberta_prelayernorm.py
│       │   │   ├── convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py
│       │   │   └── modeling_roberta_prelayernorm.py
│       │   ├── roc_bert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_roc_bert.py
│       │   │   ├── modeling_roc_bert.py
│       │   │   └── tokenization_roc_bert.py
│       │   ├── roformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_roformer.py
│       │   │   ├── convert_roformer_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── modeling_roformer.py
│       │   │   ├── tokenization_roformer.py
│       │   │   └── tokenization_utils.py
│       │   ├── rt_detr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_rt_detr.py
│       │   │   ├── configuration_rt_detr_resnet.py
│       │   │   ├── convert_rt_detr_original_pytorch_checkpoint_to_hf.py
│       │   │   ├── image_processing_pil_rt_detr.py
│       │   │   ├── image_processing_rt_detr.py
│       │   │   ├── modeling_rt_detr.py
│       │   │   ├── modeling_rt_detr_resnet.py
│       │   │   └── modular_rt_detr.py
│       │   ├── rt_detr_v2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_rt_detr_v2.py
│       │   │   ├── convert_rt_detr_v2_weights_to_hf.py
│       │   │   ├── modeling_rt_detr_v2.py
│       │   │   └── modular_rt_detr_v2.py
│       │   ├── rwkv/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_rwkv.py
│       │   │   ├── convert_rwkv_checkpoint_to_hf.py
│       │   │   └── modeling_rwkv.py
│       │   ├── sam/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_sam.py
│       │   │   ├── convert_sam_to_hf.py
│       │   │   ├── image_processing_pil_sam.py
│       │   │   ├── image_processing_sam.py
│       │   │   ├── modeling_sam.py
│       │   │   └── processing_sam.py
│       │   ├── sam2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_sam2.py
│       │   │   ├── convert_sam2_to_hf.py
│       │   │   ├── image_processing_sam2.py
│       │   │   ├── modeling_sam2.py
│       │   │   ├── modular_sam2.py
│       │   │   └── processing_sam2.py
│       │   ├── sam2_video/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_sam2_video.py
│       │   │   ├── convert_sam2_video_to_hf.py
│       │   │   ├── modeling_sam2_video.py
│       │   │   ├── modular_sam2_video.py
│       │   │   ├── processing_sam2_video.py
│       │   │   └── video_processing_sam2_video.py
│       │   ├── sam3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_sam3.py
│       │   │   ├── convert_sam3_to_hf.py
│       │   │   ├── image_processing_sam3.py
│       │   │   ├── modeling_sam3.py
│       │   │   ├── modular_sam3.py
│       │   │   └── processing_sam3.py
│       │   ├── sam3_tracker/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_sam3_tracker.py
│       │   │   ├── modeling_sam3_tracker.py
│       │   │   ├── modular_sam3_tracker.py
│       │   │   └── processing_sam3_tracker.py
│       │   ├── sam3_tracker_video/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_sam3_tracker_video.py
│       │   │   ├── modeling_sam3_tracker_video.py
│       │   │   ├── modular_sam3_tracker_video.py
│       │   │   └── processing_sam3_tracker_video.py
│       │   ├── sam3_video/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_sam3_video.py
│       │   │   ├── convert_sam3_video_to_hf.py
│       │   │   ├── modeling_sam3_video.py
│       │   │   └── processing_sam3_video.py
│       │   ├── sam_hq/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_sam_hq.py
│       │   │   ├── convert_samhq_to_hf.py
│       │   │   ├── modeling_sam_hq.py
│       │   │   ├── modular_sam_hq.py
│       │   │   └── processing_sam_hq.py
│       │   ├── seamless_m4t/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_seamless_m4t.py
│       │   │   ├── convert_fairseq2_to_hf.py
│       │   │   ├── feature_extraction_seamless_m4t.py
│       │   │   ├── modeling_seamless_m4t.py
│       │   │   ├── processing_seamless_m4t.py
│       │   │   └── tokenization_seamless_m4t.py
│       │   ├── seamless_m4t_v2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_seamless_m4t_v2.py
│       │   │   ├── convert_fairseq2_to_hf.py
│       │   │   └── modeling_seamless_m4t_v2.py
│       │   ├── seed_oss/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_seed_oss.py
│       │   │   ├── modeling_seed_oss.py
│       │   │   └── modular_seed_oss.py
│       │   ├── segformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_segformer.py
│       │   │   ├── convert_segformer_original_to_pytorch.py
│       │   │   ├── image_processing_pil_segformer.py
│       │   │   ├── image_processing_segformer.py
│       │   │   ├── modeling_segformer.py
│       │   │   └── modular_segformer.py
│       │   ├── seggpt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_seggpt.py
│       │   │   ├── convert_seggpt_to_hf.py
│       │   │   ├── image_processing_pil_seggpt.py
│       │   │   ├── image_processing_seggpt.py
│       │   │   └── modeling_seggpt.py
│       │   ├── sew/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_sew.py
│       │   │   ├── convert_sew_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_sew.py
│       │   │   └── modular_sew.py
│       │   ├── sew_d/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_sew_d.py
│       │   │   ├── convert_sew_d_original_pytorch_checkpoint_to_pytorch.py
│       │   │   └── modeling_sew_d.py
│       │   ├── shieldgemma2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_shieldgemma2.py
│       │   │   ├── convert_shieldgemma2_weights_orbax_to_hf.py
│       │   │   ├── modeling_shieldgemma2.py
│       │   │   └── processing_shieldgemma2.py
│       │   ├── siglip/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_siglip.py
│       │   │   ├── convert_siglip_to_hf.py
│       │   │   ├── image_processing_pil_siglip.py
│       │   │   ├── image_processing_siglip.py
│       │   │   ├── modeling_siglip.py
│       │   │   ├── processing_siglip.py
│       │   │   └── tokenization_siglip.py
│       │   ├── siglip2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_siglip2.py
│       │   │   ├── convert_siglip2_to_hf.py
│       │   │   ├── image_processing_pil_siglip2.py
│       │   │   ├── image_processing_siglip2.py
│       │   │   ├── modeling_siglip2.py
│       │   │   ├── modular_siglip2.py
│       │   │   ├── processing_siglip2.py
│       │   │   └── tokenization_siglip2.py
│       │   ├── slanext/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_slanext.py
│       │   │   ├── image_processing_slanext.py
│       │   │   ├── modeling_slanext.py
│       │   │   └── modular_slanext.py
│       │   ├── smollm3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_smollm3.py
│       │   │   ├── modeling_smollm3.py
│       │   │   └── modular_smollm3.py
│       │   ├── smolvlm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_smolvlm.py
│       │   │   ├── image_processing_pil_smolvlm.py
│       │   │   ├── image_processing_smolvlm.py
│       │   │   ├── modeling_smolvlm.py
│       │   │   ├── modular_smolvlm.py
│       │   │   ├── processing_smolvlm.py
│       │   │   └── video_processing_smolvlm.py
│       │   ├── solar_open/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_solar_open.py
│       │   │   ├── modeling_solar_open.py
│       │   │   └── modular_solar_open.py
│       │   ├── speech_encoder_decoder/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_speech_encoder_decoder.py
│       │   │   ├── convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py
│       │   │   ├── convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py
│       │   │   └── modeling_speech_encoder_decoder.py
│       │   ├── speech_to_text/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_speech_to_text.py
│       │   │   ├── convert_s2t_fairseq_to_tfms.py
│       │   │   ├── feature_extraction_speech_to_text.py
│       │   │   ├── modeling_speech_to_text.py
│       │   │   ├── processing_speech_to_text.py
│       │   │   └── tokenization_speech_to_text.py
│       │   ├── speecht5/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_speecht5.py
│       │   │   ├── convert_hifigan.py
│       │   │   ├── convert_speecht5_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── feature_extraction_speecht5.py
│       │   │   ├── modeling_speecht5.py
│       │   │   ├── number_normalizer.py
│       │   │   ├── processing_speecht5.py
│       │   │   └── tokenization_speecht5.py
│       │   ├── splinter/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_splinter.py
│       │   │   ├── modeling_splinter.py
│       │   │   └── tokenization_splinter.py
│       │   ├── squeezebert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_squeezebert.py
│       │   │   ├── modeling_squeezebert.py
│       │   │   └── tokenization_squeezebert.py
│       │   ├── stablelm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_stablelm.py
│       │   │   └── modeling_stablelm.py
│       │   ├── starcoder2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_starcoder2.py
│       │   │   ├── modeling_starcoder2.py
│       │   │   └── modular_starcoder2.py
│       │   ├── superglue/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_superglue.py
│       │   │   ├── convert_superglue_to_hf.py
│       │   │   ├── image_processing_pil_superglue.py
│       │   │   ├── image_processing_superglue.py
│       │   │   └── modeling_superglue.py
│       │   ├── superpoint/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_superpoint.py
│       │   │   ├── convert_superpoint_to_pytorch.py
│       │   │   ├── image_processing_pil_superpoint.py
│       │   │   ├── image_processing_superpoint.py
│       │   │   └── modeling_superpoint.py
│       │   ├── swiftformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_swiftformer.py
│       │   │   ├── convert_swiftformer_original_to_hf.py
│       │   │   └── modeling_swiftformer.py
│       │   ├── swin/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_swin.py
│       │   │   ├── convert_swin_simmim_to_pytorch.py
│       │   │   ├── convert_swin_timm_to_pytorch.py
│       │   │   └── modeling_swin.py
│       │   ├── swin2sr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_swin2sr.py
│       │   │   ├── convert_swin2sr_original_to_pytorch.py
│       │   │   ├── image_processing_pil_swin2sr.py
│       │   │   ├── image_processing_swin2sr.py
│       │   │   └── modeling_swin2sr.py
│       │   ├── swinv2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_swinv2.py
│       │   │   ├── convert_swinv2_timm_to_pytorch.py
│       │   │   └── modeling_swinv2.py
│       │   ├── switch_transformers/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_switch_transformers.py
│       │   │   ├── convert_big_switch.py
│       │   │   ├── convert_switch_transformers_original_flax_checkpoint_to_pytorch.py
│       │   │   ├── modeling_switch_transformers.py
│       │   │   └── modular_switch_transformers.py
│       │   ├── t5/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_t5.py
│       │   │   ├── convert_t5_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── convert_t5x_checkpoint_to_pytorch.py
│       │   │   ├── download_from_gcp.sh
│       │   │   ├── modeling_t5.py
│       │   │   └── tokenization_t5.py
│       │   ├── t5gemma/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_t5gemma.py
│       │   │   ├── modeling_t5gemma.py
│       │   │   └── modular_t5gemma.py
│       │   ├── t5gemma2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_t5gemma2.py
│       │   │   ├── modeling_t5gemma2.py
│       │   │   └── modular_t5gemma2.py
│       │   ├── table_transformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_table_transformer.py
│       │   │   ├── convert_table_transformer_to_hf.py
│       │   │   ├── convert_table_transformer_to_hf_no_timm.py
│       │   │   └── modeling_table_transformer.py
│       │   ├── tapas/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_tapas.py
│       │   │   ├── convert_tapas_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── modeling_tapas.py
│       │   │   └── tokenization_tapas.py
│       │   ├── textnet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_textnet.py
│       │   │   ├── convert_textnet_to_hf.py
│       │   │   ├── image_processing_pil_textnet.py
│       │   │   ├── image_processing_textnet.py
│       │   │   └── modeling_textnet.py
│       │   ├── time_series_transformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_time_series_transformer.py
│       │   │   └── modeling_time_series_transformer.py
│       │   ├── timesfm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_timesfm.py
│       │   │   ├── convert_timesfm_orignal_to_hf.py
│       │   │   ├── modeling_timesfm.py
│       │   │   └── modular_timesfm.py
│       │   ├── timesfm2_5/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_timesfm2_5.py
│       │   │   ├── convert_timesfm2_5_original_to_hf.py
│       │   │   ├── modeling_timesfm2_5.py
│       │   │   └── modular_timesfm2_5.py
│       │   ├── timesformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_timesformer.py
│       │   │   ├── convert_timesformer_to_pytorch.py
│       │   │   └── modeling_timesformer.py
│       │   ├── timm_backbone/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_timm_backbone.py
│       │   │   └── modeling_timm_backbone.py
│       │   ├── timm_wrapper/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_timm_wrapper.py
│       │   │   ├── image_processing_timm_wrapper.py
│       │   │   └── modeling_timm_wrapper.py
│       │   ├── trocr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_trocr.py
│       │   │   ├── convert_trocr_unilm_to_pytorch.py
│       │   │   ├── modeling_trocr.py
│       │   │   └── processing_trocr.py
│       │   ├── tvp/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_tvp.py
│       │   │   ├── image_processing_pil_tvp.py
│       │   │   ├── image_processing_tvp.py
│       │   │   ├── modeling_tvp.py
│       │   │   └── processing_tvp.py
│       │   ├── udop/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_udop.py
│       │   │   ├── convert_udop_to_hf.py
│       │   │   ├── modeling_udop.py
│       │   │   ├── processing_udop.py
│       │   │   └── tokenization_udop.py
│       │   ├── umt5/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_umt5.py
│       │   │   ├── convert_umt5_checkpoint_to_pytorch.py
│       │   │   └── modeling_umt5.py
│       │   ├── unispeech/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_unispeech.py
│       │   │   ├── convert_unispeech_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_unispeech.py
│       │   │   └── modular_unispeech.py
│       │   ├── unispeech_sat/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_unispeech_sat.py
│       │   │   ├── convert_unispeech_original_s3prl_checkpoint_to_pytorch.py
│       │   │   ├── convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_unispeech_sat.py
│       │   │   └── modular_unispeech_sat.py
│       │   ├── univnet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_univnet.py
│       │   │   ├── convert_univnet.py
│       │   │   ├── feature_extraction_univnet.py
│       │   │   └── modeling_univnet.py
│       │   ├── upernet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_upernet.py
│       │   │   ├── convert_convnext_upernet_to_pytorch.py
│       │   │   ├── convert_swin_upernet_to_pytorch.py
│       │   │   └── modeling_upernet.py
│       │   ├── uvdoc/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_uvdoc.py
│       │   │   ├── image_processing_uvdoc.py
│       │   │   ├── modeling_uvdoc.py
│       │   │   └── modular_uvdoc.py
│       │   ├── vaultgemma/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vaultgemma.py
│       │   │   ├── modeling_vaultgemma.py
│       │   │   └── modular_vaultgemma.py
│       │   ├── vibevoice_acoustic_tokenizer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vibevoice_acoustic_tokenizer.py
│       │   │   ├── convert_vibevoice_acoustic_tokenizer_to_hf.py
│       │   │   ├── feature_extraction_vibevoice_acoustic_tokenizer.py
│       │   │   ├── modeling_vibevoice_acoustic_tokenizer.py
│       │   │   └── modular_vibevoice_acoustic_tokenizer.py
│       │   ├── vibevoice_asr/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vibevoice_asr.py
│       │   │   ├── convert_vibevoice_asr_to_hf.py
│       │   │   ├── modeling_vibevoice_asr.py
│       │   │   ├── modular_vibevoice_asr.py
│       │   │   └── processing_vibevoice_asr.py
│       │   ├── video_llama_3/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_video_llama_3.py
│       │   │   ├── image_processing_pil_video_llama_3.py
│       │   │   ├── image_processing_video_llama_3.py
│       │   │   ├── modeling_video_llama_3.py
│       │   │   ├── modular_video_llama_3.py
│       │   │   ├── processing_video_llama_3.py
│       │   │   └── video_processing_video_llama_3.py
│       │   ├── video_llava/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_video_llava.py
│       │   │   ├── convert_video_llava_weights_to_hf.py
│       │   │   ├── image_processing_video_llava.py
│       │   │   ├── modeling_video_llava.py
│       │   │   ├── processing_video_llava.py
│       │   │   └── video_processing_video_llava.py
│       │   ├── videomae/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_videomae.py
│       │   │   ├── convert_videomae_to_pytorch.py
│       │   │   ├── image_processing_pil_videomae.py
│       │   │   ├── image_processing_videomae.py
│       │   │   ├── modeling_videomae.py
│       │   │   └── video_processing_videomae.py
│       │   ├── videomt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_videomt.py
│       │   │   ├── convert_videomt_to_hf.py
│       │   │   ├── modeling_videomt.py
│       │   │   ├── modular_videomt.py
│       │   │   └── video_processing_videomt.py
│       │   ├── vilt/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vilt.py
│       │   │   ├── convert_vilt_original_to_pytorch.py
│       │   │   ├── image_processing_pil_vilt.py
│       │   │   ├── image_processing_vilt.py
│       │   │   ├── modeling_vilt.py
│       │   │   └── processing_vilt.py
│       │   ├── vipllava/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vipllava.py
│       │   │   ├── convert_vipllava_weights_to_hf.py
│       │   │   ├── modeling_vipllava.py
│       │   │   └── modular_vipllava.py
│       │   ├── vision_encoder_decoder/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vision_encoder_decoder.py
│       │   │   └── modeling_vision_encoder_decoder.py
│       │   ├── vision_text_dual_encoder/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vision_text_dual_encoder.py
│       │   │   ├── modeling_vision_text_dual_encoder.py
│       │   │   └── processing_vision_text_dual_encoder.py
│       │   ├── visual_bert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_visual_bert.py
│       │   │   ├── convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py
│       │   │   └── modeling_visual_bert.py
│       │   ├── vit/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vit.py
│       │   │   ├── convert_dino_to_pytorch.py
│       │   │   ├── convert_vit_timm_to_pytorch.py
│       │   │   ├── image_processing_pil_vit.py
│       │   │   ├── image_processing_vit.py
│       │   │   └── modeling_vit.py
│       │   ├── vit_mae/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vit_mae.py
│       │   │   ├── convert_vit_mae_to_pytorch.py
│       │   │   └── modeling_vit_mae.py
│       │   ├── vit_msn/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vit_msn.py
│       │   │   ├── convert_msn_to_pytorch.py
│       │   │   └── modeling_vit_msn.py
│       │   ├── vitdet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vitdet.py
│       │   │   └── modeling_vitdet.py
│       │   ├── vitmatte/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vitmatte.py
│       │   │   ├── convert_vitmatte_to_hf.py
│       │   │   ├── image_processing_pil_vitmatte.py
│       │   │   ├── image_processing_vitmatte.py
│       │   │   └── modeling_vitmatte.py
│       │   ├── vitpose/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vitpose.py
│       │   │   ├── convert_vitpose_to_hf.py
│       │   │   ├── image_processing_pil_vitpose.py
│       │   │   ├── image_processing_vitpose.py
│       │   │   └── modeling_vitpose.py
│       │   ├── vitpose_backbone/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vitpose_backbone.py
│       │   │   └── modeling_vitpose_backbone.py
│       │   ├── vits/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vits.py
│       │   │   ├── convert_original_checkpoint.py
│       │   │   ├── modeling_vits.py
│       │   │   └── tokenization_vits.py
│       │   ├── vivit/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vivit.py
│       │   │   ├── convert_vivit_flax_to_pytorch.py
│       │   │   ├── image_processing_vivit.py
│       │   │   └── modeling_vivit.py
│       │   ├── vjepa2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_vjepa2.py
│       │   │   ├── convert_vjepa2_classifier_to_hf.py
│       │   │   ├── convert_vjepa2_to_hf.py
│       │   │   ├── modeling_vjepa2.py
│       │   │   └── video_processing_vjepa2.py
│       │   ├── voxtral/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_voxtral.py
│       │   │   ├── convert_voxtral_weights_to_hf.py
│       │   │   ├── modeling_voxtral.py
│       │   │   ├── modular_voxtral.py
│       │   │   └── processing_voxtral.py
│       │   ├── voxtral_realtime/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_voxtral_realtime.py
│       │   │   ├── convert_voxtral_realtime_weights_to_hf.py
│       │   │   ├── feature_extraction_voxtral_realtime.py
│       │   │   ├── modeling_voxtral_realtime.py
│       │   │   ├── modular_voxtral_realtime.py
│       │   │   └── processing_voxtral_realtime.py
│       │   ├── wav2vec2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_wav2vec2.py
│       │   │   ├── convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py
│       │   │   ├── feature_extraction_wav2vec2.py
│       │   │   ├── modeling_wav2vec2.py
│       │   │   ├── processing_wav2vec2.py
│       │   │   └── tokenization_wav2vec2.py
│       │   ├── wav2vec2_bert/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_wav2vec2_bert.py
│       │   │   ├── convert_wav2vec2_seamless_checkpoint.py
│       │   │   ├── modeling_wav2vec2_bert.py
│       │   │   ├── modular_wav2vec2_bert.py
│       │   │   └── processing_wav2vec2_bert.py
│       │   ├── wav2vec2_conformer/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_wav2vec2_conformer.py
│       │   │   ├── convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_wav2vec2_conformer.py
│       │   │   └── modular_wav2vec2_conformer.py
│       │   ├── wav2vec2_phoneme/
│       │   │   ├── __init__.py
│       │   │   └── tokenization_wav2vec2_phoneme.py
│       │   ├── wav2vec2_with_lm/
│       │   │   ├── __init__.py
│       │   │   └── processing_wav2vec2_with_lm.py
│       │   ├── wavlm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_wavlm.py
│       │   │   ├── convert_wavlm_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── convert_wavlm_original_s3prl_checkpoint_to_pytorch.py
│       │   │   ├── modeling_wavlm.py
│       │   │   └── modular_wavlm.py
│       │   ├── whisper/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_whisper.py
│       │   │   ├── convert_openai_to_hf.py
│       │   │   ├── english_normalizer.py
│       │   │   ├── feature_extraction_whisper.py
│       │   │   ├── generation_whisper.py
│       │   │   ├── modeling_whisper.py
│       │   │   ├── processing_whisper.py
│       │   │   └── tokenization_whisper.py
│       │   ├── x_clip/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_x_clip.py
│       │   │   ├── convert_x_clip_original_pytorch_to_hf.py
│       │   │   ├── modeling_x_clip.py
│       │   │   └── processing_x_clip.py
│       │   ├── xcodec/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_xcodec.py
│       │   │   ├── convert_xcodec_weights_to_hf.py
│       │   │   └── modeling_xcodec.py
│       │   ├── xglm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_xglm.py
│       │   │   ├── convert_xglm_original_ckpt_to_trfms.py
│       │   │   ├── modeling_xglm.py
│       │   │   └── tokenization_xglm.py
│       │   ├── xlm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_xlm.py
│       │   │   ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_xlm.py
│       │   │   └── tokenization_xlm.py
│       │   ├── xlm_roberta/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_xlm_roberta.py
│       │   │   ├── modeling_xlm_roberta.py
│       │   │   ├── modular_xlm_roberta.py
│       │   │   └── tokenization_xlm_roberta.py
│       │   ├── xlm_roberta_xl/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_xlm_roberta_xl.py
│       │   │   ├── convert_xlm_roberta_xl_original_pytorch_checkpoint_to_pytorch.py
│       │   │   ├── modeling_xlm_roberta_xl.py
│       │   │   └── modular_xlm_roberta_xl.py
│       │   ├── xlnet/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_xlnet.py
│       │   │   ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py
│       │   │   ├── modeling_xlnet.py
│       │   │   └── tokenization_xlnet.py
│       │   ├── xlstm/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_xlstm.py
│       │   │   └── modeling_xlstm.py
│       │   ├── xmod/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_xmod.py
│       │   │   ├── convert_xmod_original_pytorch_checkpoint_to_pytorch.py
│       │   │   └── modeling_xmod.py
│       │   ├── yolos/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_yolos.py
│       │   │   ├── convert_yolos_to_pytorch.py
│       │   │   ├── image_processing_pil_yolos.py
│       │   │   ├── image_processing_yolos.py
│       │   │   ├── modeling_yolos.py
│       │   │   └── modular_yolos.py
│       │   ├── yoso/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_yoso.py
│       │   │   ├── convert_yoso_pytorch_to_pytorch.py
│       │   │   └── modeling_yoso.py
│       │   ├── youtu/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_youtu.py
│       │   │   ├── modeling_youtu.py
│       │   │   └── modular_youtu.py
│       │   ├── zamba/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_zamba.py
│       │   │   └── modeling_zamba.py
│       │   ├── zamba2/
│       │   │   ├── __init__.py
│       │   │   ├── configuration_zamba2.py
│       │   │   ├── modeling_zamba2.py
│       │   │   └── modular_zamba2.py
│       │   └── zoedepth/
│       │       ├── __init__.py
│       │       ├── configuration_zoedepth.py
│       │       ├── convert_zoedepth_to_hf.py
│       │       ├── image_processing_pil_zoedepth.py
│       │       ├── image_processing_zoedepth.py
│       │       └── modeling_zoedepth.py
│       ├── monkey_patching.py
│       ├── optimization.py
│       ├── pipelines/
│       │   ├── __init__.py
│       │   ├── any_to_any.py
│       │   ├── audio_classification.py
│       │   ├── audio_utils.py
│       │   ├── automatic_speech_recognition.py
│       │   ├── base.py
│       │   ├── depth_estimation.py
│       │   ├── document_question_answering.py
│       │   ├── feature_extraction.py
│       │   ├── fill_mask.py
│       │   ├── image_classification.py
│       │   ├── image_feature_extraction.py
│       │   ├── image_segmentation.py
│       │   ├── image_text_to_text.py
│       │   ├── keypoint_matching.py
│       │   ├── mask_generation.py
│       │   ├── object_detection.py
│       │   ├── pt_utils.py
│       │   ├── table_question_answering.py
│       │   ├── text_classification.py
│       │   ├── text_generation.py
│       │   ├── text_to_audio.py
│       │   ├── token_classification.py
│       │   ├── video_classification.py
│       │   ├── zero_shot_audio_classification.py
│       │   ├── zero_shot_classification.py
│       │   ├── zero_shot_image_classification.py
│       │   └── zero_shot_object_detection.py
│       ├── processing_utils.py
│       ├── py.typed
│       ├── pytorch_utils.py
│       ├── quantizers/
│       │   ├── __init__.py
│       │   ├── auto.py
│       │   ├── base.py
│       │   ├── quantizer_aqlm.py
│       │   ├── quantizer_auto_round.py
│       │   ├── quantizer_awq.py
│       │   ├── quantizer_bitnet.py
│       │   ├── quantizer_bnb_4bit.py
│       │   ├── quantizer_bnb_8bit.py
│       │   ├── quantizer_compressed_tensors.py
│       │   ├── quantizer_eetq.py
│       │   ├── quantizer_fbgemm_fp8.py
│       │   ├── quantizer_finegrained_fp8.py
│       │   ├── quantizer_fouroversix.py
│       │   ├── quantizer_fp_quant.py
│       │   ├── quantizer_gptq.py
│       │   ├── quantizer_higgs.py
│       │   ├── quantizer_hqq.py
│       │   ├── quantizer_metal.py
│       │   ├── quantizer_mxfp4.py
│       │   ├── quantizer_quanto.py
│       │   ├── quantizer_quark.py
│       │   ├── quantizer_sinq.py
│       │   ├── quantizer_spqr.py
│       │   ├── quantizer_torchao.py
│       │   ├── quantizer_vptq.py
│       │   └── quantizers_utils.py
│       ├── safetensors_conversion.py
│       ├── testing_utils.py
│       ├── time_series_utils.py
│       ├── tokenization_mistral_common.py
│       ├── tokenization_python.py
│       ├── tokenization_utils_base.py
│       ├── tokenization_utils_sentencepiece.py
│       ├── tokenization_utils_tokenizers.py
│       ├── trainer.py
│       ├── trainer_callback.py
│       ├── trainer_jit_checkpoint.py
│       ├── trainer_optimizer.py
│       ├── trainer_pt_utils.py
│       ├── trainer_seq2seq.py
│       ├── trainer_utils.py
│       ├── training_args.py
│       ├── training_args_seq2seq.py
│       ├── utils/
│       │   ├── __init__.py
│       │   ├── attention_visualizer.py
│       │   ├── auto_docstring.py
│       │   ├── backbone_utils.py
│       │   ├── chat_parsing_utils.py
│       │   ├── chat_template_utils.py
│       │   ├── constants.py
│       │   ├── deprecation.py
│       │   ├── doc.py
│       │   ├── dummy_detectron2_objects.py
│       │   ├── dummy_essentia_and_librosa_and_pretty_midi_and_scipy_and_torch_objects.py
│       │   ├── dummy_mistral_common_objects.py
│       │   ├── dummy_music_objects.py
│       │   ├── dummy_pt_objects.py
│       │   ├── dummy_sentencepiece_and_tokenizers_objects.py
│       │   ├── dummy_speech_objects.py
│       │   ├── dummy_timm_and_torchvision_objects.py
│       │   ├── dummy_tokenizers_objects.py
│       │   ├── dummy_torchaudio_objects.py
│       │   ├── dummy_torchvision_objects.py
│       │   ├── dummy_vision_objects.py
│       │   ├── generic.py
│       │   ├── hp_naming.py
│       │   ├── hub.py
│       │   ├── import_utils.py
│       │   ├── kernel_config.py
│       │   ├── loading_report.py
│       │   ├── logging.py
│       │   ├── metrics.py
│       │   ├── network_logging.py
│       │   ├── notebook.py
│       │   ├── output_capturing.py
│       │   ├── peft_utils.py
│       │   ├── pytest_helpers.py
│       │   ├── quantization_config.py
│       │   ├── sentencepiece_model_pb2.py
│       │   ├── sentencepiece_model_pb2_new.py
│       │   ├── type_validators.py
│       │   └── versions.py
│       ├── video_processing_utils.py
│       └── video_utils.py
├── tests/
│   ├── __init__.py
│   ├── causal_lm_tester.py
│   ├── cli/
│   │   ├── conftest.py
│   │   ├── test_chat.py
│   │   ├── test_download.py
│   │   ├── test_serve.py
│   │   └── test_system.py
│   ├── fixtures/
│   │   ├── audioflamingo3/
│   │   │   ├── expected_results_batched.json
│   │   │   └── expected_results_single.json
│   │   ├── config.json
│   │   ├── dummy-config.json
│   │   ├── dummy_feature_extractor_config.json
│   │   ├── empty.txt
│   │   ├── gpt_oss/
│   │   │   └── integration_tests.json
│   │   ├── input.txt
│   │   ├── merges.txt
│   │   ├── musicflamingo/
│   │   │   ├── expected_results_batched.json
│   │   │   └── expected_results_single.json
│   │   ├── parakeet/
│   │   │   ├── expected_results_batch.json
│   │   │   └── expected_results_single.json
│   │   ├── preprocessor_config.json
│   │   ├── sample_text.txt
│   │   ├── sample_text_no_unicode.txt
│   │   ├── spiece.model
│   │   ├── test_entity_vocab.json
│   │   ├── test_sentencepiece.model
│   │   ├── test_sentencepiece_bpe.model
│   │   ├── test_sentencepiece_bpe_char.model
│   │   ├── test_sentencepiece_no_bos.model
│   │   ├── test_sentencepiece_with_bytefallback.model
│   │   ├── tests_samples/
│   │   │   ├── .gitignore
│   │   │   ├── COCO/
│   │   │   │   ├── coco_annotations.txt
│   │   │   │   └── coco_panoptic_annotations.txt
│   │   │   ├── GermEval/
│   │   │   │   ├── dev.txt
│   │   │   │   ├── labels.txt
│   │   │   │   └── train.txt
│   │   │   ├── MRPC/
│   │   │   │   ├── dev.csv
│   │   │   │   ├── dev.tsv
│   │   │   │   ├── train.csv
│   │   │   │   └── train.tsv
│   │   │   ├── SQUAD/
│   │   │   │   └── sample.json
│   │   │   ├── STS-B/
│   │   │   │   ├── dev.tsv
│   │   │   │   └── train.tsv
│   │   │   ├── conll/
│   │   │   │   └── sample.json
│   │   │   ├── swag/
│   │   │   │   └── sample.json
│   │   │   ├── wiki_text/
│   │   │   │   └── wiki_00
│   │   │   ├── wmt16/
│   │   │   │   └── sample.json
│   │   │   ├── wmt_en_ro/
│   │   │   │   ├── test.json
│   │   │   │   ├── train.json
│   │   │   │   └── val.json
│   │   │   └── xsum/
│   │   │       └── sample.json
│   │   ├── vibevoice/
│   │   │   └── expected_acoustic_tokenizer_results.json
│   │   ├── vibevoice_asr/
│   │   │   ├── expected_results_batch.json
│   │   │   ├── expected_results_single.json
│   │   │   └── expected_results_with_context.json
│   │   ├── vocab.json
│   │   ├── vocab.txt
│   │   └── xcodec/
│   │       └── integration_tests.json
│   ├── generation/
│   │   ├── __init__.py
│   │   ├── test_candidate_generator.py
│   │   ├── test_configuration_utils.py
│   │   ├── test_continuous_batching.py
│   │   ├── test_flash_attention_parity.py
│   │   ├── test_logits_process.py
│   │   ├── test_paged_attention.py
│   │   ├── test_stopping_criteria.py
│   │   ├── test_streamers.py
│   │   └── test_utils.py
│   ├── kernels/
│   │   └── test_kernels.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── afmoe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_afmoe.py
│   │   ├── aimv2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_aimv2.py
│   │   ├── albert/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_albert.py
│   │   │   └── test_tokenization_albert.py
│   │   ├── align/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_align.py
│   │   │   └── test_processing_align.py
│   │   ├── altclip/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_altclip.py
│   │   │   └── test_processing_altclip.py
│   │   ├── apertus/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_apertus.py
│   │   ├── arcee/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_arcee.py
│   │   ├── aria/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_aria.py
│   │   │   ├── test_modeling_aria.py
│   │   │   └── test_processing_aria.py
│   │   ├── audio_spectrogram_transformer/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_audio_spectrogram_transformer.py
│   │   │   └── test_modeling_audio_spectrogram_transformer.py
│   │   ├── audioflamingo3/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_audioflamingo3.py
│   │   │   └── test_processing_audioflamingo3.py
│   │   ├── auto/
│   │   │   ├── __init__.py
│   │   │   ├── test_configuration_auto.py
│   │   │   ├── test_feature_extraction_auto.py
│   │   │   ├── test_image_processing_auto.py
│   │   │   ├── test_modeling_auto.py
│   │   │   ├── test_processor_auto.py
│   │   │   ├── test_tokenization_auto.py
│   │   │   └── test_video_processing_auto.py
│   │   ├── autoformer/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_autoformer.py
│   │   ├── aya_vision/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_aya_vision.py
│   │   │   └── test_processing_aya_vision.py
│   │   ├── bamba/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_bamba.py
│   │   ├── bark/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_bark.py
│   │   │   └── test_processing_bark.py
│   │   ├── bart/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_bart.py
│   │   ├── barthez/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_barthez.py
│   │   ├── bartpho/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_bartpho.py
│   │   ├── beit/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_beit.py
│   │   │   └── test_modeling_beit.py
│   │   ├── bert/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_bert.py
│   │   │   └── test_tokenization_bert.py
│   │   ├── bert_generation/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_bert_generation.py
│   │   │   └── test_tokenization_bert_generation.py
│   │   ├── bert_japanese/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_bert_japanese.py
│   │   ├── bertweet/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_bertweet.py
│   │   ├── big_bird/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_big_bird.py
│   │   │   └── test_tokenization_big_bird.py
│   │   ├── bigbird_pegasus/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_bigbird_pegasus.py
│   │   ├── biogpt/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_biogpt.py
│   │   │   └── test_tokenization_biogpt.py
│   │   ├── bit/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_bit.py
│   │   │   └── test_modeling_bit.py
│   │   ├── bitnet/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_bitnet.py
│   │   ├── blenderbot/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_blenderbot.py
│   │   │   └── test_tokenization_blenderbot.py
│   │   ├── blenderbot_small/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_blenderbot_small.py
│   │   │   └── test_tokenization_blenderbot_small.py
│   │   ├── blip/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_blip.py
│   │   │   ├── test_modeling_blip.py
│   │   │   ├── test_modeling_blip_text.py
│   │   │   └── test_processing_blip.py
│   │   ├── blip_2/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_blip_2.py
│   │   │   └── test_processing_blip_2.py
│   │   ├── bloom/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_bloom.py
│   │   │   └── test_tokenization_bloom.py
│   │   ├── blt/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_blt.py
│   │   ├── bridgetower/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_bridgetower.py
│   │   │   ├── test_modeling_bridgetower.py
│   │   │   └── test_processing_bridgetower.py
│   │   ├── bros/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_bros.py
│   │   ├── byt5/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_byt5.py
│   │   ├── camembert/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_camembert.py
│   │   │   └── test_tokenization_camembert.py
│   │   ├── canine/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_canine.py
│   │   │   └── test_tokenization_canine.py
│   │   ├── chameleon/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_chameleon.py
│   │   │   ├── test_modeling_chameleon.py
│   │   │   └── test_processing_chameleon.py
│   │   ├── chinese_clip/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_chinese_clip.py
│   │   │   ├── test_modeling_chinese_clip.py
│   │   │   └── test_processing_chinese_clip.py
│   │   ├── chmv2/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_chmv2.py
│   │   │   └── test_modeling_chmv2.py
│   │   ├── clap/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_clap.py
│   │   │   ├── test_modeling_clap.py
│   │   │   └── test_processing_clap.py
│   │   ├── clip/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_clip.py
│   │   │   ├── test_modeling_clip.py
│   │   │   ├── test_processing_clip.py
│   │   │   └── test_tokenization_clip.py
│   │   ├── clipseg/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_clipseg.py
│   │   │   └── test_processing_clipseg.py
│   │   ├── clvp/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_clvp.py
│   │   │   ├── test_modeling_clvp.py
│   │   │   ├── test_processing_clvp.py
│   │   │   └── test_tokenization_clvp.py
│   │   ├── code_llama/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_code_llama.py
│   │   ├── codegen/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_codegen.py
│   │   │   └── test_tokenization_codegen.py
│   │   ├── cohere/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_cohere.py
│   │   │   └── test_tokenization_cohere.py
│   │   ├── cohere2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_cohere2.py
│   │   ├── cohere2_vision/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_cohere2_vision.py
│   │   │   ├── test_modeling_cohere2_vision.py
│   │   │   └── test_processing_cohere2_vision.py
│   │   ├── cohere_asr/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_cohere_asr.py
│   │   ├── colmodernvbert/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_colmodernvbert.py
│   │   │   └── test_processing_colmodernvbert.py
│   │   ├── colpali/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_colpali.py
│   │   │   └── test_processing_colpali.py
│   │   ├── colqwen2/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_colqwen2.py
│   │   │   └── test_processing_colqwen2.py
│   │   ├── conditional_detr/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_conditional_detr.py
│   │   │   └── test_modeling_conditional_detr.py
│   │   ├── convbert/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_convbert.py
│   │   ├── convnext/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_convnext.py
│   │   │   └── test_modeling_convnext.py
│   │   ├── convnextv2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_convnextv2.py
│   │   ├── cpm/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_cpm.py
│   │   ├── cpmant/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_cpmant.py
│   │   │   └── test_tokenization_cpmant.py
│   │   ├── csm/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_csm.py
│   │   │   └── test_processing_csm.py
│   │   ├── ctrl/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_ctrl.py
│   │   │   └── test_tokenization_ctrl.py
│   │   ├── cvt/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_cvt.py
│   │   ├── cwm/
│   │   │   ├── __init__.py
│   │   │   ├── test_configuration_cwm.py
│   │   │   └── test_modeling_cwm.py
│   │   ├── d_fine/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_d_fine.py
│   │   ├── dab_detr/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_dab_detr.py
│   │   ├── dac/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_dac.py
│   │   │   └── test_modeling_dac.py
│   │   ├── data2vec/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_data2vec_audio.py
│   │   │   ├── test_modeling_data2vec_text.py
│   │   │   └── test_modeling_data2vec_vision.py
│   │   ├── dbrx/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_dbrx.py
│   │   ├── deberta/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_deberta.py
│   │   │   └── test_tokenization_deberta.py
│   │   ├── deberta_v2/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_deberta_v2.py
│   │   │   └── test_tokenization_deberta_v2.py
│   │   ├── decision_transformer/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_decision_transformer.py
│   │   ├── deepseek_v2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_deepseek_v2.py
│   │   ├── deepseek_v3/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_deepseek_v3.py
│   │   ├── deepseek_vl/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_deepseek_vl.py
│   │   │   ├── test_modeling_deepseek_vl.py
│   │   │   └── test_processing_deepseek_vl.py
│   │   ├── deepseek_vl_hybrid/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_deepseek_vl_hybrid.py
│   │   │   ├── test_modeling_deepseek_vl_hybrid.py
│   │   │   └── test_processing_deepseek_vl_hybrid.py
│   │   ├── deformable_detr/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_deformable_detr.py
│   │   │   └── test_modeling_deformable_detr.py
│   │   ├── deit/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_deit.py
│   │   │   └── test_modeling_deit.py
│   │   ├── depth_anything/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_depth_anything.py
│   │   ├── depth_pro/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_depth_pro.py
│   │   │   └── test_modeling_depth_pro.py
│   │   ├── detr/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_detr.py
│   │   │   └── test_modeling_detr.py
│   │   ├── dia/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_dia.py
│   │   │   ├── test_modeling_dia.py
│   │   │   ├── test_processing_dia.py
│   │   │   └── test_tokenization_dia.py
│   │   ├── diffllama/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_diffllama.py
│   │   ├── dinat/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_dinat.py
│   │   ├── dinov2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_dinov2.py
│   │   ├── dinov2_with_registers/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_dinov2_with_registers.py
│   │   ├── dinov3_convnext/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_dinov3_convnext.py
│   │   ├── dinov3_vit/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_dinov3_vit.py
│   │   │   └── test_modeling_dinov3_vit.py
│   │   ├── distilbert/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_distilbert.py
│   │   │   └── test_tokenization_distilbert.py
│   │   ├── dit/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_dit.py
│   │   ├── doge/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_doge.py
│   │   ├── donut/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_donut.py
│   │   │   ├── test_modeling_donut_swin.py
│   │   │   └── test_processing_donut.py
│   │   ├── dots1/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_dots1.py
│   │   ├── dpr/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_dpr.py
│   │   ├── dpt/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_dpt.py
│   │   │   ├── test_modeling_dpt.py
│   │   │   ├── test_modeling_dpt_auto_backbone.py
│   │   │   └── test_modeling_dpt_hybrid.py
│   │   ├── edgetam/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_edgetam.py
│   │   ├── edgetam_video/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_edgetam_video.py
│   │   ├── efficientloftr/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_efficientloftr.py
│   │   │   └── test_modeling_efficientloftr.py
│   │   ├── efficientnet/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_efficientnet.py
│   │   │   └── test_modeling_efficientnet.py
│   │   ├── electra/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_electra.py
│   │   ├── emu3/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_emu3.py
│   │   │   └── test_processing_emu3.py
│   │   ├── encodec/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_encodec.py
│   │   │   └── test_modeling_encodec.py
│   │   ├── encoder_decoder/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_encoder_decoder.py
│   │   ├── eomt/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_eomt.py
│   │   │   └── test_modeling_eomt.py
│   │   ├── eomt_dinov3/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_eomt_dinov3.py
│   │   ├── ernie/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_ernie.py
│   │   ├── ernie4_5/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_ernie4_5.py
│   │   ├── ernie4_5_moe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_ernie4_5_moe.py
│   │   ├── ernie4_5_vl_moe/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_ernie4_5_vl_moe.py
│   │   │   ├── test_modeling_ernie4_5_vl_moe.py
│   │   │   ├── test_processing_ernie4_5_vl_moe.py
│   │   │   └── test_video_processing_ernie4_5_vl_moe.py
│   │   ├── esm/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_esm.py
│   │   │   ├── test_modeling_esmfold.py
│   │   │   └── test_tokenization_esm.py
│   │   ├── eurobert/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_eurobert.py
│   │   ├── evolla/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_evolla.py
│   │   │   └── test_processing_evolla.py
│   │   ├── exaone4/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_exaone4.py
│   │   ├── exaone_moe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_exaone_moe.py
│   │   ├── falcon/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_falcon.py
│   │   ├── falcon_h1/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_falcon_h1.py
│   │   ├── falcon_mamba/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_falcon_mamba.py
│   │   ├── fast_vlm/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_fast_vlm.py
│   │   ├── fastspeech2_conformer/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_fastspeech2_conformer.py
│   │   │   └── test_tokenization_fastspeech2_conformer.py
│   │   ├── flaubert/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_flaubert.py
│   │   │   └── test_tokenization_flaubert.py
│   │   ├── flava/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_flava.py
│   │   │   ├── test_modeling_flava.py
│   │   │   └── test_processing_flava.py
│   │   ├── flex_olmo/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_flex_olmo.py
│   │   ├── florence2/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_florence2.py
│   │   │   └── test_processing_florence2.py
│   │   ├── fnet/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_fnet.py
│   │   ├── focalnet/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_focalnet.py
│   │   ├── fsmt/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_fsmt.py
│   │   │   └── test_tokenization_fsmt.py
│   │   ├── funnel/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_funnel.py
│   │   │   └── test_tokenization_funnel.py
│   │   ├── fuyu/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_fuyu.py
│   │   │   ├── test_modeling_fuyu.py
│   │   │   └── test_processing_fuyu.py
│   │   ├── gemma/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_gemma.py
│   │   │   └── test_tokenization_gemma.py
│   │   ├── gemma2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_gemma2.py
│   │   ├── gemma3/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_gemma3.py
│   │   │   ├── test_modeling_gemma3.py
│   │   │   └── test_processing_gemma3.py
│   │   ├── gemma3n/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_gemma3n.py
│   │   │   ├── test_modeling_gemma3n.py
│   │   │   └── test_processing_gemma3n.py
│   │   ├── git/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_git.py
│   │   │   └── test_processing_git.py
│   │   ├── glm/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_glm.py
│   │   ├── glm4/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_glm4.py
│   │   ├── glm46v/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_glm46v.py
│   │   │   ├── test_processor_glm46v.py
│   │   │   └── test_video_processing_glm46v.py
│   │   ├── glm4_moe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_glm4_moe.py
│   │   ├── glm4_moe_lite/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_glm4_moe_lite.py
│   │   ├── glm4v/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_glm4v.py
│   │   │   ├── test_modeling_glm4v.py
│   │   │   ├── test_processor_glm4v.py
│   │   │   └── test_video_processing_glm4v.py
│   │   ├── glm4v_moe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_glm4v_moe.py
│   │   ├── glm_image/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_glm_image.py
│   │   │   └── test_processor_glm_image.py
│   │   ├── glm_moe_dsa/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_glm_moe_dsa.py
│   │   ├── glm_ocr/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_glm_ocr.py
│   │   ├── glmasr/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_glmasr.py
│   │   ├── glpn/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_glpn.py
│   │   │   └── test_modeling_glpn.py
│   │   ├── got_ocr2/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_got_ocr2.py
│   │   │   ├── test_modeling_got_ocr2.py
│   │   │   └── test_processing_got_ocr2.py
│   │   ├── gpt2/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_gpt2.py
│   │   │   └── test_tokenization_gpt2.py
│   │   ├── gpt_bigcode/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_gpt_bigcode.py
│   │   ├── gpt_neo/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_gpt_neo.py
│   │   ├── gpt_neox/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_gpt_neox.py
│   │   │   └── test_tokenization_gpt_neox.py
│   │   ├── gpt_neox_japanese/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_gpt_neox_japanese.py
│   │   │   └── test_tokenization_gpt_neox_japanese.py
│   │   ├── gpt_oss/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_gpt_oss.py
│   │   ├── gpt_sw3/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_gpt_sw3.py
│   │   ├── gptj/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_gptj.py
│   │   ├── granite/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_granite.py
│   │   ├── granite_speech/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_granite_speech.py
│   │   │   └── test_processing_granite_speech.py
│   │   ├── granitemoe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_granitemoe.py
│   │   ├── granitemoehybrid/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_granitemoehybrid.py
│   │   ├── granitemoeshared/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_granitemoeshared.py
│   │   ├── grounding_dino/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_grounding_dino.py
│   │   │   ├── test_modeling_grounding_dino.py
│   │   │   └── test_processing_grounding_dino.py
│   │   ├── groupvit/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_groupvit.py
│   │   ├── helium/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_helium.py
│   │   ├── herbert/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_herbert.py
│   │   ├── hgnet_v2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_hgnet_v2.py
│   │   ├── hiera/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_hiera.py
│   │   ├── higgs_audio_v2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_higgs_audio_v2.py
│   │   ├── higgs_audio_v2_tokenizer/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_higgs_audio_v2_tokenizer.py
│   │   ├── hubert/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_hubert.py
│   │   ├── hunyuan_v1_dense/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_hunyuan_v1_dense.py
│   │   ├── hunyuan_v1_moe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_hunyuan_v1_moe.py
│   │   ├── ibert/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_ibert.py
│   │   ├── idefics/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_idefics.py
│   │   │   ├── test_modeling_idefics.py
│   │   │   └── test_processing_idefics.py
│   │   ├── idefics2/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_idefics2.py
│   │   │   ├── test_modeling_idefics2.py
│   │   │   └── test_processing_idefics2.py
│   │   ├── idefics3/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_idefics3.py
│   │   │   ├── test_modeling_idefics3.py
│   │   │   └── test_processing_idefics3.py
│   │   ├── ijepa/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_ijepa.py
│   │   ├── imagegpt/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_imagegpt.py
│   │   │   └── test_modeling_imagegpt.py
│   │   ├── informer/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_informer.py
│   │   ├── instructblip/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_instructblip.py
│   │   │   └── test_processing_instructblip.py
│   │   ├── instructblipvideo/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_instructblipvideo.py
│   │   │   ├── test_processing_instructblipvideo.py
│   │   │   └── test_video_processing_instructblipvideo.py
│   │   ├── internvl/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_internvl.py
│   │   │   ├── test_processing_internvl.py
│   │   │   └── test_video_processing_internvl.py
│   │   ├── jais2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_jais2.py
│   │   ├── jamba/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_jamba.py
│   │   ├── janus/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_janus.py
│   │   │   ├── test_modeling_janus.py
│   │   │   └── test_processing_janus.py
│   │   ├── jetmoe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_jetmoe.py
│   │   ├── jina_embeddings_v3/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_jina_embeddings_v3.py
│   │   ├── kosmos2/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_kosmos2.py
│   │   │   └── test_processing_kosmos2.py
│   │   ├── kosmos2_5/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_kosmos2_5.py
│   │   │   ├── test_modeling_kosmos2_5.py
│   │   │   └── test_processor_kosmos2_5.py
│   │   ├── kyutai_speech_to_text/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_kyutai_speech_to_text.py
│   │   ├── lasr/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_lasr.py
│   │   ├── layoutlm/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_layoutlm.py
│   │   ├── layoutlmv2/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_layoutlmv2.py
│   │   │   ├── test_modeling_layoutlmv2.py
│   │   │   ├── test_processing_layoutlmv2.py
│   │   │   └── test_tokenization_layoutlmv2.py
│   │   ├── layoutlmv3/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_layoutlmv3.py
│   │   │   ├── test_modeling_layoutlmv3.py
│   │   │   ├── test_processing_layoutlmv3.py
│   │   │   └── test_tokenization_layoutlmv3.py
│   │   ├── layoutxlm/
│   │   │   ├── __init__.py
│   │   │   ├── test_processing_layoutxlm.py
│   │   │   └── test_tokenization_layoutxlm.py
│   │   ├── led/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_led.py
│   │   ├── levit/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_levit.py
│   │   │   └── test_modeling_levit.py
│   │   ├── lfm2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_lfm2.py
│   │   ├── lfm2_moe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_lfm2_moe.py
│   │   ├── lfm2_vl/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_lfm2_vl.py
│   │   │   ├── test_modeling_lfm2_vl.py
│   │   │   └── test_processing_lfm2_vl.py
│   │   ├── lightglue/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_lightglue.py
│   │   │   └── test_modeling_lightglue.py
│   │   ├── lighton_ocr/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_lighton_ocr.py
│   │   │   └── test_processor_lighton_ocr.py
│   │   ├── lilt/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_lilt.py
│   │   ├── llama/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_llama.py
│   │   │   └── test_tokenization_llama.py
│   │   ├── llama4/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_llama4.py
│   │   │   ├── test_modeling_llama4.py
│   │   │   └── test_processing_llama4.py
│   │   ├── llava/
│   │   │   ├── __init__.py
│   │   │   ├── test_configuration_llava.py
│   │   │   ├── test_image_processing_llava.py
│   │   │   ├── test_modeling_llava.py
│   │   │   └── test_processing_llava.py
│   │   ├── llava_next/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_llava_next.py
│   │   │   ├── test_modeling_llava_next.py
│   │   │   └── test_processing_llava_next.py
│   │   ├── llava_next_video/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_llava_next_video.py
│   │   │   ├── test_processing_llava_next_video.py
│   │   │   └── test_video_processing_llava_next_video.py
│   │   ├── llava_onevision/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_llava_onevision.py
│   │   │   ├── test_modeling_llava_onevision.py
│   │   │   ├── test_processing_llava_onevision.py
│   │   │   └── test_video_processing_llava_onevision.py
│   │   ├── longcat_flash/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_longcat_flash.py
│   │   ├── longformer/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_longformer.py
│   │   ├── longt5/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_longt5.py
│   │   ├── luke/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_luke.py
│   │   │   └── test_tokenization_luke.py
│   │   ├── lw_detr/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_lw_detr.py
│   │   ├── lxmert/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_lxmert.py
│   │   ├── m2m_100/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_m2m_100.py
│   │   │   └── test_tokenization_m2m_100.py
│   │   ├── mamba/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mamba.py
│   │   ├── mamba2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mamba2.py
│   │   ├── marian/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_marian.py
│   │   │   └── test_tokenization_marian.py
│   │   ├── markuplm/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_markuplm.py
│   │   │   ├── test_modeling_markuplm.py
│   │   │   ├── test_processing_markuplm.py
│   │   │   └── test_tokenization_markuplm.py
│   │   ├── mask2former/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_mask2former.py
│   │   │   └── test_modeling_mask2former.py
│   │   ├── maskformer/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_maskformer.py
│   │   │   ├── test_modeling_maskformer.py
│   │   │   └── test_modeling_maskformer_swin.py
│   │   ├── mbart/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_mbart.py
│   │   │   └── test_tokenization_mbart.py
│   │   ├── mbart50/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_mbart50.py
│   │   ├── megatron_bert/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_megatron_bert.py
│   │   ├── megatron_gpt2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_megatron_gpt2.py
│   │   ├── metaclip_2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_metaclip_2.py
│   │   ├── mgp_str/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_mgp_str.py
│   │   │   ├── test_processing_mgp_str.py
│   │   │   └── test_tokenization_mgp_str.py
│   │   ├── mimi/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mimi.py
│   │   ├── minimax/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_minimax.py
│   │   ├── minimax_m2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_minimax_m2.py
│   │   ├── ministral/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_ministral.py
│   │   ├── ministral3/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_ministral3.py
│   │   ├── mistral/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mistral.py
│   │   ├── mistral3/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_mistral3.py
│   │   │   └── test_processing_mistral3.py
│   │   ├── mistral4/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mistral4.py
│   │   ├── mixtral/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mixtral.py
│   │   ├── mlcd/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mlcd.py
│   │   ├── mllama/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_mllama.py
│   │   │   ├── test_modeling_mllama.py
│   │   │   └── test_processing_mllama.py
│   │   ├── mluke/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_mluke.py
│   │   ├── mm_grounding_dino/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mm_grounding_dino.py
│   │   ├── mobilebert/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mobilebert.py
│   │   ├── mobilenet_v1/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_mobilenet_v1.py
│   │   │   └── test_modeling_mobilenet_v1.py
│   │   ├── mobilenet_v2/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_mobilenet_v2.py
│   │   │   └── test_modeling_mobilenet_v2.py
│   │   ├── mobilevit/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_mobilevit.py
│   │   │   └── test_modeling_mobilevit.py
│   │   ├── mobilevitv2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mobilevitv2.py
│   │   ├── modernbert/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_modernbert.py
│   │   ├── modernbert_decoder/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_modernbert_decoder.py
│   │   ├── modernvbert/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_modernvbert.py
│   │   ├── moonshine/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_moonshine.py
│   │   ├── moonshine_streaming/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_moonshine_streaming.py
│   │   ├── moshi/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_moshi.py
│   │   │   └── test_tokenization_moshi.py
│   │   ├── mpnet/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_mpnet.py
│   │   │   └── test_tokenization_mpnet.py
│   │   ├── mpt/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mpt.py
│   │   ├── mra/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mra.py
│   │   ├── mt5/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mt5.py
│   │   ├── musicflamingo/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_musicflamingo.py
│   │   │   └── test_processing_musicflamingo.py
│   │   ├── musicgen/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_musicgen.py
│   │   │   └── test_processing_musicgen.py
│   │   ├── musicgen_melody/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_musicgen_melody.py
│   │   │   ├── test_modeling_musicgen_melody.py
│   │   │   └── test_processing_musicgen_melody.py
│   │   ├── mvp/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_mvp.py
│   │   ├── myt5/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_myt5.py
│   │   ├── nanochat/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_nanochat.py
│   │   ├── nemotron/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_nemotron.py
│   │   ├── nemotron_h/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_nemotron_h.py
│   │   ├── nllb/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_nllb.py
│   │   ├── nllb_moe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_nllb_moe.py
│   │   ├── nougat/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_nougat.py
│   │   │   └── test_tokenization_nougat.py
│   │   ├── nystromformer/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_nystromformer.py
│   │   ├── olmo/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_olmo.py
│   │   ├── olmo2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_olmo2.py
│   │   ├── olmo3/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_olmo3.py
│   │   ├── olmo_hybrid/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_olmo_hybrid.py
│   │   ├── olmoe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_olmoe.py
│   │   ├── omdet_turbo/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_omdet_turbo.py
│   │   │   └── test_processing_omdet_turbo.py
│   │   ├── oneformer/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_oneformer.py
│   │   │   ├── test_modeling_oneformer.py
│   │   │   └── test_processing_oneformer.py
│   │   ├── openai/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_openai.py
│   │   │   └── test_tokenization_openai.py
│   │   ├── opt/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_opt.py
│   │   ├── ovis2/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_ovis2.py
│   │   │   ├── test_modeling_ovis2.py
│   │   │   └── test_processor_ovis2.py
│   │   ├── owlv2/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_owlv2.py
│   │   │   ├── test_modeling_owlv2.py
│   │   │   └── test_processing_owlv2.py
│   │   ├── owlvit/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_owlvit.py
│   │   │   ├── test_modeling_owlvit.py
│   │   │   └── test_processing_owlvit.py
│   │   ├── paddleocr_vl/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_paddleocr_vl.py
│   │   │   └── test_modeling_paddleocr_vl.py
│   │   ├── paligemma/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_paligemma.py
│   │   │   └── test_processing_paligemma.py
│   │   ├── paligemma2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_paligemma2.py
│   │   ├── parakeet/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_parakeet.py
│   │   │   ├── test_modeling_parakeet.py
│   │   │   ├── test_processing_parakeet.py
│   │   │   └── test_tokenization_parakeet.py
│   │   ├── patchtsmixer/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_patchtsmixer.py
│   │   ├── patchtst/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_patchtst.py
│   │   ├── pe_audio/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_pe_audio.py
│   │   ├── pe_audio_video/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_pe_audio_video.py
│   │   ├── pe_video/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_pe_video.py
│   │   ├── pegasus/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_pegasus.py
│   │   │   └── test_tokenization_pegasus.py
│   │   ├── pegasus_x/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_pegasus_x.py
│   │   ├── perceiver/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_perceiver.py
│   │   │   ├── test_modeling_perceiver.py
│   │   │   └── test_tokenization_perceiver.py
│   │   ├── perception_lm/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_perception_lm.py
│   │   │   ├── test_modeling_perception_lm.py
│   │   │   ├── test_processing_perception_lm.py
│   │   │   └── test_video_processing_perception_lm.py
│   │   ├── persimmon/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_persimmon.py
│   │   ├── phi/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_phi.py
│   │   ├── phi3/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_phi3.py
│   │   ├── phi4_multimodal/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_phi4_multimodal.py
│   │   │   ├── test_image_processing_phi4_multimodal.py
│   │   │   ├── test_modeling_phi4_multimodal.py
│   │   │   └── test_processing_phi4_multimodal.py
│   │   ├── phimoe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_phimoe.py
│   │   ├── phobert/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_phobert.py
│   │   ├── pi0/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_pi0.py
│   │   │   └── test_processing_pi0.py
│   │   ├── pix2struct/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_pix2struct.py
│   │   │   ├── test_modeling_pix2struct.py
│   │   │   └── test_processing_pix2struct.py
│   │   ├── pixio/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_pixio.py
│   │   ├── pixtral/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_pixtral.py
│   │   │   ├── test_modeling_pixtral.py
│   │   │   └── test_processing_pixtral.py
│   │   ├── plbart/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_plbart.py
│   │   │   └── test_tokenization_plbart.py
│   │   ├── poolformer/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_poolformer.py
│   │   │   └── test_modeling_poolformer.py
│   │   ├── pop2piano/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_pop2piano.py
│   │   │   ├── test_modeling_pop2piano.py
│   │   │   ├── test_processing_pop2piano.py
│   │   │   └── test_tokenization_pop2piano.py
│   │   ├── pp_chart2table/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_pp_chart2table.py
│   │   │   ├── test_modeling_pp_chart2table.py
│   │   │   └── test_processing_pp_chart2table.py
│   │   ├── pp_doclayout_v2/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_pp_doclayout_v2.py
│   │   │   └── test_modeling_pp_doclayout_v2.py
│   │   ├── pp_doclayout_v3/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_pp_doclayout_v3.py
│   │   │   └── test_modeling_pp_doclayout_v3.py
│   │   ├── pp_lcnet/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_pp_lcnet.py
│   │   │   └── test_modeling_pp_lcnet.py
│   │   ├── pp_lcnet_v3/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_pp_lcnet_v3.py
│   │   ├── pp_ocrv5_mobile_det/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_pp_ocrv5_mobile_det.py
│   │   ├── pp_ocrv5_mobile_rec/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_pp_ocrv5_mobile_rec.py
│   │   ├── pp_ocrv5_server_det/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_pp_ocrv5_server_det.py
│   │   │   └── test_modeling_pp_ocrv5_server_det.py
│   │   ├── pp_ocrv5_server_rec/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_pp_ocrv5_server_det.py
│   │   │   └── test_modeling_pp_ocrv5_server_rec.py
│   │   ├── prompt_depth_anything/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_prompt_depth_anything.py
│   │   │   └── test_modeling_prompt_depth_anything.py
│   │   ├── prophetnet/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_prophetnet.py
│   │   │   └── test_tokenization_prophetnet.py
│   │   ├── pvt/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_pvt.py
│   │   │   └── test_modeling_pvt.py
│   │   ├── pvt_v2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_pvt_v2.py
│   │   ├── qwen2/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_qwen2.py
│   │   │   └── test_tokenization_qwen2.py
│   │   ├── qwen2_5_omni/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_qwen2_5_omni.py
│   │   │   └── test_processing_qwen2_5_omni.py
│   │   ├── qwen2_5_vl/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_qwen2_5_vl.py
│   │   │   └── test_processing_qwen2_5_vl.py
│   │   ├── qwen2_audio/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_qwen2_audio.py
│   │   │   └── test_processing_qwen2_audio.py
│   │   ├── qwen2_moe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_qwen2_moe.py
│   │   ├── qwen2_vl/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_qwen2_vl.py
│   │   │   ├── test_modeling_qwen2_vl.py
│   │   │   ├── test_processing_qwen2_vl.py
│   │   │   └── test_video_processing_qwen2_vl.py
│   │   ├── qwen3/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_qwen3.py
│   │   ├── qwen3_5/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_qwen3_5.py
│   │   ├── qwen3_5_moe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_qwen3_5_moe.py
│   │   ├── qwen3_moe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_qwen3_moe.py
│   │   ├── qwen3_next/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_qwen3_next.py
│   │   ├── qwen3_omni_moe/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_qwen3_omni_moe.py
│   │   │   └── test_processing_qwen3_omni_moe.py
│   │   ├── qwen3_vl/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_qwen3_vl.py
│   │   │   ├── test_processing_qwen3_vl.py
│   │   │   └── test_video_processing_qwen3_vl.py
│   │   ├── qwen3_vl_moe/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_qwen3_vl_moe.py
│   │   ├── rag/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_rag.py
│   │   │   ├── test_retrieval_rag.py
│   │   │   └── test_tokenization_rag.py
│   │   ├── recurrent_gemma/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_recurrent_gemma.py
│   │   ├── reformer/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_reformer.py
│   │   │   └── test_tokenization_reformer.py
│   │   ├── regnet/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_regnet.py
│   │   ├── rembert/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_rembert.py
│   │   │   └── test_tokenization_rembert.py
│   │   ├── resnet/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_resnet.py
│   │   ├── roberta/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_roberta.py
│   │   │   └── test_tokenization_roberta.py
│   │   ├── roberta_prelayernorm/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_roberta_prelayernorm.py
│   │   ├── roc_bert/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_roc_bert.py
│   │   │   └── test_tokenization_roc_bert.py
│   │   ├── roformer/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_roformer.py
│   │   │   └── test_tokenization_roformer.py
│   │   ├── rt_detr/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_rt_detr.py
│   │   │   ├── test_modeling_rt_detr.py
│   │   │   └── test_modeling_rt_detr_resnet.py
│   │   ├── rt_detr_v2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_rt_detr_v2.py
│   │   ├── rwkv/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_rwkv.py
│   │   ├── sam/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_sam.py
│   │   │   ├── test_modeling_sam.py
│   │   │   └── test_processing_sam.py
│   │   ├── sam2/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_sam2.py
│   │   │   ├── test_modeling_sam2.py
│   │   │   └── test_processor_sam2.py
│   │   ├── sam2_video/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_sam2_video.py
│   │   │   └── test_processor_sam2_video.py
│   │   ├── sam3/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_sam3.py
│   │   │   └── test_modeling_sam3.py
│   │   ├── sam3_tracker/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_sam3_tracker.py
│   │   ├── sam3_tracker_video/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_sam3_tracker_video.py
│   │   ├── sam3_video/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_sam3_video.py
│   │   ├── sam_hq/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_sam_hq.py
│   │   │   └── test_processing_sam_hq.py
│   │   ├── seamless_m4t/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_seamless_m4t.py
│   │   │   ├── test_modeling_seamless_m4t.py
│   │   │   ├── test_processing_seamless_m4t.py
│   │   │   └── test_tokenization_seamless_m4t.py
│   │   ├── seamless_m4t_v2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_seamless_m4t_v2.py
│   │   ├── seed_oss/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_seed_oss.py
│   │   ├── segformer/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_segformer.py
│   │   │   └── test_modeling_segformer.py
│   │   ├── seggpt/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_seggpt.py
│   │   │   └── test_modeling_seggpt.py
│   │   ├── sew/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_sew.py
│   │   ├── sew_d/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_sew_d.py
│   │   ├── shieldgemma2/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_shieldgemma2.py
│   │   │   └── test_processing_shieldgemma2.py
│   │   ├── siglip/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_siglip.py
│   │   │   ├── test_modeling_siglip.py
│   │   │   └── test_tokenization_siglip.py
│   │   ├── siglip2/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_siglip2.py
│   │   │   ├── test_modeling_siglip2.py
│   │   │   └── test_tokenization_siglip2.py
│   │   ├── slanext/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_slanext.py
│   │   │   └── test_modeling_slanext.py
│   │   ├── smollm3/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_smollm3.py
│   │   ├── smolvlm/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_smolvlm.py
│   │   │   ├── test_modeling_smolvlm.py
│   │   │   ├── test_processing_smolvlm.py
│   │   │   └── test_video_processing_smolvlm.py
│   │   ├── solar_open/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_solar_open.py
│   │   ├── speech_encoder_decoder/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_speech_encoder_decoder.py
│   │   ├── speech_to_text/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_speech_to_text.py
│   │   │   ├── test_modeling_speech_to_text.py
│   │   │   ├── test_processing_speech_to_text.py
│   │   │   └── test_tokenization_speech_to_text.py
│   │   ├── speecht5/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_speecht5.py
│   │   │   ├── test_modeling_speecht5.py
│   │   │   ├── test_processing_speecht5.py
│   │   │   └── test_tokenization_speecht5.py
│   │   ├── splinter/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_splinter.py
│   │   │   └── test_tokenization_splinter.py
│   │   ├── squeezebert/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_squeezebert.py
│   │   ├── stablelm/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_stablelm.py
│   │   ├── starcoder2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_starcoder2.py
│   │   ├── superglue/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_superglue.py
│   │   │   └── test_modeling_superglue.py
│   │   ├── superpoint/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_superpoint.py
│   │   │   └── test_modeling_superpoint.py
│   │   ├── swiftformer/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_swiftformer.py
│   │   ├── swin/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_swin.py
│   │   ├── swin2sr/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_swin2sr.py
│   │   │   └── test_modeling_swin2sr.py
│   │   ├── swinv2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_swinv2.py
│   │   ├── switch_transformers/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_switch_transformers.py
│   │   ├── t5/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_t5.py
│   │   │   └── test_tokenization_t5.py
│   │   ├── t5gemma/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_t5gemma.py
│   │   ├── t5gemma2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_t5gemma2.py
│   │   ├── table_transformer/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_table_transformer.py
│   │   ├── tapas/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_tapas.py
│   │   │   └── test_tokenization_tapas.py
│   │   ├── textnet/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_textnet.py
│   │   │   └── test_modeling_textnet.py
│   │   ├── time_series_transformer/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_time_series_transformer.py
│   │   ├── timesfm/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_timesfm.py
│   │   ├── timesfm2_5/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_timesfm2_5.py
│   │   ├── timesformer/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_timesformer.py
│   │   ├── timm_backbone/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_timm_backbone.py
│   │   ├── timm_wrapper/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_timm_wrapper.py
│   │   │   └── test_modeling_timm_wrapper.py
│   │   ├── trocr/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_trocr.py
│   │   │   └── test_processing_trocr.py
│   │   ├── tvp/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_tvp.py
│   │   │   └── test_modeling_tvp.py
│   │   ├── udop/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_udop.py
│   │   │   ├── test_processing_udop.py
│   │   │   └── test_tokenization_udop.py
│   │   ├── umt5/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_umt5.py
│   │   ├── unispeech/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_unispeech.py
│   │   ├── unispeech_sat/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_unispeech_sat.py
│   │   ├── univnet/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_univnet.py
│   │   │   └── test_modeling_univnet.py
│   │   ├── upernet/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_upernet.py
│   │   ├── uvdoc/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_uvdoc.py
│   │   │   └── test_modeling_uvdoc.py
│   │   ├── vaultgemma/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_vaultgemma.py
│   │   ├── vibevoice_acoustic_tokenizer/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_vibevoice_acoustic_tokenizer.py
│   │   │   └── test_modeling_vibevoice_acoustic_tokenizer.py
│   │   ├── vibevoice_asr/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_vibevoice_asr.py
│   │   │   └── test_processing_vibevoice_asr.py
│   │   ├── video_llama_3/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_video_llama_3.py
│   │   │   ├── test_modeling_video_llama_3.py
│   │   │   ├── test_processing_video_llama_3.py
│   │   │   └── test_video_processing_video_llama_3.py
│   │   ├── video_llava/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_video_llava.py
│   │   │   └── test_video_processing_video_llava.py
│   │   ├── videomae/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_videomae.py
│   │   │   ├── test_modeling_videomae.py
│   │   │   └── test_video_processing_videomae.py
│   │   ├── videomt/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_videomt.py
│   │   │   └── test_video_processing_videomt.py
│   │   ├── vilt/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_vilt.py
│   │   │   └── test_modeling_vilt.py
│   │   ├── vipllava/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_vipllava.py
│   │   ├── vision_encoder_decoder/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_vision_encoder_decoder.py
│   │   ├── vision_text_dual_encoder/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_vision_text_dual_encoder.py
│   │   │   └── test_processing_vision_text_dual_encoder.py
│   │   ├── visual_bert/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_visual_bert.py
│   │   ├── vit/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_vit.py
│   │   │   └── test_modeling_vit.py
│   │   ├── vit_mae/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_vit_mae.py
│   │   ├── vit_msn/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_vit_msn.py
│   │   ├── vitdet/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_vitdet.py
│   │   ├── vitmatte/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_vitmatte.py
│   │   │   └── test_modeling_vitmatte.py
│   │   ├── vitpose/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_vitpose.py
│   │   │   └── test_modeling_vitpose.py
│   │   ├── vitpose_backbone/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_vitpose_backbone.py
│   │   ├── vits/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_vits.py
│   │   │   └── test_tokenization_vits.py
│   │   ├── vivit/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_vivit.py
│   │   │   └── test_modeling_vivit.py
│   │   ├── vjepa2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_vjepa2.py
│   │   ├── voxtral/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_voxtral.py
│   │   ├── voxtral_realtime/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_voxtral_realtime.py
│   │   ├── wav2vec2/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_wav2vec2.py
│   │   │   ├── test_modeling_wav2vec2.py
│   │   │   ├── test_processing_wav2vec2.py
│   │   │   └── test_tokenization_wav2vec2.py
│   │   ├── wav2vec2_bert/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_wav2vec2_bert.py
│   │   │   └── test_processing_wav2vec2_bert.py
│   │   ├── wav2vec2_conformer/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_wav2vec2_conformer.py
│   │   ├── wav2vec2_phoneme/
│   │   │   ├── __init__.py
│   │   │   └── test_tokenization_wav2vec2_phoneme.py
│   │   ├── wav2vec2_with_lm/
│   │   │   ├── __init__.py
│   │   │   └── test_processing_wav2vec2_with_lm.py
│   │   ├── wavlm/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_wavlm.py
│   │   ├── whisper/
│   │   │   ├── __init__.py
│   │   │   ├── test_feature_extraction_whisper.py
│   │   │   ├── test_modeling_whisper.py
│   │   │   ├── test_processing_whisper.py
│   │   │   └── test_tokenization_whisper.py
│   │   ├── x_clip/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_x_clip.py
│   │   ├── xcodec/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_xcodec.py
│   │   ├── xglm/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_xglm.py
│   │   │   └── test_tokenization_xglm.py
│   │   ├── xlm/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_xlm.py
│   │   │   └── test_tokenization_xlm.py
│   │   ├── xlm_roberta/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_xlm_roberta.py
│   │   │   └── test_tokenization_xlm_roberta.py
│   │   ├── xlm_roberta_xl/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_xlm_roberta_xl.py
│   │   ├── xlnet/
│   │   │   ├── __init__.py
│   │   │   ├── test_modeling_xlnet.py
│   │   │   └── test_tokenization_xlnet.py
│   │   ├── xlstm/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_xlstm.py
│   │   ├── xmod/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_xmod.py
│   │   ├── yolos/
│   │   │   ├── __init__.py
│   │   │   ├── test_image_processing_yolos.py
│   │   │   └── test_modeling_yolos.py
│   │   ├── yoso/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_yoso.py
│   │   ├── youtu/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_youtu.py
│   │   ├── zamba/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_zamba.py
│   │   ├── zamba2/
│   │   │   ├── __init__.py
│   │   │   └── test_modeling_zamba2.py
│   │   └── zoedepth/
│   │       ├── __init__.py
│   │       ├── test_image_processing_zoedepth.py
│   │       └── test_modeling_zoedepth.py
│   ├── optimization/
│   │   ├── __init__.py
│   │   ├── test_greedy_lr.py
│   │   └── test_optimization.py
│   ├── peft_integration/
│   │   └── test_peft_integration.py
│   ├── pipelines/
│   │   ├── __init__.py
│   │   ├── test_pipelines_any_to_any.py
│   │   ├── test_pipelines_audio_classification.py
│   │   ├── test_pipelines_automatic_speech_recognition.py
│   │   ├── test_pipelines_common.py
│   │   ├── test_pipelines_depth_estimation.py
│   │   ├── test_pipelines_document_question_answering.py
│   │   ├── test_pipelines_feature_extraction.py
│   │   ├── test_pipelines_fill_mask.py
│   │   ├── test_pipelines_image_classification.py
│   │   ├── test_pipelines_image_feature_extraction.py
│   │   ├── test_pipelines_image_segmentation.py
│   │   ├── test_pipelines_image_text_to_text.py
│   │   ├── test_pipelines_keypoint_matching.py
│   │   ├── test_pipelines_mask_generation.py
│   │   ├── test_pipelines_object_detection.py
│   │   ├── test_pipelines_question_answering.py
│   │   ├── test_pipelines_table_question_answering.py
│   │   ├── test_pipelines_text_classification.py
│   │   ├── test_pipelines_text_generation.py
│   │   ├── test_pipelines_text_to_audio.py
│   │   ├── test_pipelines_token_classification.py
│   │   ├── test_pipelines_video_classification.py
│   │   ├── test_pipelines_zero_shot.py
│   │   ├── test_pipelines_zero_shot_audio_classification.py
│   │   ├── test_pipelines_zero_shot_image_classification.py
│   │   └── test_pipelines_zero_shot_object_detection.py
│   ├── quantization/
│   │   ├── aqlm_integration/
│   │   │   ├── __init__.py
│   │   │   └── test_aqlm.py
│   │   ├── autoawq/
│   │   │   ├── __init__.py
│   │   │   └── test_awq.py
│   │   ├── autoround/
│   │   │   ├── __init__.py
│   │   │   └── test_auto_round.py
│   │   ├── bitnet_integration/
│   │   │   ├── __init__.py
│   │   │   └── test_bitnet.py
│   │   ├── bnb/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── test_4bit.py
│   │   │   └── test_mixed_int8.py
│   │   ├── compressed_tensors_integration/
│   │   │   ├── __init__.py
│   │   │   ├── test_compressed_models.py
│   │   │   └── test_compressed_tensors.py
│   │   ├── eetq_integration/
│   │   │   ├── __init__.py
│   │   │   └── test_eetq.py
│   │   ├── fbgemm_fp8/
│   │   │   ├── __init__.py
│   │   │   └── test_fbgemm_fp8.py
│   │   ├── finegrained_fp8/
│   │   │   ├── __init__.py
│   │   │   └── test_fp8.py
│   │   ├── fouroversix_integration/
│   │   │   ├── __init__.py
│   │   │   └── test_fouroversix.py
│   │   ├── fp_quant_integration/
│   │   │   ├── __init__.py
│   │   │   └── test_fp_quant.py
│   │   ├── ggml/
│   │   │   ├── __init__.py
│   │   │   └── test_ggml.py
│   │   ├── gptq/
│   │   │   ├── __init__.py
│   │   │   └── test_gptq.py
│   │   ├── higgs/
│   │   │   ├── __init__.py
│   │   │   └── test_higgs.py
│   │   ├── hqq/
│   │   │   └── test_hqq.py
│   │   ├── metal/
│   │   │   └── test_metal.py
│   │   ├── mxfp4/
│   │   │   ├── __init__.py
│   │   │   └── test_mxfp4.py
│   │   ├── quanto_integration/
│   │   │   ├── __init__.py
│   │   │   └── test_quanto.py
│   │   ├── quark_integration/
│   │   │   ├── __init__.py
│   │   │   └── test_quark.py
│   │   ├── sinq/
│   │   │   └── test_sinq.py
│   │   ├── spqr_integration/
│   │   │   ├── __init__.py
│   │   │   └── test_spqr.py
│   │   ├── torchao_integration/
│   │   │   ├── __init__.py
│   │   │   └── test_torchao.py
│   │   └── vptq_integration/
│   │       ├── __init__.py
│   │       └── test_vptq.py
│   ├── repo_utils/
│   │   ├── modular/
│   │   │   └── test_conversion_order.py
│   │   ├── test_check_copies.py
│   │   ├── test_check_docstrings.py
│   │   ├── test_check_repo.py
│   │   ├── test_checkers.py
│   │   ├── test_get_test_info.py
│   │   ├── test_mlinter.py
│   │   └── test_tests_fetcher.py
│   ├── sagemaker/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── conftest.py
│   │   ├── scripts/
│   │   │   └── pytorch/
│   │   │       ├── requirements.txt
│   │   │       ├── run_ddp.py
│   │   │       └── run_glue_model_parallelism.py
│   │   ├── test_multi_node_data_parallel.py
│   │   ├── test_multi_node_model_parallel.py
│   │   └── test_single_node_gpu.py
│   ├── tensor_parallel/
│   │   └── test_tensor_parallel.py
│   ├── test_backbone_common.py
│   ├── test_configuration_common.py
│   ├── test_executorch.py
│   ├── test_feature_extraction_common.py
│   ├── test_image_processing_common.py
│   ├── test_image_transforms.py
│   ├── test_modeling_common.py
│   ├── test_monkey_patching.py
│   ├── test_pipeline_mixin.py
│   ├── test_processing_common.py
│   ├── test_sentencepiece_backend_mixin.py
│   ├── test_sequence_feature_extraction_common.py
│   ├── test_tensor_parallel_mixin.py
│   ├── test_tokenization_common.py
│   ├── test_tokenization_mistral_common.py
│   ├── test_tokenizers_backend_mixin.py
│   ├── test_training_mixin.py
│   ├── test_video_processing_common.py
│   ├── tokenization/
│   │   ├── __init__.py
│   │   ├── test_tokenization_fast.py
│   │   └── test_tokenization_utils.py
│   ├── trainer/
│   │   ├── TESTING_GUIDE.md
│   │   ├── __init__.py
│   │   ├── distributed/
│   │   │   ├── __init__.py
│   │   │   ├── accelerate_configs/
│   │   │   │   ├── ddp.yaml
│   │   │   │   ├── deepspeed_zero2.yaml
│   │   │   │   ├── deepspeed_zero2_sp.yaml
│   │   │   │   ├── deepspeed_zero3.yaml
│   │   │   │   ├── fsdp.yaml
│   │   │   │   ├── fsdp2.yaml
│   │   │   │   └── fsdp2_cp.yaml
│   │   │   ├── scripts/
│   │   │   │   ├── dispatch_batches.py
│   │   │   │   ├── ds_config_zero2.json
│   │   │   │   ├── ds_config_zero3.json
│   │   │   │   ├── eval_ddp.py
│   │   │   │   ├── fsdp_generate.py
│   │   │   │   ├── loss_averaging.py
│   │   │   │   ├── torchrun_env_check.py
│   │   │   │   ├── train.py
│   │   │   │   ├── vit_feature_extractor.json
│   │   │   │   └── worker_seed.py
│   │   │   ├── test_trainer_distributed.py
│   │   │   ├── test_trainer_distributed_ddp.py
│   │   │   ├── test_trainer_distributed_deepspeed.py
│   │   │   └── test_trainer_distributed_fsdp.py
│   │   ├── test_data_collator.py
│   │   ├── test_trainer.py
│   │   ├── test_trainer_accelerator.py
│   │   ├── test_trainer_callback.py
│   │   ├── test_trainer_checkpointing.py
│   │   ├── test_trainer_data.py
│   │   ├── test_trainer_evaluation.py
│   │   ├── test_trainer_hyperparameter.py
│   │   ├── test_trainer_optimizers.py
│   │   ├── test_trainer_seq2seq.py
│   │   ├── test_training_args.py
│   │   └── trainer_test_utils.py
│   ├── utils/
│   │   ├── __init__.py
│   │   ├── import_structures/
│   │   │   ├── failing_export.py
│   │   │   ├── import_structure_raw_register.py
│   │   │   ├── import_structure_raw_register_with_versions.py
│   │   │   ├── import_structure_register_with_comments.py
│   │   │   └── import_structure_register_with_duplicates.py
│   │   ├── test_activations.py
│   │   ├── test_add_new_model_like.py
│   │   ├── test_attention_visualizer.py
│   │   ├── test_audio_utils.py
│   │   ├── test_auto_docstring.py
│   │   ├── test_backbone_utils.py
│   │   ├── test_cache_utils.py
│   │   ├── test_chat_parsing_utils.py
│   │   ├── test_chat_template_utils.py
│   │   ├── test_configuration_utils.py
│   │   ├── test_convert_slow_tokenizer.py
│   │   ├── test_core_model_loading.py
│   │   ├── test_deprecation.py
│   │   ├── test_doc_samples.py
│   │   ├── test_dynamic_module_utils.py
│   │   ├── test_expectations.py
│   │   ├── test_feature_extraction_utils.py
│   │   ├── test_file_utils.py
│   │   ├── test_generic.py
│   │   ├── test_hf_argparser.py
│   │   ├── test_hub_utils.py
│   │   ├── test_image_processing_utils.py
│   │   ├── test_image_utils.py
│   │   ├── test_import_structure.py
│   │   ├── test_import_utils.py
│   │   ├── test_logging.py
│   │   ├── test_masking_utils.py
│   │   ├── test_model_debugging_utils.py
│   │   ├── test_model_output.py
│   │   ├── test_modeling_rope_utils.py
│   │   ├── test_modeling_utils.py
│   │   ├── test_network_logging.py
│   │   ├── test_offline.py
│   │   ├── test_skip_decorators.py
│   │   ├── test_tokenization_utils.py
│   │   ├── test_versions_utils.py
│   │   ├── test_video_utils.py
│   │   └── tiny_model_summary.json
│   └── vlm_tester.py
└── utils/
    ├── add_dates.py
    ├── add_pipeline_model_mapping_to_test.py
    ├── aggregate_failure_reports.py
    ├── check_bad_commit.py
    ├── check_config_attributes.py
    ├── check_config_docstrings.py
    ├── check_copies.py
    ├── check_doc_toc.py
    ├── check_docstrings.py
    ├── check_doctest_list.py
    ├── check_dummies.py
    ├── check_import_complexity.py
    ├── check_inits.py
    ├── check_model_tester.py
    ├── check_modeling_structure.py
    ├── check_modular_conversion.py
    ├── check_pipeline_typing.py
    ├── check_repo.py
    ├── check_self_hosted_runner.py
    ├── check_types.py
    ├── checkers.py
    ├── collated_reports.py
    ├── compare_test_runs.py
    ├── create_dependency_mapping.py
    ├── create_dummy_models.py
    ├── custom_init_isort.py
    ├── deprecate_models.py
    ├── download_glue_data.py
    ├── extract_metadata.py
    ├── extract_pr_number_from_circleci.py
    ├── extract_warnings.py
    ├── fetch_hub_objects_for_ci.py
    ├── format_extras_slack_message.py
    ├── get_ci_error_statistics.py
    ├── get_github_job_time.py
    ├── get_modified_files.py
    ├── get_pr_run_slow_jobs.py
    ├── get_previous_daily_ci.py
    ├── get_test_info.py
    ├── get_test_reports.py
    ├── important_files.py
    ├── important_models.txt
    ├── mlinter/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── _helpers.py
    │   ├── mlinter.py
    │   ├── rules.toml
    │   ├── trf001.py
    │   ├── trf002.py
    │   ├── trf003.py
    │   ├── trf004.py
    │   ├── trf005.py
    │   ├── trf006.py
    │   ├── trf007.py
    │   ├── trf008.py
    │   ├── trf009.py
    │   ├── trf010.py
    │   ├── trf011.py
    │   ├── trf012.py
    │   ├── trf013.py
    │   └── trf014.py
    ├── models_to_deprecate.py
    ├── modular_integrations.py
    ├── modular_model_converter.py
    ├── modular_model_detector.py
    ├── not_doctested.txt
    ├── notification_service.py
    ├── notification_service_doc_tests.py
    ├── patch_helper.py
    ├── pr_slow_ci_models.py
    ├── print_env.py
    ├── process_bad_commit_report.py
    ├── process_circleci_workflow_test_reports.py
    ├── process_test_artifacts.py
    ├── release.py
    ├── scan_skipped_tests.py
    ├── set_cuda_devices_for_ci.py
    ├── slow_documentation_tests.txt
    ├── sort_auto_mappings.py
    ├── split_doctest_jobs.py
    ├── split_model_tests.py
    ├── test_module/
    │   ├── __init__.py
    │   ├── custom_configuration.py
    │   ├── custom_feature_extraction.py
    │   ├── custom_image_processing.py
    │   ├── custom_modeling.py
    │   ├── custom_pipeline.py
    │   ├── custom_processing.py
    │   ├── custom_tokenization.py
    │   ├── custom_tokenization_fast.py
    │   └── custom_video_processing.py
    ├── tests_fetcher.py
    ├── update_metadata.py
    └── update_tiny_models.py