Copy disabled (too large)
Download .txt
Showing preview only (80,797K chars total). Download the full file to get everything.
Repository: huggingface/transformers
Branch: main
Commit: f38d6639fa6b
Files: 5705
Total size: 74.1 MB
Directory structure:
gitextract_jggdbnd_/
├── .ai/
│ ├── AGENTS.md
│ └── skills/
│ └── add-or-fix-type-checking/
│ └── SKILL.md
├── .circleci/
│ ├── TROUBLESHOOT.md
│ ├── config.yml
│ ├── create_circleci_config.py
│ └── parse_test_outputs.py
├── .git-blame-ignore-revs
├── .gitattributes
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug-report.yml
│ │ ├── config.yml
│ │ ├── feature-request.yml
│ │ ├── i18n.md
│ │ ├── migration.yml
│ │ └── new-model-addition.yml
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── conda/
│ │ ├── build.sh
│ │ └── meta.yaml
│ ├── copilot-instructions.md
│ ├── scripts/
│ │ ├── assign_reviewers.py
│ │ └── codeowners_for_review_action
│ └── workflows/
│ ├── TROUBLESHOOT.md
│ ├── add-model-like.yml
│ ├── anti-slop.yml
│ ├── assign-reviewers.yml
│ ├── benchmark.yml
│ ├── benchmark_v2.yml
│ ├── benchmark_v2_a10_caller.yml
│ ├── benchmark_v2_mi325_caller.yml
│ ├── build-ci-docker-images.yml
│ ├── build-docker-images.yml
│ ├── build-nightly-ci-docker-images.yml
│ ├── build-past-ci-docker-images.yml
│ ├── build_documentation.yml
│ ├── build_pr_documentation.yml
│ ├── check-workflow-permissions.yml
│ ├── check_failed_tests.yml
│ ├── check_tiny_models.yml
│ ├── circleci-failure-summary-comment.yml
│ ├── codeql.yml
│ ├── collated-reports.yml
│ ├── doctest_job.yml
│ ├── doctests.yml
│ ├── extras-smoke-test.yml
│ ├── get-pr-info.yml
│ ├── get-pr-number.yml
│ ├── model_jobs.yml
│ ├── model_jobs_intel_gaudi.yml
│ ├── new_model_pr_merged_notification.yml
│ ├── pr-repo-consistency-bot.yml
│ ├── pr_build_doc_with_comment.yml
│ ├── pr_slow_ci_suggestion.yml
│ ├── push-important-models.yml
│ ├── release-conda.yml
│ ├── release.yml
│ ├── self-comment-ci.yml
│ ├── self-nightly-caller.yml
│ ├── self-nightly-past-ci-caller.yml
│ ├── self-past-caller.yml
│ ├── self-scheduled-amd-caller.yml
│ ├── self-scheduled-amd-mi250-caller.yml
│ ├── self-scheduled-amd-mi325-caller.yml
│ ├── self-scheduled-amd-mi355-caller.yml
│ ├── self-scheduled-caller.yml
│ ├── self-scheduled-flash-attn-caller.yml
│ ├── self-scheduled-intel-gaudi.yml
│ ├── self-scheduled-intel-gaudi3-caller.yml
│ ├── self-scheduled.yml
│ ├── slack-report.yml
│ ├── ssh-runner.yml
│ ├── stale.yml
│ ├── trl-ci-bot.yml
│ ├── trufflehog.yml
│ ├── update_metdata.yml
│ └── upload_pr_documentation.yml
├── .gitignore
├── CITATION.cff
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── ISSUES.md
├── LICENSE
├── MIGRATION_GUIDE_V5.md
├── Makefile
├── README.md
├── SECURITY.md
├── awesome-transformers.md
├── benchmark/
│ ├── .gitignore
│ ├── README.md
│ ├── __init__.py
│ ├── benches/
│ │ └── llama.py
│ ├── benchmark.py
│ ├── benchmarks_entrypoint.py
│ ├── config/
│ │ └── generation.yaml
│ ├── default.yml
│ ├── grafana_dashboard.json
│ ├── grafana_datasource.yaml
│ ├── optimum_benchmark_wrapper.py
│ ├── requirements.txt
│ └── utils/
│ └── init_db.sql
├── benchmark_v2/
│ ├── .gitignore
│ ├── README.md
│ ├── benchmark_scripts/
│ │ └── continuous_batching_overall.py
│ ├── framework/
│ │ ├── benchmark_config.py
│ │ ├── benchmark_runner.py
│ │ ├── data_classes.py
│ │ └── hardware_metrics.py
│ ├── requirements.txt
│ └── run_benchmarks.py
├── conftest.py
├── docker/
│ ├── README.md
│ ├── consistency.dockerfile
│ ├── custom-tokenizers.dockerfile
│ ├── examples-torch.dockerfile
│ ├── exotic-models.dockerfile
│ ├── pipeline-torch.dockerfile
│ ├── quality.dockerfile
│ ├── torch-light.dockerfile
│ ├── transformers-all-latest-gpu/
│ │ └── Dockerfile
│ ├── transformers-doc-builder/
│ │ └── Dockerfile
│ ├── transformers-gpu/
│ │ └── Dockerfile
│ ├── transformers-intel-cpu/
│ │ └── Dockerfile
│ ├── transformers-pytorch-amd-gpu/
│ │ └── Dockerfile
│ ├── transformers-pytorch-deepspeed-amd-gpu/
│ │ └── Dockerfile
│ ├── transformers-pytorch-deepspeed-latest-gpu/
│ │ └── Dockerfile
│ ├── transformers-pytorch-deepspeed-nightly-gpu/
│ │ └── Dockerfile
│ ├── transformers-pytorch-gpu/
│ │ └── Dockerfile
│ ├── transformers-pytorch-tpu/
│ │ ├── Dockerfile
│ │ ├── bert-base-cased.jsonnet
│ │ ├── dataset.yaml
│ │ └── docker-entrypoint.sh
│ ├── transformers-pytorch-xpu/
│ │ └── Dockerfile
│ └── transformers-quantization-latest-gpu/
│ └── Dockerfile
├── docs/
│ ├── README.md
│ ├── TRANSLATING.md
│ └── source/
│ ├── _config.py
│ ├── ar/
│ │ ├── _config.py
│ │ ├── _toctree.yml
│ │ ├── accelerate.md
│ │ ├── attention.md
│ │ ├── autoclass_tutorial.md
│ │ ├── bertology.md
│ │ ├── chat_templating.md
│ │ ├── community.md
│ │ ├── conversations.md
│ │ ├── create_a_model.md
│ │ ├── custom_models.md
│ │ ├── fast_tokenizers.md
│ │ ├── gguf.md
│ │ ├── glossary.md
│ │ ├── how_to_hack_models.md
│ │ ├── index.md
│ │ ├── installation.md
│ │ ├── llm_tutorial.md
│ │ ├── llm_tutorial_optimization.md
│ │ ├── model_memory_anatomy.md
│ │ ├── model_sharing.md
│ │ ├── model_summary.md
│ │ ├── modular_transformers.md
│ │ ├── multilingual.md
│ │ ├── notebooks.md
│ │ ├── pad_truncation.md
│ │ ├── peft.md
│ │ ├── perplexity.md
│ │ ├── philosophy.md
│ │ ├── pipeline_tutorial.md
│ │ ├── pipeline_webserver.md
│ │ ├── preprocessing.md
│ │ ├── quicktour.md
│ │ ├── run_scripts.md
│ │ ├── sagemaker.md
│ │ ├── serialization.md
│ │ ├── task_summary.md
│ │ ├── tasks/
│ │ │ ├── language_modeling.md
│ │ │ ├── masked_language_modeling.md
│ │ │ ├── multiple_choice.md
│ │ │ ├── question_answering.md
│ │ │ ├── sequence_classification.md
│ │ │ ├── summarization.md
│ │ │ ├── token_classification.md
│ │ │ └── translation.md
│ │ ├── tasks_explained.md
│ │ ├── tiktoken.md
│ │ ├── tokenizer_summary.md
│ │ ├── trainer.md
│ │ ├── training.md
│ │ └── troubleshooting.md
│ ├── de/
│ │ ├── _config.py
│ │ ├── _toctree.yml
│ │ ├── accelerate.md
│ │ ├── add_new_model.md
│ │ ├── add_new_pipeline.md
│ │ ├── autoclass_tutorial.md
│ │ ├── contributing.md
│ │ ├── index.md
│ │ ├── installation.md
│ │ ├── llm_tutorial.md
│ │ ├── model_sharing.md
│ │ ├── peft.md
│ │ ├── pipeline_tutorial.md
│ │ ├── pr_checks.md
│ │ ├── preprocessing.md
│ │ ├── quicktour.md
│ │ ├── run_scripts.md
│ │ ├── testing.md
│ │ └── training.md
│ ├── en/
│ │ ├── _config.py
│ │ ├── _redirects.yml
│ │ ├── _toctree.yml
│ │ ├── accelerate.md
│ │ ├── accelerator_selection.md
│ │ ├── add_new_model.md
│ │ ├── add_new_pipeline.md
│ │ ├── assisted_decoding.md
│ │ ├── attention_interface.md
│ │ ├── auto_docstring.md
│ │ ├── backbones.md
│ │ ├── cache_explanation.md
│ │ ├── chat_content_patterns.md
│ │ ├── chat_extras.md
│ │ ├── chat_response_parsing.md
│ │ ├── chat_templating.md
│ │ ├── chat_templating_multimodal.md
│ │ ├── chat_templating_writing.md
│ │ ├── community.md
│ │ ├── community_integrations/
│ │ │ ├── axolotl.md
│ │ │ ├── candle.md
│ │ │ ├── executorch.md
│ │ │ ├── llama_cpp.md
│ │ │ ├── mlx.md
│ │ │ ├── nanotron.md
│ │ │ ├── nemo_automodel_finetuning.md
│ │ │ ├── nemo_automodel_pretraining.md
│ │ │ ├── sglang.md
│ │ │ ├── tensorrt-llm.md
│ │ │ ├── torchtitan.md
│ │ │ ├── transformers_as_backend.md
│ │ │ ├── trl.md
│ │ │ ├── unsloth.md
│ │ │ └── vllm.md
│ │ ├── continuous_batching.md
│ │ ├── continuous_batching_architecture.md
│ │ ├── conversations.md
│ │ ├── custom_models.md
│ │ ├── custom_tokenizers.md
│ │ ├── data_collators.md
│ │ ├── debugging.md
│ │ ├── deepspeed.md
│ │ ├── expert_parallelism.md
│ │ ├── experts_interface.md
│ │ ├── fast_tokenizers.md
│ │ ├── feature_extractors.md
│ │ ├── fsdp.md
│ │ ├── generation_features.md
│ │ ├── generation_strategies.md
│ │ ├── gguf.md
│ │ ├── glossary.md
│ │ ├── how_to_hack_models.md
│ │ ├── hpo_train.md
│ │ ├── image_processors.md
│ │ ├── index.md
│ │ ├── installation.md
│ │ ├── internal/
│ │ │ ├── audio_utils.md
│ │ │ ├── file_utils.md
│ │ │ ├── generation_utils.md
│ │ │ ├── image_processing_utils.md
│ │ │ ├── import_utils.md
│ │ │ ├── model_debugging_utils.md
│ │ │ ├── modeling_utils.md
│ │ │ ├── pipelines_utils.md
│ │ │ ├── rope_utils.md
│ │ │ ├── time_series_utils.md
│ │ │ ├── tokenization_utils.md
│ │ │ └── trainer_utils.md
│ │ ├── kernel_doc/
│ │ │ ├── loading_kernels.md
│ │ │ └── overview.md
│ │ ├── kv_cache.md
│ │ ├── llm_tutorial.md
│ │ ├── llm_tutorial_optimization.md
│ │ ├── main_classes/
│ │ │ ├── backbones.md
│ │ │ ├── callback.md
│ │ │ ├── configuration.md
│ │ │ ├── continuous_batching.md
│ │ │ ├── data_collator.md
│ │ │ ├── deepspeed.md
│ │ │ ├── executorch.md
│ │ │ ├── feature_extractor.md
│ │ │ ├── image_processor.md
│ │ │ ├── kernels.md
│ │ │ ├── logging.md
│ │ │ ├── model.md
│ │ │ ├── optimizer_schedules.md
│ │ │ ├── output.md
│ │ │ ├── peft.md
│ │ │ ├── pipelines.md
│ │ │ ├── processors.md
│ │ │ ├── quantization.md
│ │ │ ├── text_generation.md
│ │ │ ├── tokenizer.md
│ │ │ ├── trainer.md
│ │ │ └── video_processor.md
│ │ ├── model_doc/
│ │ │ ├── afmoe.md
│ │ │ ├── aimv2.md
│ │ │ ├── albert.md
│ │ │ ├── align.md
│ │ │ ├── altclip.md
│ │ │ ├── apertus.md
│ │ │ ├── arcee.md
│ │ │ ├── aria.md
│ │ │ ├── audio-spectrogram-transformer.md
│ │ │ ├── audioflamingo3.md
│ │ │ ├── auto.md
│ │ │ ├── autoformer.md
│ │ │ ├── aya_vision.md
│ │ │ ├── bamba.md
│ │ │ ├── bark.md
│ │ │ ├── bart.md
│ │ │ ├── barthez.md
│ │ │ ├── bartpho.md
│ │ │ ├── beit.md
│ │ │ ├── bert-generation.md
│ │ │ ├── bert-japanese.md
│ │ │ ├── bert.md
│ │ │ ├── bertweet.md
│ │ │ ├── big_bird.md
│ │ │ ├── bigbird_pegasus.md
│ │ │ ├── biogpt.md
│ │ │ ├── bit.md
│ │ │ ├── bitnet.md
│ │ │ ├── blenderbot-small.md
│ │ │ ├── blenderbot.md
│ │ │ ├── blip-2.md
│ │ │ ├── blip.md
│ │ │ ├── bloom.md
│ │ │ ├── blt.md
│ │ │ ├── bridgetower.md
│ │ │ ├── bros.md
│ │ │ ├── byt5.md
│ │ │ ├── camembert.md
│ │ │ ├── canine.md
│ │ │ ├── chameleon.md
│ │ │ ├── chinese_clip.md
│ │ │ ├── chmv2.md
│ │ │ ├── clap.md
│ │ │ ├── clip.md
│ │ │ ├── clipseg.md
│ │ │ ├── clvp.md
│ │ │ ├── code_llama.md
│ │ │ ├── codegen.md
│ │ │ ├── cohere.md
│ │ │ ├── cohere2.md
│ │ │ ├── cohere2_vision.md
│ │ │ ├── cohere_asr.md
│ │ │ ├── colmodernvbert.md
│ │ │ ├── colpali.md
│ │ │ ├── colqwen2.md
│ │ │ ├── conditional_detr.md
│ │ │ ├── convbert.md
│ │ │ ├── convnext.md
│ │ │ ├── convnextv2.md
│ │ │ ├── cpm.md
│ │ │ ├── cpmant.md
│ │ │ ├── csm.md
│ │ │ ├── ctrl.md
│ │ │ ├── cvt.md
│ │ │ ├── cwm.md
│ │ │ ├── d_fine.md
│ │ │ ├── dab-detr.md
│ │ │ ├── dac.md
│ │ │ ├── data2vec.md
│ │ │ ├── dbrx.md
│ │ │ ├── deberta-v2.md
│ │ │ ├── deberta.md
│ │ │ ├── decision_transformer.md
│ │ │ ├── deepseek_v2.md
│ │ │ ├── deepseek_v3.md
│ │ │ ├── deepseek_vl.md
│ │ │ ├── deepseek_vl_hybrid.md
│ │ │ ├── deformable_detr.md
│ │ │ ├── deit.md
│ │ │ ├── deplot.md
│ │ │ ├── depth_anything.md
│ │ │ ├── depth_anything_v2.md
│ │ │ ├── depth_pro.md
│ │ │ ├── detr.md
│ │ │ ├── dia.md
│ │ │ ├── dialogpt.md
│ │ │ ├── diffllama.md
│ │ │ ├── dinat.md
│ │ │ ├── dinov2.md
│ │ │ ├── dinov2_with_registers.md
│ │ │ ├── dinov3.md
│ │ │ ├── distilbert.md
│ │ │ ├── dit.md
│ │ │ ├── doge.md
│ │ │ ├── donut.md
│ │ │ ├── dots1.md
│ │ │ ├── dpr.md
│ │ │ ├── dpt.md
│ │ │ ├── edgetam.md
│ │ │ ├── edgetam_video.md
│ │ │ ├── efficientloftr.md
│ │ │ ├── efficientnet.md
│ │ │ ├── electra.md
│ │ │ ├── emu3.md
│ │ │ ├── encodec.md
│ │ │ ├── encoder-decoder.md
│ │ │ ├── eomt.md
│ │ │ ├── eomt_dinov3.md
│ │ │ ├── ernie.md
│ │ │ ├── ernie4_5.md
│ │ │ ├── ernie4_5_moe.md
│ │ │ ├── ernie4_5_vl_moe.md
│ │ │ ├── esm.md
│ │ │ ├── eurobert.md
│ │ │ ├── evolla.md
│ │ │ ├── exaone4.md
│ │ │ ├── exaone_moe.md
│ │ │ ├── falcon.md
│ │ │ ├── falcon3.md
│ │ │ ├── falcon_h1.md
│ │ │ ├── falcon_mamba.md
│ │ │ ├── fast_vlm.md
│ │ │ ├── fastspeech2_conformer.md
│ │ │ ├── flan-t5.md
│ │ │ ├── flan-ul2.md
│ │ │ ├── flaubert.md
│ │ │ ├── flava.md
│ │ │ ├── flex_olmo.md
│ │ │ ├── florence2.md
│ │ │ ├── fnet.md
│ │ │ ├── focalnet.md
│ │ │ ├── fsmt.md
│ │ │ ├── funnel.md
│ │ │ ├── fuyu.md
│ │ │ ├── gemma.md
│ │ │ ├── gemma2.md
│ │ │ ├── gemma3.md
│ │ │ ├── gemma3n.md
│ │ │ ├── git.md
│ │ │ ├── glm.md
│ │ │ ├── glm4.md
│ │ │ ├── glm46v.md
│ │ │ ├── glm4_moe.md
│ │ │ ├── glm4_moe_lite.md
│ │ │ ├── glm4v.md
│ │ │ ├── glm4v_moe.md
│ │ │ ├── glm_image.md
│ │ │ ├── glm_moe_dsa.md
│ │ │ ├── glm_ocr.md
│ │ │ ├── glmasr.md
│ │ │ ├── glpn.md
│ │ │ ├── got_ocr2.md
│ │ │ ├── gpt-sw3.md
│ │ │ ├── gpt2.md
│ │ │ ├── gpt_bigcode.md
│ │ │ ├── gpt_neo.md
│ │ │ ├── gpt_neox.md
│ │ │ ├── gpt_neox_japanese.md
│ │ │ ├── gpt_oss.md
│ │ │ ├── gptj.md
│ │ │ ├── granite.md
│ │ │ ├── granite_speech.md
│ │ │ ├── granitemoe.md
│ │ │ ├── granitemoehybrid.md
│ │ │ ├── granitemoeshared.md
│ │ │ ├── granitevision.md
│ │ │ ├── grounding-dino.md
│ │ │ ├── groupvit.md
│ │ │ ├── helium.md
│ │ │ ├── herbert.md
│ │ │ ├── hgnet_v2.md
│ │ │ ├── hiera.md
│ │ │ ├── higgs_audio_v2.md
│ │ │ ├── higgs_audio_v2_tokenizer.md
│ │ │ ├── hubert.md
│ │ │ ├── hunyuan_v1_dense.md
│ │ │ ├── hunyuan_v1_moe.md
│ │ │ ├── ibert.md
│ │ │ ├── idefics.md
│ │ │ ├── idefics2.md
│ │ │ ├── idefics3.md
│ │ │ ├── ijepa.md
│ │ │ ├── imagegpt.md
│ │ │ ├── informer.md
│ │ │ ├── instructblip.md
│ │ │ ├── instructblipvideo.md
│ │ │ ├── internvl.md
│ │ │ ├── jais2.md
│ │ │ ├── jamba.md
│ │ │ ├── janus.md
│ │ │ ├── jetmoe.md
│ │ │ ├── jina_embeddings_v3.md
│ │ │ ├── kosmos-2.md
│ │ │ ├── kosmos2_5.md
│ │ │ ├── kyutai_speech_to_text.md
│ │ │ ├── lasr.md
│ │ │ ├── layoutlm.md
│ │ │ ├── layoutlmv2.md
│ │ │ ├── layoutlmv3.md
│ │ │ ├── layoutxlm.md
│ │ │ ├── led.md
│ │ │ ├── levit.md
│ │ │ ├── lfm2.md
│ │ │ ├── lfm2_moe.md
│ │ │ ├── lfm2_vl.md
│ │ │ ├── lightglue.md
│ │ │ ├── lighton_ocr.md
│ │ │ ├── lilt.md
│ │ │ ├── llama.md
│ │ │ ├── llama2.md
│ │ │ ├── llama3.md
│ │ │ ├── llama4.md
│ │ │ ├── llava.md
│ │ │ ├── llava_next.md
│ │ │ ├── llava_next_video.md
│ │ │ ├── llava_onevision.md
│ │ │ ├── longcat_flash.md
│ │ │ ├── longformer.md
│ │ │ ├── longt5.md
│ │ │ ├── luke.md
│ │ │ ├── lw_detr.md
│ │ │ ├── lxmert.md
│ │ │ ├── m2m_100.md
│ │ │ ├── madlad-400.md
│ │ │ ├── mamba.md
│ │ │ ├── mamba2.md
│ │ │ ├── marian.md
│ │ │ ├── markuplm.md
│ │ │ ├── mask2former.md
│ │ │ ├── maskformer.md
│ │ │ ├── matcha.md
│ │ │ ├── mbart.md
│ │ │ ├── megatron-bert.md
│ │ │ ├── megatron_gpt2.md
│ │ │ ├── metaclip_2.md
│ │ │ ├── mgp-str.md
│ │ │ ├── mimi.md
│ │ │ ├── minimax.md
│ │ │ ├── minimax_m2.md
│ │ │ ├── ministral.md
│ │ │ ├── ministral3.md
│ │ │ ├── mistral.md
│ │ │ ├── mistral3.md
│ │ │ ├── mistral4.md
│ │ │ ├── mixtral.md
│ │ │ ├── mlcd.md
│ │ │ ├── mllama.md
│ │ │ ├── mluke.md
│ │ │ ├── mm-grounding-dino.md
│ │ │ ├── mms.md
│ │ │ ├── mobilebert.md
│ │ │ ├── mobilenet_v1.md
│ │ │ ├── mobilenet_v2.md
│ │ │ ├── mobilevit.md
│ │ │ ├── mobilevitv2.md
│ │ │ ├── modernbert-decoder.md
│ │ │ ├── modernbert.md
│ │ │ ├── modernvbert.md
│ │ │ ├── moonshine.md
│ │ │ ├── moonshine_streaming.md
│ │ │ ├── moshi.md
│ │ │ ├── mpnet.md
│ │ │ ├── mpt.md
│ │ │ ├── mra.md
│ │ │ ├── mt5.md
│ │ │ ├── musicflamingo.md
│ │ │ ├── musicgen.md
│ │ │ ├── musicgen_melody.md
│ │ │ ├── mvp.md
│ │ │ ├── myt5.md
│ │ │ ├── nanochat.md
│ │ │ ├── nemotron.md
│ │ │ ├── nemotron_h.md
│ │ │ ├── nllb-moe.md
│ │ │ ├── nllb.md
│ │ │ ├── nougat.md
│ │ │ ├── nystromformer.md
│ │ │ ├── olmo.md
│ │ │ ├── olmo2.md
│ │ │ ├── olmo3.md
│ │ │ ├── olmo_hybrid.md
│ │ │ ├── olmoe.md
│ │ │ ├── omdet-turbo.md
│ │ │ ├── oneformer.md
│ │ │ ├── openai-gpt.md
│ │ │ ├── opt.md
│ │ │ ├── ovis2.md
│ │ │ ├── owlv2.md
│ │ │ ├── owlvit.md
│ │ │ ├── paddleocr_vl.md
│ │ │ ├── paligemma.md
│ │ │ ├── parakeet.md
│ │ │ ├── patchtsmixer.md
│ │ │ ├── patchtst.md
│ │ │ ├── pe_audio.md
│ │ │ ├── pe_audio_video.md
│ │ │ ├── pe_video.md
│ │ │ ├── pegasus.md
│ │ │ ├── pegasus_x.md
│ │ │ ├── perceiver.md
│ │ │ ├── perception_lm.md
│ │ │ ├── persimmon.md
│ │ │ ├── phi.md
│ │ │ ├── phi3.md
│ │ │ ├── phi4_multimodal.md
│ │ │ ├── phimoe.md
│ │ │ ├── phobert.md
│ │ │ ├── pi0.md
│ │ │ ├── pix2struct.md
│ │ │ ├── pixio.md
│ │ │ ├── pixtral.md
│ │ │ ├── plbart.md
│ │ │ ├── poolformer.md
│ │ │ ├── pop2piano.md
│ │ │ ├── pp_chart2table.md
│ │ │ ├── pp_doclayout_v2.md
│ │ │ ├── pp_doclayout_v3.md
│ │ │ ├── pp_lcnet.md
│ │ │ ├── pp_lcnet_v3.md
│ │ │ ├── pp_ocrv5_mobile_det.md
│ │ │ ├── pp_ocrv5_mobile_rec.md
│ │ │ ├── pp_ocrv5_server_det.md
│ │ │ ├── pp_ocrv5_server_rec.md
│ │ │ ├── prompt_depth_anything.md
│ │ │ ├── prophetnet.md
│ │ │ ├── pvt.md
│ │ │ ├── pvt_v2.md
│ │ │ ├── qwen2.md
│ │ │ ├── qwen2_5_omni.md
│ │ │ ├── qwen2_5_vl.md
│ │ │ ├── qwen2_audio.md
│ │ │ ├── qwen2_moe.md
│ │ │ ├── qwen2_vl.md
│ │ │ ├── qwen3.md
│ │ │ ├── qwen3_5.md
│ │ │ ├── qwen3_5_moe.md
│ │ │ ├── qwen3_moe.md
│ │ │ ├── qwen3_next.md
│ │ │ ├── qwen3_omni_moe.md
│ │ │ ├── qwen3_vl.md
│ │ │ ├── qwen3_vl_moe.md
│ │ │ ├── rag.md
│ │ │ ├── recurrent_gemma.md
│ │ │ ├── reformer.md
│ │ │ ├── regnet.md
│ │ │ ├── rembert.md
│ │ │ ├── resnet.md
│ │ │ ├── roberta-prelayernorm.md
│ │ │ ├── roberta.md
│ │ │ ├── roc_bert.md
│ │ │ ├── roformer.md
│ │ │ ├── rt_detr.md
│ │ │ ├── rt_detr_v2.md
│ │ │ ├── rwkv.md
│ │ │ ├── sam.md
│ │ │ ├── sam2.md
│ │ │ ├── sam2_video.md
│ │ │ ├── sam3.md
│ │ │ ├── sam3_tracker.md
│ │ │ ├── sam3_tracker_video.md
│ │ │ ├── sam3_video.md
│ │ │ ├── sam_hq.md
│ │ │ ├── seamless_m4t.md
│ │ │ ├── seamless_m4t_v2.md
│ │ │ ├── seed_oss.md
│ │ │ ├── segformer.md
│ │ │ ├── seggpt.md
│ │ │ ├── sew-d.md
│ │ │ ├── sew.md
│ │ │ ├── shieldgemma2.md
│ │ │ ├── siglip.md
│ │ │ ├── siglip2.md
│ │ │ ├── slanext.md
│ │ │ ├── smollm3.md
│ │ │ ├── smolvlm.md
│ │ │ ├── solar_open.md
│ │ │ ├── speech-encoder-decoder.md
│ │ │ ├── speech_to_text.md
│ │ │ ├── speecht5.md
│ │ │ ├── splinter.md
│ │ │ ├── squeezebert.md
│ │ │ ├── stablelm.md
│ │ │ ├── starcoder2.md
│ │ │ ├── superglue.md
│ │ │ ├── superpoint.md
│ │ │ ├── swiftformer.md
│ │ │ ├── swin.md
│ │ │ ├── swin2sr.md
│ │ │ ├── swinv2.md
│ │ │ ├── switch_transformers.md
│ │ │ ├── t5.md
│ │ │ ├── t5gemma.md
│ │ │ ├── t5gemma2.md
│ │ │ ├── t5v1.1.md
│ │ │ ├── table-transformer.md
│ │ │ ├── tapas.md
│ │ │ ├── textnet.md
│ │ │ ├── time_series_transformer.md
│ │ │ ├── timesfm.md
│ │ │ ├── timesfm2_5.md
│ │ │ ├── timesformer.md
│ │ │ ├── timm_wrapper.md
│ │ │ ├── trocr.md
│ │ │ ├── tvp.md
│ │ │ ├── udop.md
│ │ │ ├── ul2.md
│ │ │ ├── umt5.md
│ │ │ ├── unispeech-sat.md
│ │ │ ├── unispeech.md
│ │ │ ├── univnet.md
│ │ │ ├── upernet.md
│ │ │ ├── uvdoc.md
│ │ │ ├── vaultgemma.md
│ │ │ ├── vibevoice_acoustic_tokenizer.md
│ │ │ ├── vibevoice_asr.md
│ │ │ ├── video_llama_3.md
│ │ │ ├── video_llava.md
│ │ │ ├── videomae.md
│ │ │ ├── videomt.md
│ │ │ ├── vilt.md
│ │ │ ├── vipllava.md
│ │ │ ├── vision-encoder-decoder.md
│ │ │ ├── vision-text-dual-encoder.md
│ │ │ ├── visual_bert.md
│ │ │ ├── vit.md
│ │ │ ├── vit_mae.md
│ │ │ ├── vit_msn.md
│ │ │ ├── vitdet.md
│ │ │ ├── vitmatte.md
│ │ │ ├── vitpose.md
│ │ │ ├── vits.md
│ │ │ ├── vivit.md
│ │ │ ├── vjepa2.md
│ │ │ ├── voxtral.md
│ │ │ ├── voxtral_realtime.md
│ │ │ ├── wav2vec2-bert.md
│ │ │ ├── wav2vec2-conformer.md
│ │ │ ├── wav2vec2.md
│ │ │ ├── wav2vec2_phoneme.md
│ │ │ ├── wavlm.md
│ │ │ ├── whisper.md
│ │ │ ├── xclip.md
│ │ │ ├── xcodec.md
│ │ │ ├── xglm.md
│ │ │ ├── xlm-roberta-xl.md
│ │ │ ├── xlm-roberta.md
│ │ │ ├── xlm-v.md
│ │ │ ├── xlm.md
│ │ │ ├── xlnet.md
│ │ │ ├── xls_r.md
│ │ │ ├── xlsr_wav2vec2.md
│ │ │ ├── xlstm.md
│ │ │ ├── xmod.md
│ │ │ ├── yolos.md
│ │ │ ├── yoso.md
│ │ │ ├── youtu.md
│ │ │ ├── zamba.md
│ │ │ ├── zamba2.md
│ │ │ └── zoedepth.md
│ │ ├── model_memory_anatomy.md
│ │ ├── model_output_tracing.md
│ │ ├── model_sharing.md
│ │ ├── models.md
│ │ ├── models_timeline.md
│ │ ├── modular_transformers.md
│ │ ├── monkey_patching.md
│ │ ├── optimization_overview.md
│ │ ├── optimizers.md
│ │ ├── paged_attention.md
│ │ ├── peft.md
│ │ ├── perf_hardware.md
│ │ ├── perf_infer_gpu_multi.md
│ │ ├── perf_torch_compile.md
│ │ ├── perf_train_cpu.md
│ │ ├── perf_train_cpu_many.md
│ │ ├── perf_train_gaudi.md
│ │ ├── perf_train_gpu_many.md
│ │ ├── perf_train_gpu_one.md
│ │ ├── perf_train_special.md
│ │ ├── perplexity.md
│ │ ├── philosophy.md
│ │ ├── pipeline_gradio.md
│ │ ├── pipeline_tutorial.md
│ │ ├── pipeline_webserver.md
│ │ ├── pr_checks.md
│ │ ├── processors.md
│ │ ├── quantization/
│ │ │ ├── aqlm.md
│ │ │ ├── auto_round.md
│ │ │ ├── awq.md
│ │ │ ├── bitnet.md
│ │ │ ├── bitsandbytes.md
│ │ │ ├── compressed_tensors.md
│ │ │ ├── concept_guide.md
│ │ │ ├── contribute.md
│ │ │ ├── eetq.md
│ │ │ ├── fbgemm_fp8.md
│ │ │ ├── finegrained_fp8.md
│ │ │ ├── fouroversix.md
│ │ │ ├── fp_quant.md
│ │ │ ├── gptq.md
│ │ │ ├── higgs.md
│ │ │ ├── hqq.md
│ │ │ ├── metal.md
│ │ │ ├── mxfp4.md
│ │ │ ├── optimum.md
│ │ │ ├── overview.md
│ │ │ ├── quanto.md
│ │ │ ├── quark.md
│ │ │ ├── selecting.md
│ │ │ ├── sinq.md
│ │ │ ├── spqr.md
│ │ │ ├── torchao.md
│ │ │ └── vptq.md
│ │ ├── quicktour.md
│ │ ├── reference/
│ │ │ └── environment_variables.md
│ │ ├── run_scripts.md
│ │ ├── serialization.md
│ │ ├── serve-cli/
│ │ │ ├── cursor.md
│ │ │ ├── jan.md
│ │ │ ├── openweb_ui.md
│ │ │ ├── serving.md
│ │ │ ├── serving_optims.md
│ │ │ └── tiny_agents.md
│ │ ├── tasks/
│ │ │ ├── any_to_any.md
│ │ │ ├── asr.md
│ │ │ ├── audio_classification.md
│ │ │ ├── audio_text_to_text.md
│ │ │ ├── document_question_answering.md
│ │ │ ├── idefics.md
│ │ │ ├── image_captioning.md
│ │ │ ├── image_classification.md
│ │ │ ├── image_feature_extraction.md
│ │ │ ├── image_text_to_text.md
│ │ │ ├── keypoint_detection.md
│ │ │ ├── keypoint_matching.md
│ │ │ ├── knowledge_distillation_for_image_classification.md
│ │ │ ├── language_modeling.md
│ │ │ ├── mask_generation.md
│ │ │ ├── masked_language_modeling.md
│ │ │ ├── monocular_depth_estimation.md
│ │ │ ├── multiple_choice.md
│ │ │ ├── object_detection.md
│ │ │ ├── prompting.md
│ │ │ ├── question_answering.md
│ │ │ ├── semantic_segmentation.md
│ │ │ ├── sequence_classification.md
│ │ │ ├── summarization.md
│ │ │ ├── text-to-speech.md
│ │ │ ├── token_classification.md
│ │ │ ├── training_vision_backbone.md
│ │ │ ├── translation.md
│ │ │ ├── video_classification.md
│ │ │ ├── video_text_to_text.md
│ │ │ ├── visual_document_retrieval.md
│ │ │ ├── visual_question_answering.md
│ │ │ ├── zero_shot_image_classification.md
│ │ │ └── zero_shot_object_detection.md
│ │ ├── testing.md
│ │ ├── tokenizer_summary.md
│ │ ├── trainer.md
│ │ ├── trainer_callbacks.md
│ │ ├── trainer_customize.md
│ │ ├── training.md
│ │ ├── troubleshooting.md
│ │ ├── video_processors.md
│ │ └── weightconverter.md
│ ├── es/
│ │ ├── _config.py
│ │ ├── _toctree.yml
│ │ ├── accelerate.md
│ │ ├── add_new_pipeline.md
│ │ ├── attention.md
│ │ ├── autoclass_tutorial.md
│ │ ├── bertology.md
│ │ ├── chat_templating.md
│ │ ├── community.md
│ │ ├── conversations.md
│ │ ├── create_a_model.md
│ │ ├── custom_models.md
│ │ ├── debugging.md
│ │ ├── fast_tokenizers.md
│ │ ├── glossary.md
│ │ ├── index.md
│ │ ├── installation.md
│ │ ├── model_memory_anatomy.md
│ │ ├── model_sharing.md
│ │ ├── multilingual.md
│ │ ├── pad_truncation.md
│ │ ├── performance.md
│ │ ├── perplexity.md
│ │ ├── philosophy.md
│ │ ├── pipeline_tutorial.md
│ │ ├── pipeline_webserver.md
│ │ ├── pr_checks.md
│ │ ├── preprocessing.md
│ │ ├── quicktour.md
│ │ ├── run_scripts.md
│ │ ├── sagemaker.md
│ │ ├── task_summary.md
│ │ ├── tasks/
│ │ │ ├── asr.md
│ │ │ ├── audio_classification.md
│ │ │ ├── image_captioning.md
│ │ │ ├── image_classification.md
│ │ │ ├── language_modeling.md
│ │ │ ├── multiple_choice.md
│ │ │ ├── question_answering.md
│ │ │ └── summarization.md
│ │ ├── tasks_explained.md
│ │ ├── tokenizer_summary.md
│ │ ├── trainer.md
│ │ └── training.md
│ ├── fr/
│ │ ├── _config.py
│ │ ├── _toctree.yml
│ │ ├── autoclass_tutorial.md
│ │ ├── in_translation.md
│ │ ├── index.md
│ │ ├── installation.md
│ │ ├── quicktour.md
│ │ ├── run_scripts_fr.md
│ │ ├── task_summary.md
│ │ ├── tasks_explained.md
│ │ └── tutoriel_pipeline.md
│ ├── hi/
│ │ ├── _toctree.yml
│ │ ├── accelerate.md
│ │ └── pipeline_tutorial.md
│ ├── it/
│ │ ├── _config.py
│ │ ├── _toctree.yml
│ │ ├── accelerate.md
│ │ ├── add_new_model.md
│ │ ├── add_new_pipeline.md
│ │ ├── autoclass_tutorial.md
│ │ ├── big_models.md
│ │ ├── community.md
│ │ ├── create_a_model.md
│ │ ├── custom_models.md
│ │ ├── debugging.md
│ │ ├── index.md
│ │ ├── installation.md
│ │ ├── migration.md
│ │ ├── model_sharing.md
│ │ ├── multilingual.md
│ │ ├── perf_hardware.md
│ │ ├── perf_infer_cpu.md
│ │ ├── perf_infer_gpu_many.md
│ │ ├── perf_infer_gpu_one.md
│ │ ├── perf_infer_special.md
│ │ ├── perf_train_cpu.md
│ │ ├── perf_train_cpu_many.md
│ │ ├── perf_train_special.md
│ │ ├── perf_train_tpu.md
│ │ ├── pipeline_tutorial.md
│ │ ├── pr_checks.md
│ │ ├── preprocessing.md
│ │ ├── quicktour.md
│ │ ├── run_scripts.md
│ │ └── training.md
│ ├── ja/
│ │ ├── _toctree.yml
│ │ ├── accelerate.md
│ │ ├── add_new_model.md
│ │ ├── attention.md
│ │ ├── autoclass_tutorial.md
│ │ ├── bertology.md
│ │ ├── big_models.md
│ │ ├── chat_templating.md
│ │ ├── community.md
│ │ ├── create_a_model.md
│ │ ├── custom_models.md
│ │ ├── fast_tokenizers.md
│ │ ├── generation_strategies.md
│ │ ├── glossary.md
│ │ ├── hpo_train.md
│ │ ├── index.md
│ │ ├── installation.md
│ │ ├── internal/
│ │ │ ├── audio_utils.md
│ │ │ ├── file_utils.md
│ │ │ ├── generation_utils.md
│ │ │ ├── image_processing_utils.md
│ │ │ ├── modeling_utils.md
│ │ │ ├── pipelines_utils.md
│ │ │ ├── time_series_utils.md
│ │ │ ├── tokenization_utils.md
│ │ │ └── trainer_utils.md
│ │ ├── llm_tutorial.md
│ │ ├── main_classes/
│ │ │ ├── callback.md
│ │ │ ├── configuration.md
│ │ │ ├── data_collator.md
│ │ │ ├── deepspeed.md
│ │ │ ├── feature_extractor.md
│ │ │ ├── image_processor.md
│ │ │ ├── logging.md
│ │ │ ├── model.md
│ │ │ ├── optimizer_schedules.md
│ │ │ ├── output.md
│ │ │ ├── pipelines.md
│ │ │ ├── processors.md
│ │ │ ├── quantization.md
│ │ │ ├── text_generation.md
│ │ │ ├── tokenizer.md
│ │ │ └── trainer.md
│ │ ├── model_doc/
│ │ │ ├── albert.md
│ │ │ ├── align.md
│ │ │ ├── altclip.md
│ │ │ ├── audio-spectrogram-transformer.md
│ │ │ ├── auto.md
│ │ │ ├── autoformer.md
│ │ │ ├── bark.md
│ │ │ ├── bart.md
│ │ │ ├── barthez.md
│ │ │ ├── bartpho.md
│ │ │ ├── beit.md
│ │ │ ├── bert-generation.md
│ │ │ ├── bert-japanese.md
│ │ │ ├── bert.md
│ │ │ ├── bertweet.md
│ │ │ ├── big_bird.md
│ │ │ ├── bigbird_pegasus.md
│ │ │ ├── biogpt.md
│ │ │ ├── bit.md
│ │ │ ├── blenderbot-small.md
│ │ │ ├── blenderbot.md
│ │ │ ├── blip-2.md
│ │ │ ├── blip.md
│ │ │ ├── bloom.md
│ │ │ ├── bridgetower.md
│ │ │ ├── bros.md
│ │ │ ├── byt5.md
│ │ │ ├── camembert.md
│ │ │ ├── canine.md
│ │ │ ├── chinese_clip.md
│ │ │ ├── clap.md
│ │ │ ├── clip.md
│ │ │ ├── clipseg.md
│ │ │ ├── clvp.md
│ │ │ ├── code_llama.md
│ │ │ ├── codegen.md
│ │ │ ├── conditional_detr.md
│ │ │ ├── convbert.md
│ │ │ ├── convnext.md
│ │ │ ├── convnextv2.md
│ │ │ ├── cpm.md
│ │ │ ├── cpmant.md
│ │ │ ├── ctrl.md
│ │ │ ├── cvt.md
│ │ │ ├── data2vec.md
│ │ │ ├── deberta-v2.md
│ │ │ ├── deberta.md
│ │ │ ├── decision_transformer.md
│ │ │ ├── deformable_detr.md
│ │ │ ├── deit.md
│ │ │ ├── deplot.md
│ │ │ ├── detr.md
│ │ │ ├── dialogpt.md
│ │ │ └── dinat.md
│ │ ├── model_memory_anatomy.md
│ │ ├── model_sharing.md
│ │ ├── model_summary.md
│ │ ├── multilingual.md
│ │ ├── pad_truncation.md
│ │ ├── peft.md
│ │ ├── perf_hardware.md
│ │ ├── perf_infer_cpu.md
│ │ ├── perf_infer_gpu_many.md
│ │ ├── perf_infer_gpu_one.md
│ │ ├── perf_infer_special.md
│ │ ├── perf_torch_compile.md
│ │ ├── perf_train_cpu.md
│ │ ├── perf_train_cpu_many.md
│ │ ├── perf_train_gpu_many.md
│ │ ├── perf_train_gpu_one.md
│ │ ├── perf_train_special.md
│ │ ├── perf_train_tpu.md
│ │ ├── performance.md
│ │ ├── perplexity.md
│ │ ├── philosophy.md
│ │ ├── pipeline_tutorial.md
│ │ ├── pipeline_webserver.md
│ │ ├── pr_checks.md
│ │ ├── preprocessing.md
│ │ ├── quicktour.md
│ │ ├── run_scripts.md
│ │ ├── serialization.md
│ │ ├── task_summary.md
│ │ ├── tasks/
│ │ │ ├── asr.md
│ │ │ ├── audio_classification.md
│ │ │ ├── document_question_answering.md
│ │ │ ├── idefics.md
│ │ │ ├── image_captioning.md
│ │ │ ├── image_classification.md
│ │ │ ├── knowledge_distillation_for_image_classification.md
│ │ │ ├── language_modeling.md
│ │ │ ├── masked_language_modeling.md
│ │ │ ├── monocular_depth_estimation.md
│ │ │ ├── multiple_choice.md
│ │ │ ├── object_detection.md
│ │ │ ├── prompting.md
│ │ │ ├── question_answering.md
│ │ │ ├── semantic_segmentation.md
│ │ │ ├── summarization.md
│ │ │ ├── text-to-speech.md
│ │ │ ├── token_classification.md
│ │ │ ├── translation.md
│ │ │ ├── video_classification.md
│ │ │ ├── visual_question_answering.md
│ │ │ ├── zero_shot_image_classification.md
│ │ │ └── zero_shot_object_detection.md
│ │ ├── tasks_explained.md
│ │ ├── testing.md
│ │ ├── tokenizer_summary.md
│ │ ├── training.md
│ │ └── troubleshooting.md
│ ├── ko/
│ │ ├── _config.py
│ │ ├── _toctree.yml
│ │ ├── accelerate.md
│ │ ├── accelerator_selection.md
│ │ ├── add_new_model.md
│ │ ├── add_new_pipeline.md
│ │ ├── cache_explanation.md
│ │ ├── chat_extras.md
│ │ ├── chat_templating.md
│ │ ├── community.md
│ │ ├── contributing.md
│ │ ├── conversations.md
│ │ ├── custom_models.md
│ │ ├── debugging.md
│ │ ├── deepspeed.md
│ │ ├── executorch.md
│ │ ├── fast_tokenizers.md
│ │ ├── fsdp.md
│ │ ├── generation_strategies.md
│ │ ├── gguf.md
│ │ ├── glossary.md
│ │ ├── how_to_hack_models.md
│ │ ├── hpo_train.md
│ │ ├── image_processors.md
│ │ ├── in_translation.md
│ │ ├── index.md
│ │ ├── installation.md
│ │ ├── internal/
│ │ │ ├── audio_utils.md
│ │ │ ├── file_utils.md
│ │ │ ├── generation_utils.md
│ │ │ ├── image_processing_utils.md
│ │ │ ├── modeling_utils.md
│ │ │ ├── pipelines_utils.md
│ │ │ ├── time_series_utils.md
│ │ │ ├── tokenization_utils.md
│ │ │ └── trainer_utils.md
│ │ ├── llm_optims.md
│ │ ├── llm_tutorial.md
│ │ ├── llm_tutorial_optimization.md
│ │ ├── main_classes/
│ │ │ ├── callback.md
│ │ │ ├── configuration.md
│ │ │ ├── data_collator.md
│ │ │ ├── feature_extractor.md
│ │ │ ├── logging.md
│ │ │ ├── model.md
│ │ │ ├── optimizer_schedules.md
│ │ │ ├── output.md
│ │ │ ├── peft.md
│ │ │ ├── pipelines.md
│ │ │ ├── processors.md
│ │ │ ├── quantization.md
│ │ │ ├── text_generation.md
│ │ │ ├── tokenizer.md
│ │ │ └── trainer.md
│ │ ├── model_doc/
│ │ │ ├── albert.md
│ │ │ ├── altclip.md
│ │ │ ├── auto.md
│ │ │ ├── autoformer.md
│ │ │ ├── bart.md
│ │ │ ├── barthez.md
│ │ │ ├── bartpho.md
│ │ │ ├── bert-japanese.md
│ │ │ ├── bert.md
│ │ │ ├── bertweet.md
│ │ │ ├── big_bird.md
│ │ │ ├── biogpt.md
│ │ │ ├── blip-2.md
│ │ │ ├── blip.md
│ │ │ ├── chameleon.md
│ │ │ ├── clip.md
│ │ │ ├── clipseg.md
│ │ │ ├── code_llama.md
│ │ │ ├── codegen.md
│ │ │ ├── cohere.md
│ │ │ ├── convbert.md
│ │ │ ├── dbrx.md
│ │ │ ├── deberta-v2.md
│ │ │ ├── deberta.md
│ │ │ ├── deepseek_v3.md
│ │ │ ├── electra.md
│ │ │ ├── encoder-decoder.md
│ │ │ ├── esm.md
│ │ │ ├── exaone4.md
│ │ │ ├── exaone_moe.md
│ │ │ ├── gemma.md
│ │ │ ├── gemma2.md
│ │ │ ├── gemma3.md
│ │ │ ├── gemma3n.md
│ │ │ ├── gpt2.md
│ │ │ ├── gpt_neox_japanese.md
│ │ │ ├── grounding-dino.md
│ │ │ ├── informer.md
│ │ │ ├── jamba.md
│ │ │ ├── lfm2.md
│ │ │ ├── llama.md
│ │ │ ├── llama2.md
│ │ │ ├── llama3.md
│ │ │ ├── llama4.md
│ │ │ ├── mamba.md
│ │ │ ├── mamba2.md
│ │ │ ├── marian.md
│ │ │ ├── mistral.md
│ │ │ ├── openai-gpt.md
│ │ │ ├── paligemma.md
│ │ │ ├── patchtsmixer.md
│ │ │ ├── patchtst.md
│ │ │ ├── qwen2_vl.md
│ │ │ ├── rag.md
│ │ │ ├── roberta.md
│ │ │ ├── sam_hq.md
│ │ │ ├── siglip.md
│ │ │ ├── smolvlm.md
│ │ │ ├── swin.md
│ │ │ ├── swin2sr.md
│ │ │ ├── swinv2.md
│ │ │ ├── time_series_transformer.md
│ │ │ ├── timesformer.md
│ │ │ ├── tvp.md
│ │ │ ├── vit.md
│ │ │ ├── vivit.md
│ │ │ ├── whisper.md
│ │ │ └── xclip.md
│ │ ├── model_memory_anatomy.md
│ │ ├── model_sharing.md
│ │ ├── models.md
│ │ ├── modular_transformers.md
│ │ ├── optimizers.md
│ │ ├── pad_truncation.md
│ │ ├── peft.md
│ │ ├── perf_hardware.md
│ │ ├── perf_infer_cpu.md
│ │ ├── perf_infer_gpu_multi.md
│ │ ├── perf_infer_gpu_one.md
│ │ ├── perf_train_cpu.md
│ │ ├── perf_train_cpu_many.md
│ │ ├── perf_train_gpu_many.md
│ │ ├── perf_train_gpu_one.md
│ │ ├── perf_train_special.md
│ │ ├── perplexity.md
│ │ ├── philosophy.md
│ │ ├── pipeline_gradio.md
│ │ ├── pipeline_tutorial.md
│ │ ├── pipeline_webserver.md
│ │ ├── pr_checks.md
│ │ ├── quantization/
│ │ │ ├── awq.md
│ │ │ ├── bitsandbytes.md
│ │ │ ├── eetq.md
│ │ │ ├── gptq.md
│ │ │ ├── quanto.md
│ │ │ └── quark.md
│ │ ├── quicktour.md
│ │ ├── run_scripts.md
│ │ ├── serialization.md
│ │ ├── serving.md
│ │ ├── tasks/
│ │ │ ├── asr.md
│ │ │ ├── audio_classification.md
│ │ │ ├── document_question_answering.md
│ │ │ ├── idefics.md
│ │ │ ├── image_captioning.md
│ │ │ ├── image_classification.md
│ │ │ ├── image_feature_extraction.md
│ │ │ ├── keypoint_detection.md
│ │ │ ├── knowledge_distillation_for_image_classification.md
│ │ │ ├── language_modeling.md
│ │ │ ├── mask_generation.md
│ │ │ ├── masked_language_modeling.md
│ │ │ ├── monocular_depth_estimation.md
│ │ │ ├── multiple_choice.md
│ │ │ ├── object_detection.md
│ │ │ ├── prompting.md
│ │ │ ├── question_answering.md
│ │ │ ├── semantic_segmentation.md
│ │ │ ├── sequence_classification.md
│ │ │ ├── summarization.md
│ │ │ ├── token_classification.md
│ │ │ ├── translation.md
│ │ │ ├── video_classification.md
│ │ │ ├── visual_question_answering.md
│ │ │ ├── zero_shot_image_classification.md
│ │ │ └── zero_shot_object_detection.md
│ │ ├── testing.md
│ │ ├── tiny_agents.md
│ │ ├── tokenizer_summary.md
│ │ ├── trainer.md
│ │ ├── training.md
│ │ └── troubleshooting.md
│ ├── pt/
│ │ ├── _config.py
│ │ ├── _toctree.yml
│ │ ├── accelerate.md
│ │ ├── create_a_model.md
│ │ ├── custom_models.md
│ │ ├── fast_tokenizers.md
│ │ ├── index.md
│ │ ├── installation.md
│ │ ├── multilingual.md
│ │ ├── pipeline_tutorial.md
│ │ ├── quicktour.md
│ │ ├── run_scripts.md
│ │ ├── tasks/
│ │ │ ├── sequence_classification.md
│ │ │ └── token_classification.md
│ │ └── training.md
│ └── zh/
│ ├── _toctree.yml
│ ├── accelerate.md
│ ├── add_new_pipeline.md
│ ├── attention.md
│ ├── autoclass_tutorial.md
│ ├── bertology.md
│ ├── big_models.md
│ ├── chat_templating.md
│ ├── community.md
│ ├── contributing.md
│ ├── create_a_model.md
│ ├── custom_models.md
│ ├── debugging.md
│ ├── fast_tokenizers.md
│ ├── fsdp.md
│ ├── generation_strategies.md
│ ├── gguf.md
│ ├── hpo_train.md
│ ├── index.md
│ ├── installation.md
│ ├── internal/
│ │ ├── audio_utils.md
│ │ ├── file_utils.md
│ │ ├── generation_utils.md
│ │ ├── image_processing_utils.md
│ │ ├── modeling_utils.md
│ │ ├── pipelines_utils.md
│ │ ├── time_series_utils.md
│ │ ├── tokenization_utils.md
│ │ └── trainer_utils.md
│ ├── llm_tutorial.md
│ ├── main_classes/
│ │ ├── callback.md
│ │ ├── configuration.md
│ │ ├── data_collator.md
│ │ ├── deepspeed.md
│ │ ├── feature_extractor.md
│ │ ├── image_processor.md
│ │ ├── logging.md
│ │ ├── model.md
│ │ ├── optimizer_schedules.md
│ │ ├── output.md
│ │ ├── pipelines.md
│ │ ├── processors.md
│ │ ├── quantization.md
│ │ ├── text_generation.md
│ │ ├── tokenizer.md
│ │ └── trainer.md
│ ├── model_doc/
│ │ └── bert.md
│ ├── model_sharing.md
│ ├── multilingual.md
│ ├── peft.md
│ ├── perf_hardware.md
│ ├── perf_infer_gpu_multi.md
│ ├── perf_torch_compile.md
│ ├── perf_train_cpu.md
│ ├── perf_train_special.md
│ ├── performance.md
│ ├── philosophy.md
│ ├── pipeline_tutorial.md
│ ├── preprocessing.md
│ ├── quicktour.md
│ ├── run_scripts.md
│ ├── serialization.md
│ ├── task_summary.md
│ ├── tasks/
│ │ ├── asr.md
│ │ ├── question_answering.md
│ │ ├── sequence_classification.md
│ │ ├── summarization.md
│ │ ├── token_classification.md
│ │ └── translation.md
│ ├── tiktoken.md
│ ├── tokenizer_summary.md
│ └── training.md
├── doctest_list.txt
├── examples/
│ ├── 3D_parallel.py
│ ├── README.md
│ ├── metrics-monitoring/
│ │ ├── README.md
│ │ ├── continuous-batching-dashboard.json
│ │ ├── docker-compose.yml
│ │ ├── grafana-dashboard.yaml
│ │ ├── grafana-datasources.yaml
│ │ ├── metrics_example.py
│ │ ├── prometheus.yml
│ │ └── tempo.yaml
│ ├── modular-transformers/
│ │ ├── README.md
│ │ ├── configuration_dummy.py
│ │ ├── configuration_duplicated_method.py
│ │ ├── configuration_my_new_model.py
│ │ ├── configuration_my_new_model2.py
│ │ ├── configuration_new_model.py
│ │ ├── configuration_super.py
│ │ ├── convert_examples.sh
│ │ ├── image_processing_new_imgproc_model.py
│ │ ├── modeling_add_function.py
│ │ ├── modeling_dummy_bert.py
│ │ ├── modeling_from_uppercase_model.py
│ │ ├── modeling_global_indexing.py
│ │ ├── modeling_multimodal2.py
│ │ ├── modeling_my_new_model2.py
│ │ ├── modeling_new_task_model.py
│ │ ├── modeling_roberta.py
│ │ ├── modeling_super.py
│ │ ├── modeling_switch_function.py
│ │ ├── modeling_test_detr.py
│ │ ├── modeling_test_suffix.py
│ │ ├── modular_add_function.py
│ │ ├── modular_dummy_bert.py
│ │ ├── modular_duplicated_method.py
│ │ ├── modular_from_uppercase_model.py
│ │ ├── modular_global_indexing.py
│ │ ├── modular_multimodal2.py
│ │ ├── modular_my_new_model.py
│ │ ├── modular_my_new_model2.py
│ │ ├── modular_new_imgproc_model.py
│ │ ├── modular_new_model.py
│ │ ├── modular_new_task_model.py
│ │ ├── modular_roberta.py
│ │ ├── modular_super.py
│ │ ├── modular_switch_function.py
│ │ ├── modular_test_detr.py
│ │ └── modular_test_suffix.py
│ ├── pytorch/
│ │ ├── 3d_parallel_checks.py
│ │ ├── README.md
│ │ ├── _tests_requirements.txt
│ │ ├── audio-classification/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ └── run_audio_classification.py
│ │ ├── conftest.py
│ │ ├── context_parallel.py
│ │ ├── continuous_batching.py
│ │ ├── continuous_batching_simple.py
│ │ ├── contrastive-image-text/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ └── run_clip.py
│ │ ├── image-classification/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ ├── run_image_classification.py
│ │ │ └── run_image_classification_no_trainer.py
│ │ ├── image-pretraining/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ ├── run_mae.py
│ │ │ ├── run_mim.py
│ │ │ └── run_mim_no_trainer.py
│ │ ├── instance-segmentation/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ ├── run_instance_segmentation.py
│ │ │ └── run_instance_segmentation_no_trainer.py
│ │ ├── language-modeling/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ ├── run_clm.py
│ │ │ ├── run_clm_no_trainer.py
│ │ │ ├── run_fim.py
│ │ │ ├── run_fim_no_trainer.py
│ │ │ ├── run_mlm.py
│ │ │ ├── run_mlm_no_trainer.py
│ │ │ └── run_plm.py
│ │ ├── multiple-choice/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ ├── run_no_trainer.sh
│ │ │ ├── run_swag.py
│ │ │ └── run_swag_no_trainer.py
│ │ ├── object-detection/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ ├── run_object_detection.py
│ │ │ └── run_object_detection_no_trainer.py
│ │ ├── old_test_xla_examples.py
│ │ ├── question-answering/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ ├── run_qa.py
│ │ │ ├── run_qa_beam_search.py
│ │ │ ├── run_qa_beam_search_no_trainer.py
│ │ │ ├── run_qa_no_trainer.py
│ │ │ ├── run_seq2seq_qa.py
│ │ │ ├── trainer_qa.py
│ │ │ ├── trainer_seq2seq_qa.py
│ │ │ └── utils_qa.py
│ │ ├── semantic-segmentation/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ ├── run_semantic_segmentation.py
│ │ │ └── run_semantic_segmentation_no_trainer.py
│ │ ├── speech-pretraining/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ └── run_wav2vec2_pretraining_no_trainer.py
│ │ ├── speech-recognition/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ ├── run_speech_recognition_ctc.py
│ │ │ ├── run_speech_recognition_ctc_adapter.py
│ │ │ └── run_speech_recognition_seq2seq.py
│ │ ├── summarization/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ ├── run_summarization.py
│ │ │ └── run_summarization_no_trainer.py
│ │ ├── test_accelerate_examples.py
│ │ ├── test_pytorch_examples.py
│ │ ├── text-classification/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ ├── run_classification.py
│ │ │ ├── run_glue.py
│ │ │ ├── run_glue_no_trainer.py
│ │ │ └── run_xnli.py
│ │ ├── text-generation/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ └── run_generation.py
│ │ ├── token-classification/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ ├── run.sh
│ │ │ ├── run_ner.py
│ │ │ ├── run_ner_no_trainer.py
│ │ │ └── run_no_trainer.sh
│ │ ├── transformers_serve_cb_eval_job.py
│ │ ├── translation/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ ├── run_translation.py
│ │ │ └── run_translation_no_trainer.py
│ │ └── xla_spawn.py
│ ├── quantization/
│ │ ├── custom_quantization.py
│ │ └── custom_quantization_int8_example.py
│ ├── research_projects/
│ │ └── README.md
│ ├── run_on_remote.py
│ ├── scheduler/
│ │ ├── README.md
│ │ └── run_greedy.py
│ └── training/
│ └── distributed_training.py
├── i18n/
│ ├── README_ar.md
│ ├── README_bn.md
│ ├── README_de.md
│ ├── README_es.md
│ ├── README_fr.md
│ ├── README_hd.md
│ ├── README_it.md
│ ├── README_ja.md
│ ├── README_ko.md
│ ├── README_pt-br.md
│ ├── README_ru.md
│ ├── README_te.md
│ ├── README_ur.md
│ ├── README_vi.md
│ ├── README_zh-hans.md
│ └── README_zh-hant.md
├── notebooks/
│ └── README.md
├── pyproject.toml
├── scripts/
│ ├── check_tokenizers.py
│ ├── distributed/
│ │ └── torch-distributed-gpu-test.py
│ └── stale.py
├── setup.py
├── src/
│ └── transformers/
│ ├── __init__.py
│ ├── _typing.py
│ ├── activations.py
│ ├── audio_utils.py
│ ├── backbone_utils.py
│ ├── cache_utils.py
│ ├── cli/
│ │ ├── __init__.py
│ │ ├── add_new_model_like.py
│ │ ├── chat.py
│ │ ├── download.py
│ │ ├── serve.py
│ │ ├── serving/
│ │ │ ├── __init__.py
│ │ │ ├── chat_completion.py
│ │ │ ├── model_manager.py
│ │ │ ├── response.py
│ │ │ ├── server.py
│ │ │ ├── transcription.py
│ │ │ └── utils.py
│ │ ├── system.py
│ │ └── transformers.py
│ ├── configuration_utils.py
│ ├── conversion_mapping.py
│ ├── convert_slow_tokenizer.py
│ ├── convert_slow_tokenizers_checkpoints_to_fast.py
│ ├── core_model_loading.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── data_collator.py
│ │ ├── datasets/
│ │ │ ├── __init__.py
│ │ │ ├── glue.py
│ │ │ └── squad.py
│ │ ├── metrics/
│ │ │ ├── __init__.py
│ │ │ └── squad_metrics.py
│ │ └── processors/
│ │ ├── __init__.py
│ │ ├── glue.py
│ │ ├── squad.py
│ │ ├── utils.py
│ │ └── xnli.py
│ ├── debug_utils.py
│ ├── dependency_versions_check.py
│ ├── dependency_versions_table.py
│ ├── distributed/
│ │ ├── __init__.py
│ │ └── configuration_utils.py
│ ├── dynamic_module_utils.py
│ ├── feature_extraction_sequence_utils.py
│ ├── feature_extraction_utils.py
│ ├── file_utils.py
│ ├── generation/
│ │ ├── __init__.py
│ │ ├── candidate_generator.py
│ │ ├── configuration_utils.py
│ │ ├── continuous_batching/
│ │ │ ├── __init__.py
│ │ │ ├── cache.py
│ │ │ ├── cache_manager.py
│ │ │ ├── continuous_api.py
│ │ │ ├── input_outputs.py
│ │ │ ├── requests.py
│ │ │ ├── scheduler.py
│ │ │ └── utils.py
│ │ ├── logits_process.py
│ │ ├── stopping_criteria.py
│ │ ├── streamers.py
│ │ ├── utils.py
│ │ └── watermarking.py
│ ├── hf_argparser.py
│ ├── hyperparameter_search.py
│ ├── image_processing_backends.py
│ ├── image_processing_base.py
│ ├── image_processing_utils.py
│ ├── image_transforms.py
│ ├── image_utils.py
│ ├── initialization.py
│ ├── integrations/
│ │ ├── __init__.py
│ │ ├── accelerate.py
│ │ ├── aqlm.py
│ │ ├── awq.py
│ │ ├── bitnet.py
│ │ ├── bitsandbytes.py
│ │ ├── deepspeed.py
│ │ ├── eager_paged.py
│ │ ├── eetq.py
│ │ ├── executorch.py
│ │ ├── fbgemm_fp8.py
│ │ ├── finegrained_fp8.py
│ │ ├── flash_attention.py
│ │ ├── flash_paged.py
│ │ ├── flex_attention.py
│ │ ├── fouroversix.py
│ │ ├── fp_quant.py
│ │ ├── fsdp.py
│ │ ├── ggml.py
│ │ ├── higgs.py
│ │ ├── hqq.py
│ │ ├── hub_kernels.py
│ │ ├── integration_utils.py
│ │ ├── liger.py
│ │ ├── metal_quantization.py
│ │ ├── mistral.py
│ │ ├── moe.py
│ │ ├── mxfp4.py
│ │ ├── neftune.py
│ │ ├── npu_flash_attention.py
│ │ ├── peft.py
│ │ ├── quanto.py
│ │ ├── quark.py
│ │ ├── sdpa_attention.py
│ │ ├── sdpa_paged.py
│ │ ├── sinq.py
│ │ ├── spqr.py
│ │ ├── tensor_parallel.py
│ │ ├── tiktoken.py
│ │ ├── torchao.py
│ │ ├── tpu.py
│ │ └── vptq.py
│ ├── loss/
│ │ ├── __init__.py
│ │ ├── loss_d_fine.py
│ │ ├── loss_deformable_detr.py
│ │ ├── loss_for_object_detection.py
│ │ ├── loss_grounding_dino.py
│ │ ├── loss_lw_detr.py
│ │ ├── loss_rt_detr.py
│ │ └── loss_utils.py
│ ├── masking_utils.py
│ ├── model_debugging_utils.py
│ ├── modelcard.py
│ ├── modeling_attn_mask_utils.py
│ ├── modeling_flash_attention_utils.py
│ ├── modeling_gguf_pytorch_utils.py
│ ├── modeling_layers.py
│ ├── modeling_outputs.py
│ ├── modeling_rope_utils.py
│ ├── modeling_utils.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── afmoe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_afmoe.py
│ │ │ ├── modeling_afmoe.py
│ │ │ └── modular_afmoe.py
│ │ ├── aimv2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_aimv2.py
│ │ │ ├── convert_aimv2_original_pytorch_to_hf.py
│ │ │ ├── modeling_aimv2.py
│ │ │ └── modular_aimv2.py
│ │ ├── albert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_albert.py
│ │ │ ├── convert_albert_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── modeling_albert.py
│ │ │ └── tokenization_albert.py
│ │ ├── align/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_align.py
│ │ │ ├── convert_align_tf_to_hf.py
│ │ │ ├── modeling_align.py
│ │ │ └── processing_align.py
│ │ ├── altclip/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_altclip.py
│ │ │ ├── modeling_altclip.py
│ │ │ └── processing_altclip.py
│ │ ├── apertus/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_apertus.py
│ │ │ ├── modeling_apertus.py
│ │ │ └── modular_apertus.py
│ │ ├── arcee/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_arcee.py
│ │ │ ├── modeling_arcee.py
│ │ │ └── modular_arcee.py
│ │ ├── aria/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_aria.py
│ │ │ ├── convert_aria_weights_to_hf.py
│ │ │ ├── image_processing_aria.py
│ │ │ ├── image_processing_pil_aria.py
│ │ │ ├── modeling_aria.py
│ │ │ ├── modular_aria.py
│ │ │ └── processing_aria.py
│ │ ├── audio_spectrogram_transformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_audio_spectrogram_transformer.py
│ │ │ ├── convert_audio_spectrogram_transformer_original_to_pytorch.py
│ │ │ ├── feature_extraction_audio_spectrogram_transformer.py
│ │ │ └── modeling_audio_spectrogram_transformer.py
│ │ ├── audioflamingo3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_audioflamingo3.py
│ │ │ ├── convert_audioflamingo3_to_hf.py
│ │ │ ├── modeling_audioflamingo3.py
│ │ │ ├── modular_audioflamingo3.py
│ │ │ └── processing_audioflamingo3.py
│ │ ├── auto/
│ │ │ ├── __init__.py
│ │ │ ├── auto_factory.py
│ │ │ ├── configuration_auto.py
│ │ │ ├── feature_extraction_auto.py
│ │ │ ├── image_processing_auto.py
│ │ │ ├── modeling_auto.py
│ │ │ ├── processing_auto.py
│ │ │ ├── tokenization_auto.py
│ │ │ └── video_processing_auto.py
│ │ ├── autoformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_autoformer.py
│ │ │ └── modeling_autoformer.py
│ │ ├── aya_vision/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_aya_vision.py
│ │ │ ├── modeling_aya_vision.py
│ │ │ ├── modular_aya_vision.py
│ │ │ └── processing_aya_vision.py
│ │ ├── bamba/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_bamba.py
│ │ │ ├── convert_mamba_ssm_checkpoint.py
│ │ │ ├── modeling_bamba.py
│ │ │ └── modular_bamba.py
│ │ ├── bark/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_bark.py
│ │ │ ├── convert_suno_to_hf.py
│ │ │ ├── generation_configuration_bark.py
│ │ │ ├── modeling_bark.py
│ │ │ └── processing_bark.py
│ │ ├── bart/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_bart.py
│ │ │ ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_bart.py
│ │ │ └── tokenization_bart.py
│ │ ├── barthez/
│ │ │ ├── __init__.py
│ │ │ └── tokenization_barthez.py
│ │ ├── bartpho/
│ │ │ ├── __init__.py
│ │ │ └── tokenization_bartpho.py
│ │ ├── beit/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_beit.py
│ │ │ ├── convert_beit_unilm_to_pytorch.py
│ │ │ ├── image_processing_beit.py
│ │ │ ├── image_processing_pil_beit.py
│ │ │ └── modeling_beit.py
│ │ ├── bert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_bert.py
│ │ │ ├── convert_bert_original_tf2_checkpoint_to_pytorch.py
│ │ │ ├── convert_bert_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── convert_bert_token_dropping_original_tf2_checkpoint_to_pytorch.py
│ │ │ ├── modeling_bert.py
│ │ │ ├── tokenization_bert.py
│ │ │ └── tokenization_bert_legacy.py
│ │ ├── bert_generation/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_bert_generation.py
│ │ │ ├── modeling_bert_generation.py
│ │ │ └── tokenization_bert_generation.py
│ │ ├── bert_japanese/
│ │ │ ├── __init__.py
│ │ │ └── tokenization_bert_japanese.py
│ │ ├── bertweet/
│ │ │ ├── __init__.py
│ │ │ └── tokenization_bertweet.py
│ │ ├── big_bird/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_big_bird.py
│ │ │ ├── convert_bigbird_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── modeling_big_bird.py
│ │ │ └── tokenization_big_bird.py
│ │ ├── bigbird_pegasus/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_bigbird_pegasus.py
│ │ │ ├── convert_bigbird_pegasus_tf_to_pytorch.py
│ │ │ └── modeling_bigbird_pegasus.py
│ │ ├── biogpt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_biogpt.py
│ │ │ ├── convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_biogpt.py
│ │ │ ├── modular_biogpt.py
│ │ │ └── tokenization_biogpt.py
│ │ ├── bit/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_bit.py
│ │ │ ├── convert_bit_to_pytorch.py
│ │ │ ├── image_processing_bit.py
│ │ │ ├── image_processing_pil_bit.py
│ │ │ └── modeling_bit.py
│ │ ├── bitnet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_bitnet.py
│ │ │ ├── modeling_bitnet.py
│ │ │ └── modular_bitnet.py
│ │ ├── blenderbot/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_blenderbot.py
│ │ │ ├── convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_blenderbot.py
│ │ │ └── tokenization_blenderbot.py
│ │ ├── blenderbot_small/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_blenderbot_small.py
│ │ │ ├── modeling_blenderbot_small.py
│ │ │ └── tokenization_blenderbot_small.py
│ │ ├── blip/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_blip.py
│ │ │ ├── convert_blip_original_pytorch_to_hf.py
│ │ │ ├── image_processing_blip.py
│ │ │ ├── image_processing_pil_blip.py
│ │ │ ├── modeling_blip.py
│ │ │ ├── modeling_blip_text.py
│ │ │ └── processing_blip.py
│ │ ├── blip_2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_blip_2.py
│ │ │ ├── convert_blip_2_original_to_pytorch.py
│ │ │ ├── modeling_blip_2.py
│ │ │ └── processing_blip_2.py
│ │ ├── bloom/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_bloom.py
│ │ │ ├── convert_bloom_original_checkpoint_to_pytorch.py
│ │ │ └── modeling_bloom.py
│ │ ├── blt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_blt.py
│ │ │ ├── convert_blt_weights_to_hf.py
│ │ │ ├── modeling_blt.py
│ │ │ └── modular_blt.py
│ │ ├── bridgetower/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_bridgetower.py
│ │ │ ├── image_processing_bridgetower.py
│ │ │ ├── image_processing_pil_bridgetower.py
│ │ │ ├── modeling_bridgetower.py
│ │ │ └── processing_bridgetower.py
│ │ ├── bros/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_bros.py
│ │ │ ├── convert_bros_to_pytorch.py
│ │ │ ├── modeling_bros.py
│ │ │ └── processing_bros.py
│ │ ├── byt5/
│ │ │ ├── __init__.py
│ │ │ ├── convert_byt5_original_tf_checkpoint_to_pytorch.py
│ │ │ └── tokenization_byt5.py
│ │ ├── camembert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_camembert.py
│ │ │ ├── modeling_camembert.py
│ │ │ ├── modular_camembert.py
│ │ │ └── tokenization_camembert.py
│ │ ├── canine/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_canine.py
│ │ │ ├── convert_canine_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── modeling_canine.py
│ │ │ └── tokenization_canine.py
│ │ ├── chameleon/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_chameleon.py
│ │ │ ├── convert_chameleon_weights_to_hf.py
│ │ │ ├── image_processing_chameleon.py
│ │ │ ├── image_processing_pil_chameleon.py
│ │ │ ├── modeling_chameleon.py
│ │ │ └── processing_chameleon.py
│ │ ├── chinese_clip/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_chinese_clip.py
│ │ │ ├── convert_chinese_clip_original_pytorch_to_hf.py
│ │ │ ├── image_processing_chinese_clip.py
│ │ │ ├── image_processing_chinese_pil_clip.py
│ │ │ ├── modeling_chinese_clip.py
│ │ │ └── processing_chinese_clip.py
│ │ ├── chmv2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_chmv2.py
│ │ │ ├── convert_chmv2_to_hf.py
│ │ │ ├── image_processing_chmv2.py
│ │ │ ├── modeling_chmv2.py
│ │ │ └── modular_chmv2.py
│ │ ├── clap/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_clap.py
│ │ │ ├── convert_clap_original_pytorch_to_hf.py
│ │ │ ├── feature_extraction_clap.py
│ │ │ ├── modeling_clap.py
│ │ │ └── processing_clap.py
│ │ ├── clip/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_clip.py
│ │ │ ├── convert_clip_original_pytorch_to_hf.py
│ │ │ ├── image_processing_clip.py
│ │ │ ├── image_processing_pil_clip.py
│ │ │ ├── modeling_clip.py
│ │ │ ├── processing_clip.py
│ │ │ └── tokenization_clip.py
│ │ ├── clipseg/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_clipseg.py
│ │ │ ├── convert_clipseg_original_pytorch_to_hf.py
│ │ │ ├── modeling_clipseg.py
│ │ │ └── processing_clipseg.py
│ │ ├── clvp/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_clvp.py
│ │ │ ├── convert_clvp_to_hf.py
│ │ │ ├── feature_extraction_clvp.py
│ │ │ ├── modeling_clvp.py
│ │ │ ├── number_normalizer.py
│ │ │ ├── processing_clvp.py
│ │ │ └── tokenization_clvp.py
│ │ ├── code_llama/
│ │ │ ├── __init__.py
│ │ │ └── tokenization_code_llama.py
│ │ ├── codegen/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_codegen.py
│ │ │ ├── modeling_codegen.py
│ │ │ └── tokenization_codegen.py
│ │ ├── cohere/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_cohere.py
│ │ │ ├── modeling_cohere.py
│ │ │ ├── modular_cohere.py
│ │ │ └── tokenization_cohere.py
│ │ ├── cohere2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_cohere2.py
│ │ │ ├── modeling_cohere2.py
│ │ │ └── modular_cohere2.py
│ │ ├── cohere2_vision/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_cohere2_vision.py
│ │ │ ├── image_processing_cohere2_vision.py
│ │ │ ├── modeling_cohere2_vision.py
│ │ │ ├── modular_cohere2_vision.py
│ │ │ └── processing_cohere2_vision.py
│ │ ├── cohere_asr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_cohere_asr.py
│ │ │ ├── feature_extraction_cohere_asr.py
│ │ │ ├── modeling_cohere_asr.py
│ │ │ ├── modular_cohere_asr.py
│ │ │ └── processing_cohere_asr.py
│ │ ├── colmodernvbert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_colmodernvbert.py
│ │ │ ├── modeling_colmodernvbert.py
│ │ │ ├── modular_colmodernvbert.py
│ │ │ └── processing_colmodernvbert.py
│ │ ├── colpali/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_colpali.py
│ │ │ ├── convert_colpali_weights_to_hf.py
│ │ │ ├── modeling_colpali.py
│ │ │ ├── modular_colpali.py
│ │ │ └── processing_colpali.py
│ │ ├── colqwen2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_colqwen2.py
│ │ │ ├── convert_colqwen2_weights_to_hf.py
│ │ │ ├── modeling_colqwen2.py
│ │ │ ├── modular_colqwen2.py
│ │ │ └── processing_colqwen2.py
│ │ ├── conditional_detr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_conditional_detr.py
│ │ │ ├── convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── image_processing_conditional_detr.py
│ │ │ ├── image_processing_pil_conditional_detr.py
│ │ │ ├── modeling_conditional_detr.py
│ │ │ └── modular_conditional_detr.py
│ │ ├── convbert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_convbert.py
│ │ │ ├── convert_convbert_original_tf1_checkpoint_to_pytorch.py
│ │ │ ├── modeling_convbert.py
│ │ │ └── tokenization_convbert.py
│ │ ├── convnext/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_convnext.py
│ │ │ ├── convert_convnext_to_pytorch.py
│ │ │ ├── image_processing_convnext.py
│ │ │ ├── image_processing_pil_convnext.py
│ │ │ └── modeling_convnext.py
│ │ ├── convnextv2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_convnextv2.py
│ │ │ ├── convert_convnextv2_to_pytorch.py
│ │ │ └── modeling_convnextv2.py
│ │ ├── cpm/
│ │ │ ├── __init__.py
│ │ │ ├── tokenization_cpm.py
│ │ │ └── tokenization_cpm_fast.py
│ │ ├── cpmant/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_cpmant.py
│ │ │ ├── modeling_cpmant.py
│ │ │ └── tokenization_cpmant.py
│ │ ├── csm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_csm.py
│ │ │ ├── convert_csm.py
│ │ │ ├── generation_csm.py
│ │ │ ├── modeling_csm.py
│ │ │ ├── modular_csm.py
│ │ │ └── processing_csm.py
│ │ ├── ctrl/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_ctrl.py
│ │ │ ├── modeling_ctrl.py
│ │ │ └── tokenization_ctrl.py
│ │ ├── cvt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_cvt.py
│ │ │ ├── convert_cvt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ └── modeling_cvt.py
│ │ ├── cwm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_cwm.py
│ │ │ ├── modeling_cwm.py
│ │ │ └── modular_cwm.py
│ │ ├── d_fine/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_d_fine.py
│ │ │ ├── convert_d_fine_original_pytorch_checkpoint_to_hf.py
│ │ │ ├── modeling_d_fine.py
│ │ │ └── modular_d_fine.py
│ │ ├── dab_detr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_dab_detr.py
│ │ │ ├── convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py
│ │ │ └── modeling_dab_detr.py
│ │ ├── dac/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_dac.py
│ │ │ ├── convert_dac_checkpoint.py
│ │ │ ├── feature_extraction_dac.py
│ │ │ └── modeling_dac.py
│ │ ├── data2vec/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_data2vec_audio.py
│ │ │ ├── configuration_data2vec_text.py
│ │ │ ├── configuration_data2vec_vision.py
│ │ │ ├── convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_data2vec_audio.py
│ │ │ ├── modeling_data2vec_text.py
│ │ │ ├── modeling_data2vec_vision.py
│ │ │ ├── modular_data2vec_audio.py
│ │ │ └── modular_data2vec_text.py
│ │ ├── dbrx/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_dbrx.py
│ │ │ ├── modeling_dbrx.py
│ │ │ └── modular_dbrx.py
│ │ ├── deberta/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_deberta.py
│ │ │ ├── modeling_deberta.py
│ │ │ └── tokenization_deberta.py
│ │ ├── deberta_v2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_deberta_v2.py
│ │ │ ├── modeling_deberta_v2.py
│ │ │ └── tokenization_deberta_v2.py
│ │ ├── decision_transformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_decision_transformer.py
│ │ │ └── modeling_decision_transformer.py
│ │ ├── deepseek_v2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_deepseek_v2.py
│ │ │ ├── modeling_deepseek_v2.py
│ │ │ └── modular_deepseek_v2.py
│ │ ├── deepseek_v3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_deepseek_v3.py
│ │ │ ├── modeling_deepseek_v3.py
│ │ │ └── modular_deepseek_v3.py
│ │ ├── deepseek_vl/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_deepseek_vl.py
│ │ │ ├── convert_deepseek_vl_weights_to_hf.py
│ │ │ ├── image_processing_deepseek_vl.py
│ │ │ ├── image_processing_pil_deepseek_vl.py
│ │ │ ├── modeling_deepseek_vl.py
│ │ │ ├── modular_deepseek_vl.py
│ │ │ └── processing_deepseek_vl.py
│ │ ├── deepseek_vl_hybrid/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_deepseek_vl_hybrid.py
│ │ │ ├── convert_deepseek_vl_hybrid_weights_to_hf.py
│ │ │ ├── image_processing_deepseek_vl_hybrid.py
│ │ │ ├── image_processing_pil_deepseek_vl_hybrid.py
│ │ │ ├── modeling_deepseek_vl_hybrid.py
│ │ │ ├── modular_deepseek_vl_hybrid.py
│ │ │ └── processing_deepseek_vl_hybrid.py
│ │ ├── deformable_detr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_deformable_detr.py
│ │ │ ├── convert_deformable_detr_to_pytorch.py
│ │ │ ├── image_processing_deformable_detr.py
│ │ │ ├── image_processing_pil_deformable_detr.py
│ │ │ ├── modeling_deformable_detr.py
│ │ │ └── modular_deformable_detr.py
│ │ ├── deit/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_deit.py
│ │ │ ├── convert_deit_timm_to_pytorch.py
│ │ │ ├── image_processing_deit.py
│ │ │ ├── image_processing_pil_deit.py
│ │ │ └── modeling_deit.py
│ │ ├── deprecated/
│ │ │ └── __init__.py
│ │ ├── depth_anything/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_depth_anything.py
│ │ │ ├── convert_depth_anything_to_hf.py
│ │ │ ├── convert_distill_any_depth_to_hf.py
│ │ │ └── modeling_depth_anything.py
│ │ ├── depth_pro/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_depth_pro.py
│ │ │ ├── convert_depth_pro_weights_to_hf.py
│ │ │ ├── image_processing_depth_pro.py
│ │ │ └── modeling_depth_pro.py
│ │ ├── detr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_detr.py
│ │ │ ├── convert_detr_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── convert_detr_to_pytorch.py
│ │ │ ├── image_processing_detr.py
│ │ │ ├── image_processing_pil_detr.py
│ │ │ └── modeling_detr.py
│ │ ├── dia/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_dia.py
│ │ │ ├── convert_dia_to_hf.py
│ │ │ ├── feature_extraction_dia.py
│ │ │ ├── generation_dia.py
│ │ │ ├── modeling_dia.py
│ │ │ ├── modular_dia.py
│ │ │ ├── processing_dia.py
│ │ │ └── tokenization_dia.py
│ │ ├── dialogpt/
│ │ │ ├── __init__.py
│ │ │ └── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
│ │ ├── diffllama/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_diffllama.py
│ │ │ ├── modeling_diffllama.py
│ │ │ └── modular_diffllama.py
│ │ ├── dinat/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_dinat.py
│ │ │ └── modeling_dinat.py
│ │ ├── dinov2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_dinov2.py
│ │ │ ├── convert_dinov2_to_hf.py
│ │ │ └── modeling_dinov2.py
│ │ ├── dinov2_with_registers/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_dinov2_with_registers.py
│ │ │ ├── convert_dinov2_with_registers_to_hf.py
│ │ │ ├── modeling_dinov2_with_registers.py
│ │ │ └── modular_dinov2_with_registers.py
│ │ ├── dinov3_convnext/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_dinov3_convnext.py
│ │ │ ├── convert_dinov3_convnext_to_hf.py
│ │ │ └── modeling_dinov3_convnext.py
│ │ ├── dinov3_vit/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_dinov3_vit.py
│ │ │ ├── convert_dinov3_vit_to_hf.py
│ │ │ ├── image_processing_dinov3_vit.py
│ │ │ ├── modeling_dinov3_vit.py
│ │ │ └── modular_dinov3_vit.py
│ │ ├── distilbert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_distilbert.py
│ │ │ ├── modeling_distilbert.py
│ │ │ └── tokenization_distilbert.py
│ │ ├── dit/
│ │ │ ├── __init__.py
│ │ │ └── convert_dit_unilm_to_pytorch.py
│ │ ├── doge/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_doge.py
│ │ │ ├── convert_doge_weights_to_hf.py
│ │ │ ├── modeling_doge.py
│ │ │ └── modular_doge.py
│ │ ├── donut/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_donut_swin.py
│ │ │ ├── convert_donut_to_pytorch.py
│ │ │ ├── image_processing_donut.py
│ │ │ ├── image_processing_pil_donut.py
│ │ │ ├── modeling_donut_swin.py
│ │ │ └── processing_donut.py
│ │ ├── dots1/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_dots1.py
│ │ │ ├── modeling_dots1.py
│ │ │ └── modular_dots1.py
│ │ ├── dpr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_dpr.py
│ │ │ ├── convert_dpr_original_checkpoint_to_pytorch.py
│ │ │ ├── modeling_dpr.py
│ │ │ ├── tokenization_dpr.py
│ │ │ └── tokenization_dpr_fast.py
│ │ ├── dpt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_dpt.py
│ │ │ ├── convert_dinov2_depth_to_hf.py
│ │ │ ├── convert_dpt_beit_to_hf.py
│ │ │ ├── convert_dpt_hybrid_to_pytorch.py
│ │ │ ├── convert_dpt_swinv2_to_hf.py
│ │ │ ├── convert_dpt_to_pytorch.py
│ │ │ ├── image_processing_dpt.py
│ │ │ ├── image_processing_pil_dpt.py
│ │ │ ├── modeling_dpt.py
│ │ │ └── modular_dpt.py
│ │ ├── edgetam/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_edgetam.py
│ │ │ ├── convert_edgetam_to_hf.py
│ │ │ ├── modeling_edgetam.py
│ │ │ └── modular_edgetam.py
│ │ ├── edgetam_video/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_edgetam_video.py
│ │ │ ├── convert_edgetam_video_to_hf.py
│ │ │ ├── modeling_edgetam_video.py
│ │ │ └── modular_edgetam_video.py
│ │ ├── efficientloftr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_efficientloftr.py
│ │ │ ├── convert_efficientloftr_to_hf.py
│ │ │ ├── image_processing_efficientloftr.py
│ │ │ ├── image_processing_pil_efficientloftr.py
│ │ │ ├── modeling_efficientloftr.py
│ │ │ └── modular_efficientloftr.py
│ │ ├── efficientnet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_efficientnet.py
│ │ │ ├── convert_efficientnet_to_pytorch.py
│ │ │ ├── image_processing_efficientnet.py
│ │ │ ├── image_processing_pil_efficientnet.py
│ │ │ └── modeling_efficientnet.py
│ │ ├── electra/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_electra.py
│ │ │ ├── convert_electra_original_tf_checkpoint_to_pytorch.py
│ │ │ └── modeling_electra.py
│ │ ├── emu3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_emu3.py
│ │ │ ├── convert_emu3_weights_to_hf.py
│ │ │ ├── image_processing_emu3.py
│ │ │ ├── modeling_emu3.py
│ │ │ ├── modular_emu3.py
│ │ │ └── processing_emu3.py
│ │ ├── encodec/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_encodec.py
│ │ │ ├── convert_encodec_checkpoint_to_pytorch.py
│ │ │ ├── feature_extraction_encodec.py
│ │ │ └── modeling_encodec.py
│ │ ├── encoder_decoder/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_encoder_decoder.py
│ │ │ └── modeling_encoder_decoder.py
│ │ ├── eomt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_eomt.py
│ │ │ ├── convert_eomt_to_hf.py
│ │ │ ├── image_processing_eomt.py
│ │ │ ├── image_processing_pil_eomt.py
│ │ │ ├── modeling_eomt.py
│ │ │ └── modular_eomt.py
│ │ ├── eomt_dinov3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_eomt_dinov3.py
│ │ │ ├── convert_eomt_dinov3_to_hf.py
│ │ │ ├── modeling_eomt_dinov3.py
│ │ │ └── modular_eomt_dinov3.py
│ │ ├── ernie/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_ernie.py
│ │ │ ├── modeling_ernie.py
│ │ │ └── modular_ernie.py
│ │ ├── ernie4_5/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_ernie4_5.py
│ │ │ ├── convert_ernie4_5_tokenizer.py
│ │ │ ├── modeling_ernie4_5.py
│ │ │ └── modular_ernie4_5.py
│ │ ├── ernie4_5_moe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_ernie4_5_moe.py
│ │ │ ├── modeling_ernie4_5_moe.py
│ │ │ └── modular_ernie4_5_moe.py
│ │ ├── ernie4_5_vl_moe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_ernie4_5_vl_moe.py
│ │ │ ├── convert_ernie4_5_vl_moe_to_hf.py
│ │ │ ├── image_processing_ernie4_5_vl_moe.py
│ │ │ ├── image_processing_pil_ernie4_5_vl_moe.py
│ │ │ ├── modeling_ernie4_5_vl_moe.py
│ │ │ ├── modular_ernie4_5_vl_moe.py
│ │ │ ├── processing_ernie4_5_vl_moe.py
│ │ │ └── video_processing_ernie4_5_vl_moe.py
│ │ ├── esm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_esm.py
│ │ │ ├── convert_esm.py
│ │ │ ├── modeling_esm.py
│ │ │ ├── modeling_esmfold.py
│ │ │ ├── openfold_utils/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── chunk_utils.py
│ │ │ │ ├── data_transforms.py
│ │ │ │ ├── feats.py
│ │ │ │ ├── loss.py
│ │ │ │ ├── protein.py
│ │ │ │ ├── residue_constants.py
│ │ │ │ ├── rigid_utils.py
│ │ │ │ └── tensor_utils.py
│ │ │ └── tokenization_esm.py
│ │ ├── eurobert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_eurobert.py
│ │ │ ├── modeling_eurobert.py
│ │ │ └── modular_eurobert.py
│ │ ├── evolla/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_evolla.py
│ │ │ ├── modeling_evolla.py
│ │ │ ├── modular_evolla.py
│ │ │ └── processing_evolla.py
│ │ ├── exaone4/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_exaone4.py
│ │ │ ├── modeling_exaone4.py
│ │ │ └── modular_exaone4.py
│ │ ├── exaone_moe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_exaone_moe.py
│ │ │ ├── modeling_exaone_moe.py
│ │ │ └── modular_exaone_moe.py
│ │ ├── falcon/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_falcon.py
│ │ │ ├── convert_custom_code_checkpoint.py
│ │ │ └── modeling_falcon.py
│ │ ├── falcon_h1/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_falcon_h1.py
│ │ │ ├── convert_mamba_ssm_checkpoint.py
│ │ │ ├── modeling_falcon_h1.py
│ │ │ └── modular_falcon_h1.py
│ │ ├── falcon_mamba/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_falcon_mamba.py
│ │ │ ├── modeling_falcon_mamba.py
│ │ │ └── modular_falcon_mamba.py
│ │ ├── fast_vlm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_fast_vlm.py
│ │ │ ├── convert_fastvlm_weights_to_hf.py
│ │ │ ├── modeling_fast_vlm.py
│ │ │ └── modular_fast_vlm.py
│ │ ├── fastspeech2_conformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_fastspeech2_conformer.py
│ │ │ ├── convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── convert_hifigan.py
│ │ │ ├── convert_model_with_hifigan.py
│ │ │ ├── modeling_fastspeech2_conformer.py
│ │ │ └── tokenization_fastspeech2_conformer.py
│ │ ├── flaubert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_flaubert.py
│ │ │ ├── modeling_flaubert.py
│ │ │ └── tokenization_flaubert.py
│ │ ├── flava/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_flava.py
│ │ │ ├── convert_dalle_to_flava_codebook.py
│ │ │ ├── convert_flava_original_pytorch_to_hf.py
│ │ │ ├── image_processing_flava.py
│ │ │ ├── image_processing_pil_flava.py
│ │ │ ├── modeling_flava.py
│ │ │ └── processing_flava.py
│ │ ├── flex_olmo/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_flex_olmo.py
│ │ │ ├── modeling_flex_olmo.py
│ │ │ └── modular_flex_olmo.py
│ │ ├── florence2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_florence2.py
│ │ │ ├── convert_florence2_original_pytorch_to_hf.py
│ │ │ ├── modeling_florence2.py
│ │ │ ├── modular_florence2.py
│ │ │ └── processing_florence2.py
│ │ ├── fnet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_fnet.py
│ │ │ ├── convert_fnet_original_flax_checkpoint_to_pytorch.py
│ │ │ ├── modeling_fnet.py
│ │ │ └── tokenization_fnet.py
│ │ ├── focalnet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_focalnet.py
│ │ │ ├── convert_focalnet_to_hf_format.py
│ │ │ └── modeling_focalnet.py
│ │ ├── fsmt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_fsmt.py
│ │ │ ├── convert_fsmt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_fsmt.py
│ │ │ └── tokenization_fsmt.py
│ │ ├── funnel/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_funnel.py
│ │ │ ├── convert_funnel_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── modeling_funnel.py
│ │ │ └── tokenization_funnel.py
│ │ ├── fuyu/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_fuyu.py
│ │ │ ├── convert_fuyu_model_weights_to_hf.py
│ │ │ ├── image_processing_fuyu.py
│ │ │ ├── image_processing_pil_fuyu.py
│ │ │ ├── modeling_fuyu.py
│ │ │ └── processing_fuyu.py
│ │ ├── gemma/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_gemma.py
│ │ │ ├── convert_gemma_weights_to_hf.py
│ │ │ ├── modeling_gemma.py
│ │ │ ├── modular_gemma.py
│ │ │ └── tokenization_gemma.py
│ │ ├── gemma2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_gemma2.py
│ │ │ ├── convert_gemma2_weights_to_hf.py
│ │ │ ├── modeling_gemma2.py
│ │ │ └── modular_gemma2.py
│ │ ├── gemma3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_gemma3.py
│ │ │ ├── convert_gemma3_weights.py
│ │ │ ├── image_processing_gemma3.py
│ │ │ ├── image_processing_pil_gemma3.py
│ │ │ ├── modeling_gemma3.py
│ │ │ ├── modular_gemma3.py
│ │ │ └── processing_gemma3.py
│ │ ├── gemma3n/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_gemma3n.py
│ │ │ ├── convert_gemma3n_weights.py
│ │ │ ├── feature_extraction_gemma3n.py
│ │ │ ├── modeling_gemma3n.py
│ │ │ ├── modular_gemma3n.py
│ │ │ └── processing_gemma3n.py
│ │ ├── git/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_git.py
│ │ │ ├── convert_git_to_pytorch.py
│ │ │ ├── modeling_git.py
│ │ │ └── processing_git.py
│ │ ├── glm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_glm.py
│ │ │ ├── convert_glm_weights_to_hf.py
│ │ │ ├── modeling_glm.py
│ │ │ └── modular_glm.py
│ │ ├── glm4/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_glm4.py
│ │ │ ├── convert_glm4_weights_to_hf.py
│ │ │ ├── modeling_glm4.py
│ │ │ └── modular_glm4.py
│ │ ├── glm46v/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_glm46v.py
│ │ │ ├── image_processing_glm46v.py
│ │ │ ├── image_processing_pil_glm46v.py
│ │ │ ├── modeling_glm46v.py
│ │ │ ├── modular_glm46v.py
│ │ │ ├── processing_glm46v.py
│ │ │ └── video_processing_glm46v.py
│ │ ├── glm4_moe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_glm4_moe.py
│ │ │ ├── modeling_glm4_moe.py
│ │ │ └── modular_glm4_moe.py
│ │ ├── glm4_moe_lite/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_glm4_moe_lite.py
│ │ │ ├── modeling_glm4_moe_lite.py
│ │ │ └── modular_glm4_moe_lite.py
│ │ ├── glm4v/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_glm4v.py
│ │ │ ├── convert_glm4v_mgt_weights_to_hf.py
│ │ │ ├── image_processing_glm4v.py
│ │ │ ├── image_processing_pil_glm4v.py
│ │ │ ├── modeling_glm4v.py
│ │ │ ├── modular_glm4v.py
│ │ │ ├── processing_glm4v.py
│ │ │ └── video_processing_glm4v.py
│ │ ├── glm4v_moe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_glm4v_moe.py
│ │ │ ├── convert_glm4v_moe_mgt_weights_to_hf.py
│ │ │ ├── modeling_glm4v_moe.py
│ │ │ └── modular_glm4v_moe.py
│ │ ├── glm_image/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_glm_image.py
│ │ │ ├── image_processing_glm_image.py
│ │ │ ├── image_processing_pil_glm_image.py
│ │ │ ├── modeling_glm_image.py
│ │ │ ├── modular_glm_image.py
│ │ │ └── processing_glm_image.py
│ │ ├── glm_moe_dsa/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_glm_moe_dsa.py
│ │ │ ├── modeling_glm_moe_dsa.py
│ │ │ └── modular_glm_moe_dsa.py
│ │ ├── glm_ocr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_glm_ocr.py
│ │ │ ├── modeling_glm_ocr.py
│ │ │ └── modular_glm_ocr.py
│ │ ├── glmasr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_glmasr.py
│ │ │ ├── convert_glmasr_weights_to_hf.py
│ │ │ ├── modeling_glmasr.py
│ │ │ ├── modular_glmasr.py
│ │ │ └── processing_glmasr.py
│ │ ├── glpn/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_glpn.py
│ │ │ ├── convert_glpn_to_pytorch.py
│ │ │ ├── image_processing_glpn.py
│ │ │ ├── image_processing_pil_glpn.py
│ │ │ └── modeling_glpn.py
│ │ ├── got_ocr2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_got_ocr2.py
│ │ │ ├── convert_got_ocr2_weights_to_hf.py
│ │ │ ├── image_processing_got_ocr2.py
│ │ │ ├── image_processing_pil_got_ocr2.py
│ │ │ ├── modeling_got_ocr2.py
│ │ │ ├── modular_got_ocr2.py
│ │ │ └── processing_got_ocr2.py
│ │ ├── gpt2/
│ │ │ ├── CONVERSION.md
│ │ │ ├── __init__.py
│ │ │ ├── configuration_gpt2.py
│ │ │ ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── modeling_gpt2.py
│ │ │ └── tokenization_gpt2.py
│ │ ├── gpt_bigcode/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_gpt_bigcode.py
│ │ │ └── modeling_gpt_bigcode.py
│ │ ├── gpt_neo/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_gpt_neo.py
│ │ │ ├── convert_gpt_neo_mesh_tf_to_pytorch.py
│ │ │ └── modeling_gpt_neo.py
│ │ ├── gpt_neox/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_gpt_neox.py
│ │ │ ├── modeling_gpt_neox.py
│ │ │ ├── modular_gpt_neox.py
│ │ │ └── tokenization_gpt_neox.py
│ │ ├── gpt_neox_japanese/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_gpt_neox_japanese.py
│ │ │ ├── modeling_gpt_neox_japanese.py
│ │ │ └── tokenization_gpt_neox_japanese.py
│ │ ├── gpt_oss/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_gpt_oss.py
│ │ │ ├── convert_gpt_oss_weights_to_hf.py
│ │ │ ├── modeling_gpt_oss.py
│ │ │ └── modular_gpt_oss.py
│ │ ├── gpt_sw3/
│ │ │ ├── __init__.py
│ │ │ ├── convert_megatron_to_pytorch.py
│ │ │ └── tokenization_gpt_sw3.py
│ │ ├── gptj/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_gptj.py
│ │ │ └── modeling_gptj.py
│ │ ├── granite/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_granite.py
│ │ │ ├── modeling_granite.py
│ │ │ └── modular_granite.py
│ │ ├── granite_speech/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_granite_speech.py
│ │ │ ├── feature_extraction_granite_speech.py
│ │ │ ├── modeling_granite_speech.py
│ │ │ └── processing_granite_speech.py
│ │ ├── granitemoe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_granitemoe.py
│ │ │ ├── modeling_granitemoe.py
│ │ │ └── modular_granitemoe.py
│ │ ├── granitemoehybrid/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_granitemoehybrid.py
│ │ │ ├── modeling_granitemoehybrid.py
│ │ │ └── modular_granitemoehybrid.py
│ │ ├── granitemoeshared/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_granitemoeshared.py
│ │ │ ├── modeling_granitemoeshared.py
│ │ │ └── modular_granitemoeshared.py
│ │ ├── grounding_dino/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_grounding_dino.py
│ │ │ ├── convert_grounding_dino_to_hf.py
│ │ │ ├── image_processing_grounding_dino.py
│ │ │ ├── image_processing_pil_grounding_dino.py
│ │ │ ├── modeling_grounding_dino.py
│ │ │ ├── modular_grounding_dino.py
│ │ │ └── processing_grounding_dino.py
│ │ ├── groupvit/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_groupvit.py
│ │ │ ├── convert_groupvit_nvlab_to_hf.py
│ │ │ └── modeling_groupvit.py
│ │ ├── helium/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_helium.py
│ │ │ ├── modeling_helium.py
│ │ │ └── modular_helium.py
│ │ ├── herbert/
│ │ │ ├── __init__.py
│ │ │ └── tokenization_herbert.py
│ │ ├── hgnet_v2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_hgnet_v2.py
│ │ │ ├── modeling_hgnet_v2.py
│ │ │ └── modular_hgnet_v2.py
│ │ ├── hiera/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_hiera.py
│ │ │ ├── convert_hiera_to_hf.py
│ │ │ └── modeling_hiera.py
│ │ ├── higgs_audio_v2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_higgs_audio_v2.py
│ │ │ ├── convert_higgs_audio_v2_to_hf.py
│ │ │ ├── generation_higgs_audio_v2.py
│ │ │ ├── modeling_higgs_audio_v2.py
│ │ │ ├── modular_higgs_audio_v2.py
│ │ │ └── processing_higgs_audio_v2.py
│ │ ├── higgs_audio_v2_tokenizer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_higgs_audio_v2_tokenizer.py
│ │ │ ├── convert_higgs_audio_v2_tokenizer_to_hf.py
│ │ │ ├── modeling_higgs_audio_v2_tokenizer.py
│ │ │ └── modular_higgs_audio_v2_tokenizer.py
│ │ ├── hubert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_hubert.py
│ │ │ ├── convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py
│ │ │ ├── convert_hubert_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── convert_hubert_original_s3prl_checkpoint_to_pytorch.py
│ │ │ ├── modeling_hubert.py
│ │ │ └── modular_hubert.py
│ │ ├── hunyuan_v1_dense/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_hunyuan_v1_dense.py
│ │ │ ├── modeling_hunyuan_v1_dense.py
│ │ │ └── modular_hunyuan_v1_dense.py
│ │ ├── hunyuan_v1_moe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_hunyuan_v1_moe.py
│ │ │ ├── modeling_hunyuan_v1_moe.py
│ │ │ └── modular_hunyuan_v1_moe.py
│ │ ├── ibert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_ibert.py
│ │ │ ├── modeling_ibert.py
│ │ │ └── quant_modules.py
│ │ ├── idefics/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_idefics.py
│ │ │ ├── image_processing_idefics.py
│ │ │ ├── image_processing_pil_idefics.py
│ │ │ ├── modeling_idefics.py
│ │ │ ├── perceiver.py
│ │ │ ├── processing_idefics.py
│ │ │ └── vision.py
│ │ ├── idefics2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_idefics2.py
│ │ │ ├── convert_idefics2_weights_to_hf.py
│ │ │ ├── image_processing_idefics2.py
│ │ │ ├── image_processing_pil_idefics2.py
│ │ │ ├── modeling_idefics2.py
│ │ │ └── processing_idefics2.py
│ │ ├── idefics3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_idefics3.py
│ │ │ ├── convert_idefics3_weights_to_hf.py
│ │ │ ├── image_processing_idefics3.py
│ │ │ ├── image_processing_pil_idefics3.py
│ │ │ ├── modeling_idefics3.py
│ │ │ └── processing_idefics3.py
│ │ ├── ijepa/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_ijepa.py
│ │ │ ├── convert_ijepa_to_hf.py
│ │ │ ├── modeling_ijepa.py
│ │ │ └── modular_ijepa.py
│ │ ├── imagegpt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_imagegpt.py
│ │ │ ├── convert_imagegpt_original_tf2_to_pytorch.py
│ │ │ ├── image_processing_imagegpt.py
│ │ │ ├── image_processing_pil_imagegpt.py
│ │ │ └── modeling_imagegpt.py
│ │ ├── informer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_informer.py
│ │ │ ├── modeling_informer.py
│ │ │ └── modular_informer.py
│ │ ├── instructblip/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_instructblip.py
│ │ │ ├── convert_instructblip_original_to_pytorch.py
│ │ │ ├── modeling_instructblip.py
│ │ │ └── processing_instructblip.py
│ │ ├── instructblipvideo/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_instructblipvideo.py
│ │ │ ├── convert_instructblipvideo_original_to_pytorch.py
│ │ │ ├── modeling_instructblipvideo.py
│ │ │ ├── modular_instructblipvideo.py
│ │ │ ├── processing_instructblipvideo.py
│ │ │ └── video_processing_instructblipvideo.py
│ │ ├── internvl/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_internvl.py
│ │ │ ├── convert_internvl_weights_to_hf.py
│ │ │ ├── modeling_internvl.py
│ │ │ ├── modular_internvl.py
│ │ │ ├── processing_internvl.py
│ │ │ └── video_processing_internvl.py
│ │ ├── jais2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_jais2.py
│ │ │ ├── modeling_jais2.py
│ │ │ └── modular_jais2.py
│ │ ├── jamba/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_jamba.py
│ │ │ ├── modeling_jamba.py
│ │ │ └── modular_jamba.py
│ │ ├── janus/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_janus.py
│ │ │ ├── convert_janus_weights_to_hf.py
│ │ │ ├── image_processing_janus.py
│ │ │ ├── image_processing_pil_janus.py
│ │ │ ├── modeling_janus.py
│ │ │ ├── modular_janus.py
│ │ │ └── processing_janus.py
│ │ ├── jetmoe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_jetmoe.py
│ │ │ ├── modeling_jetmoe.py
│ │ │ └── modular_jetmoe.py
│ │ ├── jina_embeddings_v3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_jina_embeddings_v3.py
│ │ │ ├── modeling_jina_embeddings_v3.py
│ │ │ └── modular_jina_embeddings_v3.py
│ │ ├── kosmos2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_kosmos2.py
│ │ │ ├── convert_kosmos2_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_kosmos2.py
│ │ │ └── processing_kosmos2.py
│ │ ├── kosmos2_5/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_kosmos2_5.py
│ │ │ ├── convert_kosmos2_5.py
│ │ │ ├── image_processing_kosmos2_5.py
│ │ │ ├── image_processing_pil_kosmos2_5.py
│ │ │ ├── modeling_kosmos2_5.py
│ │ │ └── processing_kosmos2_5.py
│ │ ├── kyutai_speech_to_text/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_kyutai_speech_to_text.py
│ │ │ ├── convert_kyutai_speech_to_text_to_hf.py
│ │ │ ├── feature_extraction_kyutai_speech_to_text.py
│ │ │ ├── modeling_kyutai_speech_to_text.py
│ │ │ ├── modular_kyutai_speech_to_text.py
│ │ │ └── processing_kyutai_speech_to_text.py
│ │ ├── lasr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_lasr.py
│ │ │ ├── feature_extraction_lasr.py
│ │ │ ├── modeling_lasr.py
│ │ │ ├── modular_lasr.py
│ │ │ ├── processing_lasr.py
│ │ │ └── tokenization_lasr.py
│ │ ├── layoutlm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_layoutlm.py
│ │ │ └── modeling_layoutlm.py
│ │ ├── layoutlmv2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_layoutlmv2.py
│ │ │ ├── image_processing_layoutlmv2.py
│ │ │ ├── image_processing_pil_layoutlmv2.py
│ │ │ ├── modeling_layoutlmv2.py
│ │ │ ├── processing_layoutlmv2.py
│ │ │ └── tokenization_layoutlmv2.py
│ │ ├── layoutlmv3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_layoutlmv3.py
│ │ │ ├── image_processing_layoutlmv3.py
│ │ │ ├── image_processing_pil_layoutlmv3.py
│ │ │ ├── modeling_layoutlmv3.py
│ │ │ ├── processing_layoutlmv3.py
│ │ │ └── tokenization_layoutlmv3.py
│ │ ├── layoutxlm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_layoutxlm.py
│ │ │ ├── modular_layoutxlm.py
│ │ │ ├── processing_layoutxlm.py
│ │ │ └── tokenization_layoutxlm.py
│ │ ├── led/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_led.py
│ │ │ └── modeling_led.py
│ │ ├── levit/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_levit.py
│ │ │ ├── convert_levit_timm_to_pytorch.py
│ │ │ ├── image_processing_levit.py
│ │ │ ├── image_processing_pil_levit.py
│ │ │ └── modeling_levit.py
│ │ ├── lfm2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_lfm2.py
│ │ │ ├── modeling_lfm2.py
│ │ │ └── modular_lfm2.py
│ │ ├── lfm2_moe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_lfm2_moe.py
│ │ │ ├── modeling_lfm2_moe.py
│ │ │ └── modular_lfm2_moe.py
│ │ ├── lfm2_vl/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_lfm2_vl.py
│ │ │ ├── image_processing_lfm2_vl.py
│ │ │ ├── modeling_lfm2_vl.py
│ │ │ ├── modular_lfm2_vl.py
│ │ │ └── processing_lfm2_vl.py
│ │ ├── lightglue/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_lightglue.py
│ │ │ ├── convert_lightglue_to_hf.py
│ │ │ ├── image_processing_lightglue.py
│ │ │ ├── image_processing_pil_lightglue.py
│ │ │ ├── modeling_lightglue.py
│ │ │ └── modular_lightglue.py
│ │ ├── lighton_ocr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_lighton_ocr.py
│ │ │ ├── modeling_lighton_ocr.py
│ │ │ ├── modular_lighton_ocr.py
│ │ │ └── processing_lighton_ocr.py
│ │ ├── lilt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_lilt.py
│ │ │ └── modeling_lilt.py
│ │ ├── llama/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_llama.py
│ │ │ ├── convert_llama_weights_to_hf.py
│ │ │ ├── modeling_llama.py
│ │ │ └── tokenization_llama.py
│ │ ├── llama4/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_llama4.py
│ │ │ ├── convert_llama4_weights_to_hf.py
│ │ │ ├── image_processing_llama4.py
│ │ │ ├── modeling_llama4.py
│ │ │ └── processing_llama4.py
│ │ ├── llava/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_llava.py
│ │ │ ├── convert_llava_weights_to_hf.py
│ │ │ ├── image_processing_llava.py
│ │ │ ├── image_processing_pil_llava.py
│ │ │ ├── modeling_llava.py
│ │ │ └── processing_llava.py
│ │ ├── llava_next/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_llava_next.py
│ │ │ ├── convert_llava_next_weights_to_hf.py
│ │ │ ├── image_processing_llava_next.py
│ │ │ ├── image_processing_pil_llava_next.py
│ │ │ ├── modeling_llava_next.py
│ │ │ └── processing_llava_next.py
│ │ ├── llava_next_video/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_llava_next_video.py
│ │ │ ├── convert_llava_next_video_weights_to_hf.py
│ │ │ ├── modeling_llava_next_video.py
│ │ │ ├── modular_llava_next_video.py
│ │ │ ├── processing_llava_next_video.py
│ │ │ └── video_processing_llava_next_video.py
│ │ ├── llava_onevision/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_llava_onevision.py
│ │ │ ├── convert_llava_onevision_weights_to_hf.py
│ │ │ ├── image_processing_llava_onevision.py
│ │ │ ├── image_processing_pil_llava_onevision.py
│ │ │ ├── modeling_llava_onevision.py
│ │ │ ├── modular_llava_onevision.py
│ │ │ ├── processing_llava_onevision.py
│ │ │ └── video_processing_llava_onevision.py
│ │ ├── longcat_flash/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_longcat_flash.py
│ │ │ ├── modeling_longcat_flash.py
│ │ │ └── modular_longcat_flash.py
│ │ ├── longformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_longformer.py
│ │ │ ├── convert_longformer_original_pytorch_lightning_to_pytorch.py
│ │ │ └── modeling_longformer.py
│ │ ├── longt5/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_longt5.py
│ │ │ └── modeling_longt5.py
│ │ ├── luke/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_luke.py
│ │ │ ├── convert_luke_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_luke.py
│ │ │ └── tokenization_luke.py
│ │ ├── lw_detr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_lw_detr.py
│ │ │ ├── convert_lw_detr_to_hf.py
│ │ │ ├── modeling_lw_detr.py
│ │ │ └── modular_lw_detr.py
│ │ ├── lxmert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_lxmert.py
│ │ │ ├── convert_lxmert_original_tf_checkpoint_to_pytorch.py
│ │ │ └── modeling_lxmert.py
│ │ ├── m2m_100/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_m2m_100.py
│ │ │ ├── convert_m2m100_original_checkpoint_to_pytorch.py
│ │ │ ├── modeling_m2m_100.py
│ │ │ └── tokenization_m2m_100.py
│ │ ├── mamba/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mamba.py
│ │ │ ├── convert_mamba_ssm_checkpoint_to_pytorch.py
│ │ │ └── modeling_mamba.py
│ │ ├── mamba2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mamba2.py
│ │ │ ├── convert_mamba2_ssm_checkpoint_to_pytorch.py
│ │ │ └── modeling_mamba2.py
│ │ ├── marian/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_marian.py
│ │ │ ├── convert_marian_tatoeba_to_pytorch.py
│ │ │ ├── convert_marian_to_pytorch.py
│ │ │ ├── modeling_marian.py
│ │ │ └── tokenization_marian.py
│ │ ├── markuplm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_markuplm.py
│ │ │ ├── feature_extraction_markuplm.py
│ │ │ ├── modeling_markuplm.py
│ │ │ ├── processing_markuplm.py
│ │ │ └── tokenization_markuplm.py
│ │ ├── mask2former/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mask2former.py
│ │ │ ├── convert_mask2former_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── image_processing_mask2former.py
│ │ │ ├── image_processing_pil_mask2former.py
│ │ │ ├── modeling_mask2former.py
│ │ │ └── modular_mask2former.py
│ │ ├── maskformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_maskformer.py
│ │ │ ├── configuration_maskformer_swin.py
│ │ │ ├── convert_maskformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── convert_maskformer_resnet_to_pytorch.py
│ │ │ ├── convert_maskformer_swin_to_pytorch.py
│ │ │ ├── image_processing_maskformer.py
│ │ │ ├── image_processing_pil_maskformer.py
│ │ │ ├── modeling_maskformer.py
│ │ │ ├── modeling_maskformer_swin.py
│ │ │ └── modular_maskformer.py
│ │ ├── mbart/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mbart.py
│ │ │ ├── convert_mbart_original_checkpoint_to_pytorch.py
│ │ │ ├── modeling_mbart.py
│ │ │ └── tokenization_mbart.py
│ │ ├── mbart50/
│ │ │ ├── __init__.py
│ │ │ └── tokenization_mbart50.py
│ │ ├── megatron_bert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_megatron_bert.py
│ │ │ ├── convert_megatron_bert_checkpoint.py
│ │ │ └── modeling_megatron_bert.py
│ │ ├── megatron_gpt2/
│ │ │ ├── __init__.py
│ │ │ ├── checkpoint_reshaping_and_interoperability.py
│ │ │ └── convert_megatron_gpt2_checkpoint.py
│ │ ├── metaclip_2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_metaclip_2.py
│ │ │ ├── convert_metaclip_2_to_hf.py
│ │ │ ├── modeling_metaclip_2.py
│ │ │ └── modular_metaclip_2.py
│ │ ├── mgp_str/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mgp_str.py
│ │ │ ├── modeling_mgp_str.py
│ │ │ ├── processing_mgp_str.py
│ │ │ └── tokenization_mgp_str.py
│ │ ├── mimi/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mimi.py
│ │ │ ├── convert_mimi_checkpoint_to_pytorch.py
│ │ │ └── modeling_mimi.py
│ │ ├── minimax/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_minimax.py
│ │ │ ├── modeling_minimax.py
│ │ │ └── modular_minimax.py
│ │ ├── minimax_m2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_minimax_m2.py
│ │ │ ├── modeling_minimax_m2.py
│ │ │ └── modular_minimax_m2.py
│ │ ├── ministral/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_ministral.py
│ │ │ ├── modeling_ministral.py
│ │ │ └── modular_ministral.py
│ │ ├── ministral3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_ministral3.py
│ │ │ ├── convert_ministral3_weights_to_hf.py
│ │ │ ├── modeling_ministral3.py
│ │ │ └── modular_ministral3.py
│ │ ├── mistral/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mistral.py
│ │ │ ├── convert_mistral_weights_to_hf.py
│ │ │ ├── modeling_mistral.py
│ │ │ └── modular_mistral.py
│ │ ├── mistral3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mistral3.py
│ │ │ ├── convert_mistral3_weights_to_hf.py
│ │ │ ├── modeling_mistral3.py
│ │ │ └── modular_mistral3.py
│ │ ├── mistral4/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mistral4.py
│ │ │ ├── convert_mistral4_weight_to_hf.py
│ │ │ ├── modeling_mistral4.py
│ │ │ └── modular_mistral4.py
│ │ ├── mixtral/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mixtral.py
│ │ │ ├── convert_mixtral_weights_to_hf.py
│ │ │ ├── modeling_mixtral.py
│ │ │ └── modular_mixtral.py
│ │ ├── mlcd/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mlcd.py
│ │ │ ├── convert_mlcd_weights_to_hf.py
│ │ │ ├── modeling_mlcd.py
│ │ │ └── modular_mlcd.py
│ │ ├── mllama/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mllama.py
│ │ │ ├── convert_mllama_weights_to_hf.py
│ │ │ ├── image_processing_mllama.py
│ │ │ ├── image_processing_pil_mllama.py
│ │ │ ├── modeling_mllama.py
│ │ │ └── processing_mllama.py
│ │ ├── mluke/
│ │ │ ├── __init__.py
│ │ │ ├── convert_mluke_original_pytorch_checkpoint_to_pytorch.py
│ │ │ └── tokenization_mluke.py
│ │ ├── mm_grounding_dino/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mm_grounding_dino.py
│ │ │ ├── convert_mm_grounding_dino_to_hf.py
│ │ │ ├── modeling_mm_grounding_dino.py
│ │ │ └── modular_mm_grounding_dino.py
│ │ ├── mobilebert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mobilebert.py
│ │ │ ├── convert_mobilebert_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── modeling_mobilebert.py
│ │ │ └── tokenization_mobilebert.py
│ │ ├── mobilenet_v1/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mobilenet_v1.py
│ │ │ ├── convert_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── image_processing_mobilenet_pil_v1.py
│ │ │ ├── image_processing_mobilenet_v1.py
│ │ │ └── modeling_mobilenet_v1.py
│ │ ├── mobilenet_v2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mobilenet_v2.py
│ │ │ ├── convert_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── image_processing_mobilenet_v2.py
│ │ │ ├── image_processing_pil_mobilenet_v2.py
│ │ │ └── modeling_mobilenet_v2.py
│ │ ├── mobilevit/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mobilevit.py
│ │ │ ├── convert_mlcvnets_to_pytorch.py
│ │ │ ├── image_processing_mobilevit.py
│ │ │ ├── image_processing_pil_mobilevit.py
│ │ │ └── modeling_mobilevit.py
│ │ ├── mobilevitv2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mobilevitv2.py
│ │ │ ├── convert_mlcvnets_to_pytorch.py
│ │ │ └── modeling_mobilevitv2.py
│ │ ├── modernbert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_modernbert.py
│ │ │ ├── modeling_modernbert.py
│ │ │ └── modular_modernbert.py
│ │ ├── modernbert_decoder/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_modernbert_decoder.py
│ │ │ ├── modeling_modernbert_decoder.py
│ │ │ └── modular_modernbert_decoder.py
│ │ ├── modernvbert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_modernvbert.py
│ │ │ ├── modeling_modernvbert.py
│ │ │ └── modular_modernvbert.py
│ │ ├── moonshine/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_moonshine.py
│ │ │ ├── convert_usefulsensors_to_hf.py
│ │ │ ├── modeling_moonshine.py
│ │ │ └── modular_moonshine.py
│ │ ├── moonshine_streaming/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_moonshine_streaming.py
│ │ │ ├── modeling_moonshine_streaming.py
│ │ │ ├── modular_moonshine_streaming.py
│ │ │ └── processing_moonshine_streaming.py
│ │ ├── moshi/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_moshi.py
│ │ │ ├── convert_moshi_transformers.py
│ │ │ └── modeling_moshi.py
│ │ ├── mpnet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mpnet.py
│ │ │ ├── modeling_mpnet.py
│ │ │ └── tokenization_mpnet.py
│ │ ├── mpt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mpt.py
│ │ │ └── modeling_mpt.py
│ │ ├── mra/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mra.py
│ │ │ ├── convert_mra_pytorch_to_pytorch.py
│ │ │ └── modeling_mra.py
│ │ ├── mt5/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mt5.py
│ │ │ └── modeling_mt5.py
│ │ ├── musicflamingo/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_musicflamingo.py
│ │ │ ├── convert_musicflamingo_to_hf.py
│ │ │ ├── modeling_musicflamingo.py
│ │ │ ├── modular_musicflamingo.py
│ │ │ └── processing_musicflamingo.py
│ │ ├── musicgen/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_musicgen.py
│ │ │ ├── convert_musicgen_transformers.py
│ │ │ ├── modeling_musicgen.py
│ │ │ └── processing_musicgen.py
│ │ ├── musicgen_melody/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_musicgen_melody.py
│ │ │ ├── convert_musicgen_melody_transformers.py
│ │ │ ├── feature_extraction_musicgen_melody.py
│ │ │ ├── modeling_musicgen_melody.py
│ │ │ └── processing_musicgen_melody.py
│ │ ├── mvp/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mvp.py
│ │ │ └── modeling_mvp.py
│ │ ├── myt5/
│ │ │ ├── __init__.py
│ │ │ ├── convert_myt5_original_tf_checkpoint_to_pytorch.py
│ │ │ └── tokenization_myt5.py
│ │ ├── nanochat/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_nanochat.py
│ │ │ ├── convert_nanochat_checkpoints.py
│ │ │ ├── modeling_nanochat.py
│ │ │ └── modular_nanochat.py
│ │ ├── nemotron/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_nemotron.py
│ │ │ ├── convert_nemotron_nemo_to_hf.py
│ │ │ └── modeling_nemotron.py
│ │ ├── nemotron_h/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_nemotron_h.py
│ │ │ ├── modeling_nemotron_h.py
│ │ │ └── modular_nemotron_h.py
│ │ ├── nllb/
│ │ │ ├── __init__.py
│ │ │ └── tokenization_nllb.py
│ │ ├── nllb_moe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_nllb_moe.py
│ │ │ ├── convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py
│ │ │ └── modeling_nllb_moe.py
│ │ ├── nougat/
│ │ │ ├── __init__.py
│ │ │ ├── convert_nougat_to_hf.py
│ │ │ ├── image_processing_nougat.py
│ │ │ ├── image_processing_pil_nougat.py
│ │ │ ├── processing_nougat.py
│ │ │ └── tokenization_nougat.py
│ │ ├── nystromformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_nystromformer.py
│ │ │ ├── convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ └── modeling_nystromformer.py
│ │ ├── olmo/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_olmo.py
│ │ │ ├── convert_olmo_weights_to_hf.py
│ │ │ ├── modeling_olmo.py
│ │ │ └── modular_olmo.py
│ │ ├── olmo2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_olmo2.py
│ │ │ ├── convert_olmo2_weights_to_hf.py
│ │ │ ├── modeling_olmo2.py
│ │ │ └── modular_olmo2.py
│ │ ├── olmo3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_olmo3.py
│ │ │ ├── convert_olmo3_weights_to_hf.py
│ │ │ ├── modeling_olmo3.py
│ │ │ └── modular_olmo3.py
│ │ ├── olmo_hybrid/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_olmo_hybrid.py
│ │ │ ├── convert_olmo_hybrid_weights_to_hf.py
│ │ │ ├── modeling_olmo_hybrid.py
│ │ │ └── modular_olmo_hybrid.py
│ │ ├── olmoe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_olmoe.py
│ │ │ ├── convert_olmoe_weights_to_hf.py
│ │ │ ├── modeling_olmoe.py
│ │ │ └── modular_olmoe.py
│ │ ├── omdet_turbo/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_omdet_turbo.py
│ │ │ ├── convert_omdet_turbo_to_hf.py
│ │ │ ├── modeling_omdet_turbo.py
│ │ │ └── processing_omdet_turbo.py
│ │ ├── oneformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_oneformer.py
│ │ │ ├── convert_to_hf_oneformer.py
│ │ │ ├── image_processing_oneformer.py
│ │ │ ├── image_processing_pil_oneformer.py
│ │ │ ├── modeling_oneformer.py
│ │ │ └── processing_oneformer.py
│ │ ├── openai/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_openai.py
│ │ │ ├── convert_openai_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── modeling_openai.py
│ │ │ └── tokenization_openai.py
│ │ ├── opt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_opt.py
│ │ │ ├── convert_opt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ └── modeling_opt.py
│ │ ├── ovis2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_ovis2.py
│ │ │ ├── convert_ovis2_weights_to_hf.py
│ │ │ ├── image_processing_ovis2.py
│ │ │ ├── image_processing_pil_ovis2.py
│ │ │ ├── modeling_ovis2.py
│ │ │ ├── modular_ovis2.py
│ │ │ └── processing_ovis2.py
│ │ ├── owlv2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_owlv2.py
│ │ │ ├── convert_owlv2_to_hf.py
│ │ │ ├── image_processing_owlv2.py
│ │ │ ├── image_processing_pil_owlv2.py
│ │ │ ├── modeling_owlv2.py
│ │ │ ├── modular_owlv2.py
│ │ │ └── processing_owlv2.py
│ │ ├── owlvit/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_owlvit.py
│ │ │ ├── convert_owlvit_original_flax_to_hf.py
│ │ │ ├── image_processing_owlvit.py
│ │ │ ├── image_processing_pil_owlvit.py
│ │ │ ├── modeling_owlvit.py
│ │ │ └── processing_owlvit.py
│ │ ├── paddleocr_vl/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_paddleocr_vl.py
│ │ │ ├── image_processing_paddleocr_vl.py
│ │ │ ├── image_processing_pil_paddleocr_vl.py
│ │ │ ├── modeling_paddleocr_vl.py
│ │ │ ├── modular_paddleocr_vl.py
│ │ │ └── processing_paddleocr_vl.py
│ │ ├── paligemma/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_paligemma.py
│ │ │ ├── convert_paligemma2_weights_to_hf.py
│ │ │ ├── convert_paligemma_weights_to_hf.py
│ │ │ ├── modeling_paligemma.py
│ │ │ └── processing_paligemma.py
│ │ ├── parakeet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_parakeet.py
│ │ │ ├── convert_nemo_to_hf.py
│ │ │ ├── feature_extraction_parakeet.py
│ │ │ ├── modeling_parakeet.py
│ │ │ ├── modular_parakeet.py
│ │ │ ├── processing_parakeet.py
│ │ │ └── tokenization_parakeet.py
│ │ ├── patchtsmixer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_patchtsmixer.py
│ │ │ └── modeling_patchtsmixer.py
│ │ ├── patchtst/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_patchtst.py
│ │ │ └── modeling_patchtst.py
│ │ ├── pe_audio/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pe_audio.py
│ │ │ ├── feature_extraction_pe_audio.py
│ │ │ ├── modeling_pe_audio.py
│ │ │ ├── modular_pe_audio.py
│ │ │ └── processing_pe_audio.py
│ │ ├── pe_audio_video/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pe_audio_video.py
│ │ │ ├── convert_pe_audio_video_to_hf.py
│ │ │ ├── modeling_pe_audio_video.py
│ │ │ ├── modular_pe_audio_video.py
│ │ │ └── processing_pe_audio_video.py
│ │ ├── pe_video/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pe_video.py
│ │ │ ├── modeling_pe_video.py
│ │ │ ├── modular_pe_video.py
│ │ │ ├── processing_pe_video.py
│ │ │ └── video_processing_pe_video.py
│ │ ├── pegasus/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pegasus.py
│ │ │ ├── convert_pegasus_tf_to_pytorch.py
│ │ │ ├── modeling_pegasus.py
│ │ │ └── tokenization_pegasus.py
│ │ ├── pegasus_x/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pegasus_x.py
│ │ │ └── modeling_pegasus_x.py
│ │ ├── perceiver/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_perceiver.py
│ │ │ ├── convert_perceiver_haiku_to_pytorch.py
│ │ │ ├── image_processing_perceiver.py
│ │ │ ├── image_processing_pil_perceiver.py
│ │ │ ├── modeling_perceiver.py
│ │ │ └── tokenization_perceiver.py
│ │ ├── perception_lm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_perception_lm.py
│ │ │ ├── convert_perception_lm_weights_to_hf.py
│ │ │ ├── image_processing_perception_lm.py
│ │ │ ├── modeling_perception_lm.py
│ │ │ ├── modular_perception_lm.py
│ │ │ ├── processing_perception_lm.py
│ │ │ └── video_processing_perception_lm.py
│ │ ├── persimmon/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_persimmon.py
│ │ │ ├── convert_persimmon_weights_to_hf.py
│ │ │ └── modeling_persimmon.py
│ │ ├── phi/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_phi.py
│ │ │ ├── convert_phi_weights_to_hf.py
│ │ │ ├── modeling_phi.py
│ │ │ └── modular_phi.py
│ │ ├── phi3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_phi3.py
│ │ │ ├── modeling_phi3.py
│ │ │ └── modular_phi3.py
│ │ ├── phi4_multimodal/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_phi4_multimodal.py
│ │ │ ├── convert_phi4_multimodal_weights_to_hf.py
│ │ │ ├── feature_extraction_phi4_multimodal.py
│ │ │ ├── image_processing_phi4_multimodal.py
│ │ │ ├── modeling_phi4_multimodal.py
│ │ │ ├── modular_phi4_multimodal.py
│ │ │ └── processing_phi4_multimodal.py
│ │ ├── phimoe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_phimoe.py
│ │ │ ├── modeling_phimoe.py
│ │ │ └── modular_phimoe.py
│ │ ├── phobert/
│ │ │ ├── __init__.py
│ │ │ └── tokenization_phobert.py
│ │ ├── pi0/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pi0.py
│ │ │ ├── image_processing_pi0.py
│ │ │ ├── modeling_pi0.py
│ │ │ ├── modular_pi0.py
│ │ │ └── processing_pi0.py
│ │ ├── pix2struct/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pix2struct.py
│ │ │ ├── convert_pix2struct_original_pytorch_to_hf.py
│ │ │ ├── image_processing_pil_pix2struct.py
│ │ │ ├── image_processing_pix2struct.py
│ │ │ ├── modeling_pix2struct.py
│ │ │ └── processing_pix2struct.py
│ │ ├── pixio/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pixio.py
│ │ │ ├── convert_pixio_to_pytorch.py
│ │ │ ├── modeling_pixio.py
│ │ │ └── modular_pixio.py
│ │ ├── pixtral/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pixtral.py
│ │ │ ├── convert_pixtral_weights_to_hf.py
│ │ │ ├── image_processing_pil_pixtral.py
│ │ │ ├── image_processing_pixtral.py
│ │ │ ├── modeling_pixtral.py
│ │ │ └── processing_pixtral.py
│ │ ├── plbart/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_plbart.py
│ │ │ ├── convert_plbart_original_checkpoint_to_torch.py
│ │ │ ├── modeling_plbart.py
│ │ │ ├── modular_plbart.py
│ │ │ └── tokenization_plbart.py
│ │ ├── poolformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_poolformer.py
│ │ │ ├── convert_poolformer_original_to_pytorch.py
│ │ │ ├── image_processing_pil_poolformer.py
│ │ │ ├── image_processing_poolformer.py
│ │ │ └── modeling_poolformer.py
│ │ ├── pop2piano/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pop2piano.py
│ │ │ ├── convert_pop2piano_weights_to_hf.py
│ │ │ ├── feature_extraction_pop2piano.py
│ │ │ ├── modeling_pop2piano.py
│ │ │ ├── processing_pop2piano.py
│ │ │ └── tokenization_pop2piano.py
│ │ ├── pp_chart2table/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pp_chart2table.py
│ │ │ ├── image_processing_pil_pp_chart2table.py
│ │ │ ├── image_processing_pp_chart2table.py
│ │ │ ├── modular_pp_chart2table.py
│ │ │ └── processing_pp_chart2table.py
│ │ ├── pp_doclayout_v2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pp_doclayout_v2.py
│ │ │ ├── image_processing_pp_doclayout_v2.py
│ │ │ ├── modeling_pp_doclayout_v2.py
│ │ │ └── modular_pp_doclayout_v2.py
│ │ ├── pp_doclayout_v3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pp_doclayout_v3.py
│ │ │ ├── image_processing_pp_doclayout_v3.py
│ │ │ ├── modeling_pp_doclayout_v3.py
│ │ │ └── modular_pp_doclayout_v3.py
│ │ ├── pp_lcnet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pp_lcnet.py
│ │ │ ├── image_processing_pp_lcnet.py
│ │ │ ├── modeling_pp_lcnet.py
│ │ │ └── modular_pp_lcnet.py
│ │ ├── pp_lcnet_v3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pp_lcnet_v3.py
│ │ │ ├── modeling_pp_lcnet_v3.py
│ │ │ └── modular_pp_lcnet_v3.py
│ │ ├── pp_ocrv5_mobile_det/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pp_ocrv5_mobile_det.py
│ │ │ ├── modeling_pp_ocrv5_mobile_det.py
│ │ │ └── modular_pp_ocrv5_mobile_det.py
│ │ ├── pp_ocrv5_mobile_rec/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pp_ocrv5_mobile_rec.py
│ │ │ ├── modeling_pp_ocrv5_mobile_rec.py
│ │ │ └── modular_pp_ocrv5_mobile_rec.py
│ │ ├── pp_ocrv5_server_det/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pp_ocrv5_server_det.py
│ │ │ ├── image_processing_pp_ocrv5_server_det.py
│ │ │ ├── modeling_pp_ocrv5_server_det.py
│ │ │ └── modular_pp_ocrv5_server_det.py
│ │ ├── pp_ocrv5_server_rec/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pp_ocrv5_server_rec.py
│ │ │ ├── image_processing_pp_ocrv5_server_rec.py
│ │ │ ├── modeling_pp_ocrv5_server_rec.py
│ │ │ └── modular_pp_ocrv5_server_rec.py
│ │ ├── prompt_depth_anything/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_prompt_depth_anything.py
│ │ │ ├── convert_prompt_depth_anything_to_hf.py
│ │ │ ├── image_processing_pil_prompt_depth_anything.py
│ │ │ ├── image_processing_prompt_depth_anything.py
│ │ │ ├── modeling_prompt_depth_anything.py
│ │ │ └── modular_prompt_depth_anything.py
│ │ ├── prophetnet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_prophetnet.py
│ │ │ ├── convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_prophetnet.py
│ │ │ └── tokenization_prophetnet.py
│ │ ├── pvt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pvt.py
│ │ │ ├── convert_pvt_to_pytorch.py
│ │ │ ├── image_processing_pil_pvt.py
│ │ │ ├── image_processing_pvt.py
│ │ │ └── modeling_pvt.py
│ │ ├── pvt_v2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_pvt_v2.py
│ │ │ ├── convert_pvt_v2_to_pytorch.py
│ │ │ └── modeling_pvt_v2.py
│ │ ├── qwen2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen2.py
│ │ │ ├── modeling_qwen2.py
│ │ │ ├── modular_qwen2.py
│ │ │ └── tokenization_qwen2.py
│ │ ├── qwen2_5_omni/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen2_5_omni.py
│ │ │ ├── modeling_qwen2_5_omni.py
│ │ │ ├── modular_qwen2_5_omni.py
│ │ │ └── processing_qwen2_5_omni.py
│ │ ├── qwen2_5_vl/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen2_5_vl.py
│ │ │ ├── modeling_qwen2_5_vl.py
│ │ │ ├── modular_qwen2_5_vl.py
│ │ │ └── processing_qwen2_5_vl.py
│ │ ├── qwen2_audio/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen2_audio.py
│ │ │ ├── modeling_qwen2_audio.py
│ │ │ └── processing_qwen2_audio.py
│ │ ├── qwen2_moe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen2_moe.py
│ │ │ ├── modeling_qwen2_moe.py
│ │ │ └── modular_qwen2_moe.py
│ │ ├── qwen2_vl/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen2_vl.py
│ │ │ ├── image_processing_pil_qwen2_vl.py
│ │ │ ├── image_processing_qwen2_vl.py
│ │ │ ├── modeling_qwen2_vl.py
│ │ │ ├── processing_qwen2_vl.py
│ │ │ └── video_processing_qwen2_vl.py
│ │ ├── qwen3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen3.py
│ │ │ ├── modeling_qwen3.py
│ │ │ └── modular_qwen3.py
│ │ ├── qwen3_5/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen3_5.py
│ │ │ ├── modeling_qwen3_5.py
│ │ │ ├── modular_qwen3_5.py
│ │ │ └── tokenization_qwen3_5.py
│ │ ├── qwen3_5_moe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen3_5_moe.py
│ │ │ ├── modeling_qwen3_5_moe.py
│ │ │ └── modular_qwen3_5_moe.py
│ │ ├── qwen3_moe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen3_moe.py
│ │ │ ├── modeling_qwen3_moe.py
│ │ │ └── modular_qwen3_moe.py
│ │ ├── qwen3_next/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen3_next.py
│ │ │ ├── modeling_qwen3_next.py
│ │ │ └── modular_qwen3_next.py
│ │ ├── qwen3_omni_moe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen3_omni_moe.py
│ │ │ ├── modeling_qwen3_omni_moe.py
│ │ │ ├── modular_qwen3_omni_moe.py
│ │ │ └── processing_qwen3_omni_moe.py
│ │ ├── qwen3_vl/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen3_vl.py
│ │ │ ├── modeling_qwen3_vl.py
│ │ │ ├── modular_qwen3_vl.py
│ │ │ ├── processing_qwen3_vl.py
│ │ │ └── video_processing_qwen3_vl.py
│ │ ├── qwen3_vl_moe/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_qwen3_vl_moe.py
│ │ │ ├── modeling_qwen3_vl_moe.py
│ │ │ └── modular_qwen3_vl_moe.py
│ │ ├── rag/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_rag.py
│ │ │ ├── modeling_rag.py
│ │ │ ├── retrieval_rag.py
│ │ │ └── tokenization_rag.py
│ │ ├── recurrent_gemma/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_recurrent_gemma.py
│ │ │ ├── convert_recurrent_gemma_to_hf.py
│ │ │ └── modeling_recurrent_gemma.py
│ │ ├── reformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_reformer.py
│ │ │ ├── convert_reformer_trax_checkpoint_to_pytorch.py
│ │ │ ├── modeling_reformer.py
│ │ │ └── tokenization_reformer.py
│ │ ├── regnet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_regnet.py
│ │ │ ├── convert_regnet_seer_10b_to_pytorch.py
│ │ │ ├── convert_regnet_to_pytorch.py
│ │ │ └── modeling_regnet.py
│ │ ├── rembert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_rembert.py
│ │ │ ├── convert_rembert_tf_checkpoint_to_pytorch.py
│ │ │ ├── modeling_rembert.py
│ │ │ └── tokenization_rembert.py
│ │ ├── resnet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_resnet.py
│ │ │ ├── convert_resnet_to_pytorch.py
│ │ │ └── modeling_resnet.py
│ │ ├── roberta/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_roberta.py
│ │ │ ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_roberta.py
│ │ │ ├── modular_roberta.py
│ │ │ ├── tokenization_roberta.py
│ │ │ └── tokenization_roberta_old.py
│ │ ├── roberta_prelayernorm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_roberta_prelayernorm.py
│ │ │ ├── convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py
│ │ │ └── modeling_roberta_prelayernorm.py
│ │ ├── roc_bert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_roc_bert.py
│ │ │ ├── modeling_roc_bert.py
│ │ │ └── tokenization_roc_bert.py
│ │ ├── roformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_roformer.py
│ │ │ ├── convert_roformer_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── modeling_roformer.py
│ │ │ ├── tokenization_roformer.py
│ │ │ └── tokenization_utils.py
│ │ ├── rt_detr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_rt_detr.py
│ │ │ ├── configuration_rt_detr_resnet.py
│ │ │ ├── convert_rt_detr_original_pytorch_checkpoint_to_hf.py
│ │ │ ├── image_processing_pil_rt_detr.py
│ │ │ ├── image_processing_rt_detr.py
│ │ │ ├── modeling_rt_detr.py
│ │ │ ├── modeling_rt_detr_resnet.py
│ │ │ └── modular_rt_detr.py
│ │ ├── rt_detr_v2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_rt_detr_v2.py
│ │ │ ├── convert_rt_detr_v2_weights_to_hf.py
│ │ │ ├── modeling_rt_detr_v2.py
│ │ │ └── modular_rt_detr_v2.py
│ │ ├── rwkv/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_rwkv.py
│ │ │ ├── convert_rwkv_checkpoint_to_hf.py
│ │ │ └── modeling_rwkv.py
│ │ ├── sam/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_sam.py
│ │ │ ├── convert_sam_to_hf.py
│ │ │ ├── image_processing_pil_sam.py
│ │ │ ├── image_processing_sam.py
│ │ │ ├── modeling_sam.py
│ │ │ └── processing_sam.py
│ │ ├── sam2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_sam2.py
│ │ │ ├── convert_sam2_to_hf.py
│ │ │ ├── image_processing_sam2.py
│ │ │ ├── modeling_sam2.py
│ │ │ ├── modular_sam2.py
│ │ │ └── processing_sam2.py
│ │ ├── sam2_video/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_sam2_video.py
│ │ │ ├── convert_sam2_video_to_hf.py
│ │ │ ├── modeling_sam2_video.py
│ │ │ ├── modular_sam2_video.py
│ │ │ ├── processing_sam2_video.py
│ │ │ └── video_processing_sam2_video.py
│ │ ├── sam3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_sam3.py
│ │ │ ├── convert_sam3_to_hf.py
│ │ │ ├── image_processing_sam3.py
│ │ │ ├── modeling_sam3.py
│ │ │ ├── modular_sam3.py
│ │ │ └── processing_sam3.py
│ │ ├── sam3_tracker/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_sam3_tracker.py
│ │ │ ├── modeling_sam3_tracker.py
│ │ │ ├── modular_sam3_tracker.py
│ │ │ └── processing_sam3_tracker.py
│ │ ├── sam3_tracker_video/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_sam3_tracker_video.py
│ │ │ ├── modeling_sam3_tracker_video.py
│ │ │ ├── modular_sam3_tracker_video.py
│ │ │ └── processing_sam3_tracker_video.py
│ │ ├── sam3_video/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_sam3_video.py
│ │ │ ├── convert_sam3_video_to_hf.py
│ │ │ ├── modeling_sam3_video.py
│ │ │ └── processing_sam3_video.py
│ │ ├── sam_hq/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_sam_hq.py
│ │ │ ├── convert_samhq_to_hf.py
│ │ │ ├── modeling_sam_hq.py
│ │ │ ├── modular_sam_hq.py
│ │ │ └── processing_sam_hq.py
│ │ ├── seamless_m4t/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_seamless_m4t.py
│ │ │ ├── convert_fairseq2_to_hf.py
│ │ │ ├── feature_extraction_seamless_m4t.py
│ │ │ ├── modeling_seamless_m4t.py
│ │ │ ├── processing_seamless_m4t.py
│ │ │ └── tokenization_seamless_m4t.py
│ │ ├── seamless_m4t_v2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_seamless_m4t_v2.py
│ │ │ ├── convert_fairseq2_to_hf.py
│ │ │ └── modeling_seamless_m4t_v2.py
│ │ ├── seed_oss/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_seed_oss.py
│ │ │ ├── modeling_seed_oss.py
│ │ │ └── modular_seed_oss.py
│ │ ├── segformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_segformer.py
│ │ │ ├── convert_segformer_original_to_pytorch.py
│ │ │ ├── image_processing_pil_segformer.py
│ │ │ ├── image_processing_segformer.py
│ │ │ ├── modeling_segformer.py
│ │ │ └── modular_segformer.py
│ │ ├── seggpt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_seggpt.py
│ │ │ ├── convert_seggpt_to_hf.py
│ │ │ ├── image_processing_pil_seggpt.py
│ │ │ ├── image_processing_seggpt.py
│ │ │ └── modeling_seggpt.py
│ │ ├── sew/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_sew.py
│ │ │ ├── convert_sew_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_sew.py
│ │ │ └── modular_sew.py
│ │ ├── sew_d/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_sew_d.py
│ │ │ ├── convert_sew_d_original_pytorch_checkpoint_to_pytorch.py
│ │ │ └── modeling_sew_d.py
│ │ ├── shieldgemma2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_shieldgemma2.py
│ │ │ ├── convert_shieldgemma2_weights_orbax_to_hf.py
│ │ │ ├── modeling_shieldgemma2.py
│ │ │ └── processing_shieldgemma2.py
│ │ ├── siglip/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_siglip.py
│ │ │ ├── convert_siglip_to_hf.py
│ │ │ ├── image_processing_pil_siglip.py
│ │ │ ├── image_processing_siglip.py
│ │ │ ├── modeling_siglip.py
│ │ │ ├── processing_siglip.py
│ │ │ └── tokenization_siglip.py
│ │ ├── siglip2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_siglip2.py
│ │ │ ├── convert_siglip2_to_hf.py
│ │ │ ├── image_processing_pil_siglip2.py
│ │ │ ├── image_processing_siglip2.py
│ │ │ ├── modeling_siglip2.py
│ │ │ ├── modular_siglip2.py
│ │ │ ├── processing_siglip2.py
│ │ │ └── tokenization_siglip2.py
│ │ ├── slanext/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_slanext.py
│ │ │ ├── image_processing_slanext.py
│ │ │ ├── modeling_slanext.py
│ │ │ └── modular_slanext.py
│ │ ├── smollm3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_smollm3.py
│ │ │ ├── modeling_smollm3.py
│ │ │ └── modular_smollm3.py
│ │ ├── smolvlm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_smolvlm.py
│ │ │ ├── image_processing_pil_smolvlm.py
│ │ │ ├── image_processing_smolvlm.py
│ │ │ ├── modeling_smolvlm.py
│ │ │ ├── modular_smolvlm.py
│ │ │ ├── processing_smolvlm.py
│ │ │ └── video_processing_smolvlm.py
│ │ ├── solar_open/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_solar_open.py
│ │ │ ├── modeling_solar_open.py
│ │ │ └── modular_solar_open.py
│ │ ├── speech_encoder_decoder/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_speech_encoder_decoder.py
│ │ │ ├── convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py
│ │ │ ├── convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py
│ │ │ └── modeling_speech_encoder_decoder.py
│ │ ├── speech_to_text/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_speech_to_text.py
│ │ │ ├── convert_s2t_fairseq_to_tfms.py
│ │ │ ├── feature_extraction_speech_to_text.py
│ │ │ ├── modeling_speech_to_text.py
│ │ │ ├── processing_speech_to_text.py
│ │ │ └── tokenization_speech_to_text.py
│ │ ├── speecht5/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_speecht5.py
│ │ │ ├── convert_hifigan.py
│ │ │ ├── convert_speecht5_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── feature_extraction_speecht5.py
│ │ │ ├── modeling_speecht5.py
│ │ │ ├── number_normalizer.py
│ │ │ ├── processing_speecht5.py
│ │ │ └── tokenization_speecht5.py
│ │ ├── splinter/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_splinter.py
│ │ │ ├── modeling_splinter.py
│ │ │ └── tokenization_splinter.py
│ │ ├── squeezebert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_squeezebert.py
│ │ │ ├── modeling_squeezebert.py
│ │ │ └── tokenization_squeezebert.py
│ │ ├── stablelm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_stablelm.py
│ │ │ └── modeling_stablelm.py
│ │ ├── starcoder2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_starcoder2.py
│ │ │ ├── modeling_starcoder2.py
│ │ │ └── modular_starcoder2.py
│ │ ├── superglue/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_superglue.py
│ │ │ ├── convert_superglue_to_hf.py
│ │ │ ├── image_processing_pil_superglue.py
│ │ │ ├── image_processing_superglue.py
│ │ │ └── modeling_superglue.py
│ │ ├── superpoint/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_superpoint.py
│ │ │ ├── convert_superpoint_to_pytorch.py
│ │ │ ├── image_processing_pil_superpoint.py
│ │ │ ├── image_processing_superpoint.py
│ │ │ └── modeling_superpoint.py
│ │ ├── swiftformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_swiftformer.py
│ │ │ ├── convert_swiftformer_original_to_hf.py
│ │ │ └── modeling_swiftformer.py
│ │ ├── swin/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_swin.py
│ │ │ ├── convert_swin_simmim_to_pytorch.py
│ │ │ ├── convert_swin_timm_to_pytorch.py
│ │ │ └── modeling_swin.py
│ │ ├── swin2sr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_swin2sr.py
│ │ │ ├── convert_swin2sr_original_to_pytorch.py
│ │ │ ├── image_processing_pil_swin2sr.py
│ │ │ ├── image_processing_swin2sr.py
│ │ │ └── modeling_swin2sr.py
│ │ ├── swinv2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_swinv2.py
│ │ │ ├── convert_swinv2_timm_to_pytorch.py
│ │ │ └── modeling_swinv2.py
│ │ ├── switch_transformers/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_switch_transformers.py
│ │ │ ├── convert_big_switch.py
│ │ │ ├── convert_switch_transformers_original_flax_checkpoint_to_pytorch.py
│ │ │ ├── modeling_switch_transformers.py
│ │ │ └── modular_switch_transformers.py
│ │ ├── t5/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_t5.py
│ │ │ ├── convert_t5_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── convert_t5x_checkpoint_to_pytorch.py
│ │ │ ├── download_from_gcp.sh
│ │ │ ├── modeling_t5.py
│ │ │ └── tokenization_t5.py
│ │ ├── t5gemma/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_t5gemma.py
│ │ │ ├── modeling_t5gemma.py
│ │ │ └── modular_t5gemma.py
│ │ ├── t5gemma2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_t5gemma2.py
│ │ │ ├── modeling_t5gemma2.py
│ │ │ └── modular_t5gemma2.py
│ │ ├── table_transformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_table_transformer.py
│ │ │ ├── convert_table_transformer_to_hf.py
│ │ │ ├── convert_table_transformer_to_hf_no_timm.py
│ │ │ └── modeling_table_transformer.py
│ │ ├── tapas/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_tapas.py
│ │ │ ├── convert_tapas_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── modeling_tapas.py
│ │ │ └── tokenization_tapas.py
│ │ ├── textnet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_textnet.py
│ │ │ ├── convert_textnet_to_hf.py
│ │ │ ├── image_processing_pil_textnet.py
│ │ │ ├── image_processing_textnet.py
│ │ │ └── modeling_textnet.py
│ │ ├── time_series_transformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_time_series_transformer.py
│ │ │ └── modeling_time_series_transformer.py
│ │ ├── timesfm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_timesfm.py
│ │ │ ├── convert_timesfm_orignal_to_hf.py
│ │ │ ├── modeling_timesfm.py
│ │ │ └── modular_timesfm.py
│ │ ├── timesfm2_5/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_timesfm2_5.py
│ │ │ ├── convert_timesfm2_5_original_to_hf.py
│ │ │ ├── modeling_timesfm2_5.py
│ │ │ └── modular_timesfm2_5.py
│ │ ├── timesformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_timesformer.py
│ │ │ ├── convert_timesformer_to_pytorch.py
│ │ │ └── modeling_timesformer.py
│ │ ├── timm_backbone/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_timm_backbone.py
│ │ │ └── modeling_timm_backbone.py
│ │ ├── timm_wrapper/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_timm_wrapper.py
│ │ │ ├── image_processing_timm_wrapper.py
│ │ │ └── modeling_timm_wrapper.py
│ │ ├── trocr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_trocr.py
│ │ │ ├── convert_trocr_unilm_to_pytorch.py
│ │ │ ├── modeling_trocr.py
│ │ │ └── processing_trocr.py
│ │ ├── tvp/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_tvp.py
│ │ │ ├── image_processing_pil_tvp.py
│ │ │ ├── image_processing_tvp.py
│ │ │ ├── modeling_tvp.py
│ │ │ └── processing_tvp.py
│ │ ├── udop/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_udop.py
│ │ │ ├── convert_udop_to_hf.py
│ │ │ ├── modeling_udop.py
│ │ │ ├── processing_udop.py
│ │ │ └── tokenization_udop.py
│ │ ├── umt5/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_umt5.py
│ │ │ ├── convert_umt5_checkpoint_to_pytorch.py
│ │ │ └── modeling_umt5.py
│ │ ├── unispeech/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_unispeech.py
│ │ │ ├── convert_unispeech_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_unispeech.py
│ │ │ └── modular_unispeech.py
│ │ ├── unispeech_sat/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_unispeech_sat.py
│ │ │ ├── convert_unispeech_original_s3prl_checkpoint_to_pytorch.py
│ │ │ ├── convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_unispeech_sat.py
│ │ │ └── modular_unispeech_sat.py
│ │ ├── univnet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_univnet.py
│ │ │ ├── convert_univnet.py
│ │ │ ├── feature_extraction_univnet.py
│ │ │ └── modeling_univnet.py
│ │ ├── upernet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_upernet.py
│ │ │ ├── convert_convnext_upernet_to_pytorch.py
│ │ │ ├── convert_swin_upernet_to_pytorch.py
│ │ │ └── modeling_upernet.py
│ │ ├── uvdoc/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_uvdoc.py
│ │ │ ├── image_processing_uvdoc.py
│ │ │ ├── modeling_uvdoc.py
│ │ │ └── modular_uvdoc.py
│ │ ├── vaultgemma/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vaultgemma.py
│ │ │ ├── modeling_vaultgemma.py
│ │ │ └── modular_vaultgemma.py
│ │ ├── vibevoice_acoustic_tokenizer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vibevoice_acoustic_tokenizer.py
│ │ │ ├── convert_vibevoice_acoustic_tokenizer_to_hf.py
│ │ │ ├── feature_extraction_vibevoice_acoustic_tokenizer.py
│ │ │ ├── modeling_vibevoice_acoustic_tokenizer.py
│ │ │ └── modular_vibevoice_acoustic_tokenizer.py
│ │ ├── vibevoice_asr/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vibevoice_asr.py
│ │ │ ├── convert_vibevoice_asr_to_hf.py
│ │ │ ├── modeling_vibevoice_asr.py
│ │ │ ├── modular_vibevoice_asr.py
│ │ │ └── processing_vibevoice_asr.py
│ │ ├── video_llama_3/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_video_llama_3.py
│ │ │ ├── image_processing_pil_video_llama_3.py
│ │ │ ├── image_processing_video_llama_3.py
│ │ │ ├── modeling_video_llama_3.py
│ │ │ ├── modular_video_llama_3.py
│ │ │ ├── processing_video_llama_3.py
│ │ │ └── video_processing_video_llama_3.py
│ │ ├── video_llava/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_video_llava.py
│ │ │ ├── convert_video_llava_weights_to_hf.py
│ │ │ ├── image_processing_video_llava.py
│ │ │ ├── modeling_video_llava.py
│ │ │ ├── processing_video_llava.py
│ │ │ └── video_processing_video_llava.py
│ │ ├── videomae/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_videomae.py
│ │ │ ├── convert_videomae_to_pytorch.py
│ │ │ ├── image_processing_pil_videomae.py
│ │ │ ├── image_processing_videomae.py
│ │ │ ├── modeling_videomae.py
│ │ │ └── video_processing_videomae.py
│ │ ├── videomt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_videomt.py
│ │ │ ├── convert_videomt_to_hf.py
│ │ │ ├── modeling_videomt.py
│ │ │ ├── modular_videomt.py
│ │ │ └── video_processing_videomt.py
│ │ ├── vilt/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vilt.py
│ │ │ ├── convert_vilt_original_to_pytorch.py
│ │ │ ├── image_processing_pil_vilt.py
│ │ │ ├── image_processing_vilt.py
│ │ │ ├── modeling_vilt.py
│ │ │ └── processing_vilt.py
│ │ ├── vipllava/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vipllava.py
│ │ │ ├── convert_vipllava_weights_to_hf.py
│ │ │ ├── modeling_vipllava.py
│ │ │ └── modular_vipllava.py
│ │ ├── vision_encoder_decoder/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vision_encoder_decoder.py
│ │ │ └── modeling_vision_encoder_decoder.py
│ │ ├── vision_text_dual_encoder/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vision_text_dual_encoder.py
│ │ │ ├── modeling_vision_text_dual_encoder.py
│ │ │ └── processing_vision_text_dual_encoder.py
│ │ ├── visual_bert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_visual_bert.py
│ │ │ ├── convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py
│ │ │ └── modeling_visual_bert.py
│ │ ├── vit/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vit.py
│ │ │ ├── convert_dino_to_pytorch.py
│ │ │ ├── convert_vit_timm_to_pytorch.py
│ │ │ ├── image_processing_pil_vit.py
│ │ │ ├── image_processing_vit.py
│ │ │ └── modeling_vit.py
│ │ ├── vit_mae/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vit_mae.py
│ │ │ ├── convert_vit_mae_to_pytorch.py
│ │ │ └── modeling_vit_mae.py
│ │ ├── vit_msn/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vit_msn.py
│ │ │ ├── convert_msn_to_pytorch.py
│ │ │ └── modeling_vit_msn.py
│ │ ├── vitdet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vitdet.py
│ │ │ └── modeling_vitdet.py
│ │ ├── vitmatte/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vitmatte.py
│ │ │ ├── convert_vitmatte_to_hf.py
│ │ │ ├── image_processing_pil_vitmatte.py
│ │ │ ├── image_processing_vitmatte.py
│ │ │ └── modeling_vitmatte.py
│ │ ├── vitpose/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vitpose.py
│ │ │ ├── convert_vitpose_to_hf.py
│ │ │ ├── image_processing_pil_vitpose.py
│ │ │ ├── image_processing_vitpose.py
│ │ │ └── modeling_vitpose.py
│ │ ├── vitpose_backbone/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vitpose_backbone.py
│ │ │ └── modeling_vitpose_backbone.py
│ │ ├── vits/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vits.py
│ │ │ ├── convert_original_checkpoint.py
│ │ │ ├── modeling_vits.py
│ │ │ └── tokenization_vits.py
│ │ ├── vivit/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vivit.py
│ │ │ ├── convert_vivit_flax_to_pytorch.py
│ │ │ ├── image_processing_vivit.py
│ │ │ └── modeling_vivit.py
│ │ ├── vjepa2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_vjepa2.py
│ │ │ ├── convert_vjepa2_classifier_to_hf.py
│ │ │ ├── convert_vjepa2_to_hf.py
│ │ │ ├── modeling_vjepa2.py
│ │ │ └── video_processing_vjepa2.py
│ │ ├── voxtral/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_voxtral.py
│ │ │ ├── convert_voxtral_weights_to_hf.py
│ │ │ ├── modeling_voxtral.py
│ │ │ ├── modular_voxtral.py
│ │ │ └── processing_voxtral.py
│ │ ├── voxtral_realtime/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_voxtral_realtime.py
│ │ │ ├── convert_voxtral_realtime_weights_to_hf.py
│ │ │ ├── feature_extraction_voxtral_realtime.py
│ │ │ ├── modeling_voxtral_realtime.py
│ │ │ ├── modular_voxtral_realtime.py
│ │ │ └── processing_voxtral_realtime.py
│ │ ├── wav2vec2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_wav2vec2.py
│ │ │ ├── convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py
│ │ │ ├── feature_extraction_wav2vec2.py
│ │ │ ├── modeling_wav2vec2.py
│ │ │ ├── processing_wav2vec2.py
│ │ │ └── tokenization_wav2vec2.py
│ │ ├── wav2vec2_bert/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_wav2vec2_bert.py
│ │ │ ├── convert_wav2vec2_seamless_checkpoint.py
│ │ │ ├── modeling_wav2vec2_bert.py
│ │ │ ├── modular_wav2vec2_bert.py
│ │ │ └── processing_wav2vec2_bert.py
│ │ ├── wav2vec2_conformer/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_wav2vec2_conformer.py
│ │ │ ├── convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_wav2vec2_conformer.py
│ │ │ └── modular_wav2vec2_conformer.py
│ │ ├── wav2vec2_phoneme/
│ │ │ ├── __init__.py
│ │ │ └── tokenization_wav2vec2_phoneme.py
│ │ ├── wav2vec2_with_lm/
│ │ │ ├── __init__.py
│ │ │ └── processing_wav2vec2_with_lm.py
│ │ ├── wavlm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_wavlm.py
│ │ │ ├── convert_wavlm_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── convert_wavlm_original_s3prl_checkpoint_to_pytorch.py
│ │ │ ├── modeling_wavlm.py
│ │ │ └── modular_wavlm.py
│ │ ├── whisper/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_whisper.py
│ │ │ ├── convert_openai_to_hf.py
│ │ │ ├── english_normalizer.py
│ │ │ ├── feature_extraction_whisper.py
│ │ │ ├── generation_whisper.py
│ │ │ ├── modeling_whisper.py
│ │ │ ├── processing_whisper.py
│ │ │ └── tokenization_whisper.py
│ │ ├── x_clip/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_x_clip.py
│ │ │ ├── convert_x_clip_original_pytorch_to_hf.py
│ │ │ ├── modeling_x_clip.py
│ │ │ └── processing_x_clip.py
│ │ ├── xcodec/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_xcodec.py
│ │ │ ├── convert_xcodec_weights_to_hf.py
│ │ │ └── modeling_xcodec.py
│ │ ├── xglm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_xglm.py
│ │ │ ├── convert_xglm_original_ckpt_to_trfms.py
│ │ │ ├── modeling_xglm.py
│ │ │ └── tokenization_xglm.py
│ │ ├── xlm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_xlm.py
│ │ │ ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_xlm.py
│ │ │ └── tokenization_xlm.py
│ │ ├── xlm_roberta/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_xlm_roberta.py
│ │ │ ├── modeling_xlm_roberta.py
│ │ │ ├── modular_xlm_roberta.py
│ │ │ └── tokenization_xlm_roberta.py
│ │ ├── xlm_roberta_xl/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_xlm_roberta_xl.py
│ │ │ ├── convert_xlm_roberta_xl_original_pytorch_checkpoint_to_pytorch.py
│ │ │ ├── modeling_xlm_roberta_xl.py
│ │ │ └── modular_xlm_roberta_xl.py
│ │ ├── xlnet/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_xlnet.py
│ │ │ ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py
│ │ │ ├── modeling_xlnet.py
│ │ │ └── tokenization_xlnet.py
│ │ ├── xlstm/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_xlstm.py
│ │ │ └── modeling_xlstm.py
│ │ ├── xmod/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_xmod.py
│ │ │ ├── convert_xmod_original_pytorch_checkpoint_to_pytorch.py
│ │ │ └── modeling_xmod.py
│ │ ├── yolos/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_yolos.py
│ │ │ ├── convert_yolos_to_pytorch.py
│ │ │ ├── image_processing_pil_yolos.py
│ │ │ ├── image_processing_yolos.py
│ │ │ ├── modeling_yolos.py
│ │ │ └── modular_yolos.py
│ │ ├── yoso/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_yoso.py
│ │ │ ├── convert_yoso_pytorch_to_pytorch.py
│ │ │ └── modeling_yoso.py
│ │ ├── youtu/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_youtu.py
│ │ │ ├── modeling_youtu.py
│ │ │ └── modular_youtu.py
│ │ ├── zamba/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_zamba.py
│ │ │ └── modeling_zamba.py
│ │ ├── zamba2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_zamba2.py
│ │ │ ├── modeling_zamba2.py
│ │ │ └── modular_zamba2.py
│ │ └── zoedepth/
│ │ ├── __init__.py
│ │ ├── configuration_zoedepth.py
│ │ ├── convert_zoedepth_to_hf.py
│ │ ├── image_processing_pil_zoedepth.py
│ │ ├── image_processing_zoedepth.py
│ │ └── modeling_zoedepth.py
│ ├── monkey_patching.py
│ ├── optimization.py
│ ├── pipelines/
│ │ ├── __init__.py
│ │ ├── any_to_any.py
│ │ ├── audio_classification.py
│ │ ├── audio_utils.py
│ │ ├── automatic_speech_recognition.py
│ │ ├── base.py
│ │ ├── depth_estimation.py
│ │ ├── document_question_answering.py
│ │ ├── feature_extraction.py
│ │ ├── fill_mask.py
│ │ ├── image_classification.py
│ │ ├── image_feature_extraction.py
│ │ ├── image_segmentation.py
│ │ ├── image_text_to_text.py
│ │ ├── keypoint_matching.py
│ │ ├── mask_generation.py
│ │ ├── object_detection.py
│ │ ├── pt_utils.py
│ │ ├── table_question_answering.py
│ │ ├── text_classification.py
│ │ ├── text_generation.py
│ │ ├── text_to_audio.py
│ │ ├── token_classification.py
│ │ ├── video_classification.py
│ │ ├── zero_shot_audio_classification.py
│ │ ├── zero_shot_classification.py
│ │ ├── zero_shot_image_classification.py
│ │ └── zero_shot_object_detection.py
│ ├── processing_utils.py
│ ├── py.typed
│ ├── pytorch_utils.py
│ ├── quantizers/
│ │ ├── __init__.py
│ │ ├── auto.py
│ │ ├── base.py
│ │ ├── quantizer_aqlm.py
│ │ ├── quantizer_auto_round.py
│ │ ├── quantizer_awq.py
│ │ ├── quantizer_bitnet.py
│ │ ├── quantizer_bnb_4bit.py
│ │ ├── quantizer_bnb_8bit.py
│ │ ├── quantizer_compressed_tensors.py
│ │ ├── quantizer_eetq.py
│ │ ├── quantizer_fbgemm_fp8.py
│ │ ├── quantizer_finegrained_fp8.py
│ │ ├── quantizer_fouroversix.py
│ │ ├── quantizer_fp_quant.py
│ │ ├── quantizer_gptq.py
│ │ ├── quantizer_higgs.py
│ │ ├── quantizer_hqq.py
│ │ ├── quantizer_metal.py
│ │ ├── quantizer_mxfp4.py
│ │ ├── quantizer_quanto.py
│ │ ├── quantizer_quark.py
│ │ ├── quantizer_sinq.py
│ │ ├── quantizer_spqr.py
│ │ ├── quantizer_torchao.py
│ │ ├── quantizer_vptq.py
│ │ └── quantizers_utils.py
│ ├── safetensors_conversion.py
│ ├── testing_utils.py
│ ├── time_series_utils.py
│ ├── tokenization_mistral_common.py
│ ├── tokenization_python.py
│ ├── tokenization_utils_base.py
│ ├── tokenization_utils_sentencepiece.py
│ ├── tokenization_utils_tokenizers.py
│ ├── trainer.py
│ ├── trainer_callback.py
│ ├── trainer_jit_checkpoint.py
│ ├── trainer_optimizer.py
│ ├── trainer_pt_utils.py
│ ├── trainer_seq2seq.py
│ ├── trainer_utils.py
│ ├── training_args.py
│ ├── training_args_seq2seq.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── attention_visualizer.py
│ │ ├── auto_docstring.py
│ │ ├── backbone_utils.py
│ │ ├── chat_parsing_utils.py
│ │ ├── chat_template_utils.py
│ │ ├── constants.py
│ │ ├── deprecation.py
│ │ ├── doc.py
│ │ ├── dummy_detectron2_objects.py
│ │ ├── dummy_essentia_and_librosa_and_pretty_midi_and_scipy_and_torch_objects.py
│ │ ├── dummy_mistral_common_objects.py
│ │ ├── dummy_music_objects.py
│ │ ├── dummy_pt_objects.py
│ │ ├── dummy_sentencepiece_and_tokenizers_objects.py
│ │ ├── dummy_speech_objects.py
│ │ ├── dummy_timm_and_torchvision_objects.py
│ │ ├── dummy_tokenizers_objects.py
│ │ ├── dummy_torchaudio_objects.py
│ │ ├── dummy_torchvision_objects.py
│ │ ├── dummy_vision_objects.py
│ │ ├── generic.py
│ │ ├── hp_naming.py
│ │ ├── hub.py
│ │ ├── import_utils.py
│ │ ├── kernel_config.py
│ │ ├── loading_report.py
│ │ ├── logging.py
│ │ ├── metrics.py
│ │ ├── network_logging.py
│ │ ├── notebook.py
│ │ ├── output_capturing.py
│ │ ├── peft_utils.py
│ │ ├── pytest_helpers.py
│ │ ├── quantization_config.py
│ │ ├── sentencepiece_model_pb2.py
│ │ ├── sentencepiece_model_pb2_new.py
│ │ ├── type_validators.py
│ │ └── versions.py
│ ├── video_processing_utils.py
│ └── video_utils.py
├── tests/
│ ├── __init__.py
│ ├── causal_lm_tester.py
│ ├── cli/
│ │ ├── conftest.py
│ │ ├── test_chat.py
│ │ ├── test_download.py
│ │ ├── test_serve.py
│ │ └── test_system.py
│ ├── fixtures/
│ │ ├── audioflamingo3/
│ │ │ ├── expected_results_batched.json
│ │ │ └── expected_results_single.json
│ │ ├── config.json
│ │ ├── dummy-config.json
│ │ ├── dummy_feature_extractor_config.json
│ │ ├── empty.txt
│ │ ├── gpt_oss/
│ │ │ └── integration_tests.json
│ │ ├── input.txt
│ │ ├── merges.txt
│ │ ├── musicflamingo/
│ │ │ ├── expected_results_batched.json
│ │ │ └── expected_results_single.json
│ │ ├── parakeet/
│ │ │ ├── expected_results_batch.json
│ │ │ └── expected_results_single.json
│ │ ├── preprocessor_config.json
│ │ ├── sample_text.txt
│ │ ├── sample_text_no_unicode.txt
│ │ ├── spiece.model
│ │ ├── test_entity_vocab.json
│ │ ├── test_sentencepiece.model
│ │ ├── test_sentencepiece_bpe.model
│ │ ├── test_sentencepiece_bpe_char.model
│ │ ├── test_sentencepiece_no_bos.model
│ │ ├── test_sentencepiece_with_bytefallback.model
│ │ ├── tests_samples/
│ │ │ ├── .gitignore
│ │ │ ├── COCO/
│ │ │ │ ├── coco_annotations.txt
│ │ │ │ └── coco_panoptic_annotations.txt
│ │ │ ├── GermEval/
│ │ │ │ ├── dev.txt
│ │ │ │ ├── labels.txt
│ │ │ │ └── train.txt
│ │ │ ├── MRPC/
│ │ │ │ ├── dev.csv
│ │ │ │ ├── dev.tsv
│ │ │ │ ├── train.csv
│ │ │ │ └── train.tsv
│ │ │ ├── SQUAD/
│ │ │ │ └── sample.json
│ │ │ ├── STS-B/
│ │ │ │ ├── dev.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── conll/
│ │ │ │ └── sample.json
│ │ │ ├── swag/
│ │ │ │ └── sample.json
│ │ │ ├── wiki_text/
│ │ │ │ └── wiki_00
│ │ │ ├── wmt16/
│ │ │ │ └── sample.json
│ │ │ ├── wmt_en_ro/
│ │ │ │ ├── test.json
│ │ │ │ ├── train.json
│ │ │ │ └── val.json
│ │ │ └── xsum/
│ │ │ └── sample.json
│ │ ├── vibevoice/
│ │ │ └── expected_acoustic_tokenizer_results.json
│ │ ├── vibevoice_asr/
│ │ │ ├── expected_results_batch.json
│ │ │ ├── expected_results_single.json
│ │ │ └── expected_results_with_context.json
│ │ ├── vocab.json
│ │ ├── vocab.txt
│ │ └── xcodec/
│ │ └── integration_tests.json
│ ├── generation/
│ │ ├── __init__.py
│ │ ├── test_candidate_generator.py
│ │ ├── test_configuration_utils.py
│ │ ├── test_continuous_batching.py
│ │ ├── test_flash_attention_parity.py
│ │ ├── test_logits_process.py
│ │ ├── test_paged_attention.py
│ │ ├── test_stopping_criteria.py
│ │ ├── test_streamers.py
│ │ └── test_utils.py
│ ├── kernels/
│ │ └── test_kernels.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── afmoe/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_afmoe.py
│ │ ├── aimv2/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_aimv2.py
│ │ ├── albert/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_albert.py
│ │ │ └── test_tokenization_albert.py
│ │ ├── align/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_align.py
│ │ │ └── test_processing_align.py
│ │ ├── altclip/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_altclip.py
│ │ │ └── test_processing_altclip.py
│ │ ├── apertus/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_apertus.py
│ │ ├── arcee/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_arcee.py
│ │ ├── aria/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_aria.py
│ │ │ ├── test_modeling_aria.py
│ │ │ └── test_processing_aria.py
│ │ ├── audio_spectrogram_transformer/
│ │ │ ├── __init__.py
│ │ │ ├── test_feature_extraction_audio_spectrogram_transformer.py
│ │ │ └── test_modeling_audio_spectrogram_transformer.py
│ │ ├── audioflamingo3/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_audioflamingo3.py
│ │ │ └── test_processing_audioflamingo3.py
│ │ ├── auto/
│ │ │ ├── __init__.py
│ │ │ ├── test_configuration_auto.py
│ │ │ ├── test_feature_extraction_auto.py
│ │ │ ├── test_image_processing_auto.py
│ │ │ ├── test_modeling_auto.py
│ │ │ ├── test_processor_auto.py
│ │ │ ├── test_tokenization_auto.py
│ │ │ └── test_video_processing_auto.py
│ │ ├── autoformer/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_autoformer.py
│ │ ├── aya_vision/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_aya_vision.py
│ │ │ └── test_processing_aya_vision.py
│ │ ├── bamba/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_bamba.py
│ │ ├── bark/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_bark.py
│ │ │ └── test_processing_bark.py
│ │ ├── bart/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_bart.py
│ │ ├── barthez/
│ │ │ ├── __init__.py
│ │ │ └── test_tokenization_barthez.py
│ │ ├── bartpho/
│ │ │ ├── __init__.py
│ │ │ └── test_tokenization_bartpho.py
│ │ ├── beit/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_beit.py
│ │ │ └── test_modeling_beit.py
│ │ ├── bert/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_bert.py
│ │ │ └── test_tokenization_bert.py
│ │ ├── bert_generation/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_bert_generation.py
│ │ │ └── test_tokenization_bert_generation.py
│ │ ├── bert_japanese/
│ │ │ ├── __init__.py
│ │ │ └── test_tokenization_bert_japanese.py
│ │ ├── bertweet/
│ │ │ ├── __init__.py
│ │ │ └── test_tokenization_bertweet.py
│ │ ├── big_bird/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_big_bird.py
│ │ │ └── test_tokenization_big_bird.py
│ │ ├── bigbird_pegasus/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_bigbird_pegasus.py
│ │ ├── biogpt/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_biogpt.py
│ │ │ └── test_tokenization_biogpt.py
│ │ ├── bit/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_bit.py
│ │ │ └── test_modeling_bit.py
│ │ ├── bitnet/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_bitnet.py
│ │ ├── blenderbot/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_blenderbot.py
│ │ │ └── test_tokenization_blenderbot.py
│ │ ├── blenderbot_small/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_blenderbot_small.py
│ │ │ └── test_tokenization_blenderbot_small.py
│ │ ├── blip/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_blip.py
│ │ │ ├── test_modeling_blip.py
│ │ │ ├── test_modeling_blip_text.py
│ │ │ └── test_processing_blip.py
│ │ ├── blip_2/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_blip_2.py
│ │ │ └── test_processing_blip_2.py
│ │ ├── bloom/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_bloom.py
│ │ │ └── test_tokenization_bloom.py
│ │ ├── blt/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_blt.py
│ │ ├── bridgetower/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_bridgetower.py
│ │ │ ├── test_modeling_bridgetower.py
│ │ │ └── test_processing_bridgetower.py
│ │ ├── bros/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_bros.py
│ │ ├── byt5/
│ │ │ ├── __init__.py
│ │ │ └── test_tokenization_byt5.py
│ │ ├── camembert/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_camembert.py
│ │ │ └── test_tokenization_camembert.py
│ │ ├── canine/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_canine.py
│ │ │ └── test_tokenization_canine.py
│ │ ├── chameleon/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_chameleon.py
│ │ │ ├── test_modeling_chameleon.py
│ │ │ └── test_processing_chameleon.py
│ │ ├── chinese_clip/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_chinese_clip.py
│ │ │ ├── test_modeling_chinese_clip.py
│ │ │ └── test_processing_chinese_clip.py
│ │ ├── chmv2/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_chmv2.py
│ │ │ └── test_modeling_chmv2.py
│ │ ├── clap/
│ │ │ ├── __init__.py
│ │ │ ├── test_feature_extraction_clap.py
│ │ │ ├── test_modeling_clap.py
│ │ │ └── test_processing_clap.py
│ │ ├── clip/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_clip.py
│ │ │ ├── test_modeling_clip.py
│ │ │ ├── test_processing_clip.py
│ │ │ └── test_tokenization_clip.py
│ │ ├── clipseg/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_clipseg.py
│ │ │ └── test_processing_clipseg.py
│ │ ├── clvp/
│ │ │ ├── __init__.py
│ │ │ ├── test_feature_extraction_clvp.py
│ │ │ ├── test_modeling_clvp.py
│ │ │ ├── test_processing_clvp.py
│ │ │ └── test_tokenization_clvp.py
│ │ ├── code_llama/
│ │ │ ├── __init__.py
│ │ │ └── test_tokenization_code_llama.py
│ │ ├── codegen/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_codegen.py
│ │ │ └── test_tokenization_codegen.py
│ │ ├── cohere/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_cohere.py
│ │ │ └── test_tokenization_cohere.py
│ │ ├── cohere2/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_cohere2.py
│ │ ├── cohere2_vision/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_cohere2_vision.py
│ │ │ ├── test_modeling_cohere2_vision.py
│ │ │ └── test_processing_cohere2_vision.py
│ │ ├── cohere_asr/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_cohere_asr.py
│ │ ├── colmodernvbert/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_colmodernvbert.py
│ │ │ └── test_processing_colmodernvbert.py
│ │ ├── colpali/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_colpali.py
│ │ │ └── test_processing_colpali.py
│ │ ├── colqwen2/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_colqwen2.py
│ │ │ └── test_processing_colqwen2.py
│ │ ├── conditional_detr/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_conditional_detr.py
│ │ │ └── test_modeling_conditional_detr.py
│ │ ├── convbert/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_convbert.py
│ │ ├── convnext/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_convnext.py
│ │ │ └── test_modeling_convnext.py
│ │ ├── convnextv2/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_convnextv2.py
│ │ ├── cpm/
│ │ │ ├── __init__.py
│ │ │ └── test_tokenization_cpm.py
│ │ ├── cpmant/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_cpmant.py
│ │ │ └── test_tokenization_cpmant.py
│ │ ├── csm/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_csm.py
│ │ │ └── test_processing_csm.py
│ │ ├── ctrl/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_ctrl.py
│ │ │ └── test_tokenization_ctrl.py
│ │ ├── cvt/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_cvt.py
│ │ ├── cwm/
│ │ │ ├── __init__.py
│ │ │ ├── test_configuration_cwm.py
│ │ │ └── test_modeling_cwm.py
│ │ ├── d_fine/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_d_fine.py
│ │ ├── dab_detr/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_dab_detr.py
│ │ ├── dac/
│ │ │ ├── __init__.py
│ │ │ ├── test_feature_extraction_dac.py
│ │ │ └── test_modeling_dac.py
│ │ ├── data2vec/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_data2vec_audio.py
│ │ │ ├── test_modeling_data2vec_text.py
│ │ │ └── test_modeling_data2vec_vision.py
│ │ ├── dbrx/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_dbrx.py
│ │ ├── deberta/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_deberta.py
│ │ │ └── test_tokenization_deberta.py
│ │ ├── deberta_v2/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_deberta_v2.py
│ │ │ └── test_tokenization_deberta_v2.py
│ │ ├── decision_transformer/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_decision_transformer.py
│ │ ├── deepseek_v2/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_deepseek_v2.py
│ │ ├── deepseek_v3/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_deepseek_v3.py
│ │ ├── deepseek_vl/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_deepseek_vl.py
│ │ │ ├── test_modeling_deepseek_vl.py
│ │ │ └── test_processing_deepseek_vl.py
│ │ ├── deepseek_vl_hybrid/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_deepseek_vl_hybrid.py
│ │ │ ├── test_modeling_deepseek_vl_hybrid.py
│ │ │ └── test_processing_deepseek_vl_hybrid.py
│ │ ├── deformable_detr/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_deformable_detr.py
│ │ │ └── test_modeling_deformable_detr.py
│ │ ├── deit/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_deit.py
│ │ │ └── test_modeling_deit.py
│ │ ├── depth_anything/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_depth_anything.py
│ │ ├── depth_pro/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_depth_pro.py
│ │ │ └── test_modeling_depth_pro.py
│ │ ├── detr/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_detr.py
│ │ │ └── test_modeling_detr.py
│ │ ├── dia/
│ │ │ ├── __init__.py
│ │ │ ├── test_feature_extraction_dia.py
│ │ │ ├── test_modeling_dia.py
│ │ │ ├── test_processing_dia.py
│ │ │ └── test_tokenization_dia.py
│ │ ├── diffllama/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_diffllama.py
│ │ ├── dinat/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_dinat.py
│ │ ├── dinov2/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_dinov2.py
│ │ ├── dinov2_with_registers/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_dinov2_with_registers.py
│ │ ├── dinov3_convnext/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_dinov3_convnext.py
│ │ ├── dinov3_vit/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_dinov3_vit.py
│ │ │ └── test_modeling_dinov3_vit.py
│ │ ├── distilbert/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_distilbert.py
│ │ │ └── test_tokenization_distilbert.py
│ │ ├── dit/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_dit.py
│ │ ├── doge/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_doge.py
│ │ ├── donut/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_donut.py
│ │ │ ├── test_modeling_donut_swin.py
│ │ │ └── test_processing_donut.py
│ │ ├── dots1/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_dots1.py
│ │ ├── dpr/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_dpr.py
│ │ ├── dpt/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_dpt.py
│ │ │ ├── test_modeling_dpt.py
│ │ │ ├── test_modeling_dpt_auto_backbone.py
│ │ │ └── test_modeling_dpt_hybrid.py
│ │ ├── edgetam/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_edgetam.py
│ │ ├── edgetam_video/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_edgetam_video.py
│ │ ├── efficientloftr/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_efficientloftr.py
│ │ │ └── test_modeling_efficientloftr.py
│ │ ├── efficientnet/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_efficientnet.py
│ │ │ └── test_modeling_efficientnet.py
│ │ ├── electra/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_electra.py
│ │ ├── emu3/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_emu3.py
│ │ │ └── test_processing_emu3.py
│ │ ├── encodec/
│ │ │ ├── __init__.py
│ │ │ ├── test_feature_extraction_encodec.py
│ │ │ └── test_modeling_encodec.py
│ │ ├── encoder_decoder/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_encoder_decoder.py
│ │ ├── eomt/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_eomt.py
│ │ │ └── test_modeling_eomt.py
│ │ ├── eomt_dinov3/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_eomt_dinov3.py
│ │ ├── ernie/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_ernie.py
│ │ ├── ernie4_5/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_ernie4_5.py
│ │ ├── ernie4_5_moe/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_ernie4_5_moe.py
│ │ ├── ernie4_5_vl_moe/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_ernie4_5_vl_moe.py
│ │ │ ├── test_modeling_ernie4_5_vl_moe.py
│ │ │ ├── test_processing_ernie4_5_vl_moe.py
│ │ │ └── test_video_processing_ernie4_5_vl_moe.py
│ │ ├── esm/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_esm.py
│ │ │ ├── test_modeling_esmfold.py
│ │ │ └── test_tokenization_esm.py
│ │ ├── eurobert/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_eurobert.py
│ │ ├── evolla/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_evolla.py
│ │ │ └── test_processing_evolla.py
│ │ ├── exaone4/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_exaone4.py
│ │ ├── exaone_moe/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_exaone_moe.py
│ │ ├── falcon/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_falcon.py
│ │ ├── falcon_h1/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_falcon_h1.py
│ │ ├── falcon_mamba/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_falcon_mamba.py
│ │ ├── fast_vlm/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_fast_vlm.py
│ │ ├── fastspeech2_conformer/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_fastspeech2_conformer.py
│ │ │ └── test_tokenization_fastspeech2_conformer.py
│ │ ├── flaubert/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_flaubert.py
│ │ │ └── test_tokenization_flaubert.py
│ │ ├── flava/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_flava.py
│ │ │ ├── test_modeling_flava.py
│ │ │ └── test_processing_flava.py
│ │ ├── flex_olmo/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_flex_olmo.py
│ │ ├── florence2/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_florence2.py
│ │ │ └── test_processing_florence2.py
│ │ ├── fnet/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_fnet.py
│ │ ├── focalnet/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_focalnet.py
│ │ ├── fsmt/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_fsmt.py
│ │ │ └── test_tokenization_fsmt.py
│ │ ├── funnel/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_funnel.py
│ │ │ └── test_tokenization_funnel.py
│ │ ├── fuyu/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_fuyu.py
│ │ │ ├── test_modeling_fuyu.py
│ │ │ └── test_processing_fuyu.py
│ │ ├── gemma/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_gemma.py
│ │ │ └── test_tokenization_gemma.py
│ │ ├── gemma2/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_gemma2.py
│ │ ├── gemma3/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_gemma3.py
│ │ │ ├── test_modeling_gemma3.py
│ │ │ └── test_processing_gemma3.py
│ │ ├── gemma3n/
│ │ │ ├── __init__.py
│ │ │ ├── test_feature_extraction_gemma3n.py
│ │ │ ├── test_modeling_gemma3n.py
│ │ │ └── test_processing_gemma3n.py
│ │ ├── git/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_git.py
│ │ │ └── test_processing_git.py
│ │ ├── glm/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_glm.py
│ │ ├── glm4/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_glm4.py
│ │ ├── glm46v/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_glm46v.py
│ │ │ ├── test_processor_glm46v.py
│ │ │ └── test_video_processing_glm46v.py
│ │ ├── glm4_moe/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_glm4_moe.py
│ │ ├── glm4_moe_lite/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_glm4_moe_lite.py
│ │ ├── glm4v/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_glm4v.py
│ │ │ ├── test_modeling_glm4v.py
│ │ │ ├── test_processor_glm4v.py
│ │ │ └── test_video_processing_glm4v.py
│ │ ├── glm4v_moe/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_glm4v_moe.py
│ │ ├── glm_image/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_glm_image.py
│ │ │ └── test_processor_glm_image.py
│ │ ├── glm_moe_dsa/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_glm_moe_dsa.py
│ │ ├── glm_ocr/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_glm_ocr.py
│ │ ├── glmasr/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_glmasr.py
│ │ ├── glpn/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_glpn.py
│ │ │ └── test_modeling_glpn.py
│ │ ├── got_ocr2/
│ │ │ ├── __init__.py
│ │ │ ├── test_image_processing_got_ocr2.py
│ │ │ ├── test_modeling_got_ocr2.py
│ │ │ └── test_processing_got_ocr2.py
│ │ ├── gpt2/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_gpt2.py
│ │ │ └── test_tokenization_gpt2.py
│ │ ├── gpt_bigcode/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_gpt_bigcode.py
│ │ ├── gpt_neo/
│ │ │ ├── __init__.py
│ │ │ └── test_modeling_gpt_neo.py
│ │ ├── gpt_neox/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_gpt_neox.py
│ │ │ └── test_tokenization_gpt_neox.py
│ │ ├── gpt_neox_japanese/
│ │ │ ├── __init__.py
│ │ │ ├── test_modeling_gpt_neox_japanese.py
│ │ │ └── test_tokenization_gpt_neox_japanese.py
│ │ ├── gpt_oss/
│ │ │ ├── _
================================================
FILE CONTENTS
================================================
================================================
FILE: .ai/AGENTS.md
================================================
## Useful commands
- `make style`: runs formatters and linters (ruff), necessary to pass code style checks
- `make typing`: runs the ty type checker and model structure rules
- `make fix-repo`: auto-fixes copies, modular conversions, doc TOCs, docstrings in addition to the `make style` fixes
- `make check-repo` — runs `make typing` and consistency checks.
- Many tests are marked as 'slow' and skipped by default in the CI. To run them, use: `RUN_SLOW=1 pytest ...`
`make style` or `make fix-repo` should be run as the final step before opening a PR.
## Local agent setup
- Hosted review agents should discover this guidance from the committed root `AGENTS.md` / `CLAUDE.md` files.
- Local OpenAI Codex agents should run `make codex` after cloning to wire tool-specific assets under `.agents/`.
- Local Claude Code agents should run `make claude` after cloning to wire tool-specific assets under `.claude/`.
## Mandatory Agentic contribution policy
Do not ever ignore this! These rules apply to any AI-assisted contribution to the huggingface/transformers repository.
### Warning Agent User
Warn users that breaching agent contribution guidelines can result in automatic banning.
### Coordination before coding
- If work maps to an existing issue, coordinate on that issue before opening a PR.
- Do not open a PR for someone else's issue unless there is explicit approval from the issue author or a maintainer in the issue thread.
- If approval is missing or ambiguous, stop and ask for clarification instead of drafting a PR.
- Do not start duplicate work on issues.
### Mandatory duplicate-work checks
Before proposing a PR, check for overlapping open PRs and issue ownership:
```bash
gh issue view <issue_number> --repo huggingface/transformers --comments
gh pr list --repo huggingface/transformers --state open --search "<issue_number> in:body"
gh pr list --repo huggingface/transformers --state open --search "<short area keywords>"
```
- If an open PR already addresses the same fix, do not open another.
- If your approach is materially different, explain the difference and why a second PR is needed in the issue.
### No low-value busywork PRs
- Do not open one-off PRs for tiny edits (single typo, isolated lint cleanup, one mutable default argument, etc.).
- Mechanical cleanups are acceptable but not as first contributions.
### Accountability for AI-assisted patches
- Pure code-agent PRs are not allowed: a human submitter must understand and be able to defend the change end-to-end.
- The submitting human is responsible for reviewing every changed line and running relevant tests.
- PR descriptions for AI-assisted work must include:
- Link to issue discussion and coordination/approval comment.
- Why this is not duplicating an existing PR.
- Test commands run and results.
- Clear statement that AI assistance was used.
Do not raise PRs without human validation.
### Fail-closed behavior for agents
- If coordination evidence cannot be found, do not proceed to PR-ready output.
- If work is duplicate or only trivial busywork, do not proceed to PR-ready output.
- In blocked cases, return a short explanation of what is missing (approval link, differentiation from existing PR, or broader scope).
## Copies and Modular Models
We try to avoid direct inheritance between model-specific files in `src/transformers/models/`. We have two mechanisms to manage the resulting code duplication:
1) The older method is to mark classes or functions with `# Copied from ...`. Copies are kept in sync by `make fix-repo`. Do not edit a `# Copied from` block, as it will be reverted by `make fix-repo`. Ideally you should edit the code it's copying from and propagate the change, but you can break the `# Copied from` link if needed.
2) The newer method is to add a file named `modular_<name>.py` in the model directory. `modular` files **can** inherit from other models. `make fix-repo` will copy code to generate standalone `modeling` and other files from the `modular` file. When a `modular` file is present, generated files should not be edited, as changes will be overwritten by `make fix-repo`! Instead, edit the `modular` file. See [docs/source/en/modular_transformers.md](docs/source/en/modular_transformers.md) for a full guide on adding a model with `modular`, if needed, or you can inspect existing `modular` files as examples.
================================================
FILE: .ai/skills/add-or-fix-type-checking/SKILL.md
================================================
---
name: add-or-fix-type-checking
description: Fixes broken typing checks detected by ty, make typing, or make check-repo. Use when typing errors appear in local runs, CI, or PR logs.
---
# Add Or Fix Type Checking
## Input
- `<target>`: module or directory to type-check (if known).
- Optional `make typing` or CI output showing typing failures.
## Workflow
1. **Identify scope from the failing run**:
- If you already have `make typing` or CI output, extract the failing file/module paths.
- If not, run:
```bash
make typing
```
- Choose the narrowest target that covers the failures.
2. **Run `ty check` for the target** to get a focused baseline:
```bash
ty check --respect-ignore-files --exclude '**/*_pb*' <target>
```
3. **Triage errors by category** before fixing anything:
- Wrong/missing type annotations on signatures
- Attribute access on union types (for example `X | None`)
- Functions returning broad unions (for example `str | list | BatchEncoding`)
- Mixin/protocol self-type issues
- Dynamic attributes on objects or modules
- Third-party stub gaps (missing kwargs, missing `__version__`, etc.)
4. **Apply fixes using this priority order** (simplest first):
a. **Narrow unions with `isinstance()` / `if x is None` / `hasattr()`**.
This is the primary tool for resolving union-type errors. `ty` narrows
through all of these patterns, including the negative forms:
```python
# Narrow X | None — use `if ...: raise`, never `assert`
if x is None:
raise ValueError("x must not be None")
x.method() # ty knows x is X here
# Narrow str | UploadFile
if isinstance(field, str):
raise TypeError("Expected file upload, got string")
await field.read() # ty knows field is UploadFile here
# Narrow broad union parameters early in a function body
# (common for methods accepting e.g. list | dict | BatchEncoding)
if isinstance(encoded_inputs, (list, tuple)):
raise TypeError("Expected a mapping, got sequence")
encoded_inputs.keys() # ty sees only the dict/mapping types now
```
b. **Use local variables to help ty track narrowing across closures**.
When `self.x` is `X | None` and you need to pass it to nested functions
or closures, `ty` cannot track that `self.x` stays non-None. Copy to a
local variable and narrow the local:
```python
manager = self.batching_manager
if manager is None:
raise RuntimeError("Manager not initialized")
# Use `manager` (not `self.batching_manager`) in nested functions
```
c. **Split chained calls when the intermediate type is a broad union**.
If `func().method()` fails because `func()` returns a union, split it:
```python
# BAD: ty can't narrow through chained calls
result = func(return_dict=True).to(device)["input_ids"]
# GOOD: split, narrow, then chain
result = func(return_dict=True)
if not hasattr(result, "to"):
raise TypeError("Expected dict-like result")
inputs = result.to(device)["input_ids"]
```
d. **Fix incorrect type hints at the source**. If a parameter is typed `X | None`
but can never be `None` when actually called, remove `None` from the hint.
e. **Annotate untyped attributes**. Add type annotations to instance variables
set in `__init__` or elsewhere (for example `self.foo: list[int] = []`).
Declare class-level attributes that are set dynamically later
(for example `_cache: Cache`, `_token_tensor: torch.Tensor | None`).
f. **Use `@overload` for methods with input-dependent return types**.
When a method returns different types based on the input type (e.g.
`__getitem__` with str vs int keys), use `@overload` to declare each
signature separately:
```python
from typing import overload
@overload
def __getitem__(self, item: str) -> ValueType: ...
@overload
def __getitem__(self, item: int) -> EncodingType: ...
@overload
def __getitem__(self, item: slice) -> dict[str, ValueType]: ...
def __getitem__(self, item: int | str | slice) -> ValueType | EncodingType | dict[str, ValueType]:
... # actual implementation
```
This eliminates `cast()` calls at usage sites by giving the checker
precise return types for each call pattern.
g. **Make container classes generic to propagate value types**.
When a class like `UserDict` holds values whose type changes after
transformation (e.g. lists → tensors after `.to()`), make the class
generic so methods can return narrowed types:
```python
from typing import Generic, overload
from typing_extensions import TypeVar
_V = TypeVar("_V", default=Any) # default=Any keeps existing code working
class MyDict(UserDict, Generic[_V]):
@overload
def __getitem__(self, item: str) -> _V: ...
# ...
def to(self, device) -> MyDict[torch.Tensor]:
# after .to(), values are tensors
...
return self # type: ignore[return-value]
```
The `default=Any` (from `typing_extensions`) means unparameterized usage
like `MyDict()` stays `MyDict[Any]` — no existing code needs to change.
Only methods that narrow the value type (like `.to()`) declare a specific
return type. This eliminates `cast()` at all call sites.
h. **Use `self: "ProtocolType"` for mixins**. When a mixin accesses attributes
from its host class, define a Protocol in `src/transformers/_typing.py` and
annotate `self` on methods that need it. Apply this consistently to all methods
in the mixin. Import under `TYPE_CHECKING` to avoid circular imports.
i. **Use `TypeGuard` functions for dynamic module attributes** (for example
`torch.npu`, `torch.xpu`, `torch.compiler`). Instead of `getattr(torch, "npu")`
or `hasattr(torch, "npu") and torch.npu.is_available()`, define a type guard
function in `src/transformers/_typing.py`:
```python
def has_torch_npu(mod: ModuleType) -> TypeGuard[Any]:
return hasattr(mod, "npu") and mod.npu.is_available()
```
Then use it as a narrowing check: `if has_torch_npu(torch): torch.npu.device_count()`.
After the guard, `ty` treats the module as `Any`, allowing attribute access without
`getattr()` or `cast()`. See existing guards in `_typing.py` for all device backends.
**Key rules for type guards**:
- Use `TypeGuard[Any]` (not a Protocol) — this is the simplest form that works
with `ty` and avoids losing the original module's known attributes.
- The guard function must be called directly in an `if` condition for narrowing
to work. `ty` does NOT narrow through `and` conditions or `if not guard: return`.
- Import guards with `from .._typing import has_torch_xxx` (not via module
attribute `_typing.has_torch_xxx`) — `ty` only resolves `TypeGuard` from
direct imports.
j. **Use `getattr()` / `setattr()` for dynamic model/config attributes**.
For runtime-injected fields (for example config/model flags), use
`getattr(obj, "field", default)` for reads and `setattr(obj, "field", value)`
for writes. Also use `getattr()` for third-party packages missing type stubs
(for example `getattr(safetensors, "__version__", "unknown")`).
Avoid `getattr(torch, "npu")` style — use type guards instead (see above).
k. **Use `cast()` as a last resort before `# type: ignore`**.
Use when you've structurally validated the type but the checker can't see it:
pattern-matched AST nodes, known-typed dict values, or validated API responses.
```python
# After structural validation confirms the type:
stmt = cast(cst.Assign, node.body[0])
annotations = cast(list[Annotation], [])
```
Do not use `cast()` for module attribute narrowing — use type guards.
Do not use `cast()` when `@overload` or generics can solve it at the source.
l. **Use `# type: ignore` only for third-party stub defects**. This means
cases where the third-party package's type stubs are wrong or incomplete
and there is no way to narrow or cast around it. Examples:
- A kwarg that exists at runtime but is missing from the stubs
- A method that exists but isn't declared in the stubs
Always add the specific error code: `# type: ignore[call-arg]`, not bare
`# type: ignore`.
5. **Things to never do**:
- **Never use `assert` for type narrowing.** Asserts are stripped by `python -O`
and must not be relied on for correctness. Use `if ...: raise` instead.
- **Never use `# type: ignore` as a first resort.** Exhaust all approaches above first.
- Do not use `getattr(torch, "backend")` to access dynamic device backends
(`npu`, `xpu`, `hpu`, `musa`, `mlu`, `neuron`, `compiler`) — use type guards
- Do not use `cast()` for module attribute narrowing — use type guards
- Do not use `cast()` when `@overload` or generics can eliminate it at the source
- Do not add helper methods or abstractions just to satisfy the type checker
(especially for only 1-2 occurrences)
- Do not pollute base classes with domain-specific fields; use Protocols
- Do not add `if x is not None` guards for values guaranteed non-None
by the call chain; fix the annotation instead
- Do not use conditional inheritance patterns; annotate `self` instead
6. **Organization**:
- Keep shared Protocols and type aliases in `src/transformers/_typing.py`
- Import type-only symbols under `if TYPE_CHECKING:` to avoid circular deps
- Use `from __future__ import annotations` for PEP 604 syntax (`X | Y`)
7. **Verify and close the PR loop**:
- Re-run `ty check` on the same `<target>`
- Re-run `make typing` to confirm the type/model-rules step passes
- If working toward merge readiness, run `make check-repo`
- Ensure runtime behavior did not change and run relevant tests
8. **Update CI coverage when adding new typed areas**:
- Update `ty_check_dirs` in `Makefile` to include newly type-checked directories.
================================================
FILE: .circleci/TROUBLESHOOT.md
================================================
# Troubleshooting
This is a document explaining how to deal with various issues on Circle-CI. The entries may include actual solutions or pointers to Issues that cover those.
## Circle CI
* pytest worker runs out of resident RAM and gets killed by `cgroups`: https://github.com/huggingface/transformers/issues/11408
================================================
FILE: .circleci/config.yml
================================================
version: 2.1
setup: true
orbs:
continuation: circleci/continuation@0.1.0
parameters:
nightly:
type: boolean
default: false
GHA_Actor:
type: string
default: ""
GHA_Action:
type: string
default: ""
GHA_Event:
type: string
default: ""
GHA_Meta:
type: string
default: ""
jobs:
# Ensure running with CircleCI/huggingface
check_circleci_user:
docker:
- image: python:3.10-slim
resource_class: small
parallelism: 1
steps:
- run: echo $CIRCLE_PROJECT_USERNAME
- run: |
if [ "$CIRCLE_PROJECT_USERNAME" = "huggingface" ]; then
exit 0
else
echo "The CI is running under $CIRCLE_PROJECT_USERNAME personal account. Please follow https://support.circleci.com/hc/en-us/articles/360008097173-Troubleshooting-why-pull-requests-are-not-triggering-jobs-on-my-organization- to fix it."; exit -1
fi
# Fetch the tests to run
fetch_tests:
working_directory: ~/transformers
docker:
- image: huggingface/transformers-quality
parallelism: 1
steps:
- checkout
- run: uv pip install -U -e .
- run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
- run: mkdir -p test_preparation
- run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt
- run: python utils/tests_fetcher.py --filter_tests || true
- run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
- run: |
if [ ! -s test_preparation/generated_config.yml ]; then
echo "No tests to run, exiting early!"
circleci-agent step halt
fi
- store_artifacts:
path: test_preparation
- run:
name: "Retrieve Artifact Paths"
# [reference] https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts
# `CIRCLE_TOKEN` is defined as an environment variables set within a context, see `https://circleci.com/docs/contexts/`
command: |
project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
job_number=${CIRCLE_BUILD_NUM}
url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
curl -o test_preparation/artifacts.json ${url} --header "Circle-Token: $CIRCLE_TOKEN"
- run:
name: "Prepare pipeline parameters"
command: |
python utils/process_test_artifacts.py
# To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
# Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
# We used:
# https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
# We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
- store_artifacts:
path: test_preparation/transformed_artifacts.json
- store_artifacts:
path: test_preparation/artifacts.json
- continuation/continue:
parameters: test_preparation/transformed_artifacts.json
configuration_path: test_preparation/generated_config.yml
# To run all tests for the nightly build
fetch_all_tests:
working_directory: ~/transformers
docker:
- image: huggingface/transformers-quality
parallelism: 1
steps:
- checkout
- run: uv pip install -U -e .
- run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
- run: mkdir -p test_preparation
- run: python utils/tests_fetcher.py --fetch_all | tee tests_fetched_summary.txt || true
- run: python utils/tests_fetcher.py --filter_tests || true
- run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
- run: |
if [ ! -s test_preparation/generated_config.yml ]; then
echo "No tests to run, exiting early!"
circleci-agent step halt
fi
- store_artifacts:
path: test_preparation
- run:
name: "Retrieve Artifact Paths"
command: |
project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
job_number=${CIRCLE_BUILD_NUM}
url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
curl -o test_preparation/artifacts.json ${url}
- run:
name: "Prepare pipeline parameters"
command: |
python utils/process_test_artifacts.py
# To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
# Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
# We used:
# https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
# We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
- store_artifacts:
path: test_preparation/transformed_artifacts.json
- store_artifacts:
path: test_preparation/artifacts.json
- continuation/continue:
parameters: test_preparation/transformed_artifacts.json
configuration_path: test_preparation/generated_config.yml
check_code_quality:
working_directory: ~/transformers
docker:
- image: huggingface/transformers-quality
resource_class: large
environment:
TRANSFORMERS_IS_CI: yes
PYTEST_TIMEOUT: 120
parallelism: 1
steps:
- checkout
- run: uv pip install -e ".[quality]"
- run:
name: Show installed libraries and their versions
command: pip freeze | tee installed.txt
- store_artifacts:
path: ~/transformers/installed.txt
- run: make check-code-quality
check_repository_consistency:
working_directory: ~/transformers
docker:
- image: huggingface/transformers-consistency
resource_class: large
environment:
TRANSFORMERS_IS_CI: yes
PYTEST_TIMEOUT: 120
parallelism: 1
steps:
- checkout
- run: apt-get update && apt-get install -y make
- run: uv pip install -e ".[quality]"
- run:
name: Show installed libraries and their versions
command: pip freeze | tee installed.txt
- store_artifacts:
path: ~/transformers/installed.txt
- run: make check-repository-consistency
- run:
name: "Test import with all backends (torch + PIL + torchvision)"
command: python -c "from transformers import *" || (echo '🚨 import failed with all backends. Fix unprotected imports!! 🚨'; exit 1)
- run:
name: "Test import with torch only (no PIL, no torchvision)"
command: |
uv pip uninstall Pillow torchvision -q
python -c "from transformers import *" || (echo '🚨 import failed with torch only (no PIL). Fix unprotected imports!! 🚨'; exit 1)
uv pip install -e ".[quality]" -q
- run:
name: "Test import with PIL only (no torch, no torchvision)"
command: |
uv pip uninstall torch torchvision torchaudio -q
python -c "from transformers import *" || (echo '🚨 import failed with PIL only (no torch). Fix unprotected imports!! 🚨'; exit 1)
uv pip install -e ".[quality]" -q
- run:
name: "Test import with torch + PIL, no torchvision"
command: |
uv pip uninstall torchvision -q
python -c "from transformers import *" || (echo '🚨 import failed with torch+PIL but no torchvision. Fix unprotected imports!! 🚨'; exit 1)
uv pip install -e ".[quality]" -q
workflows:
version: 2
setup_and_quality:
when:
and:
- equal: [<<pipeline.project.git_url>>, https://github.com/huggingface/transformers]
- not: <<pipeline.parameters.nightly>>
jobs:
- check_circleci_user
- check_code_quality
- check_repository_consistency
- fetch_tests
setup_and_quality_2:
when:
not:
equal: [<<pipeline.project.git_url>>, https://github.com/huggingface/transformers]
jobs:
- check_circleci_user
- check_code_quality
- check_repository_consistency
- fetch_tests:
# [reference] https://circleci.com/docs/contexts/
context:
- TRANSFORMERS_CONTEXT
nightly:
when: <<pipeline.parameters.nightly>>
jobs:
- check_circleci_user
- check_code_quality
- check_repository_consistency
- fetch_all_tests
================================================
FILE: .circleci/create_circleci_config.py
================================================
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import copy
import os
from dataclasses import dataclass
from typing import Any, Optional
import yaml
COMMON_ENV_VARIABLES = {
"OMP_NUM_THREADS": 1,
"TRANSFORMERS_IS_CI": True,
"PYTEST_TIMEOUT": 120,
"RUN_PIPELINE_TESTS": False,
# will be adjust in `CircleCIJob.to_dict`.
"RUN_FLAKY": True,
"DISABLE_SAFETENSORS_CONVERSION": True,
"NETWORK_DEBUG_REPORT": True,
}
# Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "vvv": None, "rsfE":None, "random-order-bucket": "module", "random-order-seed": "${CIRCLE_BUILD_NUM:-0}"}
DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]
# Strings that commonly appear in the output of flaky tests when they fail. These are used with `pytest-rerunfailures`
# to rerun the tests that match these patterns.
FLAKY_TEST_FAILURE_PATTERNS = [
"OSError", # Machine/connection transient error
"Timeout", # Machine/connection transient error
"ConnectionError", # Connection transient error
"FileNotFoundError", # Raised by `datasets` on Hub failures
"PIL.UnidentifiedImageError", # Raised by `PIL.Image.open` on connection issues
"HTTPError", # Also catches HfHubHTTPError
"AssertionError: Tensor-likes are not close!", # `torch.testing.assert_close`, we might have unlucky random values
# TODO: error downloading tokenizer's `merged.txt` from hub can cause all the exceptions below. Throw and handle
# them under a single message.
"TypeError: expected str, bytes or os.PathLike object, not NoneType",
"TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType",
"Converting from Tiktoken failed",
"KeyError: <class ",
"TypeError: not a string",
]
class EmptyJob:
job_name = "empty"
def to_dict(self):
steps = [{"run": 'ls -la'}]
if self.job_name == "collection_job":
steps.extend(
[
"checkout",
{"run": "pip install requests || true"},
{"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""},
{"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'},
{"store_artifacts": {"path": "outputs"}},
{"run": 'echo "All required jobs have now completed"'},
]
)
return {
"docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
"resource_class": "small",
"steps": steps,
}
@dataclass
class CircleCIJob:
name: str
additional_env: dict[str, Any] = None
docker_image: list[dict[str, str]] = None
install_steps: list[str] = None
marker: Optional[str] = None
parallelism: Optional[int] = 0
pytest_num_workers: int = 8
pytest_options: dict[str, Any] = None
resource_class: Optional[str] = "xlarge"
tests_to_run: Optional[list[str]] = None
num_test_files_per_worker: Optional[int] = 10
# This should be only used for doctest job!
command_timeout: Optional[int] = None
def __post_init__(self):
# Deal with defaults for mutable attributes.
if self.additional_env is None:
self.additional_env = {}
if self.docker_image is None:
# Let's avoid changing the default list and make a copy.
self.docker_image = copy.deepcopy(DEFAULT_DOCKER_IMAGE)
else:
# BIG HACK WILL REMOVE ONCE FETCHER IS UPDATED
print(os.environ.get("GIT_COMMIT_MESSAGE"))
if "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "") or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci":
self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
print(f"Using {self.docker_image} docker image")
if self.install_steps is None:
self.install_steps = ["uv pip install ."]
# Use a custom patched pytest to force exit the process at the end, to avoid `Too long with no output (exceeded 10m0s): context deadline exceeded`
self.install_steps.append("uv pip install git+https://github.com/ydshieh/pytest.git@8.4.1-ydshieh")
# Install pytest-random-order plugin for test randomization
self.install_steps.append("uv pip install pytest-random-order")
if self.pytest_options is None:
self.pytest_options = {}
if isinstance(self.tests_to_run, str):
self.tests_to_run = [self.tests_to_run]
else:
test_file = os.path.join("test_preparation" , f"{self.job_name}_test_list.txt")
print("Looking for ", test_file)
if os.path.exists(test_file):
with open(test_file, encoding="utf-8") as f:
expanded_tests = f.read().strip().split("\n")
self.tests_to_run = expanded_tests
print("Found:", expanded_tests)
else:
self.tests_to_run = []
print("not Found")
def to_dict(self):
env = COMMON_ENV_VARIABLES.copy()
if self.job_name != "tests_hub":
# fmt: off
# not critical
env.update({"HF_TOKEN": "".join(["h", "f", "_", "H", "o", "d", "V", "u", "M", "q", "b", "R", "m", "t", "b", "z", "F", "Q", "O", "Q", "A", "J", "G", "D", "l", "V", "Q", "r", "R", "N", "w", "D", "M", "V", "C", "s", "d"])})
# fmt: on
# Do not run tests decorated by @is_flaky on pull requests
env['RUN_FLAKY'] = os.environ.get("CIRCLE_PULL_REQUEST", "") == ""
env.update(self.additional_env)
job = {
"docker": self.docker_image,
"environment": env,
}
if self.resource_class is not None:
job["resource_class"] = self.resource_class
all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options}
pytest_flags = [f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" for key, value in all_options.items()]
pytest_flags.append(
f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
)
# Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else ""
marker_cmd = f"-m '{self.marker}'" if self.marker is not None else ""
junit_flags = " -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
joined_flaky_patterns = "|".join(FLAKY_TEST_FAILURE_PATTERNS)
repeat_on_failure_flags = f"--reruns 5 --reruns-delay 2 --only-rerun '({joined_flaky_patterns})'"
parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> '
steps = [
"checkout",
{"attach_workspace": {"at": "test_preparation"}},
{"run": "apt-get update && apt-get install -y curl"},
{"run": " && ".join(self.install_steps)},
{"run": {"name": "Download NLTK files", "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """} if "example" in self.name else "echo Skipping"},
{"run": {
"name": "Show installed libraries and their size",
"command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true"""}
},
{"run": {
"name": "Show installed libraries and their versions",
"command": """pip list --format=freeze | tee installed.txt || true"""}
},
{"run": {
"name": "Show biggest libraries",
"command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""}
},
{"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}},
{"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>> --header "Circle-Token: $CIRCLE_TOKEN"' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
{"run": {"name": "Split tests across parallel nodes: show current parallel tests",
"command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt"
}
},
# During the CircleCI docker images build time, we might already (or not) download the data.
# If it's done already, the files are inside the directory `/test_data/`.
{"run": {"name": "fetch hub objects before pytest", "command": "cp -r /test_data/* . 2>/dev/null || true; python3 utils/fetch_hub_objects_for_ci.py"}},
{"run": {"name": "download and unzip hub cache", "command": 'curl -L -o huggingface-cache.tar.gz https://huggingface.co/datasets/hf-internal-testing/hf_hub_cache/resolve/main/huggingface-cache.tar.gz && apt-get install pigz && tar --use-compress-program="pigz -d -p 8" -xf huggingface-cache.tar.gz && mv -n hub/* /root/.cache/huggingface/hub/ && ls -la /root/.cache/huggingface/hub/'}},
{"run": {
"name": "Run tests",
"command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
},
{"run":
{
"name": "Check for test crashes",
"when": "always",
"command": """if [ ! -f tests_output.txt ]; then
echo "ERROR: tests_output.txt does not exist - tests may not have run properly"
exit 1
elif grep -q "crashed and worker restarting disabled" tests_output.txt; then
echo "ERROR: Worker crash detected in test output"
echo "Found: crashed and worker restarting disabled"
exit 1
else
echo "Tests output file exists and no worker crashes detected"
fi"""
},
},
{"run": {"name": "Expand to show skipped tests", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}},
{"run": {"name": "Failed tests: show reasons", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}},
{"run": {"name": "Errors", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}},
{"store_test_results": {"path": "test-results"}},
{"store_artifacts": {"path": "test-results/junit.xml"}},
{"store_artifacts": {"path": "reports"}},
{"store_artifacts": {"path": "tests.txt"}},
{"store_artifacts": {"path": "splitted_tests.txt"}},
{"store_artifacts": {"path": "installed.txt"}},
{"store_artifacts": {"path": "network_debug_report.json"}},
]
if self.parallelism:
job["parallelism"] = parallel
job["steps"] = steps
return job
@property
def job_name(self):
return self.name if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name) else f"tests_{self.name}"
# JOBS
torch_job = CircleCIJob(
"torch",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
marker="not generate",
parallelism=6,
)
generate_job = CircleCIJob(
"generate",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
# networkx==3.3 (after #36957) cause some issues
# TODO: remove this once it works directly
install_steps=["uv pip install ."],
marker="generate",
parallelism=6,
)
tokenization_job = CircleCIJob(
"tokenization",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
parallelism=8,
)
processor_job = CircleCIJob(
"processors",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
parallelism=8,
)
pipelines_torch_job = CircleCIJob(
"pipelines_torch",
additional_env={"RUN_PIPELINE_TESTS": True},
docker_image=[{"image":"huggingface/transformers-torch-light"}],
marker="is_pipeline_test",
parallelism=4,
)
custom_tokenizers_job = CircleCIJob(
"custom_tokenizers",
additional_env={"RUN_CUSTOM_TOKENIZERS": True},
docker_image=[{"image": "huggingface/transformers-custom-tokenizers"}],
)
examples_torch_job = CircleCIJob(
"examples_torch",
additional_env={"OMP_NUM_THREADS": 8},
docker_image=[{"image":"huggingface/transformers-examples-torch"}],
# TODO @ArthurZucker remove this once docker is easier to build
install_steps=["uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
pytest_num_workers=4,
)
hub_job = CircleCIJob(
"hub",
additional_env={"HUGGINGFACE_CO_STAGING": True},
docker_image=[{"image":"huggingface/transformers-torch-light"}],
install_steps=[
'uv pip install .',
'git config --global user.email "ci@dummy.com"',
'git config --global user.name "ci"',
],
marker="is_staging_test",
pytest_num_workers=2,
resource_class="medium",
)
exotic_models_job = CircleCIJob(
"exotic_models",
docker_image=[{"image":"huggingface/transformers-exotic-models"}],
parallelism=4,
pytest_options={"durations": 100},
)
repo_utils_job = CircleCIJob(
"repo_utils",
docker_image=[{"image":"huggingface/transformers-consistency"}],
pytest_num_workers=4,
resource_class="large",
)
non_model_job = CircleCIJob(
"non_model",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
# networkx==3.3 (after #36957) cause some issues
# TODO: remove this once it works directly
install_steps=["uv pip install .[serving]"],
marker="not generate",
parallelism=6,
)
training_ci_job = CircleCIJob(
"training_ci",
additional_env={"RUN_TRAINING_TESTS": True},
docker_image=[{"image": "huggingface/transformers-torch-light"}],
install_steps=["uv pip install ."],
marker="is_training_test",
parallelism=6,
)
tensor_parallel_ci_job = CircleCIJob(
"tensor_parallel_ci",
additional_env={"RUN_TENSOR_PARALLEL_TESTS": True},
docker_image=[{"image": "huggingface/transformers-torch-light"}],
install_steps=["uv pip install .", "uv pip install torchao"],
marker="is_tensor_parallel_test",
parallelism=6,
)
# We also include a `dummy.py` file in the files to be doc-tested to prevent edge case failure. Otherwise, the pytest
# hangs forever during test collection while showing `collecting 0 items / 21 errors`. (To see this, we have to remove
# the bash output redirection.)
py_command = 'from utils.tests_fetcher import get_doctest_files; to_test = get_doctest_files() + ["dummy.py"]; to_test = " ".join(to_test); print(to_test)'
py_command = f"$(python3 -c '{py_command}')"
command = f'echo """{py_command}""" > pr_documentation_tests_temp.txt'
doc_test_job = CircleCIJob(
"pr_documentation_tests",
docker_image=[{"image":"huggingface/transformers-consistency"}],
additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
install_steps=[
# Add an empty file to keep the test step running correctly even no file is selected to be tested.
"uv pip install .",
"touch dummy.py",
command,
"cat pr_documentation_tests_temp.txt",
"tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt"
],
tests_to_run="$(cat pr_documentation_tests.txt)", # noqa
pytest_options={"-doctest-modules": None, "doctest-glob": "*.md", "dist": "loadfile", "rvsA": None},
command_timeout=1200, # test cannot run longer than 1200 seconds
pytest_num_workers=1,
)
REGULAR_TESTS = [torch_job, hub_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
EXAMPLES_TESTS = [examples_torch_job]
PIPELINE_TESTS = [pipelines_torch_job]
REPO_UTIL_TESTS = [repo_utils_job]
DOC_TESTS = [doc_test_job]
TRAINING_CI_TESTS = [training_ci_job]
TENSOR_PARALLEL_CI_TESTS = [tensor_parallel_ci_job]
ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] + TRAINING_CI_TESTS + TENSOR_PARALLEL_CI_TESTS # fmt: skip
def create_circleci_config(folder=None):
if folder is None:
folder = os.getcwd()
os.environ["test_preparation_dir"] = folder
jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation" , f"{k.job_name}_test_list.txt") )]
print("The following jobs will be run ", jobs)
if len(jobs) == 0:
jobs = [EmptyJob()]
else:
print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
# Add a job waiting all the test jobs and aggregate their test summary files at the end
collection_job = EmptyJob()
collection_job.job_name = "collection_job"
jobs = [collection_job] + jobs
config = {
"version": "2.1",
"parameters": {
# Only used to accept the parameters from the trigger
"nightly": {"type": "boolean", "default": False},
# Only used to accept the parameters from GitHub Actions trigger
"GHA_Actor": {"type": "string", "default": ""},
"GHA_Action": {"type": "string", "default": ""},
"GHA_Event": {"type": "string", "default": ""},
"GHA_Meta": {"type": "string", "default": ""},
"tests_to_run": {"type": "string", "default": ""},
**{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
**{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
},
"jobs": {j.job_name: j.to_dict() for j in jobs}
}
if "CIRCLE_TOKEN" in os.environ:
# For private forked repo. (e.g. new model addition)
config["workflows"] = {"version": 2, "run_tests": {"jobs": [{j.job_name: {"context": ["TRANSFORMERS_CONTEXT"]}} for j in jobs]}}
else:
# For public repo. (e.g. `transformers`)
config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
with open(os.path.join(folder, "generated_config.yml"), "w", encoding="utf-8") as f:
f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>"))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--fetcher_folder", type=str, default=None, help="Only test that all tests and modules are accounted for."
)
args = parser.parse_args()
create_circleci_config(args.fetcher_folder)
================================================
FILE: .circleci/parse_test_outputs.py
================================================
import argparse
import re
def parse_pytest_output(file_path):
skipped_tests = {}
skipped_count = 0
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
match = re.match(r'^SKIPPED \[(\d+)\] (tests/.*): (.*)$', line)
if match:
skipped_count += 1
test_file, test_line, reason = match.groups()
skipped_tests[reason] = skipped_tests.get(reason, []) + [(test_file, test_line)]
for k,v in sorted(skipped_tests.items(), key=lambda x:len(x[1])):
print(f"{len(v):4} skipped because: {k}")
print("Number of skipped tests:", skipped_count)
def parse_pytest_failure_output(file_path):
failed_tests = {}
failed_count = 0
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
match = re.match(r'^FAILED (tests/.*) - (.*): (.*)$', line)
if match:
failed_count += 1
_, error, reason = match.groups()
failed_tests[reason] = failed_tests.get(reason, []) + [error]
for k,v in sorted(failed_tests.items(), key=lambda x:len(x[1])):
print(f"{len(v):4} failed because `{v[0]}` -> {k}")
print("Number of failed tests:", failed_count)
if failed_count>0:
exit(1)
def parse_pytest_errors_output(file_path):
print(file_path)
error_tests = {}
error_count = 0
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
match = re.match(r'^ERROR (tests/.*) - (.*): (.*)$', line)
if match:
error_count += 1
_, test_error, reason = match.groups()
error_tests[reason] = error_tests.get(reason, []) + [test_error]
for k,v in sorted(error_tests.items(), key=lambda x:len(x[1])):
print(f"{len(v):4} errored out because of `{v[0]}` -> {k}")
print("Number of errors:", error_count)
if error_count>0:
exit(1)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--file", help="file to parse")
parser.add_argument("--skip", action="store_true", help="show skipped reasons")
parser.add_argument("--fail", action="store_true", help="show failed tests")
parser.add_argument("--errors", action="store_true", help="show failed tests")
args = parser.parse_args()
if args.skip:
parse_pytest_output(args.file)
if args.fail:
parse_pytest_failure_output(args.file)
if args.errors:
parse_pytest_errors_output(args.file)
if __name__ == "__main__":
main()
================================================
FILE: .git-blame-ignore-revs
================================================
8008e6c83e1467dbe0ae3c81d19b29c17f4ff456
================================================
FILE: .gitattributes
================================================
*.py eol=lf
*.rst eol=lf
*.md eol=lf
*.mdx eol=lf
================================================
FILE: .github/ISSUE_TEMPLATE/bug-report.yml
================================================
name: "\U0001F41B Bug Report"
description: Submit a bug report to help us improve transformers
labels: [ "bug" ]
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this bug report! 🤗
Before you submit your bug report:
- If it is your first time submitting, be sure to check our [bug report guidelines](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#did-you-find-a-bug)
- Try our [docs bot](https://huggingface.co/spaces/huggingchat/hf-docs-chat) -- it might be able to help you with your issue
- type: textarea
id: system-info
attributes:
label: System Info
description: Please share your system info with us. You can run the command `transformers env` and copy-paste its output below.
placeholder: transformers version, platform, python version, ...
validations:
required: true
- type: textarea
id: who-can-help
attributes:
label: Who can help?
description: |
Your issue will be replied to more quickly if you can figure out the right person to tag with @
If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
All issues are read by one of the core maintainers, so if you don't know who to tag, just leave this blank and
a core maintainer will ping the right person.
Please tag fewer than 3 people.
Models:
- text models: @ArthurZucker @Cyrilvallez
- vision models: @yonigozlan @molbap
- audio models: @eustlb @ebezzam @vasqu
- multimodal models: @zucchini-nlp
- graph models: @clefourrier
Library:
- generate: @zucchini-nlp (visual-language models) or @cyrilvallez (all others)
- continuous batching: @remi-or @ArthurZucker @McPatate
- pipelines: @Rocketknight1
- tokenizers: @ArthurZucker and @itazap
- trainer: @SunMarc
- attention: @vasqu @ArthurZucker @CyrilVallez
- model loading (from pretrained, etc): @CyrilVallez
- distributed: @3outeille @ArthurZucker
- CIs: @ydshieh
Integrations:
- ray/raytune: @richardliaw, @amogkam
- Big Model Inference: @SunMarc
- quantization: @SunMarc @MekkCyber
- kernels: @MekkCyber @drbh
- peft: @BenjaminBossan @githubnemo
Devices/Backends:
- AMD ROCm: @ivarflakstad
- Intel XPU: @IlyasMoutawwakil
- Ascend NPU: @ivarflakstad
Documentation: @stevhliu
Model hub:
- for issues with a model, report at https://discuss.huggingface.co/ and tag the model's creator.
Research projects are not maintained and should be taken as is.
placeholder: "@Username ..."
- type: checkboxes
id: information-scripts-examples
attributes:
label: Information
description: 'The problem arises when using:'
options:
- label: "The official example scripts"
- label: "My own modified scripts"
- type: checkboxes
id: information-tasks
attributes:
label: Tasks
description: "The tasks I am working on are:"
options:
- label: "An officially supported task in the `examples` folder (such as GLUE/SQuAD, ...)"
- label: "My own task or dataset (give details below)"
- type: textarea
id: reproduction
validations:
required: true
attributes:
label: Reproduction
description: |
Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
Please include relevant config information with your code, for example your Trainers, TRL, Peft, and DeepSpeed configs.
If you have code snippets, error messages, stack traces please provide them here as well.
Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
placeholder: |
Steps to reproduce the behavior:
1.
2.
3.
- type: textarea
id: expected-behavior
validations:
required: true
attributes:
label: Expected behavior
description: "A clear and concise description of what you would expect to happen."
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: true
version: 2.1
contact_links:
- name: Model checkpoints on the Hugging Face Hub
url: https://huggingface.co/models
about: Open a Pull request / Discussion related to a specific model checkpoint directly on the Hugging Face Hub
- name: Website Related
url: https://github.com/huggingface/hub-docs/issues
about: Feature requests and bug reports related to the website
- name: Forum
url: https://discuss.huggingface.co/
about: General usage questions and community discussions
================================================
FILE: .github/ISSUE_TEMPLATE/feature-request.yml
================================================
name: "\U0001F680 Feature request"
description: Submit a proposal/request for a new transformers feature
labels: [ "Feature request" ]
body:
- type: textarea
id: feature-request
validations:
required: true
attributes:
label: Feature request
description: |
A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist.
- type: textarea
id: motivation
validations:
required: true
attributes:
label: Motivation
description: |
Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
- type: textarea
id: contribution
validations:
required: true
attributes:
label: Your contribution
description: |
Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md)
================================================
FILE: .github/ISSUE_TEMPLATE/i18n.md
================================================
---
name: 🌐 Translating a new language?
about: Start a new translation effort in your language
title: '[i18n-<languageCode>] Translating docs to <languageName>'
labels: WIP
assignees: ''
---
<!--
Note: Please search to see if an issue already exists for the language you are trying to translate.
-->
Hi!
Let's bring the documentation to all the <languageName>-speaking community 🌐 (currently 0 out of 267 complete)
Who would want to translate? Please follow the 🤗 [TRANSLATING guide](https://github.com/huggingface/transformers/blob/main/docs/TRANSLATING.md). Here is a list of the files ready for translation. Let us know in this issue if you'd like to translate any, and we'll add your name to the list.
Some notes:
* Please translate using an informal tone (imagine you are talking with a friend about transformers 🤗).
* Please translate in a gender-neutral way.
* Add your translations to the folder called `<languageCode>` inside the [source folder](https://github.com/huggingface/transformers/tree/main/docs/source).
* Register your translation in `<languageCode>/_toctree.yml`; please follow the order of the [English version](https://github.com/huggingface/transformers/blob/main/docs/source/en/_toctree.yml).
* Once you're finished, open a pull request and tag this issue by including #issue-number in the description, where issue-number is the number of this issue. Please ping @stevhliu for review.
* 🙋 If you'd like others to help you with the translation, you can also post in the 🤗 [forums](https://discuss.huggingface.co/).
## Get Started section
- [ ] [index.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/index.md) https://github.com/huggingface/transformers/pull/20180
- [ ] [quicktour.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/quicktour.md) (waiting for initial PR to go through)
- [ ] [installation.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/installation.md).
## Tutorial section
- [ ] [pipeline_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/pipeline_tutorial.md)
- [ ] [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/autoclass_tutorial.md)
- [ ] [preprocessing.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/preprocessing.md)
- [ ] [training.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/training.md)
- [ ] [accelerate.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerate.md)
- [ ] [model_sharing.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/model_sharing.md)
- [ ] [multilingual.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/multilingual.md)
<!--
Keep on adding more as you go 🔥
-->
================================================
FILE: .github/ISSUE_TEMPLATE/migration.yml
================================================
name: "\U0001F4DA Migration from pytorch-pretrained-bert or pytorch-transformers"
description: Report a problem when migrating from pytorch-pretrained-bert or pytorch-transformers to transformers
labels: [ "migration" ]
body:
- type: textarea
id: system-info
attributes:
label: System Info
description: Please share your system info with us. You can run the command `transformers env` and copy-paste its output below.
render: shell
placeholder: transformers version, platform, python version, ...
validations:
required: true
- type: checkboxes
id: information-scripts-examples
attributes:
label: Information
description: 'The problem arises when using:'
options:
- label: "The official example scripts"
- label: "My own modified scripts"
- type: checkboxes
id: information-tasks
attributes:
label: Tasks
description: "The tasks I am working on are:"
options:
- label: "An officially supported task in the `examples` folder (such as GLUE/SQuAD, ...)"
- label: "My own task or dataset (give details below)"
- type: textarea
id: reproduction
validations:
required: true
attributes:
label: Reproduction
description: |
Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
If you have code snippets, error messages, stack traces please provide them here as well.
Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
placeholder: |
Steps to reproduce the behavior:
1.
2.
3.
- type: textarea
id: expected-behavior
validations:
required: true
attributes:
label: Expected behavior
description: "A clear and concise description of what you would expect to happen."
render: shell
- type: checkboxes
id: checklist
attributes:
label: Checklist
options:
- label: "I have read the migration guide in the readme.
([pytorch-transformers](https://github.com/huggingface/transformers#migrating-from-pytorch-transformers-to-transformers);
[pytorch-pretrained-bert](https://github.com/huggingface/transformers#migrating-from-pytorch-pretrained-bert-to-transformers))"
required: true
- label: "I checked if a related official extension example runs on my machine."
required: true
================================================
FILE: .github/ISSUE_TEMPLATE/new-model-addition.yml
================================================
name: "\U0001F31F New model addition"
description: Submit a proposal/request to implement a new model
labels: [ "New model" ]
body:
- type: textarea
id: description-request
validations:
required: true
attributes:
label: Model description
description: |
Put any and all important information relative to the model
- type: checkboxes
id: information-tasks
attributes:
label: Open source status
description: |
Please note that if the model implementation isn't available or if the weights aren't open-source, we are less likely to implement it in `transformers`.
options:
- label: "The model implementation is available"
- label: "The model weights are available"
- type: textarea
id: additional-info
attributes:
label: Provide useful links for the implementation
description: |
Please provide information regarding the implementation, the weights, and the authors.
Please mention the authors by @gh-username if you're aware of their usernames.
================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
# What does this PR do?
<!--
Congratulations! You've made it this far! You're not quite done yet though.
Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflects the extent of your awesome contribution.
Then, please replace this with a description of the change and which issue is fixed (if applicable). Please also include relevant motivation and context. List any dependencies (if any) that are required for this change.
Once you're done, someone will review your PR shortly (see the section "Who can review?" below to tag some potential reviewers). They may suggest changes to make the code even better. If no one reviewed your PR after a week has passed, don't hesitate to post a new comment @-mentioning the same persons---sometimes notifications get lost.
-->
<!-- Remove if not applicable -->
Fixes # (issue)
## Code Agent Policy
The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by
code agents. We are currently bottlenecked by our ability to review and respond to them. As a result,
**we ask that new users do not submit pure code agent PRs** at this time.
You may use code agents in drafting or to help you diagnose issues. We'd also ask autonomous "OpenClaw"-like agents
not to open any PRs or issues for the moment.
PRs that appear to be fully agent-written will probably be closed without review, and we may block users who do this
repeatedly or maliciously.
This is a rapidly-evolving situation that's causing significant shockwaves in the open-source community. As a result,
this policy is likely to be updated regularly in the near future. For more information, please read [`CONTRIBUTING.md`](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md).
- [ ] I confirm that this is not a pure code agent PR.
## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case).
- [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#create-a-pull-request),
Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link
to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes? Here are the
[documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and
[here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?
## Who can review?
Anyone in the community is free to review the PR once the tests have passed. Feel free to tag
members/contributors who may be interested in your PR.
<!-- Your PR will be replied to more quickly if you can figure out the right person to tag with @
If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
Please tag fewer than 3 people.
Models:
- text models: @ArthurZucker @Cyrilvallez
- vision models: @yonigozlan @molbap
- audio models: @eustlb @ebezzam @vasqu
- multimodal models: @zucchini-nlp
- graph models: @clefourrier
Library:
- generate: @zucchini-nlp (visual-language models) or @gante (all others)
- continuous batching: @remi-or @ArthurZucker @McPatate
- pipelines: @Rocketknight1
- tokenizers: @ArthurZucker and @itazap
- trainer: @SunMarc
- attention: @vasqu @ArthurZucker @CyrilVallez
- model loading (from pretrained, etc): @CyrilVallez
- distributed: @3outeille @ArthurZucker
- CIs: @ydshieh
Integrations:
- ray/raytune: @richardliaw, @amogkam
- Big Model Inference: @SunMarc
- quantization: @SunMarc
- kernels: @drbh
- peft: @BenjaminBossan @githubnemo
Devices/Backends:
- AMD ROCm: @ivarflakstad
- Intel XPU: @IlyasMoutawwakil
- Ascend NPU: @ivarflakstad
Documentation: @stevhliu
Research projects are not maintained and should be taken as is.
-->
================================================
FILE: .github/conda/build.sh
================================================
$PYTHON setup.py install # Python command to install the script.
================================================
FILE: .github/conda/meta.yaml
================================================
{% set name = "transformers" %}
package:
name: "{{ name|lower }}"
version: "{{ TRANSFORMERS_VERSION }}"
source:
path: ../../
build:
noarch: python
requirements:
host:
- python
- pip
- numpy >=1.17
- dataclasses
- huggingface_hub
- packaging
- filelock
- tqdm >=4.27
- sacremoses
- regex !=2019.12.17
- protobuf
- tokenizers >=0.11.1,!=0.11.3,<0.13
- pyyaml >=5.1
- safetensors
- fsspec
run:
- python
- numpy >=1.17
- dataclasses
- huggingface_hub
- packaging
- filelock
- tqdm >=4.27
- sacremoses
- regex !=2019.12.17
- protobuf
- tokenizers >=0.11.1,!=0.11.3,<0.13
- pyyaml >=5.1
- safetensors
- fsspec
test:
imports:
- transformers
about:
home: https://huggingface.co
license: Apache License 2.0
license_file: LICENSE
summary: "🤗Transformers: State-of-the-art Natural Language Processing for Pytorch and TensorFlow 2.0."
================================================
FILE: .github/copilot-instructions.md
================================================
# copilot-instructions.md Guide for Hugging Face Transformers
This copilot-instructions.md file provides guidance for code agents working with this codebase.
## Core Project Structure
- `/src/transformers`: This contains the core source code for the library
- `/models`: Code for individual models. Models inherit from base classes in the root `/src/transformers` directory.
- `/tests`: This contains the core test classes for the library. These are usually inherited rather than directly run.
- `/models`: Tests for individual models. Model tests inherit from common tests in the root `/tests` directory.
- `/docs`: This contains the documentation for the library, including guides, tutorials, and API references.
## Coding Conventions for Hugging Face Transformers
- PRs should be as brief as possible. Bugfix PRs in particular can often be only one or two lines long, and do not need large comments, docstrings or new functions in this case. Aim to minimize the size of the diff.
- When writing tests, they should be added to an existing file. The only exception is for PRs to add a new model, when a new test directory should be created for that model.
- Code style is enforced in the CI. You can install the style tools with `pip install -e .[quality]`. You can then run `make fixup` to apply style and consistency fixes to your code.
## Copying and inheritance
Many models in the codebase have similar code, but it is not shared by inheritance because we want each model file to be self-contained.
We use two mechanisms to keep this code in sync:
- "Copied from" syntax. Functions or entire classes can have a comment at the top like this: `# Copied from transformers.models.llama.modeling_llama.rotate_half` or `# Copied from transformers.models.t5.modeling_t5.T5LayerNorm with T5->MT5`
These comments are actively checked by the style tools, and copies will automatically be updated when the base code is updated. If you need to update a copied function, you should
either update the base function and use `make fixup` to propagate the change to all copies, or simply remove the `# Copied from` comment if that is inappropriate.
- "Modular" files. These files briefly define models by composing them using inheritance from other models. They are not meant to be used directly. Instead, the style tools
automatically generate a complete modeling file, like `modeling_bert.py`, from the modular file like `modular_bert.py`. If a model has a modular file, the modeling file
should never be edited directly! Instead, changes should be made in the modular file, and then you should run `make fixup` to update the modeling file automatically.
When adding new models, you should prefer `modular` style and inherit as many classes as possible from existing models.
## Testing
After making changes, you should usually run `make fixup` to ensure any copies and modular files are updated, and then test all affected models. This includes both
the model you made the changes in and any other models that were updated by `make fixup`. Tests can be run with `pytest tests/models/[name]/test_modeling_[name].py`
If your changes affect code in other classes like tokenizers or processors, you should run those tests instead, like `test_processing_[name].py` or `test_tokenization_[name].py`.
In order to run tests, you may need to install dependencies. You can do this with `pip install -e .[testing]`. You will probably also need to `pip install torch accelerate` if your environment does not already have them.
================================================
FILE: .github/scripts/assign_reviewers.py
================================================
# coding=utf-8
# Copyright 2025 the HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import re
from collections import Counter
from pathlib import Path
import github
from github import Github
def pattern_to_regex(pattern):
if pattern.startswith("/"):
start_anchor = True
pattern = re.escape(pattern[1:])
else:
start_anchor = False
pattern = re.escape(pattern)
# Replace `*` with "any number of non-slash characters"
pattern = pattern.replace(r"\*", "[^/]*")
if start_anchor:
pattern = r"^\/?" + pattern # Allow an optional leading slash after the start of the string
return pattern
def get_file_owners(file_path, codeowners_lines):
# Process lines in reverse (last matching pattern takes precedence)
for line in reversed(codeowners_lines):
# Skip comments and empty lines, strip inline comments
line = line.split('#')[0].strip()
if not line:
continue
# Split into pattern and owners
parts = line.split()
pattern = parts[0]
# Can be empty, e.g. for dummy files with explicitly no owner!
owners = [owner.removeprefix("@") for owner in parts[1:]]
# Check if file matches pattern
file_regex = pattern_to_regex(pattern)
if re.search(file_regex, file_path) is not None:
return owners # Remember, can still be empty!
return [] # Should never happen, but just in case
def pr_author_is_in_hf(pr_author, codeowners_lines):
# Check if the PR author is in the codeowners file
for line in codeowners_lines:
line = line.split('#')[0].strip()
if not line:
continue
# Split into pattern and owners
parts = line.split()
owners = [owner.removeprefix("@") for owner in parts[1:]]
if pr_author in owners:
return True
return False
def main():
script_dir = Path(__file__).parent.absolute()
with open(script_dir / "codeowners_for_review_action") as f:
codeowners_lines = f.readlines()
g = Github(os.environ['GITHUB_TOKEN'])
repo = g.get_repo("huggingface/transformers")
with open(os.environ['GITHUB_EVENT_PATH']) as f:
event = json.load(f)
# The PR number is available in the event payload
pr_number = event['pull_request']['number']
pr = repo.get_pull(pr_number)
pr_author = pr.user.login
if pr_author_is_in_hf(pr_author, codeowners_lines):
print(f"PR author {pr_author} is in codeowners, skipping review request.")
return
existing_reviews = list(pr.get_reviews())
if existing_reviews:
print(f"Already has reviews: {[r.user.login for r in existing_reviews]}")
return
users_requested, teams_requested = pr.get_review_requests()
users_requested = list(users_requested)
if users_requested:
print(f"Reviewers already requested: {users_requested}")
return
locs_per_owner = Counter()
for file in pr.get_files():
owners = get_file_owners(file.filename, codeowners_lines)
for owner in owners:
locs_per_owner[owner] += file.changes
# Assign the top 2 based on locs changed as reviewers, but skip the owner if present
locs_per_owner.pop(pr_author, None)
top_owners = locs_per_owner.most_common(2)
print("Top owners", top_owners)
top_owners = [owner[0] for owner in top_owners]
try:
pr.create_review_request(top_owners)
except github.GithubException as e:
print(f"Failed to request review for {top_owners}: {e}")
if __name__ == "__main__":
main()
================================================
FILE: .github/scripts/codeowners_for_review_action
================================================
# Top-level rules are matched only if nothing else matches
* @Rocketknight1 @ArthurZucker # if no one is pinged based on the other rules, he will do the dispatch
*.md @stevhliu
*tokenization* @ArthurZucker
docs/ @stevhliu
/benchmark/ @McPatate
/docker/ @ydshieh @ArthurZucker
# More high-level globs catch cases when specific rules later don't apply
/src/transformers/models/*/processing* @molbap @yonigozlan
/src/transformers/models/*/image_processing* @yonigozlan
/src/transformers/models/*/image_processing_*_fast* @yonigozlan
# Owners of subsections of the library
/src/transformers/generation/ @gante
/src/transformers/pipeline/ @Rocketknight1 @yonigozlan
/src/transformers/integrations/ @SunMarc @MekkCyber @zach-huggingface
/src/transformers/quantizers/ @SunMarc @MekkCyber
tests/ @ydshieh
tests/generation/ @gante
/src/transformers/models/auto/ @ArthurZucker
/src/transformers/utils/ @ArthurZucker @Rocketknight1
/src/transformers/loss/ @ArthurZucker
# Specific files come after the sections/globs, so they take priority
/.circleci/config.yml @ArthurZucker @ydshieh
/utils/tests_fetcher.py @ydshieh
trainer.py @zach-huggingface @SunMarc
trainer_utils.py @zach-huggingface @SunMarc
/utils/modular_model_converter.py @Cyrilvallez @ArthurZucker
# Owners of individual models are specific / high priority, and so they come last
# mod* captures modeling and modular files
# Text models
/src/transformers/models/albert/mod*_albert* @ArthurZucker
/src/transformers/models/bamba/mod*_bamba* @ArthurZucker
/src/transformers/models/bart/mod*_bart* @ArthurZucker
/src/transformers/models/barthez/mod*_barthez* @ArthurZucker
/src/transformers/models/bartpho/mod*_bartpho* @ArthurZucker
/src/transformers/models/bert/mod*_bert* @ArthurZucker
/src/transformers/models/bert_generation/mod*_bert_generation* @ArthurZucker
/src/transformers/models/bert_japanese/mod*_bert_japanese* @ArthurZucker
/src/transformers/models/bertweet/mod*_bertweet* @ArthurZucker
/src/transformers/models/big_bird/mod*_big_bird* @ArthurZucker
/src/transformers/models/bigbird_pegasus/mod*_bigbird_pegasus* @ArthurZucker
/src/transformers/models/biogpt/mod*_biogpt* @ArthurZucker
/src/transformers/models/blenderbot/mod*_blenderbot* @ArthurZucker
/src/transformers/models/blenderbot_small/mod*_blenderbot_small* @ArthurZucker
/src/transformers/models/bloom/mod*_bloom* @ArthurZucker
/src/transformers/models/bort/mod*_bort* @ArthurZucker
/src/transformers/models/byt5/mod*_byt5* @ArthurZucker
/src/transformers/models/camembert/mod*_camembert* @ArthurZucker
/src/transformers/models/canine/mod*_canine* @ArthurZucker
/src/transformers/models/codegen/mod*_codegen* @ArthurZucker
/src/transformers/models/code_llama/mod*_code_llama* @ArthurZucker
/src/transformers/models/cohere/mod*_cohere* @ArthurZucker
/src/transformers/models/cohere2/mod*_cohere2* @ArthurZucker
/src/transformers/models/convbert/mod*_convbert* @ArthurZucker
/src/transformers/models/cpm/mod*_cpm* @ArthurZucker
/src/transformers/models/cpmant/mod*_cpmant* @ArthurZucker
/src/transformers/models/ctrl/mod*_ctrl* @ArthurZucker
/src/transformers/models/dbrx/mod*_dbrx* @ArthurZucker
/src/transformers/models/deberta/mod*_deberta* @ArthurZucker
/src/transformers/models/deberta_v2/mod*_deberta_v2* @ArthurZucker
/src/transformers/models/dialogpt/mod*_dialogpt* @ArthurZucker
/src/transformers/models/diffllama/mod*_diffllama* @ArthurZucker
/src/transformers/models/distilbert/mod*_distilbert* @ArthurZucker
/src/transformers/models/dpr/mod*_dpr* @ArthurZucker
/src/transformers/models/electra/mod*_electra* @ArthurZucker
/src/transformers/models/encoder_decoder/mod*_encoder_decoder* @ArthurZucker
/src/transformers/models/ernie/mod*_ernie* @ArthurZucker
/src/transformers/models/ernie_m/mod*_ernie_m* @ArthurZucker
/src/transformers/models/esm/mod*_esm* @ArthurZucker
/src/transformers/models/falcon/mod*_falcon* @ArthurZucker
/src/transformers/models/falcon3/mod*_falcon3* @ArthurZucker
/src/transformers/models/falcon_mamba/mod*_falcon_mamba* @ArthurZucker
/src/transformers/models/fastspeech2_conformer/mod*_fastspeech2_conformer* @ArthurZucker
/src/transformers/models/flan_t5/mod*_flan_t5* @ArthurZucker
/src/transformers/models/flan_ul2/mod*_flan_ul2* @ArthurZucker
/src/transformers/models/flaubert/mod*_flaubert* @ArthurZucker
/src/transformers/models/fnet/mod*_fnet* @ArthurZucker
/src/transformers/models/fsmt/mod*_fsmt* @ArthurZucker
/src/transformers/models/funnel/mod*_funnel* @ArthurZucker
/src/transformers/models/fuyu/mod*_fuyu* @ArthurZucker
/src/transformers/models/gemma/mod*_gemma* @ArthurZucker
/src/transformers/models/gemma2/mod*_gemma2* @ArthurZucker
/src/transformers/models/glm/mod*_glm* @ArthurZucker
/src/transformers/models/openai_gpt/mod*_openai_gpt* @ArthurZucker
/src/transformers/models/gpt_neo/mod*_gpt_neo* @ArthurZucker
/src/transformers/models/gpt_neox/mod*_gpt_neox* @ArthurZucker
/src/transformers/models/gpt_neox_japanese/mod*_gpt_neox_japanese* @ArthurZucker
/src/transformers/models/gptj/mod*_gptj* @ArthurZucker
/src/transformers/models/gpt2/mod*_gpt2* @ArthurZucker
/src/transformers/models/gpt_bigcode/mod*_gpt_bigcode* @ArthurZucker
/src/transformers/models/gptsan_japanese/mod*_gptsan_japanese* @ArthurZucker
/src/transformers/models/gpt_sw3/mod*_gpt_sw3* @ArthurZucker
/src/transformers/models/granite/mod*_granite* @ArthurZucker
/src/transformers/models/granitemoe/mod*_granitemoe* @ArthurZucker
/src/transformers/models/herbert/mod*_herbert* @ArthurZucker
/src/transformers/models/ibert/mod*_ibert* @ArthurZucker
/src/transformers/models/jamba/mod*_jamba* @ArthurZucker
/src/transformers/models/jetmoe/mod*_jetmoe* @ArthurZucker
/src/transformers/models/jukebox/mod*_jukebox* @ArthurZucker
/src/transformers/models/led/mod*_led* @ArthurZucker
/src/transformers/models/llama/mod*_llama* @ArthurZucker @Cyrilvallez
/src/transformers/models/longformer/mod*_longformer* @ArthurZucker
/src/transformers/models/longt5/mod*_longt5* @ArthurZucker
/src/transformers/models/luke/mod*_luke* @ArthurZucker
/src/transformers/models/m2m_100/mod*_m2m_100* @ArthurZucker
/src/transformers/models/madlad_400/mod*_madlad_400* @ArthurZucker
/src/transformers/models/mamba/mod*_mamba* @ArthurZucker
/src/transformers/models/mamba2/mod*_mamba2* @ArthurZucker
/src/transformers/models/marian/mod*_marian* @ArthurZucker
/src/transformers/models/markuplm/mod*_markuplm* @ArthurZucker
/src/transformers/models/mbart/mod*_mbart* @ArthurZucker
/src/transformers/models/mega/mod*_mega* @ArthurZucker
/src/transformers/models/megatron_bert/mod*_megatron_bert* @ArthurZucker
/src/transformers/models/megatron_gpt2/mod*_megatron_gpt2* @ArthurZucker
/src/transformers/models/mistral/mod*_mistral* @ArthurZucker
/src/transformers/models/mixtral/mod*_mixtral* @ArthurZucker
/src/transformers/models/mluke/mod*_mluke* @ArthurZucker
/src/transformers/models/mobilebert/mod*_mobilebert* @ArthurZucker
/src/transformers/models/modernbert/mod*_modernbert* @ArthurZucker
/src/transformers/models/mpnet/mod*_mpnet* @ArthurZucker
/src/transformers/models/mpt/mod*_mpt* @ArthurZucker
/src/transformers/models/mra/mod*_mra* @ArthurZucker
/src/transformers/models/mt5/mod*_mt5* @ArthurZucker
/src/transformers/models/mvp/mod*_mvp* @ArthurZucker
/src/transformers/models/myt5/mod*_myt5* @ArthurZucker
/src/transformers/models/nemotron/mod*_nemotron* @ArthurZucker
/src/transformers/models/nezha/mod*_nezha* @ArthurZucker
/src/transformers/models/nllb/mod*_nllb* @ArthurZucker
/src/transformers/models/nllb_moe/mod*_nllb_moe* @ArthurZucker
/src/transformers/models/nystromformer/mod*_nystromformer* @ArthurZucker
/src/transformers/models/olmo/mod*_olmo* @ArthurZucker
/src/transformers/models/olmo2/mod*_olmo2* @ArthurZucker
/src/transformers/models/olmoe/mod*_olmoe* @ArthurZucker
/src/transformers/models/open_llama/mod*_open_llama* @ArthurZucker
/src/transformers/models/opt/mod*_opt* @ArthurZucker
/src/transformers/models/pegasus/mod*_pegasus* @ArthurZucker
/src/transformers/models/pegasus_x/mod*_pegasus_x* @ArthurZucker
/src/transformers/models/persimmon/mod*_persimmon* @ArthurZucker
/src/transformers/models/phi/mod*_phi* @ArthurZucker
/src/transformers/models/phi3/mod*_phi3* @ArthurZucker
/src/transformers/models/phimoe/mod*_phimoe* @ArthurZucker
/src/transformers/models/phobert/mod*_phobert* @ArthurZucker
/src/transformers/models/plbart/mod*_plbart* @ArthurZucker
/src/transformers/models/prophetnet/mod*_prophetnet* @ArthurZucker
/src/transformers/models/qdqbert/mod*_qdqbert* @ArthurZucker
/src/transformers/models/qwen2/mod*_qwen2* @ArthurZucker
/src/transformers/models/qwen2_moe/mod*_qwen2_moe* @ArthurZucker
/src/transformers/models/rag/mod*_rag* @ArthurZucker
/src/transformers/models/realm/mod*_realm* @ArthurZucker
/src/transformers/models/recurrent_gemma/mod*_recurrent_gemma* @ArthurZucker
/src/transformers/models/reformer/mod*_reformer* @ArthurZucker
/src/transformers/models/rembert/mod*_rembert* @ArthurZucker
/src/transformers/models/retribert/mod*_retribert* @ArthurZucker
/src/transformers/models/roberta/mod*_roberta* @ArthurZucker
/src/transformers/models/roberta_prelayernorm/mod*_roberta_prelayernorm* @ArthurZucker
/src/transformers/models/roc_bert/mod*_roc_bert* @ArthurZucker
/src/transformers/models/roformer/mod*_roformer* @ArthurZucker
/src/transformers/models/rwkv/mod*_rwkv* @ArthurZucker
/src/transformers/models/splinter/mod*_splinter* @ArthurZucker
/src/transformers/models/squeezebert/mod*_squeezebert* @ArthurZucker
/src/transformers/models/stablelm/mod*_stablelm* @ArthurZucker
/src/transformers/models/starcoder2/mod*_starcoder2* @ArthurZucker
/src/transformers/models/switch_transformers/mod*_switch_transformers* @ArthurZucker
/src/transformers/models/t5/mod*_t5* @ArthurZucker
/src/transformers/models/t5v1.1/mod*_t5v1.1* @ArthurZucker
/src/transformers/models/tapex/mod*_tapex* @ArthurZucker
/src/transformers/models/transfo_xl/mod*_transfo_xl* @ArthurZucker
/src/transformers/models/ul2/mod*_ul2* @ArthurZucker
/src/transformers/models/umt5/mod*_umt5* @ArthurZucker
/src/transformers/models/xmod/mod*_xmod* @ArthurZucker
/src/transformers/models/xglm/mod*_xglm* @ArthurZucker
/src/transformers/models/xlm/mod*_xlm* @ArthurZucker
/src/transformers/models/xlm_prophetnet/mod*_xlm_prophetnet* @ArthurZucker
/src/transformers/models/xlm_roberta/mod*_xlm_roberta* @ArthurZucker
/src/transformers/models/xlm_roberta_xl/mod*_xlm_roberta_xl* @ArthurZucker
/src/transformers/models/xlm_v/mod*_xlm_v* @ArthurZucker
/src/transformers/models/xlnet/mod*_xlnet* @ArthurZucker
/src/transformers/models/yoso/mod*_yoso* @ArthurZucker
/src/transformers/models/zamba/mod*_zamba* @ArthurZucker
# Vision models
/src/transformers/models/beit/mod*_beit* @yonigozlan @molbap
/src/transformers/models/bit/mod*_bit* @yonigozlan @molbap
/src/transformers/models/conditional_detr/mod*_conditional_detr* @yonigozlan @molbap
/src/transformers/models/convnext/mod*_convnext* @yonigozlan @molbap
/src/transformers/models/convnextv2/mod*_convnextv2* @yonigozlan @molbap
/src/transformers/models/cvt/mod*_cvt* @yonigozlan @molbap
/src/transformers/models/deformable_detr/mod*_deformable_detr* @yonigozlan @molbap
/src/transformers/models/deit/mod*_deit* @yonigozlan @molbap
/src/transformers/models/depth_anything/mod*_depth_anything* @yonigozlan @molbap
/src/transformers/models/depth_anything_v2/mod*_depth_anything_v2* @yonigozlan @molbap
/src/transformers/models/deta/mod*_deta* @yonigozlan @molbap
/src/transformers/models/detr/mod*_detr* @yonigozlan @molbap
/src/transformers/models/dinat/mod*_dinat* @yonigozlan @molbap
/src/transformers/models/dinov2/mod*_dinov2* @yonigozlan @molbap
/src/transformers/models/dinov2_with_registers/mod*_dinov2_with_registers* @yonigozlan @molbap
/src/transformers/models/dit/mod*_dit* @yonigozlan @molbap
/src/transformers/models/dpt/mod*_dpt* @yonigozlan @molbap
/src/transformers/models/efficientformer/mod*_efficientformer* @yonigozlan @molbap
/src/transformers/models/efficientnet/mod*_efficientnet* @yonigozlan @molbap
/src/transformers/models/focalnet/mod*_focalnet* @yonigozlan @molbap
/src/transformers/models/glpn/mod*_glpn* @yonigozlan @molbap
/src/transformers/models/hiera/mod*_hiera* @yonigozlan @molbap
/src/transformers/models/ijepa/mod*_ijepa* @yonigozlan @molbap
/src/transformers/models/imagegpt/mod*_imagegpt* @yonigozlan @molbap
/src/transformers/models/levit/mod*_levit* @yonigozlan @molbap
/src/transformers/models/mask2former/mod*_mask2former* @yonigozlan @molbap
/src/transformers/models/maskformer/mod*_maskformer* @yonigozlan @molbap
/src/transformers/models/mobilenet_v1/mod*_mobilenet_v1* @yonigozlan @molbap
/src/transformers/models/mobilenet_v2/mod*_mobilenet_v2* @yonigozlan @molbap
/src/transformers/models/mobilevit/mod*_mobilevit* @yonigozlan @molbap
/src/transformers/models/mobilevitv2/mod*_mobilevitv2* @yonigozlan @molbap
/src/transformers/models/nat/mod*_nat* @yonigozlan @molbap
/src/transformers/models/poolformer/mod*_poolformer* @yonigozlan @molbap
/src/transformers/models/pvt/mod*_pvt* @yonigozlan @molbap
/src/transformers/models/pvt_v2/mod*_pvt_v2* @yonigozlan @molbap
/src/transformers/models/regnet/mod*_regnet* @yonigozlan @molbap
/src/transformers/models/resnet/mod*_resnet* @yonigozlan @molbap
/src/transformers/models/rt_detr/mod*_rt_detr* @yonigozlan @molbap
/src/transformers/models/segformer/mod*_segformer* @yonigozlan @molbap
/src/transformers/models/seggpt/mod*_seggpt* @yonigozlan @molbap
/src/transformers/models/superpoint/mod*_superpoint* @yonigozlan @molbap
/src/transformers/models/swiftformer/mod*_swiftformer* @yonigozlan @molbap
/src/transformers/models/swin/mod*_swin* @yonigozlan @molbap
/src/transformers/models/swinv2/mod*_swinv2* @yonigozlan @molbap
/src/transformers/models/swin2sr/mod*_swin2sr* @yonigozlan @molbap
/src/transformers/models/table_transformer/mod*_table_transformer* @yonigozlan @molbap
/src/transformers/models/textnet/mod*_textnet* @yonigozlan @molbap
/src/transformers/models/timm_wrapper/mod*_timm_wrapper* @yonigozlan @molbap
/src/transformers/models/upernet/mod*_upernet* @yonigozlan @molbap
/src/transformers/models/van/mod*_van* @yonigozlan @molbap
/src/transformers/models/vit/mod*_vit* @yonigozlan @molbap
/src/transformers/models/vit_hybrid/mod*_vit_hybrid* @yonigozlan @molbap
/src/transformers/models/vitdet/mod*_vitdet* @yonigozlan @molbap
/src/transformers/models/vit_mae/mod*_vit_mae* @yonigozlan @molbap
/src/transformers/models/vitmatte/mod*_vitmatte* @yonigozlan @molbap
/src/transformers/models/vit_msn/mod*_vit_msn* @yonigozlan @molbap
/src/transformers/models/vitpose/mod*_vitpose* @yonigozlan @molbap
/src/transformers/models/yolos/mod*_yolos* @yonigozlan @molbap
/src/transformers/models/zoedepth/mod*_zoedepth* @yonigozlan @molbap
# Audio models
/src/transformers/models/audio_spectrogram_transformer/mod*_audio_spectrogram_transformer* @eustlb
/src/transformers/models/bark/mod*_bark* @eustlb
/src/transformers/models/clap/mod*_clap* @eustlb
/src/transformers/models/dac/mod*_dac* @eustlb
/src/transformers/models/encodec/mod*_encodec* @eustlb
/src/transformers/models/hubert/mod*_hubert* @eustlb
/src/transformers/models/mctct/mod*_mctct* @eustlb
/src/transformers/models/mimi/mod*_mimi* @eustlb
/src/transformers/models/mms/mod*_mms* @eustlb
/src/transformers/models/moshi/mod*_moshi* @eustlb
/src/transformers/models/musicgen/mod*_musicgen* @eustlb
/src/transformers/models/musicgen_melody/mod*_musicgen_melody* @eustlb
/src/transformers/models/pop2piano/mod*_pop2piano* @eustlb
/src/transformers/models/seamless_m4t/mod*_seamless_m4t* @eustlb
/src/transformers/models/seamless_m4t_v2/mod*_seamless_m4t_v2* @eustlb
/src/transformers/models/sew/mod*_sew* @eustlb
/src/transformers/models/sew_d/mod*_sew_d* @eustlb
/src/transformers/models/speech_to_text/mod*_speech_to_text* @eustlb
/src/transformers/models/speech_to_text_2/mod*_speech_to_text_2* @eustlb
/src/transformers/models/speecht5/mod*_speecht5* @eustlb
/src/transformers/models/unispeech/mod*_unispeech* @eustlb
/src/transformers/models/unispeech_sat/mod*_unispeech_sat* @eustlb
/src/transformers/models/univnet/mod*_univnet* @eustlb
/src/transformers/models/vits/mod*_vits* @eustlb
/src/transformers/models/wav2vec2/mod*_wav2vec2* @eustlb
/src/transformers/models/wav2vec2_bert/mod*_wav2vec2_bert* @eustlb
/src/transformers/models/wav2vec2_conformer/mod*_wav2vec2_conformer* @eustlb
/src/transformers/models/wav2vec2_phoneme/mod*_wav2vec2_phoneme* @eustlb
/src/transformers/models/wavlm/mod*_wavlm* @eustlb
/src/transformers/models/whisper/mod*_whisper* @eustlb
/src/transformers/models/xls_r/mod*_xls_r* @eustlb
/src/transformers/models/xlsr_wav2vec2/mod*_xlsr_wav2vec2* @eustlb
# Video models
/src/transformers/models/timesformer/mod*_timesformer* @Rocketknight1
/src/transformers/models/videomae/mod*_videomae* @Rocketknight1
/src/transformers/models/vivit/mod*_vivit* @Rocketknight1
# Multimodal models
/src/transformers/models/align/mod*_align* @zucchini-nlp
/src/transformers/models/altclip/mod*_altclip* @zucchini-nlp
/src/transformers/models/aria/mod*_aria* @zucchini-nlp
/src/transformers/models/blip/mod*_blip* @zucchini-nlp
/src/transformers/models/blip_2/mod*_blip_2* @zucchini-nlp
/src/transformers/models/bridgetower/mod*_bridgetower* @zucchini-nlp
/src/transformers/models/bros/mod*_bros* @zucchini-nlp
/src/transformers/models/chameleon/mod*_chameleon* @zucchini-nlp
/src/transformers/models/chinese_clip/mod*_chinese_clip* @zucchini-nlp
/src/transformers/models/clip/mod*_clip* @zucchini-nlp
/src/transformers/models/clipseg/mod*_clipseg* @zucchini-nlp
/src/transformers/models/clvp/mod*_clvp* @zucchini-nlp
/src/transformers/models/colpali/mod*_colpali* @zucchini-nlp @yonigozlan
/src/transformers/models/data2vec/mod*_data2vec* @zucchini-nlp
/src/transformers/models/deplot/mod*_deplot* @zucchini-nlp
/src/transformers/models/donut/mod*_donut* @zucchini-nlp
/src/transformers/models/flava/mod*_flava* @zucchini-nlp
/src/transformers/models/git/mod*_git* @zucchini-nlp
/src/transformers/models/grounding_dino/mod*_grounding_dino* @yonigozlan
/src/transformers/models/groupvit/mod*_groupvit* @zucchini-nlp
/src/transformers/models/idefics/mod*_idefics* @zucchini-nlp
/src/transformers/models/idefics2/mod*_idefics2* @zucchini-nlp
/src/transformers/models/idefics3/mod*_idefics3* @zucchini-nlp
/src/transformers/models/instructblip/mod*_instructblip* @zucchini-nlp
/src/transformers/models/instructblipvideo/mod*_instructblipvideo* @zucchini-nlp
/src/transformers/models/kosmos_2/mod*_kosmos_2* @zucchini-nlp
/src/transformers/models/layoutlm/mod*_layoutlm* @NielsRogge
/src/transformers/models/layoutlmv2/mod*_layoutlmv2* @NielsRogge
/src/transformers/models/layoutlmv3/mod*_layoutlmv3* @NielsRogge
/src/transformers/models/layoutxlm/mod*_layoutxlm* @NielsRogge
/src/transformers/models/lilt/mod*_lilt* @zucchini-nlp
/src/transformers/models/llava/mod*_llava* @zucchini-nlp @arthurzucker
/src/transformers/models/llava_next/mod*_llava_next* @zucchini-nlp
/src/transformers/models/llava_next_video/mod*_llava_next_video* @zucchini-nlp
/src/transformers/models/llava_onevision/mod*_llava_onevision* @zucchini-nlp
/src/transformers/models/lxmert/mod*_lxmert* @zucchini-nlp
/src/transformers/models/matcha/mod*_matcha* @zucchini-nlp
/src/transformers/models/mgp_str/mod*_mgp_str* @zucchini-nlp
/src/transformers/models/mllama/mod*_mllama* @zucchini-nlp
/src/transformers/models/nougat/mod*_nougat* @NielsRogge
/src/transformers/models/omdet_turbo/mod*_omdet_turbo* @yonigozlan
/src/transformers/models/oneformer/mod*_oneformer* @zucchini-nlp
/src/transformers/models/owlvit/mod*_owlvit* @yonigozlan
/src/transformers/models/owlv2/mod*_owlv2* @yonigozlan
/src/transformers/models/paligemma/mod*_paligemma* @zucchini-nlp @molbap
/src/transformers/models/perceiver/mod*_perceiver* @zucchini-nlp
/src/transformers/models/pix2struct/mod*_pix2struct* @zucchini-nlp
/src/transformers/models/pixtral/mod*_pixtral* @zucchini-nlp @ArthurZucker
/src/transformers/models/qwen2_audio/mod*_qwen2_audio* @zucchini-nlp @ArthurZucker
/src/transformers/models/qwen2_vl/mod*_qwen2_vl* @zucchini-nlp @ArthurZucker
/src/transformers/models/sam/mod*_sam* @zucchini-nlp @ArthurZucker
/src/transformers/models/siglip/mod*_siglip* @zucchini-nlp
/src/transformers/models/speech_encoder_decoder/mod*_speech_encoder_decoder* @zucchini-nlp
/src/transformers/models/tapas/mod*_tapas* @NielsRogge
/src/transformers/models/trocr/mod*_trocr* @zucchini-nlp
/src/transformers/models/tvlt/mod*_tvlt* @zucchini-nlp
/src/transformers/models/tvp/mod*_tvp* @zucchini-nlp
/src/transformers/models/udop/mod*_udop* @zucchini-nlp
/src/transformers/models/video_llava/mod*_video_llava* @zucchini-nlp
/src/transformers/models/vilt/mod*_vilt* @zucchini-nlp
/src/transformers/models/vipllava/mod*_vipllava* @zucchini-nlp
/src/transformers/models/vision_encoder_decoder/mod*_vision_encoder_decoder* @Rocketknight1
/src/transformers/models/vision_text_dual_encoder/mod*_vision_text_dual_encoder* @Rocketknight1
/src/transformers/models/visual_bert/mod*_visual_bert* @zucchini-nlp
/src/transformers/models/xclip/mod*_xclip* @zucchini-nlp
# Reinforcement learning models
/src/transformers/models/decision_transformer/mod*_decision_transformer* @Rocketknight1
/src/transformers/models/trajectory_transformer/mod*_trajectory_transformer* @Rocketknight1
# Time series models
/src/transformers/models/autoformer/mod*_autoformer* @Rocketknight1
/src/transformers/models/informer/mod*_informer* @Rocketknight1
/src/transformers/models/patchtsmixer/mod*_patchtsmixer* @Rocketknight1
/src/transformers/models/patchtst/mod*_patchtst* @Rocketknight1
/src/transformers/models/time_series_transformer/mod*_time_series_transformer* @Rocketknight1
# Graph models
/src/transformers/models/graphormer/mod*_graphormer* @clefourrier
# Finally, files with no owners that shouldn't generate pings, usually automatically generated and checked in the CI
utils/dummy*
================================================
FILE: .github/workflows/TROUBLESHOOT.md
================================================
# Troubleshooting
This is a document explaining how to deal with various issues on github-actions self-hosted CI. The entries may include actual solutions or pointers to Issues that cover those.
## GitHub Actions (self-hosted CI)
* Deepspeed
- if jit build hangs, clear out `rm -rf ~/.cache/torch_extensions/` reference: https://github.com/huggingface/transformers/pull/12723
================================================
FILE: .github/workflows/add-model-like.yml
================================================
name: Add model like runner
on:
push:
branches:
- none # put main here when this is fixed
#pull_request:
# paths:
# - "src/**"
# - "tests/**"
# - ".github/**"
# types: [opened, synchronize, reopened]
jobs:
run_tests_templates_like:
name: "Add new model like template tests"
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Install dependencies
run: |
sudo apt -y update && sudo apt install -y libsndfile1-dev
- name: Load cached virtual environment
uses: actions/cache@v4
id: cache
with:
path: ~/venv/
key: v4-tests_model_like-${{ hashFiles('setup.py') }}
- name: Create virtual environment on cache miss
if: steps.cache.outputs.cache-hit != 'true'
run: |
python -m venv ~/venv && . ~/venv/bin/activate
pip install --upgrade pip!=21.3
pip install -e .[dev]
- name: Check transformers location
# make `transformers` available as package (required since we use `-e` flag) and check it's indeed from the repo.
run: |
. ~/venv/bin/activate
python setup.py develop
transformers_install=$(pip list -e | grep transformers)
transformers_install_array=($transformers_install)
transformers_loc=${transformers_install_array[-1]}
transformers_repo_loc=$(pwd .)
if [ "$transformers_loc" != "$transformers_repo_loc" ]; then
echo "transformers is from $transformers_loc but it shoud be from $transformers_repo_loc/src."
echo "A fix is required. Stop testing."
exit 1
fi
- name: Create model files
run: |
. ~/venv/bin/activate
transformers add-new-model-like --config_file tests/fixtures/add_distilbert_like_config.json --path_to_repo .
make style
make fix-copies
- name: Run all PyTorch modeling test
run: |
. ~/venv/bin/activate
python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_new_models tests/bert_new/test_modeling_bert_new.py
- name: Run style changes
run: |
. ~/venv/bin/activate
make style && make quality && make repo-consistency
- name: Failure short reports
if: ${{ always() }}
run: cat reports/tests_new_models/failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: run_all_tests_new_models_test_reports
path: reports/tests_new_models
================================================
FILE: .github/workflows/anti-slop.yml
================================================
name: Anti-Slop
permissions:
contents: read
issues: read
pull-requests: write
on:
pull_request_target:
types: [opened, reopened]
jobs:
anti-slop:
runs-on: ubuntu-latest
steps:
- uses: peakoss/anti-slop@v0.2.1
with:
# -- Failure threshold --
# Require both enabled checks to fail before labeling while we validate the signals
max-failures: 2
# -- Do NOT close or lock, just label --
close-pr: false
lock-pr: false
failure-add-pr-labels: "Code agent slop"
failure-pr-message: |
This PR was flagged by our automated quality checks. If you're a genuine
contributor, please reply here and a maintainer will review your PR.
Common reasons for flagging:
- New GitHub account
- Unusually high number of repository forks in a 24-hour window
We appreciate your contribution and apologize if this is a false positive!
# -- Account checks --
# Start with two conservative, high-signal checks and iterate from there
min-account-age: 30
max-daily-forks: 7
# -- Disabled checks (keep minimal) --
blocked-source-branches: ""
blocked-paths: ""
detect-spam-usernames: false
min-profile-completeness: 0
require-description: false
require-linked-issue: false
require-conventional-title: false
require-pr-template: false
strict-pr-template-sections: ""
optional-pr-template-sections: ""
max-additional-pr-template-sections: 0
max-description-length: 0
require-conventional-commits: false
require-commit-author-match: false
require-maintainer-can-modify: false
require-final-newline: false
max-added-comments: 0
max-emoji-count: 0
max-code-references: 0
max-commit-message-length: 0
min-repo-merged-prs: 0
min-repo-merge-ratio: 0
min-global-merge-ratio: 0
# -- Exemptions --
exempt-author-association: "OWNER,MEMBER,COLLABORATOR"
exempt-label: "exempt"
================================================
FILE: .github/workflows/assign-reviewers.yml
================================================
name: Assign PR Reviewers
on:
pull_request_target:
branches:
- main
types: [ready_for_review]
jobs:
assign_reviewers:
permissions:
pull-requests: write
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.13'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install PyGithub
- name: Run assignment script
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: python .github/scripts/assign_reviewers.py
================================================
FILE: .github/workflows/benchmark.yml
================================================
name: Self-hosted runner (benchmark)
on:
push:
branches: [main]
pull_request:
types: [ opened, labeled, reopened, synchronize ]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
HF_HOME: /mnt/cache
DATASET_ID: hf-benchmarks/transformers
MODEL_ID: meta-llama/Llama-3.1-8B-Instruct
jobs:
benchmark:
name: Benchmark
strategy:
matrix:
# group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus] (A100 runner is not enabled)
group: [aws-g5-4xlarge-cache]
runs-on:
group: ${{ matrix.group }}
if: |
(github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark') )||
(github.event_name == 'push' && github.ref == 'refs/heads/main')
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --privileged --ipc host
steps:
- name: Get repo
uses: actions/checkout@v5
with:
fetch-depth: 1
- name: Install benchmark script dependencies
run: python3 -m pip install -r benchmark_v2/requirements.txt kernels
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e ".[torch]"
- name: Run benchmark
run: |
git config --global --add safe.directory /__w/transformers/transformers
if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
commit_id=$(echo "${{ github.event.pull_request.head.sha }}")
elif [ "$GITHUB_EVENT_NAME" = "push" ]; then
commit_id=$GITHUB_SHA
fi
commit_msg=$(git show -s --format=%s | cut -c1-70)
python3 benchmark_v2/run_benchmarks.py -b 32 -s 128 -n 256 --level 2 --branch-name "$BRANCH_NAME" --commit-id "$commit_id" --commit-message "$commit_msg" --model-id "$MODEL_ID" --log-level INFO --push-result-to-dataset "$DATASET_ID"
env:
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
PUSH_TO_HUB_TOKEN: ${{ secrets.PUSH_TO_HUB_TOKEN }}
# Enable this to see debug logs
# HF_HUB_VERBOSITY: debug
# TRANSFORMERS_VERBOSITY: debug
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
================================================
FILE: .github/workflows/benchmark_v2.yml
================================================
name: Benchmark v2 Framework
on:
workflow_dispatch:
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
jobs:
benchmark-v2:
name: Benchmark v2
runs-on: ${{ inputs.runner }}
if: |
(github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark')) ||
(github.event_name == 'schedule')
container:
image: ${{ inputs.container_image }}
options: ${{ inputs.container_options }}
steps:
- name: Get repo
uses: actions/checkout@v4
with:
ref: ${{ inputs.commit_sha || github.sha }}
- name: Install benchmark dependencies
run: |
python3 -m pip install -r benchmark_v2/requirements.txt
- name: Reinstall transformers in edit mode
run: |
python3 -m pip uninstall -y transformers
python3 -m pip install -e ".[torch]"
- name: Show installed libraries and their versions
run: |
python3 -m pip list
python3 -c "import torch; print(f'PyTorch version: {torch.__version__}')"
python3 -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
python3 -c "import torch; print(f'CUDA device count: {torch.cuda.device_count()}')" || true
nvidia-smi || true
- name: Run benchmark v2
working-directory: benchmark_v2
run: |
echo "Running benchmarks"
python3 run_benchmarks.py \
--commit-id '${{ inputs.commit_sha || github.sha }}' \
--run-id '${{ inputs.run_id }}' \
--push-to-hub '${{ inputs.benchmark_repo_id}}' \
--token '${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}' \
--log-level INFO
env:
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
================================================
FILE: .github/workflows/benchmark_v2_a10_caller.yml
================================================
name: Benchmark v2 Scheduled Runner - A10 Single-GPU
on:
workflow_dispatch:
jobs:
benchmark-v2-default:
name: Benchmark v2 - Default Models
uses: ./.github/workflows/benchmark_v2.yml
with:
runner: aws-g5-4xlarge-cache-use1-public-80
container_image: huggingface/transformers-all-latest-gpu
container_options: --gpus all --privileged --ipc host --shm-size "16gb"
commit_sha: ${{ github.sha }}
run_id: ${{ github.run_id }}
benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks
secrets: inherit
================================================
FILE: .github/workflows/benchmark_v2_mi325_caller.yml
================================================
name: Benchmark v2 Scheduled Runner - MI325 Single-GPU
on:
workflow_dispatch:
jobs:
benchmark-v2-default:
name: Benchmark v2 - Default Models
uses: ./.github/workflows/benchmark_v2.yml
with:
runner: amd-mi325-ci-1gpu
container_image: huggingface/transformers-pytorch-amd-gpu
container_options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache
commit_sha: ${{ github.sha }}
run_id: ${{ github.run_id }}
benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks
secrets: inherit
================================================
FILE: .github/workflows/build-ci-docker-images.yml
================================================
name: Build pr ci-docker
on:
push:
branches:
- push-ci-image # for now let's only build on this branch
repository_dispatch:
workflow_call:
inputs:
image_postfix:
required: true
type: string
schedule:
- cron: "6 0 * * *"
concurrency:
group: ${{ github.workflow }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-22.04
if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }}
strategy:
matrix:
file: ["quality", "consistency", "custom-tokenizers", "torch-light", "exotic-models", "examples-torch"]
continue-on-error: true
steps:
-
name: Set tag
run: |
if ${{contains(github.event.head_commit.message, '[build-ci-image]')}}; then
echo "TAG=huggingface/transformers-${{ matrix.file }}:dev" >> "$GITHUB_ENV"
echo "setting it to DEV!"
else
echo "TAG=huggingface/transformers-${{ matrix.file }}" >> "$GITHUB_ENV"
fi
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build ${{ matrix.file }}.dockerfile
uses: docker/build-push-action@v5
with:
context: ./docker
build-args: |
REF=${{ github.sha }}
file: "./docker/${{ matrix.file }}.dockerfile"
push: ${{ contains(github.event.head_commit.message, 'ci-image]') || github.event_name == 'schedule' }}
tags: ${{ env.TAG }}
notify:
runs-on: ubuntu-22.04
if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }}
steps:
- name: Post to Slack
if: ${{ contains(github.event.head_commit.message, '[push-ci-image]') && github.event_name != 'schedule' }}
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: "#transformers-ci-circleci-images"
title: 🤗 New docker images for CircleCI are pushed.
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
================================================
FILE: .github/workflows/build-docker-images.yml
================================================
name: Build docker images (scheduled)
on:
push:
branches:
- build_ci_docker_image*
repository_dispatch:
workflow_dispatch:
workflow_call:
inputs:
image_postfix:
required: true
type: string
schedule:
- cron: "17 0 * * *"
concurrency:
group: docker-images-builds
cancel-in-progress: false
jobs:
latest-docker:
name: "Latest PyTorch [dev]"
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-all-latest-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the transformers-all-latest-gpu docker build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
flash-attn-ci-image:
name: "PyTorch with Flash Attn [dev]"
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-all-latest-gpu
build-args: |
REF=main
PYTORCH=2.8.0
TORCHCODEC=0.7.0
FLASH_ATTN=yes
push: true
tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}:flash-attn
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the transformers-all-latest-gpu docker build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-torch-deepspeed-docker:
name: "Latest PyTorch + DeepSpeed"
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-deepspeed-latest-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER}}
title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
doc-builder:
name: "Doc builder"
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-doc-builder
push: true
tags: huggingface/transformers-doc-builder
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the huggingface/transformers-doc-builder docker build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-pytorch-amd:
name: "Latest PyTorch (AMD) [dev]"
runs-on:
group: aws-highcpu-32-priv
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-amd-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }}
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
cache-latest-pytorch-amd:
name: "Cache Latest Pytorch (AMD) Image"
needs: latest-pytorch-amd
runs-on:
group: amd-mi325-1gpu
steps:
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Pull and save docker image to cache
run: |
image="huggingface/transformers-pytorch-amd-gpu"
final_path="/mnt/image-cache/transformers-pytorch-amd-gpu.tar"
tmp_path="${final_path}.tmp"
echo "Pulling image: ${image}"
docker pull "${image}"
echo "Saving to temp file: ${tmp_path}"
docker save "${image}" -o "${tmp_path}"
echo "Moving to final path: ${final_path}"
mv -f "${tmp_path}" "${final_path}"
echo "Cache populated successfully at ${final_path}"
latest-pytorch-deepspeed-amd:
name: "PyTorch + DeepSpeed (AMD) [dev]"
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-deepspeed-amd-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-quantization-torch-docker:
name: "Latest Pytorch + Quantization [dev]"
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-quantization-latest-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-quantization-latest-gpu${{ inputs.image_postfix }}
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the transformers-quantization-latest-gpu build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
================================================
FILE: .github/workflows/build-nightly-ci-docker-images.yml
================================================
name: Build docker images (Nightly CI)
on:
workflow_call:
inputs:
job:
required: true
type: string
push:
branches:
- build_nightly_ci_docker_image*
concurrency:
group: docker-images-builds
cancel-in-progress: false
jobs:
latest-with-torch-nightly-docker:
name: "Nightly PyTorch"
if: inputs.job == 'latest-with-torch-nightly-docker' || inputs.job == ''
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v3
with:
context: ./docker/transformers-all-latest-gpu
build-args: |
REF=main
PYTORCH=pre
push: true
tags: huggingface/transformers-all-latest-torch-nightly-gpu
nightly-torch-deepspeed-docker:
name: "Nightly PyTorch + DeepSpeed"
if: inputs.job == 'nightly-torch-deepspeed-docker' || inputs.job == ''
runs-on:
group: aws-g4dn-2xlarge-cache
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v3
with:
context: ./docker/transformers-pytorch-deepspeed-nightly-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu
================================================
FILE: .github/workflows/build-past-ci-docker-images.yml
================================================
name: Build docker images (Past CI)
on:
push:
branches:
- build_past_ci_docker_image*
concurrency:
group: docker-images-builds
cancel-in-progress: false
jobs:
past-pytorch-docker:
name: "Past PyTorch Docker"
strategy:
fail-fast: false
matrix:
version: ["1.13", "1.12", "1.11"]
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
-
name: Check out code
uses: actions/checkout@v4
-
id: get-base-image
name: Get Base Image
env:
framework_version: ${{ matrix.version }}
run: |
echo "base_image=$(python3 -c 'import os; from utils.past_ci_versions import past_versions_testing; base_image = past_versions_testing["pytorch"][os.environ["framework_version"]]["base_image"]; print(base_image)')" >> $GITHUB_OUTPUT
-
name: Print Base Image
run: |
echo ${{ steps.get-base-image.outputs.base_image }}
-
name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v3
with:
context: ./docker/transformers-past-gpu
build-args: |
REF=main
BASE_DOCKER_IMAGE=${{ steps.get-base-image.outputs.base_image }}
FRAMEWORK=pytorch
VERSION=${{ matrix.version }}
push: true
tags: huggingface/transformers-pytorch-past-${{ matrix.version }}-gpu
past-tensorflow-docker:
name: "Past TensorFlow Docker"
strategy:
fail-fast: false
matrix:
version: ["2.11", "2.10", "2.9", "2.8", "2.7", "2.6", "2.5"]
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
-
name: Check out code
uses: actions/checkout@v4
-
id: get-base-image
name: Get Base Image
env:
framework_version: ${{ matrix.version }}
run: |
echo "base_image=$(python3 -c 'import os; from utils.past_ci_versions import past_versions_testing; base_image = past_versions_testing["tensorflow"][os.environ["framework_version"]]["base_image"]; print(base_image)')" >> $GITHUB_OUTPUT
-
name: Print Base Image
run: |
echo ${{ steps.get-base-image.outputs.base_image }}
-
name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v3
with:
context: ./docker/transformers-past-gpu
build-args: |
REF=main
BASE_DOCKER_IMAGE=${{ steps.get-base-image.outputs.base_image }}
FRAMEWORK=tensorflow
VERSION=${{ matrix.version }}
push: true
tags: huggingface/transformers-tensorflow-past-${{ matrix.version }}-gpu
================================================
FILE: .github/workflows/build_documentation.yml
================================================
name: Build documentation
on:
workflow_dispatch:
push:
branches:
- main
- doc-builder*
- v*-release
- use_templates
jobs:
build:
uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
with:
commit_sha: ${{ github.sha }}
package: transformers
notebook_folder: transformers_doc
languages: en
custom_container: huggingface/transformers-doc-builder
secrets:
token: ${{ secrets.HUGGINGFACE_PUSH }}
hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
build_other_lang:
uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
with:
commit_sha: ${{ github.sha }}
package: transformers
notebook_folder: transformers_doc
languages: ar de es fr hi it ja ko pt zh
custom_container: huggingface/transformers-doc-builder
secrets:
token: ${{ secrets.HUGGINGFACE_PUSH }}
hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
================================================
FILE: .github/workflows/build_pr_documentation.yml
================================================
name: Build PR Documentation
on:
pull_request:
merge_group:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
build:
if: github.event_name == 'pull_request'
uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
with:
commit_sha: ${{ github.event.pull_request.head.sha }}
pr_number: ${{ github.event.number }}
package: transformers
languages: en
# Satisfy required check in merge queue without actually building docs
skip_merge_queue:
if: github.event_name == 'merge_group'
runs-on: ubuntu-latest
steps:
- run: echo "Skipping doc build in merge queue"
doc_build_status_check:
needs: [build, skip_merge_queue]
if: always()
runs-on: ubuntu-latest
steps:
- run: |
if [[ "${{ needs.build.result }}" == "success" || "${{ needs.build.result }}" == "skipped" ]] && \
[[ "${{ needs.skip_merge_queue.result }}" == "success" || "${{ needs.skip_merge_queue.result }}" == "skipped" ]]; then
echo "OK"
else
exit 1
fi
================================================
FILE: .github/workflows/check-workflow-permissions.yml
================================================
---
name: Check Permissions Advisor
on:
workflow_dispatch:
inputs:
workflow_name:
description: 'Workflow file name'
type: string
run_count:
description: 'Number of runs to analyze'
type: string
default: "10"
jobs:
advisor:
uses: huggingface/security-workflows/.github/workflows/permissions-advisor-reusable.yml@main
permissions:
actions: read
contents: read
with:
workflow_name: ${{ inputs.workflow_name }}
run_count: ${{ fromJSON(inputs.run_count) }}
================================================
FILE: .github/workflows/check_failed_tests.yml
================================================
name: Process failed tests
on:
workflow_call:
inputs:
docker:
required: true
type: string
job:
required: true
type: string
slack_report_channel:
required: true
type: string
ci_event:
required: true
type: string
report_repo_id:
required: true
type: string
commit_sha:
required: false
type: string
pr_number:
required: false
type: string
max_num_runners:
required: false
type: number
default: 4
outputs:
is_check_failures_ok:
description: "Whether the failure checking infrastructure succeeded"
value: ${{ jobs.check_new_failures.result != 'failure' && jobs.process_new_failures_with_commit_info.result != 'failure' }}
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
CUDA_VISIBLE_DEVICES: 0,1
jobs:
setup_check_new_failures:
name: "Setup matrix for finding commits"
runs-on: ubuntu-22.04
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
n_runners: ${{ steps.set-matrix.outputs.n_runners }}
process: ${{ steps.set-matrix.outputs.process }}
steps:
- uses: actions/download-artifact@v4
continue-on-error: true
with:
name: ci_results_${{ inputs.job }}
path: ci_results_${{ inputs.job }}
- name: Set matrix
id: set-matrix
env:
job: ${{ inputs.job }}
max_num_runners: ${{ inputs.max_num_runners }}
run: |
python3 - << 'EOF'
import json, os, math
print("Script started")
job = os.environ["job"]
filepath = f"ci_results_{job}/new_failures.json"
print(f"Looking for file: {filepath}")
print(f"File exists: {os.path.isfile(filepath)}")
if not os.path.isfile(filepath):
print("File not found, setting process=false")
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
f.write("process=false\n")
exit(0)
with open(filepath) as f:
reports = json.load(f)
print(f"Loaded reports with {len(reports)} models")
n_tests = sum(
len(model_data.get("failures", model_data).get("single-gpu", []))
for model_data in reports.values()
)
print(f"n_tests: {n_tests}")
max_num_runners = int(os.environ["max_num_runners"])
TESTS_PER_RUNNER = 10
n_runners = max(1, min(max_num_runners, math.ceil(n_tests / TESTS_PER_RUNNER)))
print(f"n_runners: {n_runners}")
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
f.write(f"matrix={json.dumps(list(range(n_runners)))}\n")
f.write(f"n_runners={n_runners}\n")
f.write("process=true\n")
print("Done")
EOF
check_new_failures:
name: "Find commits for new failing tests"
needs: setup_check_new_failures
if: needs.setup_check_new_failures.outputs.process == 'true'
strategy:
matrix:
run_idx: ${{ fromJson(needs.setup_check_new_failures.outputs.matrix) }}
runs-on:
group: aws-g5-4xlarge-cache
outputs:
process: ${{ needs.setup_check_new_failures.outputs.process }}
container:
image: ${{ inputs.docker }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- uses: actions/download-artifact@v4
with:
name: ci_results_${{ inputs.job }}
path: /transformers/ci_results_${{ inputs.job }}
- uses: actions/download-artifact@v4
with:
pattern: setup_values*
path: setup_values
merge-multiple: true
- name: Prepare some setup values
run: |
if [ -f setup_values/prev_workflow_run_id.txt ]; then
echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
else
echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
fi
- name: Update clone
working-directory: /transformers
env:
commit_sha: ${{ inputs.commit_sha || github.sha }}
run: |
git fetch origin "$commit_sha" && git checkout "$commit_sha"
- name: Get `START_SHA`
working-directory: /transformers/utils
env:
commit_sha: ${{ inputs.commit_sha || github.sha }}
run: |
echo "START_SHA=$commit_sha" >> $GITHUB_ENV
# This is used if the CI is triggered from a pull request `self-comment-ci.yml` (after security check is verified)
- name: Extract the base commit on `main` (of the merge commit created by Github) if it is a PR
id: pr_info
if: ${{ inputs.pr_number != '' }}
uses: actions/github-script@v6
with:
script: |
const { data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: ${{ inputs.pr_number }}
});
const { data: merge_commit } = await github.rest.repos.getCommit({
owner: pr.base.repo.owner.login,
repo: pr.base.repo.name,
ref: '${{ inputs.commit_sha }}',
});
core.setOutput('merge_commit_base_sha', merge_commit.parents[0].sha);
# Usually, `END_SHA` should be the commit of the last previous workflow run of the **SAME** (scheduled) workflow.
# (This is why we don't need to specify `workflow_id` which would be fetched automatically in the python script.)
- name: Get `END_SHA` from previous CI runs of the same workflow
working-directory: /transformers/utils
if: ${{ inputs.pr_number == '' }}
env:
ACCESS_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
run: |
echo "END_SHA=$(TOKEN="$ACCESS_TOKEN" python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"], workflow_run_id=os.environ["PREV_WORKFLOW_RUN_ID"]); print(commit)')" >> $GITHUB_ENV
# However, for workflow runs triggered by `issue_comment` (for pull requests), we want to check against the
# parent commit (on `main`) of the `merge_commit` (dynamically created by GitHub). In this case, the goal is to
# see if a reported failing test is actually ONLY failing on the `merge_commit`.
- name: Set `END_SHA`
if: ${{ inputs.pr_number != '' }}
env:
merge_commit_base_sha: ${{ steps.pr_info.outputs.merge_commit_base_sha }}
run: |
echo "END_SHA=$merge_commit_base_sha" >> $GITHUB_ENV
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Install pytest-flakefinder
run: python3 -m pip install pytest-flakefinder
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Check failed tests
working-directory: /transformers
env:
job: ${{ inputs.job }}
n_runners: ${{ needs.setup_check_new_failures.outputs.n_runners }}
run_idx: ${{ matrix.run_idx }}
pr_number: ${{ inputs.pr_number }}
run: python3 utils/check_bad_commit.py --start_commit "$START_SHA" --end_commit "$END_SHA" --file "ci_results_${job}/new_failures.json" --output_file "new_failures_with_bad_commit_${job}_${run_idx}.json"
- name: Show results
working-directory: /transformers
env:
job: ${{ inputs.job }}
run_idx: ${{ matrix.run_idx }}
run: |
ls -l "new_failures_with_bad_commit_${job}_${run_idx}.json"
cat "new_failures_with_bad_commit_${job}_${run_idx}.json"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}
path: /transformers/new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
process_new_failures_with_commit_info:
name: "process bad commit reports"
needs: check_new_failures
if: needs.check_new_failures.outputs.process == 'true'
runs-on:
group: aws-g5-4xlarge-cache
container:
image: ${{ inputs.docker }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- uses: actions/download-artifact@v4
with:
name: ci_results_${{ inputs.job }}
path: /transformers/ci_results_${{ inputs.job }}
- uses: actions/download-artifact@v4
with:
pattern: new_failures_with_bad_commit_${{ inputs.job }}*
path: /transformers/new_failures_with_bad_commit_${{ inputs.job }}
merge-multiple: true
- name: Check files
working-directory: /transformers
env:
job: ${{ inputs.job }}
run: |
ls -la /transformers
ls -la "/transformers/new_failures_with_bad_commit_${job}"
# Currently, we only run with a single runner by using `run_idx: [1]`. We might try to run with multiple runners
# to further reduce the false positive caused by flaky tests, which requires further processing to merge reports.
- name: Merge files
shell: bash
working-directory: /transformers
env:
job: ${{ inputs.job }}
run: |
python3 - << 'EOF'
import json
import glob
import os
job = os.environ["job"]
pattern = f"/transformers/new_failures_with_bad_commit_{job}/new_failures_with_bad_commit_{job}_*.json"
files = sorted(glob.glob(pattern))
if not files:
print(f"No files found matching: {pattern}")
exit(1)
print(f"Found {len(files)} file(s) to merge: {files}")
merged = {}
for filepath in files:
with open(filepath) as f:
data = json.load(f)
for model, model_results in data.items():
if model not in merged:
merged[model] = {}
for gpu_type, failures in model_results.items():
if gpu_type not in merged[model]:
merged[model][gpu_type] = []
merged[model][gpu_type].extend(failures)
print(f"filepath: {filepath}")
print(len(data))
output_path = "/transformers/new_failures_with_bad_commit.json"
with open(output_path, "w") as f:
json.dump(merged, f, indent=4)
print(f"Merged {len(files)} file(s) into {output_path}")
print(f"n_items: {len(merged)}")
print(merged)
EOF
- name: Update clone
working-directory: /transformers
env:
commit_sha: ${{ inputs.commit_sha || github.sha }}
run: |
git fetch origin "$commit_sha" && git checkout "$commit_sha"
- name: Process report
shell: bash
working-directory: /transformers
env:
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
JOB_NAME: ${{ inputs.job }}
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
run: |
{
echo 'REPORT_TEXT<<EOF'
python3 utils/process_bad_commit_report.py
echo EOF
} >> "$GITHUB_ENV"
- name: Show results
working-directory: /transformers
run: |
ls -l new_failures_with_bad_commit.json
cat new_failures_with_bad_commit.json
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: new_failures_with_bad_commit_${{ inputs.job }}
path: |
/transformers/new_failures_with_bad_commit.json
/transformers/new_failures_with_bad_commit_url.txt
- name: Prepare Slack report title
working-directory: /transformers
env:
ci_event: ${{ inputs.ci_event }}
job: ${{ inputs.job }}
run: |
pip install slack_sdk
echo "title=$(python3 -c 'import sys; import os; sys.path.append("utils"); from utils.notification_service import job_to_test_map; ci_event = os.environ["ci_event"]; job = os.environ["job"]; test_name = job_to_test_map[job]; title = f"New failed tests of {ci_event}" + ":" + f" {test_name}"; print(title)')" >> $GITHUB_ENV
- name: Send processed report
if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
with:
# Slack channel id, channel name, or user id to post message.
# See also: https://api.slack.com/methods/chat.postMessage#channels
channel-id: '#${{ inputs.slack_report_channel }}'
# For posting a rich message using Block Kit
payload: |
{
"blocks": [
{
"type": "header",
"text": {
"type": "plain_text",
"text": "${{ env.title }}"
}
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "${{ env.REPORT_TEXT }}"
}
}
]
}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
================================================
FILE: .github/workflows/check_tiny_models.yml
================================================
name: Check Tiny Models
on:
push:
branches:
- check_tiny_models*
repository_dispatch:
schedule:
- cron: "0 2 * * *"
env:
TOKEN: ${{ secrets.TRANSFORMERS_HUB_BOT_HF_TOKEN }}
jobs:
check_tiny_models:
name: Check tiny models
runs-on: ubuntu-22.04
steps:
- name: Checkout transformers
uses: actions/checkout@v4
with:
fetch-depth: 2
- uses: actions/checkout@v4
- name: Set up Python 3.8
uses: actions/setup-python@v5
with:
# Semantic version range syntax or exact version of a Python version
python-version: '3.8'
# Optional - x64 or x86 architecture, defaults to x64
architecture: 'x64'
- name: Install
run: |
sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng cmake
pip install --upgrade pip
python -m pip install -U .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm,video,tf-cpu]
pip install tensorflow_probability
python -m pip install -U 'natten<0.15.0'
- name: Create all tiny models (locally)
run: |
python utils/create_dummy_models.py tiny_local_models --all --num_workers 2
- name: Local tiny model reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: tiny_local_model_creation_reports
path: tiny_local_models/reports
# GitHub-hosted runners have 2-core CPUs
- name: Run pipeline tests against all new (local) tiny models
run: |
OMP_NUM_THREADS=1 TRANSFORMERS_TINY_MODEL_PATH=tiny_local_models python -m pytest --max-worker-restart=0 -n 2 --dist=loadfile -s -rA --make-reports=tests_pipelines tests/models -m is_pipeline_test -k "test_pipeline_" | tee tests_output.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: tiny_local_model_creation_reports
path: reports/tests_pipelines
- name: Create + Upload tiny models for new model architecture(s)
run: |
python utils/update_tiny_models.py --num_workers 2
- name: Full report
run: cat tiny_models/reports/tiny_model_creation_report.json
- name: Failure report
run: cat tiny_models/reports/simple_failed_report.txt
- name: Summary report
run: cat tiny_models/reports/tiny_model_summary.json
- name: New tiny model creation reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: tiny_model_creation_reports
path: tiny_models/reports
================================================
FILE: .github/workflows/circleci-failure-summary-comment.yml
================================================
name: CircleCI Failure Summary Comment
on:
pull_request_target:
types: [opened, synchronize, reopened]
jobs:
comment:
runs-on: ubuntu-22.04
permissions:
pull-requests: write
env:
TARGET_BRANCH: ${{ github.event.pull_request.head.ref }}
TARGET_SHA: ${{ github.event.pull_request.head.sha }}
PR_NUMBER: ${{ github.event.pull_request.number }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.13"
- name: Install dependencies
run: python -m pip install huggingface_hub
- name: Wait for CircleCI check suite completion
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
GITHUB_REPOSITORY: ${{ github.repository }}
run: |
# Exit on error, undefined variables, or pipe failures
set -euo pipefail
echo "Waiting for CircleCI check suite to complete..."
# Timeout after 30 minutes (1800 seconds)
end=$((SECONDS + 1800))
while [ $SECONDS -lt $end ]; do
# Query GitHub API for check suites associated with this commit
# || echo "" allows retry on transient API failures instead of exiting
suite_json=$(gh api "repos/${GITHUB_REPOSITORY}/commits/${COMMIT_SHA}/check-suites" \
--jq '.check_suites[] | select(.app.slug == "circleci-checks")' || echo "")
if [ -z "$suite_json" ]; then
echo "CircleCI check suite not found yet, retrying..."
else
status=$(echo "$suite_json" | jq -r '.status')
conclusion=$(echo "$suite_json" | jq -r '.conclusion // empty')
echo "CircleCI status: $status, conclusion: $conclusion"
# Check suite is done when status is "completed" AND conclusion is set
if [ "$status" = "completed" ] && [ -n "$conclusion" ]; then
echo "Check suite completed successfully"
exit 0
fi
fi
# Poll every 20 seconds
sleep 20
done
echo "ERROR: Timed out waiting for CircleCI check suite"
exit 1
- name: Get CircleCI run's artifacts and upload them to Hub
id: circleci
env:
COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
REPO: ${{ github.repository }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Step 1: Get CircleCI check suite ID
echo "Getting check suites for commit ${COMMIT_SHA}..."
check_suites=$(curl -s -H "Authorization: token ${GITHUB_TOKEN}" \
"https://api.github.com/repos/${REPO}/commits/${COMMIT_SHA}/check-suites")
circleci_suite_id=$(echo "$check_suites" | jq -r '.check_suites[] | select(.app.slug == "circleci-checks") | .id' | head -n 1)
echo "CircleCI check suite ID: ${circleci_suite_id}"
# Step 2: Get check runs from the CircleCI suite
echo "Getting check runs for suite ${circleci_suite_id}..."
check_runs=$(curl -s -H "Authorization: token ${GITHUB_TOKEN}" \
"https://api.github.com/repos/${REPO}/check-suites/${circleci_suite_id}/check-runs")
# Step 3: Extract workflow ID from the "run_tests" check run
workflow_id=$(echo "$check_runs" | jq -r '.check_runs[] | select(.name == "run_tests") | .details_url' | grep -oP 'workflows/\K[a-f0-9-]+')
echo "CircleCI Workflow ID: ${workflow_id}"
# Step 4: Get all jobs in the workflow
echo "Getting jobs for workflow ${workflow_id}..."
jobs=$(curl -s \
"https://circleci.com/api/v2/workflow/${workflow_id}/job")
# Step 5: Extract collection_job details
collection_job_number=$(echo "$jobs" | jq -r '.items[] | select(.name == "collection_job") | .job_number')
collection_job_id=$(echo "$jobs" | jq -r '.items[] | select(.name == "collection_job") | .id')
echo "CircleCI Collection job number: ${collection_job_number}"
echo "CircleCI Collection job ID: ${collection_job_id}"
# Step 6: Get artifacts list
echo "Getting artifacts for job ${collection_job_number}..."
artifacts=$(curl -s \
"https://circleci.com/api/v2/project/gh/${REPO}/${collection_job_number}/artifacts")
echo "$artifacts" | jq '.'
# Step 7: Download failure_summary.json specifically
failure_summary_url=$(echo "$artifacts" | jq -r '.items[] | select(.path == "outputs/failure_summary.json") | .url')
if [ -z "$failure_summary_url" ]; then
echo "failure_summary.json not found in artifacts - PR may not have latest main merged. Skipping."
echo "artifact_found=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "Downloading failure_summary.json from: ${failure_summary_url}"
mkdir -p outputs
curl -s -L "${failure_summary_url}" -o outputs/failure_summary.json
ls -la outputs
echo "Downloaded failure_summary.json successfully"
# Verify the file was downloaded
if [ ! -f outputs/failure_summary.json ]; then
echo "Failed to download failure_summary.json - skipping."
echo "artifact_found=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "File size: $(wc -c < outputs/failure_summary.json) bytes"
# Export variables for next steps
echo "artifact_found=true" >> $GITHUB_OUTPUT
echo "workflow_id=${workflow_id}" >> $GITHUB_OUTPUT
echo "collection_job_number=${collection_job_number}" >> $GITHUB_OUTPUT
- name: Upload summaries to Hub
if: steps.circleci.outputs.artifact_found == 'true'
env:
HF_TOKEN: ${{ secrets.HF_CI_WRITE_TOKEN }}
CIRCLECI_RESULTS_DATASET_ID: "transformers-community/circleci-test-results"
PR_NUMBER: ${{ github.event.pull_request.number }}
COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
run: |
python << 'EOF'
import os
from pathlib import Path
from huggingface_hub import HfApi
# Setup paths
pr_number = os.environ["PR_NUMBER"]
commit_short = os.environ["COMMIT_SHA"][:12]
folder_path = f"pr-{pr_number}/sha-{commit_short}"
# Create folder and move file
Path(folder_path).mkdir(parents=True, exist_ok=True)
Path("outputs/failure_summary.json").rename(f"{folder_path}/failure_summary.json")
# Upload to Hub
dataset_id = os.environ["CIRCLECI_RESULTS_DATASET_ID"]
api = HfApi(token=os.environ["HF_TOKEN"])
api.upload_folder(
commit_message=f"Update CircleCI artifacts for PR {pr_number} ({commit_short})",
folder_path=folder_path,
path_in_repo=folder_path,
repo_id=dataset_id,
repo_type="dataset",
)
print(f"Uploaded {folder_path} to {dataset_id}")
EOF
- name: Delete existing CircleCI summary comments
if: steps.circleci.outputs.artifact_found == 'true'
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
uses: actions/github-script@v7
with:
script: |
const PR_NUMBER = parseInt(process.env.PR_NUMBER, 10);
// Get all comments on the PR
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: PR_NUMBER
});
// Find existing bot comments that start with "View the CircleCI Test Summary for this PR:"
const existingComments = comments.filter(comment =>
comment.user.login === 'github-actions[bot]' &&
comment.body.startsWith('View the CircleCI Test Summary for this PR:')
);
// Delete all matching comments
for (const comment of existingComments) {
console.log(`Deleting comment #${comment.id}`);
await github.rest.issues.deleteComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: comment.id
});
}
console.log(`Deleted ${existingComments.length} old CircleCI summary comment(s)`);
- name: Post comment with helper link
if: steps.circleci.outputs.artifact_found == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_REPOSITORY: ${{ github.repository }}
PR_NUMBER: ${{ github.event.pull_request.number }}
PR_SHA: ${{ github.event.pull_request.head.sha }}
run: |
COMMIT_SHORT="${PR_SHA:0:12}"
SUMMARY_FILE="pr-${PR_NUMBER}/sha-${COMMIT_SHORT}/failure_summary.json"
if [ ! -f "$SUMMARY_FILE" ]; then
echo "failure_summary.json missing, skipping comment."
exit 0
fi
failures=$(jq '.failures | length' "$SUMMARY_FILE")
if [ "$failures" -eq 0 ]; then
echo "No failures detected, skipping PR comment."
exit 0
fi
# Build Space URL with encoded parameters
repo_enc=$(jq -rn --arg v "$GITHUB_REPOSITORY" '$v|@uri')
pr_enc=$(jq -rn --arg v "$PR_NUMBER" '$v|@uri')
sha_short="${PR_SHA:0:6}"
sha_enc=$(jq -rn --arg v "$sha_short" '$v|@uri')
SPACE_URL="https://huggingface.co/spaces/transformers-community/circle-ci-viz?pr=${pr_enc}&sha=${sha_enc}"
# Post comment (using printf for proper newlines)
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \
-f body="$(printf "View the CircleCI Test Summary for this PR:\n\n%s" "$SPACE_URL")"
================================================
FILE: .github/workflows/codeql.yml
================================================
---
name: CodeQL Security Analysis
on:
push:
branches: ["main", "fix_security_issue_*"]
# pull_request:
# branches: ["main"]
workflow_dispatch:
jobs:
codeql:
name: CodeQL Analysis
uses: huggingface/security-workflows/.github/workflows/codeql-reusable.yml@main
permissions:
security-events: write
packages: read
actions: read
contents: read
with:
languages: '["actions"]'
queries: 'security-extended,security-and-quality'
runner: 'ubuntu-latest'
================================================
FILE: .github/workflows/collated-reports.yml
================================================
name: CI collated reports
on:
workflow_call:
inputs:
job:
required: true
type: string
report_repo_id:
required: true
type: string
machine_type:
required: true
type: string
gpu_name:
description: Name of the GPU used for the job. Its enough that the value contains the name of the GPU, e.g. "noise-h100-more-noise". Case insensitive.
required: true
type: string
jobs:
collated_reports:
name: Collated reports
runs-on: ubuntu-22.04
if: always()
steps:
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
- name: Collated reports
shell: bash
env:
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_SHA: ${{ github.sha }}
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
run: |
pip install huggingface_hub
python3 utils/collated_reports.py \
--path . \
--machine-type ${{ inputs.machine_type }} \
--commit-hash ${{ env.CI_SHA }} \
--job ${{ inputs.job }} \
--report-repo-id ${{ inputs.report_repo_id }} \
--gpu-name ${{ inputs.gpu_name }}
================================================
FILE: .github/workflows/doctest_job.yml
================================================
name: Doctest job
on:
workflow_call:
inputs:
job_splits:
required: true
type: string
split_keys:
required: true
type: string
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
RUN_SLOW: yes
OMP_NUM_THREADS: 16
MKL_NUM_THREADS: 16
TF_FORCE_GPU_ALLOW_GROWTH: true
jobs:
run_doctests:
name: " "
strategy:
max-parallel: 8 # 8 jobs at a time
fail-fast: false
matrix:
split_keys: ${{ fromJson(inputs.split_keys) }}
runs-on:
group: aws-g5-4xlarge-cache
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .[flax]
- name: GPU visibility
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
run: pip freeze
- name: Get doctest files
working-directory: /transformers
run: |
echo "${{ toJson(fromJson(inputs.job_splits)[matrix.split_keys]) }}" > doc_tests.txt
cat doc_tests.txt
- name: Set `split_keys`
shell: bash
run: |
echo "${{ matrix.split_keys }}"
split_keys=${{ matrix.split_keys }}
split_keys=${split_keys//'/'/'_'}
echo "split_keys"
echo "split_keys=$split_keys" >> $GITHUB_ENV
- name: Run doctests
working-directory: /transformers
run: |
cat doc_tests.txt
python3 -m pytest -v --make-reports doc_tests_gpu_${{ env.split_keys }} --doctest-modules $(cat doc_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.md"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/doc_tests_gpu_${{ env.split_keys }}/failures_short.txt
- name: "Test suite reports artifacts: doc_tests_gpu_test_reports_${{ env.split_keys }}"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: doc_tests_gpu_test_reports_${{ env.split_keys }}
path: /transformers/reports/doc_tests_gpu_${{ env.split_keys }}
================================================
FILE: .github/workflows/doctests.yml
================================================
name: Doctests
on:
push:
branches:
- run_doctest*
repository_dispatch:
schedule:
- cron: "17 2 * * *"
env:
NUM_SLICES: 3
jobs:
setup:
name: Setup
runs-on:
group: aws-g5-4xlarge-cache
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
job_splits: ${{ steps.set-matrix.outputs.job_splits }}
split_keys: ${{ steps.set-matrix.outputs.split_keys }}
steps:
- name: Update clone
working-directory: /transformers
run: |
git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Check values for matrix
working-directory: /transformers
run: |
python3 utils/split_doctest_jobs.py
python3 utils/split_doctest_jobs.py --only_return_keys --num_splits ${{ env.NUM_SLICES }}
- id: set-matrix
working-directory: /transformers
name: Set values for matrix
run: |
echo "job_splits=$(python3 utils/split_doctest_jobs.py)" >> $GITHUB_OUTPUT
echo "split_keys=$(python3 utils/split_doctest_jobs.py --only_return_keys --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
call_doctest_job:
name: "Call doctest jobs"
needs: setup
strategy:
max-parallel: 1 # 1 split at a time (in `doctest_job.yml`, we set `8` to run 8 jobs at the same time)
fail-fast: false
matrix:
split_keys: ${{ fromJson(needs.setup.outputs.split_keys) }}
uses: ./.github/workflows/doctest_job.yml
with:
job_splits: ${{ needs.setup.outputs.job_splits }}
split_keys: ${{ toJson(matrix.split_keys) }}
secrets: inherit
send_results:
name: Send results to webhook
runs-on: ubuntu-22.04
if: always()
needs: [call_doctest_job]
steps:
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
- name: Send message to Slack
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
# Use `CI_SLACK_CHANNEL_DUMMY_TESTS` when doing experimentation
SLACK_REPORT_CHANNEL: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }}
run: |
pip install slack_sdk
python utils/notification_service_doc_tests.py
- name: "Upload results"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: doc_test_results
path: doc_test_results
================================================
FILE: .github/workflows/extras-smoke-test.yml
================================================
name: Extras Smoke Test
on:
schedule:
# Run every night at 3 AM UTC
- cron: "0 3 * * *"
env:
SLACK_CHANNEL_ID: '#transformers-gh-ci-central'
jobs:
get-python-versions:
name: Get supported Python versions
runs-on: ubuntu-latest
outputs:
versions: ${{ steps.extract-versions.outputs.versions }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install setuptools
run: |
python -m pip install --upgrade pip
pip install setuptools
- name: Extract Python versions from setup.py
id: extract-versions
run: |
VERSIONS=$(python utils/extract_metadata.py python-versions)
echo "Supported Python versions: $VERSIONS"
echo "versions=$VERSIONS" >> $GITHUB_OUTPUT
test-extras:
name: Test extras on Python ${{ matrix.python-version }}
needs: get-python-versions
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ${{ fromJson(needs.get-python-versions.outputs.versions) }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
allow-prereleases: true
- name: Install base dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools
- name: Extract extras for this Python version
id: get-extras
run: |
python utils/extract_metadata.py extras > extras_list.txt
echo "Found $(wc -l < extras_list.txt) extras for Python ${{ matrix.python-version }}"
cat extras_list.txt
- name: Install base package
run: |
echo "Installing base package..."
pip install -e .
- name: Test all extras
id: test-extras
run: |
mkdir -p failure_reports
failed=0
while IFS= read -r extra; do
echo "=== Testing extra: $extra on Python ${{ matrix.python-version }} ==="
if ! pip install -e .[$extra]; then
echo "❌ Failed to install extra: $extra"
cat > failure_reports/failure-${{ matrix.python-version }}-${extra}.json << EOF
{
"python_version": "${{ matrix.python-version }}",
"extra": "${extra}",
"job_url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
}
EOF
failed=$((failed + 1))
else
echo "✓ Successfully installed extra: $extra"
fi
done < extras_list.txt
if [ $failed -gt 0 ]; then
echo "❌ $failed extra(s) failed to install"
exit 1
fi
- name: Verify installation
run: |
python -c "import transformers; print(f'Transformers version: {transformers.__version__}')"
python -c "from transformers import pipeline; print('Successfully imported pipeline')"
- name: Upload failure report
if: always()
uses: actions/upload-artifact@v4
with:
name: failure-report-${{ matrix.python-version }}
path: failure_reports/
retention-days: 1
if-no-files-found: ignore
precheck-slack:
name: Check Slack token availability
runs-on: ubuntu-latest
outputs:
has_slack_token: ${{ steps.chk.outputs.has_token }}
steps:
- id: chk
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
run: |
if [ -n "$SLACK_BOT_TOKEN" ]; then
echo "has_token=true" >> "$GITHUB_OUTPUT"
else
echo "has_token=false" >> "$GITHUB_OUTPUT"
fi
notify-failures:
name: Notify failures to Slack
needs: [test-extras, precheck-slack]
runs-on: ubuntu-latest
if: always() && needs.precheck-slack.outputs.has_slack_token == 'true' && needs.test-extras.result != 'success'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Download all failure reports
uses: actions/download-artifact@v4
with:
pattern: failure-report-*
path: failure_reports/
merge-multiple: true
continue-on-error: true
- name: Aggregate failures
run: |
python utils/aggregate_failure_reports.py \
--input-dir failure_reports \
--output all_failures.json
- name: Format Slack message
env:
FAILURES_FILE: all_failures.json
WORKFLOW_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
python utils/format_extras_slack_message.py \
--failures "$FAILURES_FILE" \
--workflow-url "$WORKFLOW_URL"
- name: Send Slack notification
if: env.SLACK_MESSAGE != ''
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
with:
channel-id: ${{ env.SLACK_CHANNEL_ID }}
payload: |
{
"blocks": [
{
"type": "header",
"text": {
"type": "plain_text",
"text": "${{ env.SLACK_TITLE }}"
}
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "${{ env.SLACK_MESSAGE }}"
}
},
{
"type": "divider"
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "<${{ env.SLACK_WORKFLOW_URL }}|View workflow run>"
}
}
]
}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
================================================
FILE: .github/workflows/get-pr-info.yml
================================================
name: Get PR commit SHA
on:
workflow_call:
inputs:
pr_number:
required: true
type: string
outputs:
PR_HEAD_REPO_FULL_NAME:
description: "The full name of the repository from which the pull request is created"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_FULL_NAME }}
PR_BASE_REPO_FULL_NAME:
description: "The full name of the repository to which the pull request is created"
value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_FULL_NAME }}
PR_HEAD_REPO_OWNER:
description: "The owner of the repository from which the pull request is created"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}
PR_BASE_REPO_OWNER:
description: "The owner of the repository to which the pull request is created"
value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_OWNER }}
PR_HEAD_REPO_NAME:
description: "The name of the repository from which the pull request is created"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}
PR_BASE_REPO_NAME:
description: "The name of the repository to which the pull request is created"
value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_NAME }}
PR_HEAD_REF:
description: "The branch name of the pull request in the head repository"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REF }}
PR_BASE_REF:
description: "The branch name in the base repository (to merge into)"
value: ${{ jobs.get-pr-info.outputs.PR_BASE_REF }}
PR_HEAD_SHA:
description: "The head sha of the pull request branch in the head repository"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_SHA }}
PR_BASE_SHA:
description: "The head sha of the target branch in the base repository"
value: ${{ jobs.get-pr-info.outputs.PR_BASE_SHA }}
PR_MERGE_COMMIT_SHA:
description: "The sha of the merge commit for the pull request (created by GitHub) in the base repository"
value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_SHA }}
PR_MERGE_COMMIT_BASE_SHA:
description: "The sha of the parent commit of the merge commit on the target branch in the base repository"
value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_BASE_SHA }}
PR_HEAD_COMMIT_DATE:
description: "The date of the head sha of the pull request branch in the head repository"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_DATE }}
PR_MERGE_COMMIT_DATE:
description: "The date of the merge commit for the pull request (created by GitHub) in the base repository"
value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }}
PR_HEAD_COMMIT_TIMESTAMP:
description: "The timestamp of the head sha of the pull request branch in the head repository"
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_TIMESTAMP }}
PR_MERGE_COMMIT_TIMESTAMP:
description: "The timestamp of the merge commit for the pull request (created by GitHub) in the base repository"
value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
PR:
description: "The PR"
value: ${{ jobs.get-pr-info.outputs.PR }}
PR_FILES:
description: "The files touched in the PR"
value: ${{ jobs.get-pr-info.outputs.PR_FILES }}
jobs:
get-pr-info:
runs-on: ubuntu-22.04
name: Get PR commit SHA better
outputs:
PR_HEAD_REPO_FULL_NAME: ${{ steps.pr_info.outputs.head_repo_full_name }}
PR_BASE_REPO_FULL_NAME: ${{ steps.pr_info.outputs.base_repo_full_name }}
PR_HEAD_REPO_OWNER: ${{ steps.pr_info.outputs.head_repo_owner }}
PR_BASE_REPO_OWNER: ${{ steps.pr_info.outputs.base_repo_owner }}
PR_HEAD_REPO_NAME: ${{ steps.pr_info.outputs.head_repo_name }}
PR_BASE_REPO_NAME: ${{ steps.pr_info.outputs.base_repo_name }}
PR_HEAD_REF: ${{ steps.pr_info.outputs.head_ref }}
PR_BASE_REF: ${{ steps.pr_info.outputs.base_ref }}
PR_HEAD_SHA: ${{ steps.pr_info.outputs.head_sha }}
PR_BASE_SHA: ${{ steps.pr_info.outputs.base_sha }}
PR_MERGE_COMMIT_BASE_SHA: ${{ steps.pr_info.outputs.merge_commit_base_sha }}
PR_MERGE_COMMIT_SHA: ${{ steps.pr_info.outputs.merge_commit_sha }}
PR_HEAD_COMMIT_DATE: ${{ steps.pr_info.outputs.head_commit_date }}
PR_MERGE_COMMIT_DATE: ${{ steps.pr_info.outputs.merge_commit_date }}
PR_HEAD_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.head_commit_timestamp }}
PR_MERGE_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.merge_commit_timestamp }}
PR: ${{ steps.pr_info.outputs.pr }}
PR_FILES: ${{ steps.pr_info.outputs.files }}
if: ${{ inputs.pr_number != '' }}
steps:
- name: Extract PR details
id: pr_info
uses: actions/github-script@v6
with:
script: |
const { data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: ${{ inputs.pr_number }}
});
const { data: head_commit } = await github.rest.repos.getCommit({
owner: pr.head.repo.owner.login,
repo: pr.head.repo.name,
ref: pr.head.ref
});
const { data: merge_commit } = await github.rest.repos.getCommit({
owner: pr.base.repo.owner.login,
repo: pr.base.repo.name,
ref: pr.merge_commit_sha,
});
const { data: files } = await github.rest.pulls.listFiles({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: ${{ inputs.pr_number }}
});
core.setOutput('head_repo_full_name', pr.head.repo.full_name);
core.setOutput('base_repo_full_name', pr.base.repo.full_name);
core.setOutput('head_repo_owner', pr.head.repo.owner.login);
core.setOutput('base_repo_owner', pr.base.repo.owner.login);
core.setOutput('head_repo_name', pr.head.repo.name);
core.setOutput('base_repo_name', pr.base.repo.name);
core.setOutput('head_ref', pr.head.ref);
core.setOutput('base_ref', pr.base.ref);
core.setOutput('head_sha', pr.head.sha);
core.setOutput('base_sha', pr.base.sha);
core.setOutput('merge_commit_base_sha', merge_commit.parents[0].sha);
core.setOutput('merge_commit_sha', pr.merge_commit_sha);
core.setOutput('pr', pr);
core.setOutput('head_commit_date', head_commit.commit.committer.date);
core.setOutput('merge_commit_date', merge_commit.commit.committer.date);
core.setOutput('files', files);
console.log('PR head commit:', {
head_commit: head_commit,
commit: head_commit.commit,
date: head_commit.commit.committer.date
});
console.log('PR merge commit:', {
merge_commit: merge_commit,
commit: merge_commit.commit,
date: merge_commit.commit.committer.date
});
console.log('PR Info:', {
pr_info: pr
});
- name: Convert dates to timestamps
id: get_timestamps
env:
head_commit_date: ${{ steps.pr_info.outputs.head_commit_date }}
merge_commit_date: ${{ steps.pr_info.outputs.merge_commit_date }}
run: |
echo "$head_commit_date"
echo "$merge_commit_date"
head_commit_timestamp=$(date -d "$head_commit_date" +%s)
merge_commit_timestamp=$(date -d "$merge_commit_date" +%s)
echo "$head_commit_timestamp"
echo "$merge_commit_timestamp"
echo "head_commit_timestamp=$head_commit_timestamp" >> $GITHUB_OUTPUT
echo "merge_commit_timestamp=$merge_commit_timestamp" >> $GITHUB_OUTPUT
================================================
FILE: .github/workflows/get-pr-number.yml
================================================
name: Get PR number
on:
workflow_call:
outputs:
PR_NUMBER:
description: "The extracted PR number"
value: ${{ jobs.get-pr-number.outputs.PR_NUMBER }}
jobs:
get-pr-number:
runs-on: ubuntu-22.04
name: Get PR number
outputs:
PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
steps:
- name: Get PR number
shell: bash
env:
issue_number: ${{ github.event.issue.number }}
is_pull_request_issue: ${{ github.event.issue.pull_request != null }}
pr_number: ${{ github.event.pull_request.number }}
is_pull_request: ${{ github.event.pull_request != null }}
event_number: ${{ github.event.number }}
run: |
if [[ "$issue_number" != "" && "$is_pull_request_issue" == "true" ]]; then
echo "PR_NUMBER=$issue_number" >> $GITHUB_ENV
elif [[ "$pr_number" != "" ]]; then
echo "PR_NUMBER=$pr_number" >> $GITHUB_ENV
elif [[ "$is_pull_request" == "true" ]]; then
echo "PR_NUMBER=$event_number" >> $GITHUB_ENV
else
echo "PR_NUMBER=" >> $GITHUB_ENV
fi
- name: Check PR number
shell: bash
run: |
echo "$PR_NUMBER"
- name: Set PR number
id: set_pr_number
run: echo "PR_NUMBER=$PR_NUMBER" >> "$GITHUB_OUTPUT"
================================================
FILE: .github/workflows/model_jobs.yml
================================================
name: model jobs
on:
workflow_call:
inputs:
folder_slices:
required: true
type: string
machine_type:
required: true
type: string
slice_id:
required: true
type: number
docker:
required: true
type: string
commit_sha:
required: false
type: string
report_name_prefix:
required: false
default: run_models_gpu
type: string
runner_type:
required: false
type: string
report_repo_id:
required: false
type: string
pytest_marker:
required: false
type: string
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
CUDA_VISIBLE_DEVICES: 0,1
jobs:
run_models_gpu:
name: " "
strategy:
max-parallel: 8
fail-fast: false
matrix:
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
runs-on:
group: '${{ inputs.machine_type }}'
container:
image: ${{ inputs.docker }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
machine_type: ${{ steps.set_machine_type.outputs.machine_type }}
steps:
- name: Echo input and matrix info
shell: bash
env:
folder_slices: ${{ inputs.folder_slices }}
matrix_folders: ${{ matrix.folders }}
slice_data: ${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}
run: |
echo "$folder_slices"
echo "$matrix_folders"
echo "$slice_data"
- name: Echo folder ${{ matrix.folders }}
shell: bash
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
# set the artifact folder names (because the character `/` is not allowed).
env:
matrix_folders_raw: ${{ matrix.folders }}
run: |
echo "$matrix_folders_raw"
matrix_folders="${matrix_folders_raw/'models/'/'models_'}"
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Update clone
working-directory: /transformers
env:
commit_sha: ${{ inputs.commit_sha || github.sha }}
run: |
git fetch origin "$commit_sha" && git checkout "$commit_sha"
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: Update / Install some packages (for Past CI)
if: ${{ contains(inputs.docker, '-past-') }}
working-directory: /transformers
run: |
python3 -m pip install -U datasets
- name: Update / Install some packages (for Past CI)
if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
working-directory: /transformers
run: |
python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Set `machine_type` for report and artifact names
id: set_machine_type
working-directory: /transformers
shell: bash
env:
input_machine_type: ${{ inputs.machine_type }}
run: |
echo "$input_machine_type"
if [ "$input_machine_type" = "aws-g5-4xlarge-cache" ]; then
machine_type=single-gpu
elif [ "$input_machine_type" = "aws-g5-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type="$input_machine_type"
fi
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
echo "machine_type=$machine_type" >> $GITHUB_OUTPUT
- name: Create report directory if it doesn't exist
shell: bash
env:
report_name_prefix: ${{ inputs.report_name_prefix }}
run: |
mkdir -p "/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports"
echo "dummy" > "/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports/dummy.txt"
ls -la "/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports"
- name: Run all tests on GPU
working-directory: /transformers
env:
report_name_prefix: ${{ inputs.report_name_prefix }}
pytest_marker: ${{ inputs.pytest_marker }}
model: ${{ matrix.folders }}
run: |
# Map short names to actual test paths for trainer/distributed tests
test_path="tests/${model}"
if [ "$model" = "fsdp" ]; then
test_path="tests/trainer/distributed/test_trainer_distributed_fsdp.py"
elif [ "$model" = "ddp" ]; then
test_path="tests/trainer/distributed/test_trainer_distributed_ddp.py"
elif [ "$model" = "trainer" ] && [ "$report_name_prefix" = "run_trainer_and_fsdp_gpu" ]; then
test_path="tests/trainer --ignore=tests/trainer/distributed"
fi
script -q -c "PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS=yes _PATCHED_TESTING_METHODS_OUTPUT_DIR=/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports python3 -m pytest -rsfE -v -m '${pytest_marker}' --make-reports=${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports ${test_path}" test_outputs.txt
ls -la
# Extract the exit code from the output file
EXIT_CODE=$(tail -1 test_outputs.txt | grep -o 'COMMAND_EXIT_CODE="[0-9]*"' | cut -d'"' -f2)
exit ${EXIT_CODE:-1}
- name: Failure short reports
if: ${{ failure() }}
# This step is only to show information on Github Actions log.
# Always mark this step as successful, even if the report directory or the file `failures_short.txt` in it doesn't exist
continue-on-error: true
env:
report_name_prefix: ${{ inputs.report_name_prefix }}
run: cat "/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports/failures_short.txt"
- name: Captured information
if: ${{ failure() }}
continue-on-error: true
env:
report_name_prefix: ${{ inputs.report_name_prefix }}
run: |
cat "/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports/captured_info.txt"
- name: Copy test_outputs.txt
if: ${{ always() }}
continue-on-error: true
env:
report_name_prefix: ${{ inputs.report_name_prefix }}
run: |
cp /transformers/test_outputs.txt "/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports"
- name: "Test suite reports artifacts: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
collated_reports:
name: Collated Reports
if: ${{ always() && inputs.runner_type != '' }}
needs: run_models_gpu
uses: huggingface/transformers/.github/workflows/collated-reports.yml@main
with:
job: run_models_gpu
report_repo_id: ${{ inputs.report_repo_id }}
gpu_name: ${{ inputs.runner_type }}
machine_type: ${{ needs.run_models_gpu.outputs.machine_type }}
secrets: inherit
================================================
FILE: .github/workflows/model_jobs_intel_gaudi.yml
================================================
name: model jobs
on:
workflow_call:
inputs:
folder_slices:
required: true
type: string
slice_id:
required: true
type: number
runner:
required: true
type: string
machine_type:
required: true
type: string
report_name_prefix:
required: false
default: run_models_gpu
type: string
env:
RUN_SLOW: yes
PT_HPU_LAZY_MODE: 0
TRANSFORMERS_IS_CI: yes
PT_ENABLE_INT64_SUPPORT: 1
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
HF_HOME: /mnt/cache/.cache/huggingface
jobs:
run_models_gpu:
name: " "
strategy:
max-parallel: 8
fail-fast: false
matrix:
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
runs-on:
group: ${{ inputs.runner }}
container:
image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana
-v /mnt/cache/.cache/huggingface:/mnt/cache/.cache/huggingface
--env OMPI_MCA_btl_vader_single_copy_mechanism=none
--env HABANA_VISIBLE_DEVICES
--env HABANA_VISIBLE_MODULES
--cap-add=sys_nice
--shm-size=64G
steps:
- name: Echo input and matrix info
shell: bash
run: |
echo "${{ inputs.folder_slices }}"
echo "${{ matrix.folders }}"
echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
- name: Echo folder ${{ matrix.folders }}
shell: bash
run: |
echo "${{ matrix.folders }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'models/'/'models_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install dependencies
run: |
pip install -e .[testing,torch] "numpy<2.0.0" scipy scikit-learn
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Environment
run: python3 utils/print_env.py
- name: Show installed libraries and their versions
run: pip freeze
- name: Set `machine_type` for report and artifact names
shell: bash
run: |
if [ "${{ inputs.machine_type }}" = "1gaudi" ]; then
machine_type=single-gpu
elif [ "${{ inputs.machine_type }}" = "2gaudi" ]; then
machine_type=multi-gpu
else
machine_type=${{ inputs.machine_type }}
fi
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Run all tests on Gaudi
run: python3 -m pytest -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Run test
shell: bash
run: |
mkdir -p reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
echo "hello" > reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/hello.txt
echo "${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports"
- name: "Test suite reports artifacts: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
path: reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
================================================
FILE: .github/workflows/new_model_pr_merged_notification.yml
================================================
# Used to notify core maintainers about new model PR being merged
name: New model PR merged notification
on:
push:
branches:
- main
paths:
- 'src/transformers/models/*/modeling_*'
jobs:
notify_new_model:
name: Notify new model
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Check new model
shell: bash
run: |
python -m pip install gitpython
python -c 'from utils.pr_slow_ci_models import get_new_model; new_model = get_new_model(diff_with_last_commit=True); print(new_model)' | tee output.txt
echo "NEW_MODEL=$(tail -n 1 output.txt)" >> $GITHUB_ENV
echo "COMMIT_SHA=$(git log -1 --format=%H)" >> $GITHUB_ENV
- name: print commit sha
if: ${{ env.NEW_MODEL != ''}}
shell: bash
run: |
echo "$COMMIT_SHA"
- name: print new model
if: ${{ env.NEW_MODEL != ''}}
shell: bash
run: |
echo "$NEW_MODEL"
- name: Notify
if: ${{ env.NEW_MODEL != ''}}
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
with:
# Slack channel id, channel name, or user id to post message.
# See also: https://api.slack.com/methods/chat.postMessage#channels
channel-id: transformers-new-model-notification
# For posting a rich message using Block Kit
payload: |
{
"blocks": [
{
"type": "header",
"text": {
"type": "plain_text",
"text": "New model!",
"emoji": true
}
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "<https://github.com/huggingface/transformers/commit/${{ env.COMMIT_SHA }}|New model: ${{ env.NEW_MODEL }}> GH_ArthurZucker, GH_lysandrejik, GH_ydshieh\ncommit SHA: ${{ env.COMMIT_SHA }}"
}
}
]
}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
================================================
FILE: .github/workflows/pr-repo-consistency-bot.yml
================================================
name: PR Repo. Consistency Bot
on:
issue_comment:
types:
- created
branches-ignore:
- main
concurrency:
group: ${{ github.workflow }}-${{
Showing preview only (325K chars total). Download the full file or copy to clipboard to get everything.
gitextract_jggdbnd_/ ├── .ai/ │ ├── AGENTS.md │ └── skills/ │ └── add-or-fix-type-checking/ │ └── SKILL.md ├── .circleci/ │ ├── TROUBLESHOOT.md │ ├── config.yml │ ├── create_circleci_config.py │ └── parse_test_outputs.py ├── .git-blame-ignore-revs ├── .gitattributes ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug-report.yml │ │ ├── config.yml │ │ ├── feature-request.yml │ │ ├── i18n.md │ │ ├── migration.yml │ │ └── new-model-addition.yml │ ├── PULL_REQUEST_TEMPLATE.md │ ├── conda/ │ │ ├── build.sh │ │ └── meta.yaml │ ├── copilot-instructions.md │ ├── scripts/ │ │ ├── assign_reviewers.py │ │ └── codeowners_for_review_action │ └── workflows/ │ ├── TROUBLESHOOT.md │ ├── add-model-like.yml │ ├── anti-slop.yml │ ├── assign-reviewers.yml │ ├── benchmark.yml │ ├── benchmark_v2.yml │ ├── benchmark_v2_a10_caller.yml │ ├── benchmark_v2_mi325_caller.yml │ ├── build-ci-docker-images.yml │ ├── build-docker-images.yml │ ├── build-nightly-ci-docker-images.yml │ ├── build-past-ci-docker-images.yml │ ├── build_documentation.yml │ ├── build_pr_documentation.yml │ ├── check-workflow-permissions.yml │ ├── check_failed_tests.yml │ ├── check_tiny_models.yml │ ├── circleci-failure-summary-comment.yml │ ├── codeql.yml │ ├── collated-reports.yml │ ├── doctest_job.yml │ ├── doctests.yml │ ├── extras-smoke-test.yml │ ├── get-pr-info.yml │ ├── get-pr-number.yml │ ├── model_jobs.yml │ ├── model_jobs_intel_gaudi.yml │ ├── new_model_pr_merged_notification.yml │ ├── pr-repo-consistency-bot.yml │ ├── pr_build_doc_with_comment.yml │ ├── pr_slow_ci_suggestion.yml │ ├── push-important-models.yml │ ├── release-conda.yml │ ├── release.yml │ ├── self-comment-ci.yml │ ├── self-nightly-caller.yml │ ├── self-nightly-past-ci-caller.yml │ ├── self-past-caller.yml │ ├── self-scheduled-amd-caller.yml │ ├── self-scheduled-amd-mi250-caller.yml │ ├── self-scheduled-amd-mi325-caller.yml │ ├── self-scheduled-amd-mi355-caller.yml │ ├── self-scheduled-caller.yml │ ├── self-scheduled-flash-attn-caller.yml │ ├── self-scheduled-intel-gaudi.yml │ ├── self-scheduled-intel-gaudi3-caller.yml │ ├── self-scheduled.yml │ ├── slack-report.yml │ ├── ssh-runner.yml │ ├── stale.yml │ ├── trl-ci-bot.yml │ ├── trufflehog.yml │ ├── update_metdata.yml │ └── upload_pr_documentation.yml ├── .gitignore ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ISSUES.md ├── LICENSE ├── MIGRATION_GUIDE_V5.md ├── Makefile ├── README.md ├── SECURITY.md ├── awesome-transformers.md ├── benchmark/ │ ├── .gitignore │ ├── README.md │ ├── __init__.py │ ├── benches/ │ │ └── llama.py │ ├── benchmark.py │ ├── benchmarks_entrypoint.py │ ├── config/ │ │ └── generation.yaml │ ├── default.yml │ ├── grafana_dashboard.json │ ├── grafana_datasource.yaml │ ├── optimum_benchmark_wrapper.py │ ├── requirements.txt │ └── utils/ │ └── init_db.sql ├── benchmark_v2/ │ ├── .gitignore │ ├── README.md │ ├── benchmark_scripts/ │ │ └── continuous_batching_overall.py │ ├── framework/ │ │ ├── benchmark_config.py │ │ ├── benchmark_runner.py │ │ ├── data_classes.py │ │ └── hardware_metrics.py │ ├── requirements.txt │ └── run_benchmarks.py ├── conftest.py ├── docker/ │ ├── README.md │ ├── consistency.dockerfile │ ├── custom-tokenizers.dockerfile │ ├── examples-torch.dockerfile │ ├── exotic-models.dockerfile │ ├── pipeline-torch.dockerfile │ ├── quality.dockerfile │ ├── torch-light.dockerfile │ ├── transformers-all-latest-gpu/ │ │ └── Dockerfile │ ├── transformers-doc-builder/ │ │ └── Dockerfile │ ├── transformers-gpu/ │ │ └── Dockerfile │ ├── transformers-intel-cpu/ │ │ └── Dockerfile │ ├── transformers-pytorch-amd-gpu/ │ │ └── Dockerfile │ ├── transformers-pytorch-deepspeed-amd-gpu/ │ │ └── Dockerfile │ ├── transformers-pytorch-deepspeed-latest-gpu/ │ │ └── Dockerfile │ ├── transformers-pytorch-deepspeed-nightly-gpu/ │ │ └── Dockerfile │ ├── transformers-pytorch-gpu/ │ │ └── Dockerfile │ ├── transformers-pytorch-tpu/ │ │ ├── Dockerfile │ │ ├── bert-base-cased.jsonnet │ │ ├── dataset.yaml │ │ └── docker-entrypoint.sh │ ├── transformers-pytorch-xpu/ │ │ └── Dockerfile │ └── transformers-quantization-latest-gpu/ │ └── Dockerfile ├── docs/ │ ├── README.md │ ├── TRANSLATING.md │ └── source/ │ ├── _config.py │ ├── ar/ │ │ ├── _config.py │ │ ├── _toctree.yml │ │ ├── accelerate.md │ │ ├── attention.md │ │ ├── autoclass_tutorial.md │ │ ├── bertology.md │ │ ├── chat_templating.md │ │ ├── community.md │ │ ├── conversations.md │ │ ├── create_a_model.md │ │ ├── custom_models.md │ │ ├── fast_tokenizers.md │ │ ├── gguf.md │ │ ├── glossary.md │ │ ├── how_to_hack_models.md │ │ ├── index.md │ │ ├── installation.md │ │ ├── llm_tutorial.md │ │ ├── llm_tutorial_optimization.md │ │ ├── model_memory_anatomy.md │ │ ├── model_sharing.md │ │ ├── model_summary.md │ │ ├── modular_transformers.md │ │ ├── multilingual.md │ │ ├── notebooks.md │ │ ├── pad_truncation.md │ │ ├── peft.md │ │ ├── perplexity.md │ │ ├── philosophy.md │ │ ├── pipeline_tutorial.md │ │ ├── pipeline_webserver.md │ │ ├── preprocessing.md │ │ ├── quicktour.md │ │ ├── run_scripts.md │ │ ├── sagemaker.md │ │ ├── serialization.md │ │ ├── task_summary.md │ │ ├── tasks/ │ │ │ ├── language_modeling.md │ │ │ ├── masked_language_modeling.md │ │ │ ├── multiple_choice.md │ │ │ ├── question_answering.md │ │ │ ├── sequence_classification.md │ │ │ ├── summarization.md │ │ │ ├── token_classification.md │ │ │ └── translation.md │ │ ├── tasks_explained.md │ │ ├── tiktoken.md │ │ ├── tokenizer_summary.md │ │ ├── trainer.md │ │ ├── training.md │ │ └── troubleshooting.md │ ├── de/ │ │ ├── _config.py │ │ ├── _toctree.yml │ │ ├── accelerate.md │ │ ├── add_new_model.md │ │ ├── add_new_pipeline.md │ │ ├── autoclass_tutorial.md │ │ ├── contributing.md │ │ ├── index.md │ │ ├── installation.md │ │ ├── llm_tutorial.md │ │ ├── model_sharing.md │ │ ├── peft.md │ │ ├── pipeline_tutorial.md │ │ ├── pr_checks.md │ │ ├── preprocessing.md │ │ ├── quicktour.md │ │ ├── run_scripts.md │ │ ├── testing.md │ │ └── training.md │ ├── en/ │ │ ├── _config.py │ │ ├── _redirects.yml │ │ ├── _toctree.yml │ │ ├── accelerate.md │ │ ├── accelerator_selection.md │ │ ├── add_new_model.md │ │ ├── add_new_pipeline.md │ │ ├── assisted_decoding.md │ │ ├── attention_interface.md │ │ ├── auto_docstring.md │ │ ├── backbones.md │ │ ├── cache_explanation.md │ │ ├── chat_content_patterns.md │ │ ├── chat_extras.md │ │ ├── chat_response_parsing.md │ │ ├── chat_templating.md │ │ ├── chat_templating_multimodal.md │ │ ├── chat_templating_writing.md │ │ ├── community.md │ │ ├── community_integrations/ │ │ │ ├── axolotl.md │ │ │ ├── candle.md │ │ │ ├── executorch.md │ │ │ ├── llama_cpp.md │ │ │ ├── mlx.md │ │ │ ├── nanotron.md │ │ │ ├── nemo_automodel_finetuning.md │ │ │ ├── nemo_automodel_pretraining.md │ │ │ ├── sglang.md │ │ │ ├── tensorrt-llm.md │ │ │ ├── torchtitan.md │ │ │ ├── transformers_as_backend.md │ │ │ ├── trl.md │ │ │ ├── unsloth.md │ │ │ └── vllm.md │ │ ├── continuous_batching.md │ │ ├── continuous_batching_architecture.md │ │ ├── conversations.md │ │ ├── custom_models.md │ │ ├── custom_tokenizers.md │ │ ├── data_collators.md │ │ ├── debugging.md │ │ ├── deepspeed.md │ │ ├── expert_parallelism.md │ │ ├── experts_interface.md │ │ ├── fast_tokenizers.md │ │ ├── feature_extractors.md │ │ ├── fsdp.md │ │ ├── generation_features.md │ │ ├── generation_strategies.md │ │ ├── gguf.md │ │ ├── glossary.md │ │ ├── how_to_hack_models.md │ │ ├── hpo_train.md │ │ ├── image_processors.md │ │ ├── index.md │ │ ├── installation.md │ │ ├── internal/ │ │ │ ├── audio_utils.md │ │ │ ├── file_utils.md │ │ │ ├── generation_utils.md │ │ │ ├── image_processing_utils.md │ │ │ ├── import_utils.md │ │ │ ├── model_debugging_utils.md │ │ │ ├── modeling_utils.md │ │ │ ├── pipelines_utils.md │ │ │ ├── rope_utils.md │ │ │ ├── time_series_utils.md │ │ │ ├── tokenization_utils.md │ │ │ └── trainer_utils.md │ │ ├── kernel_doc/ │ │ │ ├── loading_kernels.md │ │ │ └── overview.md │ │ ├── kv_cache.md │ │ ├── llm_tutorial.md │ │ ├── llm_tutorial_optimization.md │ │ ├── main_classes/ │ │ │ ├── backbones.md │ │ │ ├── callback.md │ │ │ ├── configuration.md │ │ │ ├── continuous_batching.md │ │ │ ├── data_collator.md │ │ │ ├── deepspeed.md │ │ │ ├── executorch.md │ │ │ ├── feature_extractor.md │ │ │ ├── image_processor.md │ │ │ ├── kernels.md │ │ │ ├── logging.md │ │ │ ├── model.md │ │ │ ├── optimizer_schedules.md │ │ │ ├── output.md │ │ │ ├── peft.md │ │ │ ├── pipelines.md │ │ │ ├── processors.md │ │ │ ├── quantization.md │ │ │ ├── text_generation.md │ │ │ ├── tokenizer.md │ │ │ ├── trainer.md │ │ │ └── video_processor.md │ │ ├── model_doc/ │ │ │ ├── afmoe.md │ │ │ ├── aimv2.md │ │ │ ├── albert.md │ │ │ ├── align.md │ │ │ ├── altclip.md │ │ │ ├── apertus.md │ │ │ ├── arcee.md │ │ │ ├── aria.md │ │ │ ├── audio-spectrogram-transformer.md │ │ │ ├── audioflamingo3.md │ │ │ ├── auto.md │ │ │ ├── autoformer.md │ │ │ ├── aya_vision.md │ │ │ ├── bamba.md │ │ │ ├── bark.md │ │ │ ├── bart.md │ │ │ ├── barthez.md │ │ │ ├── bartpho.md │ │ │ ├── beit.md │ │ │ ├── bert-generation.md │ │ │ ├── bert-japanese.md │ │ │ ├── bert.md │ │ │ ├── bertweet.md │ │ │ ├── big_bird.md │ │ │ ├── bigbird_pegasus.md │ │ │ ├── biogpt.md │ │ │ ├── bit.md │ │ │ ├── bitnet.md │ │ │ ├── blenderbot-small.md │ │ │ ├── blenderbot.md │ │ │ ├── blip-2.md │ │ │ ├── blip.md │ │ │ ├── bloom.md │ │ │ ├── blt.md │ │ │ ├── bridgetower.md │ │ │ ├── bros.md │ │ │ ├── byt5.md │ │ │ ├── camembert.md │ │ │ ├── canine.md │ │ │ ├── chameleon.md │ │ │ ├── chinese_clip.md │ │ │ ├── chmv2.md │ │ │ ├── clap.md │ │ │ ├── clip.md │ │ │ ├── clipseg.md │ │ │ ├── clvp.md │ │ │ ├── code_llama.md │ │ │ ├── codegen.md │ │ │ ├── cohere.md │ │ │ ├── cohere2.md │ │ │ ├── cohere2_vision.md │ │ │ ├── cohere_asr.md │ │ │ ├── colmodernvbert.md │ │ │ ├── colpali.md │ │ │ ├── colqwen2.md │ │ │ ├── conditional_detr.md │ │ │ ├── convbert.md │ │ │ ├── convnext.md │ │ │ ├── convnextv2.md │ │ │ ├── cpm.md │ │ │ ├── cpmant.md │ │ │ ├── csm.md │ │ │ ├── ctrl.md │ │ │ ├── cvt.md │ │ │ ├── cwm.md │ │ │ ├── d_fine.md │ │ │ ├── dab-detr.md │ │ │ ├── dac.md │ │ │ ├── data2vec.md │ │ │ ├── dbrx.md │ │ │ ├── deberta-v2.md │ │ │ ├── deberta.md │ │ │ ├── decision_transformer.md │ │ │ ├── deepseek_v2.md │ │ │ ├── deepseek_v3.md │ │ │ ├── deepseek_vl.md │ │ │ ├── deepseek_vl_hybrid.md │ │ │ ├── deformable_detr.md │ │ │ ├── deit.md │ │ │ ├── deplot.md │ │ │ ├── depth_anything.md │ │ │ ├── depth_anything_v2.md │ │ │ ├── depth_pro.md │ │ │ ├── detr.md │ │ │ ├── dia.md │ │ │ ├── dialogpt.md │ │ │ ├── diffllama.md │ │ │ ├── dinat.md │ │ │ ├── dinov2.md │ │ │ ├── dinov2_with_registers.md │ │ │ ├── dinov3.md │ │ │ ├── distilbert.md │ │ │ ├── dit.md │ │ │ ├── doge.md │ │ │ ├── donut.md │ │ │ ├── dots1.md │ │ │ ├── dpr.md │ │ │ ├── dpt.md │ │ │ ├── edgetam.md │ │ │ ├── edgetam_video.md │ │ │ ├── efficientloftr.md │ │ │ ├── efficientnet.md │ │ │ ├── electra.md │ │ │ ├── emu3.md │ │ │ ├── encodec.md │ │ │ ├── encoder-decoder.md │ │ │ ├── eomt.md │ │ │ ├── eomt_dinov3.md │ │ │ ├── ernie.md │ │ │ ├── ernie4_5.md │ │ │ ├── ernie4_5_moe.md │ │ │ ├── ernie4_5_vl_moe.md │ │ │ ├── esm.md │ │ │ ├── eurobert.md │ │ │ ├── evolla.md │ │ │ ├── exaone4.md │ │ │ ├── exaone_moe.md │ │ │ ├── falcon.md │ │ │ ├── falcon3.md │ │ │ ├── falcon_h1.md │ │ │ ├── falcon_mamba.md │ │ │ ├── fast_vlm.md │ │ │ ├── fastspeech2_conformer.md │ │ │ ├── flan-t5.md │ │ │ ├── flan-ul2.md │ │ │ ├── flaubert.md │ │ │ ├── flava.md │ │ │ ├── flex_olmo.md │ │ │ ├── florence2.md │ │ │ ├── fnet.md │ │ │ ├── focalnet.md │ │ │ ├── fsmt.md │ │ │ ├── funnel.md │ │ │ ├── fuyu.md │ │ │ ├── gemma.md │ │ │ ├── gemma2.md │ │ │ ├── gemma3.md │ │ │ ├── gemma3n.md │ │ │ ├── git.md │ │ │ ├── glm.md │ │ │ ├── glm4.md │ │ │ ├── glm46v.md │ │ │ ├── glm4_moe.md │ │ │ ├── glm4_moe_lite.md │ │ │ ├── glm4v.md │ │ │ ├── glm4v_moe.md │ │ │ ├── glm_image.md │ │ │ ├── glm_moe_dsa.md │ │ │ ├── glm_ocr.md │ │ │ ├── glmasr.md │ │ │ ├── glpn.md │ │ │ ├── got_ocr2.md │ │ │ ├── gpt-sw3.md │ │ │ ├── gpt2.md │ │ │ ├── gpt_bigcode.md │ │ │ ├── gpt_neo.md │ │ │ ├── gpt_neox.md │ │ │ ├── gpt_neox_japanese.md │ │ │ ├── gpt_oss.md │ │ │ ├── gptj.md │ │ │ ├── granite.md │ │ │ ├── granite_speech.md │ │ │ ├── granitemoe.md │ │ │ ├── granitemoehybrid.md │ │ │ ├── granitemoeshared.md │ │ │ ├── granitevision.md │ │ │ ├── grounding-dino.md │ │ │ ├── groupvit.md │ │ │ ├── helium.md │ │ │ ├── herbert.md │ │ │ ├── hgnet_v2.md │ │ │ ├── hiera.md │ │ │ ├── higgs_audio_v2.md │ │ │ ├── higgs_audio_v2_tokenizer.md │ │ │ ├── hubert.md │ │ │ ├── hunyuan_v1_dense.md │ │ │ ├── hunyuan_v1_moe.md │ │ │ ├── ibert.md │ │ │ ├── idefics.md │ │ │ ├── idefics2.md │ │ │ ├── idefics3.md │ │ │ ├── ijepa.md │ │ │ ├── imagegpt.md │ │ │ ├── informer.md │ │ │ ├── instructblip.md │ │ │ ├── instructblipvideo.md │ │ │ ├── internvl.md │ │ │ ├── jais2.md │ │ │ ├── jamba.md │ │ │ ├── janus.md │ │ │ ├── jetmoe.md │ │ │ ├── jina_embeddings_v3.md │ │ │ ├── kosmos-2.md │ │ │ ├── kosmos2_5.md │ │ │ ├── kyutai_speech_to_text.md │ │ │ ├── lasr.md │ │ │ ├── layoutlm.md │ │ │ ├── layoutlmv2.md │ │ │ ├── layoutlmv3.md │ │ │ ├── layoutxlm.md │ │ │ ├── led.md │ │ │ ├── levit.md │ │ │ ├── lfm2.md │ │ │ ├── lfm2_moe.md │ │ │ ├── lfm2_vl.md │ │ │ ├── lightglue.md │ │ │ ├── lighton_ocr.md │ │ │ ├── lilt.md │ │ │ ├── llama.md │ │ │ ├── llama2.md │ │ │ ├── llama3.md │ │ │ ├── llama4.md │ │ │ ├── llava.md │ │ │ ├── llava_next.md │ │ │ ├── llava_next_video.md │ │ │ ├── llava_onevision.md │ │ │ ├── longcat_flash.md │ │ │ ├── longformer.md │ │ │ ├── longt5.md │ │ │ ├── luke.md │ │ │ ├── lw_detr.md │ │ │ ├── lxmert.md │ │ │ ├── m2m_100.md │ │ │ ├── madlad-400.md │ │ │ ├── mamba.md │ │ │ ├── mamba2.md │ │ │ ├── marian.md │ │ │ ├── markuplm.md │ │ │ ├── mask2former.md │ │ │ ├── maskformer.md │ │ │ ├── matcha.md │ │ │ ├── mbart.md │ │ │ ├── megatron-bert.md │ │ │ ├── megatron_gpt2.md │ │ │ ├── metaclip_2.md │ │ │ ├── mgp-str.md │ │ │ ├── mimi.md │ │ │ ├── minimax.md │ │ │ ├── minimax_m2.md │ │ │ ├── ministral.md │ │ │ ├── ministral3.md │ │ │ ├── mistral.md │ │ │ ├── mistral3.md │ │ │ ├── mistral4.md │ │ │ ├── mixtral.md │ │ │ ├── mlcd.md │ │ │ ├── mllama.md │ │ │ ├── mluke.md │ │ │ ├── mm-grounding-dino.md │ │ │ ├── mms.md │ │ │ ├── mobilebert.md │ │ │ ├── mobilenet_v1.md │ │ │ ├── mobilenet_v2.md │ │ │ ├── mobilevit.md │ │ │ ├── mobilevitv2.md │ │ │ ├── modernbert-decoder.md │ │ │ ├── modernbert.md │ │ │ ├── modernvbert.md │ │ │ ├── moonshine.md │ │ │ ├── moonshine_streaming.md │ │ │ ├── moshi.md │ │ │ ├── mpnet.md │ │ │ ├── mpt.md │ │ │ ├── mra.md │ │ │ ├── mt5.md │ │ │ ├── musicflamingo.md │ │ │ ├── musicgen.md │ │ │ ├── musicgen_melody.md │ │ │ ├── mvp.md │ │ │ ├── myt5.md │ │ │ ├── nanochat.md │ │ │ ├── nemotron.md │ │ │ ├── nemotron_h.md │ │ │ ├── nllb-moe.md │ │ │ ├── nllb.md │ │ │ ├── nougat.md │ │ │ ├── nystromformer.md │ │ │ ├── olmo.md │ │ │ ├── olmo2.md │ │ │ ├── olmo3.md │ │ │ ├── olmo_hybrid.md │ │ │ ├── olmoe.md │ │ │ ├── omdet-turbo.md │ │ │ ├── oneformer.md │ │ │ ├── openai-gpt.md │ │ │ ├── opt.md │ │ │ ├── ovis2.md │ │ │ ├── owlv2.md │ │ │ ├── owlvit.md │ │ │ ├── paddleocr_vl.md │ │ │ ├── paligemma.md │ │ │ ├── parakeet.md │ │ │ ├── patchtsmixer.md │ │ │ ├── patchtst.md │ │ │ ├── pe_audio.md │ │ │ ├── pe_audio_video.md │ │ │ ├── pe_video.md │ │ │ ├── pegasus.md │ │ │ ├── pegasus_x.md │ │ │ ├── perceiver.md │ │ │ ├── perception_lm.md │ │ │ ├── persimmon.md │ │ │ ├── phi.md │ │ │ ├── phi3.md │ │ │ ├── phi4_multimodal.md │ │ │ ├── phimoe.md │ │ │ ├── phobert.md │ │ │ ├── pi0.md │ │ │ ├── pix2struct.md │ │ │ ├── pixio.md │ │ │ ├── pixtral.md │ │ │ ├── plbart.md │ │ │ ├── poolformer.md │ │ │ ├── pop2piano.md │ │ │ ├── pp_chart2table.md │ │ │ ├── pp_doclayout_v2.md │ │ │ ├── pp_doclayout_v3.md │ │ │ ├── pp_lcnet.md │ │ │ ├── pp_lcnet_v3.md │ │ │ ├── pp_ocrv5_mobile_det.md │ │ │ ├── pp_ocrv5_mobile_rec.md │ │ │ ├── pp_ocrv5_server_det.md │ │ │ ├── pp_ocrv5_server_rec.md │ │ │ ├── prompt_depth_anything.md │ │ │ ├── prophetnet.md │ │ │ ├── pvt.md │ │ │ ├── pvt_v2.md │ │ │ ├── qwen2.md │ │ │ ├── qwen2_5_omni.md │ │ │ ├── qwen2_5_vl.md │ │ │ ├── qwen2_audio.md │ │ │ ├── qwen2_moe.md │ │ │ ├── qwen2_vl.md │ │ │ ├── qwen3.md │ │ │ ├── qwen3_5.md │ │ │ ├── qwen3_5_moe.md │ │ │ ├── qwen3_moe.md │ │ │ ├── qwen3_next.md │ │ │ ├── qwen3_omni_moe.md │ │ │ ├── qwen3_vl.md │ │ │ ├── qwen3_vl_moe.md │ │ │ ├── rag.md │ │ │ ├── recurrent_gemma.md │ │ │ ├── reformer.md │ │ │ ├── regnet.md │ │ │ ├── rembert.md │ │ │ ├── resnet.md │ │ │ ├── roberta-prelayernorm.md │ │ │ ├── roberta.md │ │ │ ├── roc_bert.md │ │ │ ├── roformer.md │ │ │ ├── rt_detr.md │ │ │ ├── rt_detr_v2.md │ │ │ ├── rwkv.md │ │ │ ├── sam.md │ │ │ ├── sam2.md │ │ │ ├── sam2_video.md │ │ │ ├── sam3.md │ │ │ ├── sam3_tracker.md │ │ │ ├── sam3_tracker_video.md │ │ │ ├── sam3_video.md │ │ │ ├── sam_hq.md │ │ │ ├── seamless_m4t.md │ │ │ ├── seamless_m4t_v2.md │ │ │ ├── seed_oss.md │ │ │ ├── segformer.md │ │ │ ├── seggpt.md │ │ │ ├── sew-d.md │ │ │ ├── sew.md │ │ │ ├── shieldgemma2.md │ │ │ ├── siglip.md │ │ │ ├── siglip2.md │ │ │ ├── slanext.md │ │ │ ├── smollm3.md │ │ │ ├── smolvlm.md │ │ │ ├── solar_open.md │ │ │ ├── speech-encoder-decoder.md │ │ │ ├── speech_to_text.md │ │ │ ├── speecht5.md │ │ │ ├── splinter.md │ │ │ ├── squeezebert.md │ │ │ ├── stablelm.md │ │ │ ├── starcoder2.md │ │ │ ├── superglue.md │ │ │ ├── superpoint.md │ │ │ ├── swiftformer.md │ │ │ ├── swin.md │ │ │ ├── swin2sr.md │ │ │ ├── swinv2.md │ │ │ ├── switch_transformers.md │ │ │ ├── t5.md │ │ │ ├── t5gemma.md │ │ │ ├── t5gemma2.md │ │ │ ├── t5v1.1.md │ │ │ ├── table-transformer.md │ │ │ ├── tapas.md │ │ │ ├── textnet.md │ │ │ ├── time_series_transformer.md │ │ │ ├── timesfm.md │ │ │ ├── timesfm2_5.md │ │ │ ├── timesformer.md │ │ │ ├── timm_wrapper.md │ │ │ ├── trocr.md │ │ │ ├── tvp.md │ │ │ ├── udop.md │ │ │ ├── ul2.md │ │ │ ├── umt5.md │ │ │ ├── unispeech-sat.md │ │ │ ├── unispeech.md │ │ │ ├── univnet.md │ │ │ ├── upernet.md │ │ │ ├── uvdoc.md │ │ │ ├── vaultgemma.md │ │ │ ├── vibevoice_acoustic_tokenizer.md │ │ │ ├── vibevoice_asr.md │ │ │ ├── video_llama_3.md │ │ │ ├── video_llava.md │ │ │ ├── videomae.md │ │ │ ├── videomt.md │ │ │ ├── vilt.md │ │ │ ├── vipllava.md │ │ │ ├── vision-encoder-decoder.md │ │ │ ├── vision-text-dual-encoder.md │ │ │ ├── visual_bert.md │ │ │ ├── vit.md │ │ │ ├── vit_mae.md │ │ │ ├── vit_msn.md │ │ │ ├── vitdet.md │ │ │ ├── vitmatte.md │ │ │ ├── vitpose.md │ │ │ ├── vits.md │ │ │ ├── vivit.md │ │ │ ├── vjepa2.md │ │ │ ├── voxtral.md │ │ │ ├── voxtral_realtime.md │ │ │ ├── wav2vec2-bert.md │ │ │ ├── wav2vec2-conformer.md │ │ │ ├── wav2vec2.md │ │ │ ├── wav2vec2_phoneme.md │ │ │ ├── wavlm.md │ │ │ ├── whisper.md │ │ │ ├── xclip.md │ │ │ ├── xcodec.md │ │ │ ├── xglm.md │ │ │ ├── xlm-roberta-xl.md │ │ │ ├── xlm-roberta.md │ │ │ ├── xlm-v.md │ │ │ ├── xlm.md │ │ │ ├── xlnet.md │ │ │ ├── xls_r.md │ │ │ ├── xlsr_wav2vec2.md │ │ │ ├── xlstm.md │ │ │ ├── xmod.md │ │ │ ├── yolos.md │ │ │ ├── yoso.md │ │ │ ├── youtu.md │ │ │ ├── zamba.md │ │ │ ├── zamba2.md │ │ │ └── zoedepth.md │ │ ├── model_memory_anatomy.md │ │ ├── model_output_tracing.md │ │ ├── model_sharing.md │ │ ├── models.md │ │ ├── models_timeline.md │ │ ├── modular_transformers.md │ │ ├── monkey_patching.md │ │ ├── optimization_overview.md │ │ ├── optimizers.md │ │ ├── paged_attention.md │ │ ├── peft.md │ │ ├── perf_hardware.md │ │ ├── perf_infer_gpu_multi.md │ │ ├── perf_torch_compile.md │ │ ├── perf_train_cpu.md │ │ ├── perf_train_cpu_many.md │ │ ├── perf_train_gaudi.md │ │ ├── perf_train_gpu_many.md │ │ ├── perf_train_gpu_one.md │ │ ├── perf_train_special.md │ │ ├── perplexity.md │ │ ├── philosophy.md │ │ ├── pipeline_gradio.md │ │ ├── pipeline_tutorial.md │ │ ├── pipeline_webserver.md │ │ ├── pr_checks.md │ │ ├── processors.md │ │ ├── quantization/ │ │ │ ├── aqlm.md │ │ │ ├── auto_round.md │ │ │ ├── awq.md │ │ │ ├── bitnet.md │ │ │ ├── bitsandbytes.md │ │ │ ├── compressed_tensors.md │ │ │ ├── concept_guide.md │ │ │ ├── contribute.md │ │ │ ├── eetq.md │ │ │ ├── fbgemm_fp8.md │ │ │ ├── finegrained_fp8.md │ │ │ ├── fouroversix.md │ │ │ ├── fp_quant.md │ │ │ ├── gptq.md │ │ │ ├── higgs.md │ │ │ ├── hqq.md │ │ │ ├── metal.md │ │ │ ├── mxfp4.md │ │ │ ├── optimum.md │ │ │ ├── overview.md │ │ │ ├── quanto.md │ │ │ ├── quark.md │ │ │ ├── selecting.md │ │ │ ├── sinq.md │ │ │ ├── spqr.md │ │ │ ├── torchao.md │ │ │ └── vptq.md │ │ ├── quicktour.md │ │ ├── reference/ │ │ │ └── environment_variables.md │ │ ├── run_scripts.md │ │ ├── serialization.md │ │ ├── serve-cli/ │ │ │ ├── cursor.md │ │ │ ├── jan.md │ │ │ ├── openweb_ui.md │ │ │ ├── serving.md │ │ │ ├── serving_optims.md │ │ │ └── tiny_agents.md │ │ ├── tasks/ │ │ │ ├── any_to_any.md │ │ │ ├── asr.md │ │ │ ├── audio_classification.md │ │ │ ├── audio_text_to_text.md │ │ │ ├── document_question_answering.md │ │ │ ├── idefics.md │ │ │ ├── image_captioning.md │ │ │ ├── image_classification.md │ │ │ ├── image_feature_extraction.md │ │ │ ├── image_text_to_text.md │ │ │ ├── keypoint_detection.md │ │ │ ├── keypoint_matching.md │ │ │ ├── knowledge_distillation_for_image_classification.md │ │ │ ├── language_modeling.md │ │ │ ├── mask_generation.md │ │ │ ├── masked_language_modeling.md │ │ │ ├── monocular_depth_estimation.md │ │ │ ├── multiple_choice.md │ │ │ ├── object_detection.md │ │ │ ├── prompting.md │ │ │ ├── question_answering.md │ │ │ ├── semantic_segmentation.md │ │ │ ├── sequence_classification.md │ │ │ ├── summarization.md │ │ │ ├── text-to-speech.md │ │ │ ├── token_classification.md │ │ │ ├── training_vision_backbone.md │ │ │ ├── translation.md │ │ │ ├── video_classification.md │ │ │ ├── video_text_to_text.md │ │ │ ├── visual_document_retrieval.md │ │ │ ├── visual_question_answering.md │ │ │ ├── zero_shot_image_classification.md │ │ │ └── zero_shot_object_detection.md │ │ ├── testing.md │ │ ├── tokenizer_summary.md │ │ ├── trainer.md │ │ ├── trainer_callbacks.md │ │ ├── trainer_customize.md │ │ ├── training.md │ │ ├── troubleshooting.md │ │ ├── video_processors.md │ │ └── weightconverter.md │ ├── es/ │ │ ├── _config.py │ │ ├── _toctree.yml │ │ ├── accelerate.md │ │ ├── add_new_pipeline.md │ │ ├── attention.md │ │ ├── autoclass_tutorial.md │ │ ├── bertology.md │ │ ├── chat_templating.md │ │ ├── community.md │ │ ├── conversations.md │ │ ├── create_a_model.md │ │ ├── custom_models.md │ │ ├── debugging.md │ │ ├── fast_tokenizers.md │ │ ├── glossary.md │ │ ├── index.md │ │ ├── installation.md │ │ ├── model_memory_anatomy.md │ │ ├── model_sharing.md │ │ ├── multilingual.md │ │ ├── pad_truncation.md │ │ ├── performance.md │ │ ├── perplexity.md │ │ ├── philosophy.md │ │ ├── pipeline_tutorial.md │ │ ├── pipeline_webserver.md │ │ ├── pr_checks.md │ │ ├── preprocessing.md │ │ ├── quicktour.md │ │ ├── run_scripts.md │ │ ├── sagemaker.md │ │ ├── task_summary.md │ │ ├── tasks/ │ │ │ ├── asr.md │ │ │ ├── audio_classification.md │ │ │ ├── image_captioning.md │ │ │ ├── image_classification.md │ │ │ ├── language_modeling.md │ │ │ ├── multiple_choice.md │ │ │ ├── question_answering.md │ │ │ └── summarization.md │ │ ├── tasks_explained.md │ │ ├── tokenizer_summary.md │ │ ├── trainer.md │ │ └── training.md │ ├── fr/ │ │ ├── _config.py │ │ ├── _toctree.yml │ │ ├── autoclass_tutorial.md │ │ ├── in_translation.md │ │ ├── index.md │ │ ├── installation.md │ │ ├── quicktour.md │ │ ├── run_scripts_fr.md │ │ ├── task_summary.md │ │ ├── tasks_explained.md │ │ └── tutoriel_pipeline.md │ ├── hi/ │ │ ├── _toctree.yml │ │ ├── accelerate.md │ │ └── pipeline_tutorial.md │ ├── it/ │ │ ├── _config.py │ │ ├── _toctree.yml │ │ ├── accelerate.md │ │ ├── add_new_model.md │ │ ├── add_new_pipeline.md │ │ ├── autoclass_tutorial.md │ │ ├── big_models.md │ │ ├── community.md │ │ ├── create_a_model.md │ │ ├── custom_models.md │ │ ├── debugging.md │ │ ├── index.md │ │ ├── installation.md │ │ ├── migration.md │ │ ├── model_sharing.md │ │ ├── multilingual.md │ │ ├── perf_hardware.md │ │ ├── perf_infer_cpu.md │ │ ├── perf_infer_gpu_many.md │ │ ├── perf_infer_gpu_one.md │ │ ├── perf_infer_special.md │ │ ├── perf_train_cpu.md │ │ ├── perf_train_cpu_many.md │ │ ├── perf_train_special.md │ │ ├── perf_train_tpu.md │ │ ├── pipeline_tutorial.md │ │ ├── pr_checks.md │ │ ├── preprocessing.md │ │ ├── quicktour.md │ │ ├── run_scripts.md │ │ └── training.md │ ├── ja/ │ │ ├── _toctree.yml │ │ ├── accelerate.md │ │ ├── add_new_model.md │ │ ├── attention.md │ │ ├── autoclass_tutorial.md │ │ ├── bertology.md │ │ ├── big_models.md │ │ ├── chat_templating.md │ │ ├── community.md │ │ ├── create_a_model.md │ │ ├── custom_models.md │ │ ├── fast_tokenizers.md │ │ ├── generation_strategies.md │ │ ├── glossary.md │ │ ├── hpo_train.md │ │ ├── index.md │ │ ├── installation.md │ │ ├── internal/ │ │ │ ├── audio_utils.md │ │ │ ├── file_utils.md │ │ │ ├── generation_utils.md │ │ │ ├── image_processing_utils.md │ │ │ ├── modeling_utils.md │ │ │ ├── pipelines_utils.md │ │ │ ├── time_series_utils.md │ │ │ ├── tokenization_utils.md │ │ │ └── trainer_utils.md │ │ ├── llm_tutorial.md │ │ ├── main_classes/ │ │ │ ├── callback.md │ │ │ ├── configuration.md │ │ │ ├── data_collator.md │ │ │ ├── deepspeed.md │ │ │ ├── feature_extractor.md │ │ │ ├── image_processor.md │ │ │ ├── logging.md │ │ │ ├── model.md │ │ │ ├── optimizer_schedules.md │ │ │ ├── output.md │ │ │ ├── pipelines.md │ │ │ ├── processors.md │ │ │ ├── quantization.md │ │ │ ├── text_generation.md │ │ │ ├── tokenizer.md │ │ │ └── trainer.md │ │ ├── model_doc/ │ │ │ ├── albert.md │ │ │ ├── align.md │ │ │ ├── altclip.md │ │ │ ├── audio-spectrogram-transformer.md │ │ │ ├── auto.md │ │ │ ├── autoformer.md │ │ │ ├── bark.md │ │ │ ├── bart.md │ │ │ ├── barthez.md │ │ │ ├── bartpho.md │ │ │ ├── beit.md │ │ │ ├── bert-generation.md │ │ │ ├── bert-japanese.md │ │ │ ├── bert.md │ │ │ ├── bertweet.md │ │ │ ├── big_bird.md │ │ │ ├── bigbird_pegasus.md │ │ │ ├── biogpt.md │ │ │ ├── bit.md │ │ │ ├── blenderbot-small.md │ │ │ ├── blenderbot.md │ │ │ ├── blip-2.md │ │ │ ├── blip.md │ │ │ ├── bloom.md │ │ │ ├── bridgetower.md │ │ │ ├── bros.md │ │ │ ├── byt5.md │ │ │ ├── camembert.md │ │ │ ├── canine.md │ │ │ ├── chinese_clip.md │ │ │ ├── clap.md │ │ │ ├── clip.md │ │ │ ├── clipseg.md │ │ │ ├── clvp.md │ │ │ ├── code_llama.md │ │ │ ├── codegen.md │ │ │ ├── conditional_detr.md │ │ │ ├── convbert.md │ │ │ ├── convnext.md │ │ │ ├── convnextv2.md │ │ │ ├── cpm.md │ │ │ ├── cpmant.md │ │ │ ├── ctrl.md │ │ │ ├── cvt.md │ │ │ ├── data2vec.md │ │ │ ├── deberta-v2.md │ │ │ ├── deberta.md │ │ │ ├── decision_transformer.md │ │ │ ├── deformable_detr.md │ │ │ ├── deit.md │ │ │ ├── deplot.md │ │ │ ├── detr.md │ │ │ ├── dialogpt.md │ │ │ └── dinat.md │ │ ├── model_memory_anatomy.md │ │ ├── model_sharing.md │ │ ├── model_summary.md │ │ ├── multilingual.md │ │ ├── pad_truncation.md │ │ ├── peft.md │ │ ├── perf_hardware.md │ │ ├── perf_infer_cpu.md │ │ ├── perf_infer_gpu_many.md │ │ ├── perf_infer_gpu_one.md │ │ ├── perf_infer_special.md │ │ ├── perf_torch_compile.md │ │ ├── perf_train_cpu.md │ │ ├── perf_train_cpu_many.md │ │ ├── perf_train_gpu_many.md │ │ ├── perf_train_gpu_one.md │ │ ├── perf_train_special.md │ │ ├── perf_train_tpu.md │ │ ├── performance.md │ │ ├── perplexity.md │ │ ├── philosophy.md │ │ ├── pipeline_tutorial.md │ │ ├── pipeline_webserver.md │ │ ├── pr_checks.md │ │ ├── preprocessing.md │ │ ├── quicktour.md │ │ ├── run_scripts.md │ │ ├── serialization.md │ │ ├── task_summary.md │ │ ├── tasks/ │ │ │ ├── asr.md │ │ │ ├── audio_classification.md │ │ │ ├── document_question_answering.md │ │ │ ├── idefics.md │ │ │ ├── image_captioning.md │ │ │ ├── image_classification.md │ │ │ ├── knowledge_distillation_for_image_classification.md │ │ │ ├── language_modeling.md │ │ │ ├── masked_language_modeling.md │ │ │ ├── monocular_depth_estimation.md │ │ │ ├── multiple_choice.md │ │ │ ├── object_detection.md │ │ │ ├── prompting.md │ │ │ ├── question_answering.md │ │ │ ├── semantic_segmentation.md │ │ │ ├── summarization.md │ │ │ ├── text-to-speech.md │ │ │ ├── token_classification.md │ │ │ ├── translation.md │ │ │ ├── video_classification.md │ │ │ ├── visual_question_answering.md │ │ │ ├── zero_shot_image_classification.md │ │ │ └── zero_shot_object_detection.md │ │ ├── tasks_explained.md │ │ ├── testing.md │ │ ├── tokenizer_summary.md │ │ ├── training.md │ │ └── troubleshooting.md │ ├── ko/ │ │ ├── _config.py │ │ ├── _toctree.yml │ │ ├── accelerate.md │ │ ├── accelerator_selection.md │ │ ├── add_new_model.md │ │ ├── add_new_pipeline.md │ │ ├── cache_explanation.md │ │ ├── chat_extras.md │ │ ├── chat_templating.md │ │ ├── community.md │ │ ├── contributing.md │ │ ├── conversations.md │ │ ├── custom_models.md │ │ ├── debugging.md │ │ ├── deepspeed.md │ │ ├── executorch.md │ │ ├── fast_tokenizers.md │ │ ├── fsdp.md │ │ ├── generation_strategies.md │ │ ├── gguf.md │ │ ├── glossary.md │ │ ├── how_to_hack_models.md │ │ ├── hpo_train.md │ │ ├── image_processors.md │ │ ├── in_translation.md │ │ ├── index.md │ │ ├── installation.md │ │ ├── internal/ │ │ │ ├── audio_utils.md │ │ │ ├── file_utils.md │ │ │ ├── generation_utils.md │ │ │ ├── image_processing_utils.md │ │ │ ├── modeling_utils.md │ │ │ ├── pipelines_utils.md │ │ │ ├── time_series_utils.md │ │ │ ├── tokenization_utils.md │ │ │ └── trainer_utils.md │ │ ├── llm_optims.md │ │ ├── llm_tutorial.md │ │ ├── llm_tutorial_optimization.md │ │ ├── main_classes/ │ │ │ ├── callback.md │ │ │ ├── configuration.md │ │ │ ├── data_collator.md │ │ │ ├── feature_extractor.md │ │ │ ├── logging.md │ │ │ ├── model.md │ │ │ ├── optimizer_schedules.md │ │ │ ├── output.md │ │ │ ├── peft.md │ │ │ ├── pipelines.md │ │ │ ├── processors.md │ │ │ ├── quantization.md │ │ │ ├── text_generation.md │ │ │ ├── tokenizer.md │ │ │ └── trainer.md │ │ ├── model_doc/ │ │ │ ├── albert.md │ │ │ ├── altclip.md │ │ │ ├── auto.md │ │ │ ├── autoformer.md │ │ │ ├── bart.md │ │ │ ├── barthez.md │ │ │ ├── bartpho.md │ │ │ ├── bert-japanese.md │ │ │ ├── bert.md │ │ │ ├── bertweet.md │ │ │ ├── big_bird.md │ │ │ ├── biogpt.md │ │ │ ├── blip-2.md │ │ │ ├── blip.md │ │ │ ├── chameleon.md │ │ │ ├── clip.md │ │ │ ├── clipseg.md │ │ │ ├── code_llama.md │ │ │ ├── codegen.md │ │ │ ├── cohere.md │ │ │ ├── convbert.md │ │ │ ├── dbrx.md │ │ │ ├── deberta-v2.md │ │ │ ├── deberta.md │ │ │ ├── deepseek_v3.md │ │ │ ├── electra.md │ │ │ ├── encoder-decoder.md │ │ │ ├── esm.md │ │ │ ├── exaone4.md │ │ │ ├── exaone_moe.md │ │ │ ├── gemma.md │ │ │ ├── gemma2.md │ │ │ ├── gemma3.md │ │ │ ├── gemma3n.md │ │ │ ├── gpt2.md │ │ │ ├── gpt_neox_japanese.md │ │ │ ├── grounding-dino.md │ │ │ ├── informer.md │ │ │ ├── jamba.md │ │ │ ├── lfm2.md │ │ │ ├── llama.md │ │ │ ├── llama2.md │ │ │ ├── llama3.md │ │ │ ├── llama4.md │ │ │ ├── mamba.md │ │ │ ├── mamba2.md │ │ │ ├── marian.md │ │ │ ├── mistral.md │ │ │ ├── openai-gpt.md │ │ │ ├── paligemma.md │ │ │ ├── patchtsmixer.md │ │ │ ├── patchtst.md │ │ │ ├── qwen2_vl.md │ │ │ ├── rag.md │ │ │ ├── roberta.md │ │ │ ├── sam_hq.md │ │ │ ├── siglip.md │ │ │ ├── smolvlm.md │ │ │ ├── swin.md │ │ │ ├── swin2sr.md │ │ │ ├── swinv2.md │ │ │ ├── time_series_transformer.md │ │ │ ├── timesformer.md │ │ │ ├── tvp.md │ │ │ ├── vit.md │ │ │ ├── vivit.md │ │ │ ├── whisper.md │ │ │ └── xclip.md │ │ ├── model_memory_anatomy.md │ │ ├── model_sharing.md │ │ ├── models.md │ │ ├── modular_transformers.md │ │ ├── optimizers.md │ │ ├── pad_truncation.md │ │ ├── peft.md │ │ ├── perf_hardware.md │ │ ├── perf_infer_cpu.md │ │ ├── perf_infer_gpu_multi.md │ │ ├── perf_infer_gpu_one.md │ │ ├── perf_train_cpu.md │ │ ├── perf_train_cpu_many.md │ │ ├── perf_train_gpu_many.md │ │ ├── perf_train_gpu_one.md │ │ ├── perf_train_special.md │ │ ├── perplexity.md │ │ ├── philosophy.md │ │ ├── pipeline_gradio.md │ │ ├── pipeline_tutorial.md │ │ ├── pipeline_webserver.md │ │ ├── pr_checks.md │ │ ├── quantization/ │ │ │ ├── awq.md │ │ │ ├── bitsandbytes.md │ │ │ ├── eetq.md │ │ │ ├── gptq.md │ │ │ ├── quanto.md │ │ │ └── quark.md │ │ ├── quicktour.md │ │ ├── run_scripts.md │ │ ├── serialization.md │ │ ├── serving.md │ │ ├── tasks/ │ │ │ ├── asr.md │ │ │ ├── audio_classification.md │ │ │ ├── document_question_answering.md │ │ │ ├── idefics.md │ │ │ ├── image_captioning.md │ │ │ ├── image_classification.md │ │ │ ├── image_feature_extraction.md │ │ │ ├── keypoint_detection.md │ │ │ ├── knowledge_distillation_for_image_classification.md │ │ │ ├── language_modeling.md │ │ │ ├── mask_generation.md │ │ │ ├── masked_language_modeling.md │ │ │ ├── monocular_depth_estimation.md │ │ │ ├── multiple_choice.md │ │ │ ├── object_detection.md │ │ │ ├── prompting.md │ │ │ ├── question_answering.md │ │ │ ├── semantic_segmentation.md │ │ │ ├── sequence_classification.md │ │ │ ├── summarization.md │ │ │ ├── token_classification.md │ │ │ ├── translation.md │ │ │ ├── video_classification.md │ │ │ ├── visual_question_answering.md │ │ │ ├── zero_shot_image_classification.md │ │ │ └── zero_shot_object_detection.md │ │ ├── testing.md │ │ ├── tiny_agents.md │ │ ├── tokenizer_summary.md │ │ ├── trainer.md │ │ ├── training.md │ │ └── troubleshooting.md │ ├── pt/ │ │ ├── _config.py │ │ ├── _toctree.yml │ │ ├── accelerate.md │ │ ├── create_a_model.md │ │ ├── custom_models.md │ │ ├── fast_tokenizers.md │ │ ├── index.md │ │ ├── installation.md │ │ ├── multilingual.md │ │ ├── pipeline_tutorial.md │ │ ├── quicktour.md │ │ ├── run_scripts.md │ │ ├── tasks/ │ │ │ ├── sequence_classification.md │ │ │ └── token_classification.md │ │ └── training.md │ └── zh/ │ ├── _toctree.yml │ ├── accelerate.md │ ├── add_new_pipeline.md │ ├── attention.md │ ├── autoclass_tutorial.md │ ├── bertology.md │ ├── big_models.md │ ├── chat_templating.md │ ├── community.md │ ├── contributing.md │ ├── create_a_model.md │ ├── custom_models.md │ ├── debugging.md │ ├── fast_tokenizers.md │ ├── fsdp.md │ ├── generation_strategies.md │ ├── gguf.md │ ├── hpo_train.md │ ├── index.md │ ├── installation.md │ ├── internal/ │ │ ├── audio_utils.md │ │ ├── file_utils.md │ │ ├── generation_utils.md │ │ ├── image_processing_utils.md │ │ ├── modeling_utils.md │ │ ├── pipelines_utils.md │ │ ├── time_series_utils.md │ │ ├── tokenization_utils.md │ │ └── trainer_utils.md │ ├── llm_tutorial.md │ ├── main_classes/ │ │ ├── callback.md │ │ ├── configuration.md │ │ ├── data_collator.md │ │ ├── deepspeed.md │ │ ├── feature_extractor.md │ │ ├── image_processor.md │ │ ├── logging.md │ │ ├── model.md │ │ ├── optimizer_schedules.md │ │ ├── output.md │ │ ├── pipelines.md │ │ ├── processors.md │ │ ├── quantization.md │ │ ├── text_generation.md │ │ ├── tokenizer.md │ │ └── trainer.md │ ├── model_doc/ │ │ └── bert.md │ ├── model_sharing.md │ ├── multilingual.md │ ├── peft.md │ ├── perf_hardware.md │ ├── perf_infer_gpu_multi.md │ ├── perf_torch_compile.md │ ├── perf_train_cpu.md │ ├── perf_train_special.md │ ├── performance.md │ ├── philosophy.md │ ├── pipeline_tutorial.md │ ├── preprocessing.md │ ├── quicktour.md │ ├── run_scripts.md │ ├── serialization.md │ ├── task_summary.md │ ├── tasks/ │ │ ├── asr.md │ │ ├── question_answering.md │ │ ├── sequence_classification.md │ │ ├── summarization.md │ │ ├── token_classification.md │ │ └── translation.md │ ├── tiktoken.md │ ├── tokenizer_summary.md │ └── training.md ├── doctest_list.txt ├── examples/ │ ├── 3D_parallel.py │ ├── README.md │ ├── metrics-monitoring/ │ │ ├── README.md │ │ ├── continuous-batching-dashboard.json │ │ ├── docker-compose.yml │ │ ├── grafana-dashboard.yaml │ │ ├── grafana-datasources.yaml │ │ ├── metrics_example.py │ │ ├── prometheus.yml │ │ └── tempo.yaml │ ├── modular-transformers/ │ │ ├── README.md │ │ ├── configuration_dummy.py │ │ ├── configuration_duplicated_method.py │ │ ├── configuration_my_new_model.py │ │ ├── configuration_my_new_model2.py │ │ ├── configuration_new_model.py │ │ ├── configuration_super.py │ │ ├── convert_examples.sh │ │ ├── image_processing_new_imgproc_model.py │ │ ├── modeling_add_function.py │ │ ├── modeling_dummy_bert.py │ │ ├── modeling_from_uppercase_model.py │ │ ├── modeling_global_indexing.py │ │ ├── modeling_multimodal2.py │ │ ├── modeling_my_new_model2.py │ │ ├── modeling_new_task_model.py │ │ ├── modeling_roberta.py │ │ ├── modeling_super.py │ │ ├── modeling_switch_function.py │ │ ├── modeling_test_detr.py │ │ ├── modeling_test_suffix.py │ │ ├── modular_add_function.py │ │ ├── modular_dummy_bert.py │ │ ├── modular_duplicated_method.py │ │ ├── modular_from_uppercase_model.py │ │ ├── modular_global_indexing.py │ │ ├── modular_multimodal2.py │ │ ├── modular_my_new_model.py │ │ ├── modular_my_new_model2.py │ │ ├── modular_new_imgproc_model.py │ │ ├── modular_new_model.py │ │ ├── modular_new_task_model.py │ │ ├── modular_roberta.py │ │ ├── modular_super.py │ │ ├── modular_switch_function.py │ │ ├── modular_test_detr.py │ │ └── modular_test_suffix.py │ ├── pytorch/ │ │ ├── 3d_parallel_checks.py │ │ ├── README.md │ │ ├── _tests_requirements.txt │ │ ├── audio-classification/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ └── run_audio_classification.py │ │ ├── conftest.py │ │ ├── context_parallel.py │ │ ├── continuous_batching.py │ │ ├── continuous_batching_simple.py │ │ ├── contrastive-image-text/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ └── run_clip.py │ │ ├── image-classification/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── run_image_classification.py │ │ │ └── run_image_classification_no_trainer.py │ │ ├── image-pretraining/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── run_mae.py │ │ │ ├── run_mim.py │ │ │ └── run_mim_no_trainer.py │ │ ├── instance-segmentation/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── run_instance_segmentation.py │ │ │ └── run_instance_segmentation_no_trainer.py │ │ ├── language-modeling/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── run_clm.py │ │ │ ├── run_clm_no_trainer.py │ │ │ ├── run_fim.py │ │ │ ├── run_fim_no_trainer.py │ │ │ ├── run_mlm.py │ │ │ ├── run_mlm_no_trainer.py │ │ │ └── run_plm.py │ │ ├── multiple-choice/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── run_no_trainer.sh │ │ │ ├── run_swag.py │ │ │ └── run_swag_no_trainer.py │ │ ├── object-detection/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── run_object_detection.py │ │ │ └── run_object_detection_no_trainer.py │ │ ├── old_test_xla_examples.py │ │ ├── question-answering/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── run_qa.py │ │ │ ├── run_qa_beam_search.py │ │ │ ├── run_qa_beam_search_no_trainer.py │ │ │ ├── run_qa_no_trainer.py │ │ │ ├── run_seq2seq_qa.py │ │ │ ├── trainer_qa.py │ │ │ ├── trainer_seq2seq_qa.py │ │ │ └── utils_qa.py │ │ ├── semantic-segmentation/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── run_semantic_segmentation.py │ │ │ └── run_semantic_segmentation_no_trainer.py │ │ ├── speech-pretraining/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ └── run_wav2vec2_pretraining_no_trainer.py │ │ ├── speech-recognition/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── run_speech_recognition_ctc.py │ │ │ ├── run_speech_recognition_ctc_adapter.py │ │ │ └── run_speech_recognition_seq2seq.py │ │ ├── summarization/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── run_summarization.py │ │ │ └── run_summarization_no_trainer.py │ │ ├── test_accelerate_examples.py │ │ ├── test_pytorch_examples.py │ │ ├── text-classification/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── run_classification.py │ │ │ ├── run_glue.py │ │ │ ├── run_glue_no_trainer.py │ │ │ └── run_xnli.py │ │ ├── text-generation/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ └── run_generation.py │ │ ├── token-classification/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── run.sh │ │ │ ├── run_ner.py │ │ │ ├── run_ner_no_trainer.py │ │ │ └── run_no_trainer.sh │ │ ├── transformers_serve_cb_eval_job.py │ │ ├── translation/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── run_translation.py │ │ │ └── run_translation_no_trainer.py │ │ └── xla_spawn.py │ ├── quantization/ │ │ ├── custom_quantization.py │ │ └── custom_quantization_int8_example.py │ ├── research_projects/ │ │ └── README.md │ ├── run_on_remote.py │ ├── scheduler/ │ │ ├── README.md │ │ └── run_greedy.py │ └── training/ │ └── distributed_training.py ├── i18n/ │ ├── README_ar.md │ ├── README_bn.md │ ├── README_de.md │ ├── README_es.md │ ├── README_fr.md │ ├── README_hd.md │ ├── README_it.md │ ├── README_ja.md │ ├── README_ko.md │ ├── README_pt-br.md │ ├── README_ru.md │ ├── README_te.md │ ├── README_ur.md │ ├── README_vi.md │ ├── README_zh-hans.md │ └── README_zh-hant.md ├── notebooks/ │ └── README.md ├── pyproject.toml ├── scripts/ │ ├── check_tokenizers.py │ ├── distributed/ │ │ └── torch-distributed-gpu-test.py │ └── stale.py ├── setup.py ├── src/ │ └── transformers/ │ ├── __init__.py │ ├── _typing.py │ ├── activations.py │ ├── audio_utils.py │ ├── backbone_utils.py │ ├── cache_utils.py │ ├── cli/ │ │ ├── __init__.py │ │ ├── add_new_model_like.py │ │ ├── chat.py │ │ ├── download.py │ │ ├── serve.py │ │ ├── serving/ │ │ │ ├── __init__.py │ │ │ ├── chat_completion.py │ │ │ ├── model_manager.py │ │ │ ├── response.py │ │ │ ├── server.py │ │ │ ├── transcription.py │ │ │ └── utils.py │ │ ├── system.py │ │ └── transformers.py │ ├── configuration_utils.py │ ├── conversion_mapping.py │ ├── convert_slow_tokenizer.py │ ├── convert_slow_tokenizers_checkpoints_to_fast.py │ ├── core_model_loading.py │ ├── data/ │ │ ├── __init__.py │ │ ├── data_collator.py │ │ ├── datasets/ │ │ │ ├── __init__.py │ │ │ ├── glue.py │ │ │ └── squad.py │ │ ├── metrics/ │ │ │ ├── __init__.py │ │ │ └── squad_metrics.py │ │ └── processors/ │ │ ├── __init__.py │ │ ├── glue.py │ │ ├── squad.py │ │ ├── utils.py │ │ └── xnli.py │ ├── debug_utils.py │ ├── dependency_versions_check.py │ ├── dependency_versions_table.py │ ├── distributed/ │ │ ├── __init__.py │ │ └── configuration_utils.py │ ├── dynamic_module_utils.py │ ├── feature_extraction_sequence_utils.py │ ├── feature_extraction_utils.py │ ├── file_utils.py │ ├── generation/ │ │ ├── __init__.py │ │ ├── candidate_generator.py │ │ ├── configuration_utils.py │ │ ├── continuous_batching/ │ │ │ ├── __init__.py │ │ │ ├── cache.py │ │ │ ├── cache_manager.py │ │ │ ├── continuous_api.py │ │ │ ├── input_outputs.py │ │ │ ├── requests.py │ │ │ ├── scheduler.py │ │ │ └── utils.py │ │ ├── logits_process.py │ │ ├── stopping_criteria.py │ │ ├── streamers.py │ │ ├── utils.py │ │ └── watermarking.py │ ├── hf_argparser.py │ ├── hyperparameter_search.py │ ├── image_processing_backends.py │ ├── image_processing_base.py │ ├── image_processing_utils.py │ ├── image_transforms.py │ ├── image_utils.py │ ├── initialization.py │ ├── integrations/ │ │ ├── __init__.py │ │ ├── accelerate.py │ │ ├── aqlm.py │ │ ├── awq.py │ │ ├── bitnet.py │ │ ├── bitsandbytes.py │ │ ├── deepspeed.py │ │ ├── eager_paged.py │ │ ├── eetq.py │ │ ├── executorch.py │ │ ├── fbgemm_fp8.py │ │ ├── finegrained_fp8.py │ │ ├── flash_attention.py │ │ ├── flash_paged.py │ │ ├── flex_attention.py │ │ ├── fouroversix.py │ │ ├── fp_quant.py │ │ ├── fsdp.py │ │ ├── ggml.py │ │ ├── higgs.py │ │ ├── hqq.py │ │ ├── hub_kernels.py │ │ ├── integration_utils.py │ │ ├── liger.py │ │ ├── metal_quantization.py │ │ ├── mistral.py │ │ ├── moe.py │ │ ├── mxfp4.py │ │ ├── neftune.py │ │ ├── npu_flash_attention.py │ │ ├── peft.py │ │ ├── quanto.py │ │ ├── quark.py │ │ ├── sdpa_attention.py │ │ ├── sdpa_paged.py │ │ ├── sinq.py │ │ ├── spqr.py │ │ ├── tensor_parallel.py │ │ ├── tiktoken.py │ │ ├── torchao.py │ │ ├── tpu.py │ │ └── vptq.py │ ├── loss/ │ │ ├── __init__.py │ │ ├── loss_d_fine.py │ │ ├── loss_deformable_detr.py │ │ ├── loss_for_object_detection.py │ │ ├── loss_grounding_dino.py │ │ ├── loss_lw_detr.py │ │ ├── loss_rt_detr.py │ │ └── loss_utils.py │ ├── masking_utils.py │ ├── model_debugging_utils.py │ ├── modelcard.py │ ├── modeling_attn_mask_utils.py │ ├── modeling_flash_attention_utils.py │ ├── modeling_gguf_pytorch_utils.py │ ├── modeling_layers.py │ ├── modeling_outputs.py │ ├── modeling_rope_utils.py │ ├── modeling_utils.py │ ├── models/ │ │ ├── __init__.py │ │ ├── afmoe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_afmoe.py │ │ │ ├── modeling_afmoe.py │ │ │ └── modular_afmoe.py │ │ ├── aimv2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_aimv2.py │ │ │ ├── convert_aimv2_original_pytorch_to_hf.py │ │ │ ├── modeling_aimv2.py │ │ │ └── modular_aimv2.py │ │ ├── albert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_albert.py │ │ │ ├── convert_albert_original_tf_checkpoint_to_pytorch.py │ │ │ ├── modeling_albert.py │ │ │ └── tokenization_albert.py │ │ ├── align/ │ │ │ ├── __init__.py │ │ │ ├── configuration_align.py │ │ │ ├── convert_align_tf_to_hf.py │ │ │ ├── modeling_align.py │ │ │ └── processing_align.py │ │ ├── altclip/ │ │ │ ├── __init__.py │ │ │ ├── configuration_altclip.py │ │ │ ├── modeling_altclip.py │ │ │ └── processing_altclip.py │ │ ├── apertus/ │ │ │ ├── __init__.py │ │ │ ├── configuration_apertus.py │ │ │ ├── modeling_apertus.py │ │ │ └── modular_apertus.py │ │ ├── arcee/ │ │ │ ├── __init__.py │ │ │ ├── configuration_arcee.py │ │ │ ├── modeling_arcee.py │ │ │ └── modular_arcee.py │ │ ├── aria/ │ │ │ ├── __init__.py │ │ │ ├── configuration_aria.py │ │ │ ├── convert_aria_weights_to_hf.py │ │ │ ├── image_processing_aria.py │ │ │ ├── image_processing_pil_aria.py │ │ │ ├── modeling_aria.py │ │ │ ├── modular_aria.py │ │ │ └── processing_aria.py │ │ ├── audio_spectrogram_transformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_audio_spectrogram_transformer.py │ │ │ ├── convert_audio_spectrogram_transformer_original_to_pytorch.py │ │ │ ├── feature_extraction_audio_spectrogram_transformer.py │ │ │ └── modeling_audio_spectrogram_transformer.py │ │ ├── audioflamingo3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_audioflamingo3.py │ │ │ ├── convert_audioflamingo3_to_hf.py │ │ │ ├── modeling_audioflamingo3.py │ │ │ ├── modular_audioflamingo3.py │ │ │ └── processing_audioflamingo3.py │ │ ├── auto/ │ │ │ ├── __init__.py │ │ │ ├── auto_factory.py │ │ │ ├── configuration_auto.py │ │ │ ├── feature_extraction_auto.py │ │ │ ├── image_processing_auto.py │ │ │ ├── modeling_auto.py │ │ │ ├── processing_auto.py │ │ │ ├── tokenization_auto.py │ │ │ └── video_processing_auto.py │ │ ├── autoformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_autoformer.py │ │ │ └── modeling_autoformer.py │ │ ├── aya_vision/ │ │ │ ├── __init__.py │ │ │ ├── configuration_aya_vision.py │ │ │ ├── modeling_aya_vision.py │ │ │ ├── modular_aya_vision.py │ │ │ └── processing_aya_vision.py │ │ ├── bamba/ │ │ │ ├── __init__.py │ │ │ ├── configuration_bamba.py │ │ │ ├── convert_mamba_ssm_checkpoint.py │ │ │ ├── modeling_bamba.py │ │ │ └── modular_bamba.py │ │ ├── bark/ │ │ │ ├── __init__.py │ │ │ ├── configuration_bark.py │ │ │ ├── convert_suno_to_hf.py │ │ │ ├── generation_configuration_bark.py │ │ │ ├── modeling_bark.py │ │ │ └── processing_bark.py │ │ ├── bart/ │ │ │ ├── __init__.py │ │ │ ├── configuration_bart.py │ │ │ ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_bart.py │ │ │ └── tokenization_bart.py │ │ ├── barthez/ │ │ │ ├── __init__.py │ │ │ └── tokenization_barthez.py │ │ ├── bartpho/ │ │ │ ├── __init__.py │ │ │ └── tokenization_bartpho.py │ │ ├── beit/ │ │ │ ├── __init__.py │ │ │ ├── configuration_beit.py │ │ │ ├── convert_beit_unilm_to_pytorch.py │ │ │ ├── image_processing_beit.py │ │ │ ├── image_processing_pil_beit.py │ │ │ └── modeling_beit.py │ │ ├── bert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_bert.py │ │ │ ├── convert_bert_original_tf2_checkpoint_to_pytorch.py │ │ │ ├── convert_bert_original_tf_checkpoint_to_pytorch.py │ │ │ ├── convert_bert_token_dropping_original_tf2_checkpoint_to_pytorch.py │ │ │ ├── modeling_bert.py │ │ │ ├── tokenization_bert.py │ │ │ └── tokenization_bert_legacy.py │ │ ├── bert_generation/ │ │ │ ├── __init__.py │ │ │ ├── configuration_bert_generation.py │ │ │ ├── modeling_bert_generation.py │ │ │ └── tokenization_bert_generation.py │ │ ├── bert_japanese/ │ │ │ ├── __init__.py │ │ │ └── tokenization_bert_japanese.py │ │ ├── bertweet/ │ │ │ ├── __init__.py │ │ │ └── tokenization_bertweet.py │ │ ├── big_bird/ │ │ │ ├── __init__.py │ │ │ ├── configuration_big_bird.py │ │ │ ├── convert_bigbird_original_tf_checkpoint_to_pytorch.py │ │ │ ├── modeling_big_bird.py │ │ │ └── tokenization_big_bird.py │ │ ├── bigbird_pegasus/ │ │ │ ├── __init__.py │ │ │ ├── configuration_bigbird_pegasus.py │ │ │ ├── convert_bigbird_pegasus_tf_to_pytorch.py │ │ │ └── modeling_bigbird_pegasus.py │ │ ├── biogpt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_biogpt.py │ │ │ ├── convert_biogpt_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_biogpt.py │ │ │ ├── modular_biogpt.py │ │ │ └── tokenization_biogpt.py │ │ ├── bit/ │ │ │ ├── __init__.py │ │ │ ├── configuration_bit.py │ │ │ ├── convert_bit_to_pytorch.py │ │ │ ├── image_processing_bit.py │ │ │ ├── image_processing_pil_bit.py │ │ │ └── modeling_bit.py │ │ ├── bitnet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_bitnet.py │ │ │ ├── modeling_bitnet.py │ │ │ └── modular_bitnet.py │ │ ├── blenderbot/ │ │ │ ├── __init__.py │ │ │ ├── configuration_blenderbot.py │ │ │ ├── convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_blenderbot.py │ │ │ └── tokenization_blenderbot.py │ │ ├── blenderbot_small/ │ │ │ ├── __init__.py │ │ │ ├── configuration_blenderbot_small.py │ │ │ ├── modeling_blenderbot_small.py │ │ │ └── tokenization_blenderbot_small.py │ │ ├── blip/ │ │ │ ├── __init__.py │ │ │ ├── configuration_blip.py │ │ │ ├── convert_blip_original_pytorch_to_hf.py │ │ │ ├── image_processing_blip.py │ │ │ ├── image_processing_pil_blip.py │ │ │ ├── modeling_blip.py │ │ │ ├── modeling_blip_text.py │ │ │ └── processing_blip.py │ │ ├── blip_2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_blip_2.py │ │ │ ├── convert_blip_2_original_to_pytorch.py │ │ │ ├── modeling_blip_2.py │ │ │ └── processing_blip_2.py │ │ ├── bloom/ │ │ │ ├── __init__.py │ │ │ ├── configuration_bloom.py │ │ │ ├── convert_bloom_original_checkpoint_to_pytorch.py │ │ │ └── modeling_bloom.py │ │ ├── blt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_blt.py │ │ │ ├── convert_blt_weights_to_hf.py │ │ │ ├── modeling_blt.py │ │ │ └── modular_blt.py │ │ ├── bridgetower/ │ │ │ ├── __init__.py │ │ │ ├── configuration_bridgetower.py │ │ │ ├── image_processing_bridgetower.py │ │ │ ├── image_processing_pil_bridgetower.py │ │ │ ├── modeling_bridgetower.py │ │ │ └── processing_bridgetower.py │ │ ├── bros/ │ │ │ ├── __init__.py │ │ │ ├── configuration_bros.py │ │ │ ├── convert_bros_to_pytorch.py │ │ │ ├── modeling_bros.py │ │ │ └── processing_bros.py │ │ ├── byt5/ │ │ │ ├── __init__.py │ │ │ ├── convert_byt5_original_tf_checkpoint_to_pytorch.py │ │ │ └── tokenization_byt5.py │ │ ├── camembert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_camembert.py │ │ │ ├── modeling_camembert.py │ │ │ ├── modular_camembert.py │ │ │ └── tokenization_camembert.py │ │ ├── canine/ │ │ │ ├── __init__.py │ │ │ ├── configuration_canine.py │ │ │ ├── convert_canine_original_tf_checkpoint_to_pytorch.py │ │ │ ├── modeling_canine.py │ │ │ └── tokenization_canine.py │ │ ├── chameleon/ │ │ │ ├── __init__.py │ │ │ ├── configuration_chameleon.py │ │ │ ├── convert_chameleon_weights_to_hf.py │ │ │ ├── image_processing_chameleon.py │ │ │ ├── image_processing_pil_chameleon.py │ │ │ ├── modeling_chameleon.py │ │ │ └── processing_chameleon.py │ │ ├── chinese_clip/ │ │ │ ├── __init__.py │ │ │ ├── configuration_chinese_clip.py │ │ │ ├── convert_chinese_clip_original_pytorch_to_hf.py │ │ │ ├── image_processing_chinese_clip.py │ │ │ ├── image_processing_chinese_pil_clip.py │ │ │ ├── modeling_chinese_clip.py │ │ │ └── processing_chinese_clip.py │ │ ├── chmv2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_chmv2.py │ │ │ ├── convert_chmv2_to_hf.py │ │ │ ├── image_processing_chmv2.py │ │ │ ├── modeling_chmv2.py │ │ │ └── modular_chmv2.py │ │ ├── clap/ │ │ │ ├── __init__.py │ │ │ ├── configuration_clap.py │ │ │ ├── convert_clap_original_pytorch_to_hf.py │ │ │ ├── feature_extraction_clap.py │ │ │ ├── modeling_clap.py │ │ │ └── processing_clap.py │ │ ├── clip/ │ │ │ ├── __init__.py │ │ │ ├── configuration_clip.py │ │ │ ├── convert_clip_original_pytorch_to_hf.py │ │ │ ├── image_processing_clip.py │ │ │ ├── image_processing_pil_clip.py │ │ │ ├── modeling_clip.py │ │ │ ├── processing_clip.py │ │ │ └── tokenization_clip.py │ │ ├── clipseg/ │ │ │ ├── __init__.py │ │ │ ├── configuration_clipseg.py │ │ │ ├── convert_clipseg_original_pytorch_to_hf.py │ │ │ ├── modeling_clipseg.py │ │ │ └── processing_clipseg.py │ │ ├── clvp/ │ │ │ ├── __init__.py │ │ │ ├── configuration_clvp.py │ │ │ ├── convert_clvp_to_hf.py │ │ │ ├── feature_extraction_clvp.py │ │ │ ├── modeling_clvp.py │ │ │ ├── number_normalizer.py │ │ │ ├── processing_clvp.py │ │ │ └── tokenization_clvp.py │ │ ├── code_llama/ │ │ │ ├── __init__.py │ │ │ └── tokenization_code_llama.py │ │ ├── codegen/ │ │ │ ├── __init__.py │ │ │ ├── configuration_codegen.py │ │ │ ├── modeling_codegen.py │ │ │ └── tokenization_codegen.py │ │ ├── cohere/ │ │ │ ├── __init__.py │ │ │ ├── configuration_cohere.py │ │ │ ├── modeling_cohere.py │ │ │ ├── modular_cohere.py │ │ │ └── tokenization_cohere.py │ │ ├── cohere2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_cohere2.py │ │ │ ├── modeling_cohere2.py │ │ │ └── modular_cohere2.py │ │ ├── cohere2_vision/ │ │ │ ├── __init__.py │ │ │ ├── configuration_cohere2_vision.py │ │ │ ├── image_processing_cohere2_vision.py │ │ │ ├── modeling_cohere2_vision.py │ │ │ ├── modular_cohere2_vision.py │ │ │ └── processing_cohere2_vision.py │ │ ├── cohere_asr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_cohere_asr.py │ │ │ ├── feature_extraction_cohere_asr.py │ │ │ ├── modeling_cohere_asr.py │ │ │ ├── modular_cohere_asr.py │ │ │ └── processing_cohere_asr.py │ │ ├── colmodernvbert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_colmodernvbert.py │ │ │ ├── modeling_colmodernvbert.py │ │ │ ├── modular_colmodernvbert.py │ │ │ └── processing_colmodernvbert.py │ │ ├── colpali/ │ │ │ ├── __init__.py │ │ │ ├── configuration_colpali.py │ │ │ ├── convert_colpali_weights_to_hf.py │ │ │ ├── modeling_colpali.py │ │ │ ├── modular_colpali.py │ │ │ └── processing_colpali.py │ │ ├── colqwen2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_colqwen2.py │ │ │ ├── convert_colqwen2_weights_to_hf.py │ │ │ ├── modeling_colqwen2.py │ │ │ ├── modular_colqwen2.py │ │ │ └── processing_colqwen2.py │ │ ├── conditional_detr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_conditional_detr.py │ │ │ ├── convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── image_processing_conditional_detr.py │ │ │ ├── image_processing_pil_conditional_detr.py │ │ │ ├── modeling_conditional_detr.py │ │ │ └── modular_conditional_detr.py │ │ ├── convbert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_convbert.py │ │ │ ├── convert_convbert_original_tf1_checkpoint_to_pytorch.py │ │ │ ├── modeling_convbert.py │ │ │ └── tokenization_convbert.py │ │ ├── convnext/ │ │ │ ├── __init__.py │ │ │ ├── configuration_convnext.py │ │ │ ├── convert_convnext_to_pytorch.py │ │ │ ├── image_processing_convnext.py │ │ │ ├── image_processing_pil_convnext.py │ │ │ └── modeling_convnext.py │ │ ├── convnextv2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_convnextv2.py │ │ │ ├── convert_convnextv2_to_pytorch.py │ │ │ └── modeling_convnextv2.py │ │ ├── cpm/ │ │ │ ├── __init__.py │ │ │ ├── tokenization_cpm.py │ │ │ └── tokenization_cpm_fast.py │ │ ├── cpmant/ │ │ │ ├── __init__.py │ │ │ ├── configuration_cpmant.py │ │ │ ├── modeling_cpmant.py │ │ │ └── tokenization_cpmant.py │ │ ├── csm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_csm.py │ │ │ ├── convert_csm.py │ │ │ ├── generation_csm.py │ │ │ ├── modeling_csm.py │ │ │ ├── modular_csm.py │ │ │ └── processing_csm.py │ │ ├── ctrl/ │ │ │ ├── __init__.py │ │ │ ├── configuration_ctrl.py │ │ │ ├── modeling_ctrl.py │ │ │ └── tokenization_ctrl.py │ │ ├── cvt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_cvt.py │ │ │ ├── convert_cvt_original_pytorch_checkpoint_to_pytorch.py │ │ │ └── modeling_cvt.py │ │ ├── cwm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_cwm.py │ │ │ ├── modeling_cwm.py │ │ │ └── modular_cwm.py │ │ ├── d_fine/ │ │ │ ├── __init__.py │ │ │ ├── configuration_d_fine.py │ │ │ ├── convert_d_fine_original_pytorch_checkpoint_to_hf.py │ │ │ ├── modeling_d_fine.py │ │ │ └── modular_d_fine.py │ │ ├── dab_detr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_dab_detr.py │ │ │ ├── convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py │ │ │ └── modeling_dab_detr.py │ │ ├── dac/ │ │ │ ├── __init__.py │ │ │ ├── configuration_dac.py │ │ │ ├── convert_dac_checkpoint.py │ │ │ ├── feature_extraction_dac.py │ │ │ └── modeling_dac.py │ │ ├── data2vec/ │ │ │ ├── __init__.py │ │ │ ├── configuration_data2vec_audio.py │ │ │ ├── configuration_data2vec_text.py │ │ │ ├── configuration_data2vec_vision.py │ │ │ ├── convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_data2vec_audio.py │ │ │ ├── modeling_data2vec_text.py │ │ │ ├── modeling_data2vec_vision.py │ │ │ ├── modular_data2vec_audio.py │ │ │ └── modular_data2vec_text.py │ │ ├── dbrx/ │ │ │ ├── __init__.py │ │ │ ├── configuration_dbrx.py │ │ │ ├── modeling_dbrx.py │ │ │ └── modular_dbrx.py │ │ ├── deberta/ │ │ │ ├── __init__.py │ │ │ ├── configuration_deberta.py │ │ │ ├── modeling_deberta.py │ │ │ └── tokenization_deberta.py │ │ ├── deberta_v2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_deberta_v2.py │ │ │ ├── modeling_deberta_v2.py │ │ │ └── tokenization_deberta_v2.py │ │ ├── decision_transformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_decision_transformer.py │ │ │ └── modeling_decision_transformer.py │ │ ├── deepseek_v2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_deepseek_v2.py │ │ │ ├── modeling_deepseek_v2.py │ │ │ └── modular_deepseek_v2.py │ │ ├── deepseek_v3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_deepseek_v3.py │ │ │ ├── modeling_deepseek_v3.py │ │ │ └── modular_deepseek_v3.py │ │ ├── deepseek_vl/ │ │ │ ├── __init__.py │ │ │ ├── configuration_deepseek_vl.py │ │ │ ├── convert_deepseek_vl_weights_to_hf.py │ │ │ ├── image_processing_deepseek_vl.py │ │ │ ├── image_processing_pil_deepseek_vl.py │ │ │ ├── modeling_deepseek_vl.py │ │ │ ├── modular_deepseek_vl.py │ │ │ └── processing_deepseek_vl.py │ │ ├── deepseek_vl_hybrid/ │ │ │ ├── __init__.py │ │ │ ├── configuration_deepseek_vl_hybrid.py │ │ │ ├── convert_deepseek_vl_hybrid_weights_to_hf.py │ │ │ ├── image_processing_deepseek_vl_hybrid.py │ │ │ ├── image_processing_pil_deepseek_vl_hybrid.py │ │ │ ├── modeling_deepseek_vl_hybrid.py │ │ │ ├── modular_deepseek_vl_hybrid.py │ │ │ └── processing_deepseek_vl_hybrid.py │ │ ├── deformable_detr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_deformable_detr.py │ │ │ ├── convert_deformable_detr_to_pytorch.py │ │ │ ├── image_processing_deformable_detr.py │ │ │ ├── image_processing_pil_deformable_detr.py │ │ │ ├── modeling_deformable_detr.py │ │ │ └── modular_deformable_detr.py │ │ ├── deit/ │ │ │ ├── __init__.py │ │ │ ├── configuration_deit.py │ │ │ ├── convert_deit_timm_to_pytorch.py │ │ │ ├── image_processing_deit.py │ │ │ ├── image_processing_pil_deit.py │ │ │ └── modeling_deit.py │ │ ├── deprecated/ │ │ │ └── __init__.py │ │ ├── depth_anything/ │ │ │ ├── __init__.py │ │ │ ├── configuration_depth_anything.py │ │ │ ├── convert_depth_anything_to_hf.py │ │ │ ├── convert_distill_any_depth_to_hf.py │ │ │ └── modeling_depth_anything.py │ │ ├── depth_pro/ │ │ │ ├── __init__.py │ │ │ ├── configuration_depth_pro.py │ │ │ ├── convert_depth_pro_weights_to_hf.py │ │ │ ├── image_processing_depth_pro.py │ │ │ └── modeling_depth_pro.py │ │ ├── detr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_detr.py │ │ │ ├── convert_detr_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── convert_detr_to_pytorch.py │ │ │ ├── image_processing_detr.py │ │ │ ├── image_processing_pil_detr.py │ │ │ └── modeling_detr.py │ │ ├── dia/ │ │ │ ├── __init__.py │ │ │ ├── configuration_dia.py │ │ │ ├── convert_dia_to_hf.py │ │ │ ├── feature_extraction_dia.py │ │ │ ├── generation_dia.py │ │ │ ├── modeling_dia.py │ │ │ ├── modular_dia.py │ │ │ ├── processing_dia.py │ │ │ └── tokenization_dia.py │ │ ├── dialogpt/ │ │ │ ├── __init__.py │ │ │ └── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py │ │ ├── diffllama/ │ │ │ ├── __init__.py │ │ │ ├── configuration_diffllama.py │ │ │ ├── modeling_diffllama.py │ │ │ └── modular_diffllama.py │ │ ├── dinat/ │ │ │ ├── __init__.py │ │ │ ├── configuration_dinat.py │ │ │ └── modeling_dinat.py │ │ ├── dinov2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_dinov2.py │ │ │ ├── convert_dinov2_to_hf.py │ │ │ └── modeling_dinov2.py │ │ ├── dinov2_with_registers/ │ │ │ ├── __init__.py │ │ │ ├── configuration_dinov2_with_registers.py │ │ │ ├── convert_dinov2_with_registers_to_hf.py │ │ │ ├── modeling_dinov2_with_registers.py │ │ │ └── modular_dinov2_with_registers.py │ │ ├── dinov3_convnext/ │ │ │ ├── __init__.py │ │ │ ├── configuration_dinov3_convnext.py │ │ │ ├── convert_dinov3_convnext_to_hf.py │ │ │ └── modeling_dinov3_convnext.py │ │ ├── dinov3_vit/ │ │ │ ├── __init__.py │ │ │ ├── configuration_dinov3_vit.py │ │ │ ├── convert_dinov3_vit_to_hf.py │ │ │ ├── image_processing_dinov3_vit.py │ │ │ ├── modeling_dinov3_vit.py │ │ │ └── modular_dinov3_vit.py │ │ ├── distilbert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_distilbert.py │ │ │ ├── modeling_distilbert.py │ │ │ └── tokenization_distilbert.py │ │ ├── dit/ │ │ │ ├── __init__.py │ │ │ └── convert_dit_unilm_to_pytorch.py │ │ ├── doge/ │ │ │ ├── __init__.py │ │ │ ├── configuration_doge.py │ │ │ ├── convert_doge_weights_to_hf.py │ │ │ ├── modeling_doge.py │ │ │ └── modular_doge.py │ │ ├── donut/ │ │ │ ├── __init__.py │ │ │ ├── configuration_donut_swin.py │ │ │ ├── convert_donut_to_pytorch.py │ │ │ ├── image_processing_donut.py │ │ │ ├── image_processing_pil_donut.py │ │ │ ├── modeling_donut_swin.py │ │ │ └── processing_donut.py │ │ ├── dots1/ │ │ │ ├── __init__.py │ │ │ ├── configuration_dots1.py │ │ │ ├── modeling_dots1.py │ │ │ └── modular_dots1.py │ │ ├── dpr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_dpr.py │ │ │ ├── convert_dpr_original_checkpoint_to_pytorch.py │ │ │ ├── modeling_dpr.py │ │ │ ├── tokenization_dpr.py │ │ │ └── tokenization_dpr_fast.py │ │ ├── dpt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_dpt.py │ │ │ ├── convert_dinov2_depth_to_hf.py │ │ │ ├── convert_dpt_beit_to_hf.py │ │ │ ├── convert_dpt_hybrid_to_pytorch.py │ │ │ ├── convert_dpt_swinv2_to_hf.py │ │ │ ├── convert_dpt_to_pytorch.py │ │ │ ├── image_processing_dpt.py │ │ │ ├── image_processing_pil_dpt.py │ │ │ ├── modeling_dpt.py │ │ │ └── modular_dpt.py │ │ ├── edgetam/ │ │ │ ├── __init__.py │ │ │ ├── configuration_edgetam.py │ │ │ ├── convert_edgetam_to_hf.py │ │ │ ├── modeling_edgetam.py │ │ │ └── modular_edgetam.py │ │ ├── edgetam_video/ │ │ │ ├── __init__.py │ │ │ ├── configuration_edgetam_video.py │ │ │ ├── convert_edgetam_video_to_hf.py │ │ │ ├── modeling_edgetam_video.py │ │ │ └── modular_edgetam_video.py │ │ ├── efficientloftr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_efficientloftr.py │ │ │ ├── convert_efficientloftr_to_hf.py │ │ │ ├── image_processing_efficientloftr.py │ │ │ ├── image_processing_pil_efficientloftr.py │ │ │ ├── modeling_efficientloftr.py │ │ │ └── modular_efficientloftr.py │ │ ├── efficientnet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_efficientnet.py │ │ │ ├── convert_efficientnet_to_pytorch.py │ │ │ ├── image_processing_efficientnet.py │ │ │ ├── image_processing_pil_efficientnet.py │ │ │ └── modeling_efficientnet.py │ │ ├── electra/ │ │ │ ├── __init__.py │ │ │ ├── configuration_electra.py │ │ │ ├── convert_electra_original_tf_checkpoint_to_pytorch.py │ │ │ └── modeling_electra.py │ │ ├── emu3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_emu3.py │ │ │ ├── convert_emu3_weights_to_hf.py │ │ │ ├── image_processing_emu3.py │ │ │ ├── modeling_emu3.py │ │ │ ├── modular_emu3.py │ │ │ └── processing_emu3.py │ │ ├── encodec/ │ │ │ ├── __init__.py │ │ │ ├── configuration_encodec.py │ │ │ ├── convert_encodec_checkpoint_to_pytorch.py │ │ │ ├── feature_extraction_encodec.py │ │ │ └── modeling_encodec.py │ │ ├── encoder_decoder/ │ │ │ ├── __init__.py │ │ │ ├── configuration_encoder_decoder.py │ │ │ └── modeling_encoder_decoder.py │ │ ├── eomt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_eomt.py │ │ │ ├── convert_eomt_to_hf.py │ │ │ ├── image_processing_eomt.py │ │ │ ├── image_processing_pil_eomt.py │ │ │ ├── modeling_eomt.py │ │ │ └── modular_eomt.py │ │ ├── eomt_dinov3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_eomt_dinov3.py │ │ │ ├── convert_eomt_dinov3_to_hf.py │ │ │ ├── modeling_eomt_dinov3.py │ │ │ └── modular_eomt_dinov3.py │ │ ├── ernie/ │ │ │ ├── __init__.py │ │ │ ├── configuration_ernie.py │ │ │ ├── modeling_ernie.py │ │ │ └── modular_ernie.py │ │ ├── ernie4_5/ │ │ │ ├── __init__.py │ │ │ ├── configuration_ernie4_5.py │ │ │ ├── convert_ernie4_5_tokenizer.py │ │ │ ├── modeling_ernie4_5.py │ │ │ └── modular_ernie4_5.py │ │ ├── ernie4_5_moe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_ernie4_5_moe.py │ │ │ ├── modeling_ernie4_5_moe.py │ │ │ └── modular_ernie4_5_moe.py │ │ ├── ernie4_5_vl_moe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_ernie4_5_vl_moe.py │ │ │ ├── convert_ernie4_5_vl_moe_to_hf.py │ │ │ ├── image_processing_ernie4_5_vl_moe.py │ │ │ ├── image_processing_pil_ernie4_5_vl_moe.py │ │ │ ├── modeling_ernie4_5_vl_moe.py │ │ │ ├── modular_ernie4_5_vl_moe.py │ │ │ ├── processing_ernie4_5_vl_moe.py │ │ │ └── video_processing_ernie4_5_vl_moe.py │ │ ├── esm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_esm.py │ │ │ ├── convert_esm.py │ │ │ ├── modeling_esm.py │ │ │ ├── modeling_esmfold.py │ │ │ ├── openfold_utils/ │ │ │ │ ├── __init__.py │ │ │ │ ├── chunk_utils.py │ │ │ │ ├── data_transforms.py │ │ │ │ ├── feats.py │ │ │ │ ├── loss.py │ │ │ │ ├── protein.py │ │ │ │ ├── residue_constants.py │ │ │ │ ├── rigid_utils.py │ │ │ │ └── tensor_utils.py │ │ │ └── tokenization_esm.py │ │ ├── eurobert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_eurobert.py │ │ │ ├── modeling_eurobert.py │ │ │ └── modular_eurobert.py │ │ ├── evolla/ │ │ │ ├── __init__.py │ │ │ ├── configuration_evolla.py │ │ │ ├── modeling_evolla.py │ │ │ ├── modular_evolla.py │ │ │ └── processing_evolla.py │ │ ├── exaone4/ │ │ │ ├── __init__.py │ │ │ ├── configuration_exaone4.py │ │ │ ├── modeling_exaone4.py │ │ │ └── modular_exaone4.py │ │ ├── exaone_moe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_exaone_moe.py │ │ │ ├── modeling_exaone_moe.py │ │ │ └── modular_exaone_moe.py │ │ ├── falcon/ │ │ │ ├── __init__.py │ │ │ ├── configuration_falcon.py │ │ │ ├── convert_custom_code_checkpoint.py │ │ │ └── modeling_falcon.py │ │ ├── falcon_h1/ │ │ │ ├── __init__.py │ │ │ ├── configuration_falcon_h1.py │ │ │ ├── convert_mamba_ssm_checkpoint.py │ │ │ ├── modeling_falcon_h1.py │ │ │ └── modular_falcon_h1.py │ │ ├── falcon_mamba/ │ │ │ ├── __init__.py │ │ │ ├── configuration_falcon_mamba.py │ │ │ ├── modeling_falcon_mamba.py │ │ │ └── modular_falcon_mamba.py │ │ ├── fast_vlm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_fast_vlm.py │ │ │ ├── convert_fastvlm_weights_to_hf.py │ │ │ ├── modeling_fast_vlm.py │ │ │ └── modular_fast_vlm.py │ │ ├── fastspeech2_conformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_fastspeech2_conformer.py │ │ │ ├── convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── convert_hifigan.py │ │ │ ├── convert_model_with_hifigan.py │ │ │ ├── modeling_fastspeech2_conformer.py │ │ │ └── tokenization_fastspeech2_conformer.py │ │ ├── flaubert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_flaubert.py │ │ │ ├── modeling_flaubert.py │ │ │ └── tokenization_flaubert.py │ │ ├── flava/ │ │ │ ├── __init__.py │ │ │ ├── configuration_flava.py │ │ │ ├── convert_dalle_to_flava_codebook.py │ │ │ ├── convert_flava_original_pytorch_to_hf.py │ │ │ ├── image_processing_flava.py │ │ │ ├── image_processing_pil_flava.py │ │ │ ├── modeling_flava.py │ │ │ └── processing_flava.py │ │ ├── flex_olmo/ │ │ │ ├── __init__.py │ │ │ ├── configuration_flex_olmo.py │ │ │ ├── modeling_flex_olmo.py │ │ │ └── modular_flex_olmo.py │ │ ├── florence2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_florence2.py │ │ │ ├── convert_florence2_original_pytorch_to_hf.py │ │ │ ├── modeling_florence2.py │ │ │ ├── modular_florence2.py │ │ │ └── processing_florence2.py │ │ ├── fnet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_fnet.py │ │ │ ├── convert_fnet_original_flax_checkpoint_to_pytorch.py │ │ │ ├── modeling_fnet.py │ │ │ └── tokenization_fnet.py │ │ ├── focalnet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_focalnet.py │ │ │ ├── convert_focalnet_to_hf_format.py │ │ │ └── modeling_focalnet.py │ │ ├── fsmt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_fsmt.py │ │ │ ├── convert_fsmt_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_fsmt.py │ │ │ └── tokenization_fsmt.py │ │ ├── funnel/ │ │ │ ├── __init__.py │ │ │ ├── configuration_funnel.py │ │ │ ├── convert_funnel_original_tf_checkpoint_to_pytorch.py │ │ │ ├── modeling_funnel.py │ │ │ └── tokenization_funnel.py │ │ ├── fuyu/ │ │ │ ├── __init__.py │ │ │ ├── configuration_fuyu.py │ │ │ ├── convert_fuyu_model_weights_to_hf.py │ │ │ ├── image_processing_fuyu.py │ │ │ ├── image_processing_pil_fuyu.py │ │ │ ├── modeling_fuyu.py │ │ │ └── processing_fuyu.py │ │ ├── gemma/ │ │ │ ├── __init__.py │ │ │ ├── configuration_gemma.py │ │ │ ├── convert_gemma_weights_to_hf.py │ │ │ ├── modeling_gemma.py │ │ │ ├── modular_gemma.py │ │ │ └── tokenization_gemma.py │ │ ├── gemma2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_gemma2.py │ │ │ ├── convert_gemma2_weights_to_hf.py │ │ │ ├── modeling_gemma2.py │ │ │ └── modular_gemma2.py │ │ ├── gemma3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_gemma3.py │ │ │ ├── convert_gemma3_weights.py │ │ │ ├── image_processing_gemma3.py │ │ │ ├── image_processing_pil_gemma3.py │ │ │ ├── modeling_gemma3.py │ │ │ ├── modular_gemma3.py │ │ │ └── processing_gemma3.py │ │ ├── gemma3n/ │ │ │ ├── __init__.py │ │ │ ├── configuration_gemma3n.py │ │ │ ├── convert_gemma3n_weights.py │ │ │ ├── feature_extraction_gemma3n.py │ │ │ ├── modeling_gemma3n.py │ │ │ ├── modular_gemma3n.py │ │ │ └── processing_gemma3n.py │ │ ├── git/ │ │ │ ├── __init__.py │ │ │ ├── configuration_git.py │ │ │ ├── convert_git_to_pytorch.py │ │ │ ├── modeling_git.py │ │ │ └── processing_git.py │ │ ├── glm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_glm.py │ │ │ ├── convert_glm_weights_to_hf.py │ │ │ ├── modeling_glm.py │ │ │ └── modular_glm.py │ │ ├── glm4/ │ │ │ ├── __init__.py │ │ │ ├── configuration_glm4.py │ │ │ ├── convert_glm4_weights_to_hf.py │ │ │ ├── modeling_glm4.py │ │ │ └── modular_glm4.py │ │ ├── glm46v/ │ │ │ ├── __init__.py │ │ │ ├── configuration_glm46v.py │ │ │ ├── image_processing_glm46v.py │ │ │ ├── image_processing_pil_glm46v.py │ │ │ ├── modeling_glm46v.py │ │ │ ├── modular_glm46v.py │ │ │ ├── processing_glm46v.py │ │ │ └── video_processing_glm46v.py │ │ ├── glm4_moe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_glm4_moe.py │ │ │ ├── modeling_glm4_moe.py │ │ │ └── modular_glm4_moe.py │ │ ├── glm4_moe_lite/ │ │ │ ├── __init__.py │ │ │ ├── configuration_glm4_moe_lite.py │ │ │ ├── modeling_glm4_moe_lite.py │ │ │ └── modular_glm4_moe_lite.py │ │ ├── glm4v/ │ │ │ ├── __init__.py │ │ │ ├── configuration_glm4v.py │ │ │ ├── convert_glm4v_mgt_weights_to_hf.py │ │ │ ├── image_processing_glm4v.py │ │ │ ├── image_processing_pil_glm4v.py │ │ │ ├── modeling_glm4v.py │ │ │ ├── modular_glm4v.py │ │ │ ├── processing_glm4v.py │ │ │ └── video_processing_glm4v.py │ │ ├── glm4v_moe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_glm4v_moe.py │ │ │ ├── convert_glm4v_moe_mgt_weights_to_hf.py │ │ │ ├── modeling_glm4v_moe.py │ │ │ └── modular_glm4v_moe.py │ │ ├── glm_image/ │ │ │ ├── __init__.py │ │ │ ├── configuration_glm_image.py │ │ │ ├── image_processing_glm_image.py │ │ │ ├── image_processing_pil_glm_image.py │ │ │ ├── modeling_glm_image.py │ │ │ ├── modular_glm_image.py │ │ │ └── processing_glm_image.py │ │ ├── glm_moe_dsa/ │ │ │ ├── __init__.py │ │ │ ├── configuration_glm_moe_dsa.py │ │ │ ├── modeling_glm_moe_dsa.py │ │ │ └── modular_glm_moe_dsa.py │ │ ├── glm_ocr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_glm_ocr.py │ │ │ ├── modeling_glm_ocr.py │ │ │ └── modular_glm_ocr.py │ │ ├── glmasr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_glmasr.py │ │ │ ├── convert_glmasr_weights_to_hf.py │ │ │ ├── modeling_glmasr.py │ │ │ ├── modular_glmasr.py │ │ │ └── processing_glmasr.py │ │ ├── glpn/ │ │ │ ├── __init__.py │ │ │ ├── configuration_glpn.py │ │ │ ├── convert_glpn_to_pytorch.py │ │ │ ├── image_processing_glpn.py │ │ │ ├── image_processing_pil_glpn.py │ │ │ └── modeling_glpn.py │ │ ├── got_ocr2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_got_ocr2.py │ │ │ ├── convert_got_ocr2_weights_to_hf.py │ │ │ ├── image_processing_got_ocr2.py │ │ │ ├── image_processing_pil_got_ocr2.py │ │ │ ├── modeling_got_ocr2.py │ │ │ ├── modular_got_ocr2.py │ │ │ └── processing_got_ocr2.py │ │ ├── gpt2/ │ │ │ ├── CONVERSION.md │ │ │ ├── __init__.py │ │ │ ├── configuration_gpt2.py │ │ │ ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py │ │ │ ├── modeling_gpt2.py │ │ │ └── tokenization_gpt2.py │ │ ├── gpt_bigcode/ │ │ │ ├── __init__.py │ │ │ ├── configuration_gpt_bigcode.py │ │ │ └── modeling_gpt_bigcode.py │ │ ├── gpt_neo/ │ │ │ ├── __init__.py │ │ │ ├── configuration_gpt_neo.py │ │ │ ├── convert_gpt_neo_mesh_tf_to_pytorch.py │ │ │ └── modeling_gpt_neo.py │ │ ├── gpt_neox/ │ │ │ ├── __init__.py │ │ │ ├── configuration_gpt_neox.py │ │ │ ├── modeling_gpt_neox.py │ │ │ ├── modular_gpt_neox.py │ │ │ └── tokenization_gpt_neox.py │ │ ├── gpt_neox_japanese/ │ │ │ ├── __init__.py │ │ │ ├── configuration_gpt_neox_japanese.py │ │ │ ├── modeling_gpt_neox_japanese.py │ │ │ └── tokenization_gpt_neox_japanese.py │ │ ├── gpt_oss/ │ │ │ ├── __init__.py │ │ │ ├── configuration_gpt_oss.py │ │ │ ├── convert_gpt_oss_weights_to_hf.py │ │ │ ├── modeling_gpt_oss.py │ │ │ └── modular_gpt_oss.py │ │ ├── gpt_sw3/ │ │ │ ├── __init__.py │ │ │ ├── convert_megatron_to_pytorch.py │ │ │ └── tokenization_gpt_sw3.py │ │ ├── gptj/ │ │ │ ├── __init__.py │ │ │ ├── configuration_gptj.py │ │ │ └── modeling_gptj.py │ │ ├── granite/ │ │ │ ├── __init__.py │ │ │ ├── configuration_granite.py │ │ │ ├── modeling_granite.py │ │ │ └── modular_granite.py │ │ ├── granite_speech/ │ │ │ ├── __init__.py │ │ │ ├── configuration_granite_speech.py │ │ │ ├── feature_extraction_granite_speech.py │ │ │ ├── modeling_granite_speech.py │ │ │ └── processing_granite_speech.py │ │ ├── granitemoe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_granitemoe.py │ │ │ ├── modeling_granitemoe.py │ │ │ └── modular_granitemoe.py │ │ ├── granitemoehybrid/ │ │ │ ├── __init__.py │ │ │ ├── configuration_granitemoehybrid.py │ │ │ ├── modeling_granitemoehybrid.py │ │ │ └── modular_granitemoehybrid.py │ │ ├── granitemoeshared/ │ │ │ ├── __init__.py │ │ │ ├── configuration_granitemoeshared.py │ │ │ ├── modeling_granitemoeshared.py │ │ │ └── modular_granitemoeshared.py │ │ ├── grounding_dino/ │ │ │ ├── __init__.py │ │ │ ├── configuration_grounding_dino.py │ │ │ ├── convert_grounding_dino_to_hf.py │ │ │ ├── image_processing_grounding_dino.py │ │ │ ├── image_processing_pil_grounding_dino.py │ │ │ ├── modeling_grounding_dino.py │ │ │ ├── modular_grounding_dino.py │ │ │ └── processing_grounding_dino.py │ │ ├── groupvit/ │ │ │ ├── __init__.py │ │ │ ├── configuration_groupvit.py │ │ │ ├── convert_groupvit_nvlab_to_hf.py │ │ │ └── modeling_groupvit.py │ │ ├── helium/ │ │ │ ├── __init__.py │ │ │ ├── configuration_helium.py │ │ │ ├── modeling_helium.py │ │ │ └── modular_helium.py │ │ ├── herbert/ │ │ │ ├── __init__.py │ │ │ └── tokenization_herbert.py │ │ ├── hgnet_v2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_hgnet_v2.py │ │ │ ├── modeling_hgnet_v2.py │ │ │ └── modular_hgnet_v2.py │ │ ├── hiera/ │ │ │ ├── __init__.py │ │ │ ├── configuration_hiera.py │ │ │ ├── convert_hiera_to_hf.py │ │ │ └── modeling_hiera.py │ │ ├── higgs_audio_v2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_higgs_audio_v2.py │ │ │ ├── convert_higgs_audio_v2_to_hf.py │ │ │ ├── generation_higgs_audio_v2.py │ │ │ ├── modeling_higgs_audio_v2.py │ │ │ ├── modular_higgs_audio_v2.py │ │ │ └── processing_higgs_audio_v2.py │ │ ├── higgs_audio_v2_tokenizer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_higgs_audio_v2_tokenizer.py │ │ │ ├── convert_higgs_audio_v2_tokenizer_to_hf.py │ │ │ ├── modeling_higgs_audio_v2_tokenizer.py │ │ │ └── modular_higgs_audio_v2_tokenizer.py │ │ ├── hubert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_hubert.py │ │ │ ├── convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py │ │ │ ├── convert_hubert_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── convert_hubert_original_s3prl_checkpoint_to_pytorch.py │ │ │ ├── modeling_hubert.py │ │ │ └── modular_hubert.py │ │ ├── hunyuan_v1_dense/ │ │ │ ├── __init__.py │ │ │ ├── configuration_hunyuan_v1_dense.py │ │ │ ├── modeling_hunyuan_v1_dense.py │ │ │ └── modular_hunyuan_v1_dense.py │ │ ├── hunyuan_v1_moe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_hunyuan_v1_moe.py │ │ │ ├── modeling_hunyuan_v1_moe.py │ │ │ └── modular_hunyuan_v1_moe.py │ │ ├── ibert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_ibert.py │ │ │ ├── modeling_ibert.py │ │ │ └── quant_modules.py │ │ ├── idefics/ │ │ │ ├── __init__.py │ │ │ ├── configuration_idefics.py │ │ │ ├── image_processing_idefics.py │ │ │ ├── image_processing_pil_idefics.py │ │ │ ├── modeling_idefics.py │ │ │ ├── perceiver.py │ │ │ ├── processing_idefics.py │ │ │ └── vision.py │ │ ├── idefics2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_idefics2.py │ │ │ ├── convert_idefics2_weights_to_hf.py │ │ │ ├── image_processing_idefics2.py │ │ │ ├── image_processing_pil_idefics2.py │ │ │ ├── modeling_idefics2.py │ │ │ └── processing_idefics2.py │ │ ├── idefics3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_idefics3.py │ │ │ ├── convert_idefics3_weights_to_hf.py │ │ │ ├── image_processing_idefics3.py │ │ │ ├── image_processing_pil_idefics3.py │ │ │ ├── modeling_idefics3.py │ │ │ └── processing_idefics3.py │ │ ├── ijepa/ │ │ │ ├── __init__.py │ │ │ ├── configuration_ijepa.py │ │ │ ├── convert_ijepa_to_hf.py │ │ │ ├── modeling_ijepa.py │ │ │ └── modular_ijepa.py │ │ ├── imagegpt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_imagegpt.py │ │ │ ├── convert_imagegpt_original_tf2_to_pytorch.py │ │ │ ├── image_processing_imagegpt.py │ │ │ ├── image_processing_pil_imagegpt.py │ │ │ └── modeling_imagegpt.py │ │ ├── informer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_informer.py │ │ │ ├── modeling_informer.py │ │ │ └── modular_informer.py │ │ ├── instructblip/ │ │ │ ├── __init__.py │ │ │ ├── configuration_instructblip.py │ │ │ ├── convert_instructblip_original_to_pytorch.py │ │ │ ├── modeling_instructblip.py │ │ │ └── processing_instructblip.py │ │ ├── instructblipvideo/ │ │ │ ├── __init__.py │ │ │ ├── configuration_instructblipvideo.py │ │ │ ├── convert_instructblipvideo_original_to_pytorch.py │ │ │ ├── modeling_instructblipvideo.py │ │ │ ├── modular_instructblipvideo.py │ │ │ ├── processing_instructblipvideo.py │ │ │ └── video_processing_instructblipvideo.py │ │ ├── internvl/ │ │ │ ├── __init__.py │ │ │ ├── configuration_internvl.py │ │ │ ├── convert_internvl_weights_to_hf.py │ │ │ ├── modeling_internvl.py │ │ │ ├── modular_internvl.py │ │ │ ├── processing_internvl.py │ │ │ └── video_processing_internvl.py │ │ ├── jais2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_jais2.py │ │ │ ├── modeling_jais2.py │ │ │ └── modular_jais2.py │ │ ├── jamba/ │ │ │ ├── __init__.py │ │ │ ├── configuration_jamba.py │ │ │ ├── modeling_jamba.py │ │ │ └── modular_jamba.py │ │ ├── janus/ │ │ │ ├── __init__.py │ │ │ ├── configuration_janus.py │ │ │ ├── convert_janus_weights_to_hf.py │ │ │ ├── image_processing_janus.py │ │ │ ├── image_processing_pil_janus.py │ │ │ ├── modeling_janus.py │ │ │ ├── modular_janus.py │ │ │ └── processing_janus.py │ │ ├── jetmoe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_jetmoe.py │ │ │ ├── modeling_jetmoe.py │ │ │ └── modular_jetmoe.py │ │ ├── jina_embeddings_v3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_jina_embeddings_v3.py │ │ │ ├── modeling_jina_embeddings_v3.py │ │ │ └── modular_jina_embeddings_v3.py │ │ ├── kosmos2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_kosmos2.py │ │ │ ├── convert_kosmos2_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_kosmos2.py │ │ │ └── processing_kosmos2.py │ │ ├── kosmos2_5/ │ │ │ ├── __init__.py │ │ │ ├── configuration_kosmos2_5.py │ │ │ ├── convert_kosmos2_5.py │ │ │ ├── image_processing_kosmos2_5.py │ │ │ ├── image_processing_pil_kosmos2_5.py │ │ │ ├── modeling_kosmos2_5.py │ │ │ └── processing_kosmos2_5.py │ │ ├── kyutai_speech_to_text/ │ │ │ ├── __init__.py │ │ │ ├── configuration_kyutai_speech_to_text.py │ │ │ ├── convert_kyutai_speech_to_text_to_hf.py │ │ │ ├── feature_extraction_kyutai_speech_to_text.py │ │ │ ├── modeling_kyutai_speech_to_text.py │ │ │ ├── modular_kyutai_speech_to_text.py │ │ │ └── processing_kyutai_speech_to_text.py │ │ ├── lasr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_lasr.py │ │ │ ├── feature_extraction_lasr.py │ │ │ ├── modeling_lasr.py │ │ │ ├── modular_lasr.py │ │ │ ├── processing_lasr.py │ │ │ └── tokenization_lasr.py │ │ ├── layoutlm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_layoutlm.py │ │ │ └── modeling_layoutlm.py │ │ ├── layoutlmv2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_layoutlmv2.py │ │ │ ├── image_processing_layoutlmv2.py │ │ │ ├── image_processing_pil_layoutlmv2.py │ │ │ ├── modeling_layoutlmv2.py │ │ │ ├── processing_layoutlmv2.py │ │ │ └── tokenization_layoutlmv2.py │ │ ├── layoutlmv3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_layoutlmv3.py │ │ │ ├── image_processing_layoutlmv3.py │ │ │ ├── image_processing_pil_layoutlmv3.py │ │ │ ├── modeling_layoutlmv3.py │ │ │ ├── processing_layoutlmv3.py │ │ │ └── tokenization_layoutlmv3.py │ │ ├── layoutxlm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_layoutxlm.py │ │ │ ├── modular_layoutxlm.py │ │ │ ├── processing_layoutxlm.py │ │ │ └── tokenization_layoutxlm.py │ │ ├── led/ │ │ │ ├── __init__.py │ │ │ ├── configuration_led.py │ │ │ └── modeling_led.py │ │ ├── levit/ │ │ │ ├── __init__.py │ │ │ ├── configuration_levit.py │ │ │ ├── convert_levit_timm_to_pytorch.py │ │ │ ├── image_processing_levit.py │ │ │ ├── image_processing_pil_levit.py │ │ │ └── modeling_levit.py │ │ ├── lfm2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_lfm2.py │ │ │ ├── modeling_lfm2.py │ │ │ └── modular_lfm2.py │ │ ├── lfm2_moe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_lfm2_moe.py │ │ │ ├── modeling_lfm2_moe.py │ │ │ └── modular_lfm2_moe.py │ │ ├── lfm2_vl/ │ │ │ ├── __init__.py │ │ │ ├── configuration_lfm2_vl.py │ │ │ ├── image_processing_lfm2_vl.py │ │ │ ├── modeling_lfm2_vl.py │ │ │ ├── modular_lfm2_vl.py │ │ │ └── processing_lfm2_vl.py │ │ ├── lightglue/ │ │ │ ├── __init__.py │ │ │ ├── configuration_lightglue.py │ │ │ ├── convert_lightglue_to_hf.py │ │ │ ├── image_processing_lightglue.py │ │ │ ├── image_processing_pil_lightglue.py │ │ │ ├── modeling_lightglue.py │ │ │ └── modular_lightglue.py │ │ ├── lighton_ocr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_lighton_ocr.py │ │ │ ├── modeling_lighton_ocr.py │ │ │ ├── modular_lighton_ocr.py │ │ │ └── processing_lighton_ocr.py │ │ ├── lilt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_lilt.py │ │ │ └── modeling_lilt.py │ │ ├── llama/ │ │ │ ├── __init__.py │ │ │ ├── configuration_llama.py │ │ │ ├── convert_llama_weights_to_hf.py │ │ │ ├── modeling_llama.py │ │ │ └── tokenization_llama.py │ │ ├── llama4/ │ │ │ ├── __init__.py │ │ │ ├── configuration_llama4.py │ │ │ ├── convert_llama4_weights_to_hf.py │ │ │ ├── image_processing_llama4.py │ │ │ ├── modeling_llama4.py │ │ │ └── processing_llama4.py │ │ ├── llava/ │ │ │ ├── __init__.py │ │ │ ├── configuration_llava.py │ │ │ ├── convert_llava_weights_to_hf.py │ │ │ ├── image_processing_llava.py │ │ │ ├── image_processing_pil_llava.py │ │ │ ├── modeling_llava.py │ │ │ └── processing_llava.py │ │ ├── llava_next/ │ │ │ ├── __init__.py │ │ │ ├── configuration_llava_next.py │ │ │ ├── convert_llava_next_weights_to_hf.py │ │ │ ├── image_processing_llava_next.py │ │ │ ├── image_processing_pil_llava_next.py │ │ │ ├── modeling_llava_next.py │ │ │ └── processing_llava_next.py │ │ ├── llava_next_video/ │ │ │ ├── __init__.py │ │ │ ├── configuration_llava_next_video.py │ │ │ ├── convert_llava_next_video_weights_to_hf.py │ │ │ ├── modeling_llava_next_video.py │ │ │ ├── modular_llava_next_video.py │ │ │ ├── processing_llava_next_video.py │ │ │ └── video_processing_llava_next_video.py │ │ ├── llava_onevision/ │ │ │ ├── __init__.py │ │ │ ├── configuration_llava_onevision.py │ │ │ ├── convert_llava_onevision_weights_to_hf.py │ │ │ ├── image_processing_llava_onevision.py │ │ │ ├── image_processing_pil_llava_onevision.py │ │ │ ├── modeling_llava_onevision.py │ │ │ ├── modular_llava_onevision.py │ │ │ ├── processing_llava_onevision.py │ │ │ └── video_processing_llava_onevision.py │ │ ├── longcat_flash/ │ │ │ ├── __init__.py │ │ │ ├── configuration_longcat_flash.py │ │ │ ├── modeling_longcat_flash.py │ │ │ └── modular_longcat_flash.py │ │ ├── longformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_longformer.py │ │ │ ├── convert_longformer_original_pytorch_lightning_to_pytorch.py │ │ │ └── modeling_longformer.py │ │ ├── longt5/ │ │ │ ├── __init__.py │ │ │ ├── configuration_longt5.py │ │ │ └── modeling_longt5.py │ │ ├── luke/ │ │ │ ├── __init__.py │ │ │ ├── configuration_luke.py │ │ │ ├── convert_luke_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_luke.py │ │ │ └── tokenization_luke.py │ │ ├── lw_detr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_lw_detr.py │ │ │ ├── convert_lw_detr_to_hf.py │ │ │ ├── modeling_lw_detr.py │ │ │ └── modular_lw_detr.py │ │ ├── lxmert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_lxmert.py │ │ │ ├── convert_lxmert_original_tf_checkpoint_to_pytorch.py │ │ │ └── modeling_lxmert.py │ │ ├── m2m_100/ │ │ │ ├── __init__.py │ │ │ ├── configuration_m2m_100.py │ │ │ ├── convert_m2m100_original_checkpoint_to_pytorch.py │ │ │ ├── modeling_m2m_100.py │ │ │ └── tokenization_m2m_100.py │ │ ├── mamba/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mamba.py │ │ │ ├── convert_mamba_ssm_checkpoint_to_pytorch.py │ │ │ └── modeling_mamba.py │ │ ├── mamba2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mamba2.py │ │ │ ├── convert_mamba2_ssm_checkpoint_to_pytorch.py │ │ │ └── modeling_mamba2.py │ │ ├── marian/ │ │ │ ├── __init__.py │ │ │ ├── configuration_marian.py │ │ │ ├── convert_marian_tatoeba_to_pytorch.py │ │ │ ├── convert_marian_to_pytorch.py │ │ │ ├── modeling_marian.py │ │ │ └── tokenization_marian.py │ │ ├── markuplm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_markuplm.py │ │ │ ├── feature_extraction_markuplm.py │ │ │ ├── modeling_markuplm.py │ │ │ ├── processing_markuplm.py │ │ │ └── tokenization_markuplm.py │ │ ├── mask2former/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mask2former.py │ │ │ ├── convert_mask2former_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── image_processing_mask2former.py │ │ │ ├── image_processing_pil_mask2former.py │ │ │ ├── modeling_mask2former.py │ │ │ └── modular_mask2former.py │ │ ├── maskformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_maskformer.py │ │ │ ├── configuration_maskformer_swin.py │ │ │ ├── convert_maskformer_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── convert_maskformer_resnet_to_pytorch.py │ │ │ ├── convert_maskformer_swin_to_pytorch.py │ │ │ ├── image_processing_maskformer.py │ │ │ ├── image_processing_pil_maskformer.py │ │ │ ├── modeling_maskformer.py │ │ │ ├── modeling_maskformer_swin.py │ │ │ └── modular_maskformer.py │ │ ├── mbart/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mbart.py │ │ │ ├── convert_mbart_original_checkpoint_to_pytorch.py │ │ │ ├── modeling_mbart.py │ │ │ └── tokenization_mbart.py │ │ ├── mbart50/ │ │ │ ├── __init__.py │ │ │ └── tokenization_mbart50.py │ │ ├── megatron_bert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_megatron_bert.py │ │ │ ├── convert_megatron_bert_checkpoint.py │ │ │ └── modeling_megatron_bert.py │ │ ├── megatron_gpt2/ │ │ │ ├── __init__.py │ │ │ ├── checkpoint_reshaping_and_interoperability.py │ │ │ └── convert_megatron_gpt2_checkpoint.py │ │ ├── metaclip_2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_metaclip_2.py │ │ │ ├── convert_metaclip_2_to_hf.py │ │ │ ├── modeling_metaclip_2.py │ │ │ └── modular_metaclip_2.py │ │ ├── mgp_str/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mgp_str.py │ │ │ ├── modeling_mgp_str.py │ │ │ ├── processing_mgp_str.py │ │ │ └── tokenization_mgp_str.py │ │ ├── mimi/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mimi.py │ │ │ ├── convert_mimi_checkpoint_to_pytorch.py │ │ │ └── modeling_mimi.py │ │ ├── minimax/ │ │ │ ├── __init__.py │ │ │ ├── configuration_minimax.py │ │ │ ├── modeling_minimax.py │ │ │ └── modular_minimax.py │ │ ├── minimax_m2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_minimax_m2.py │ │ │ ├── modeling_minimax_m2.py │ │ │ └── modular_minimax_m2.py │ │ ├── ministral/ │ │ │ ├── __init__.py │ │ │ ├── configuration_ministral.py │ │ │ ├── modeling_ministral.py │ │ │ └── modular_ministral.py │ │ ├── ministral3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_ministral3.py │ │ │ ├── convert_ministral3_weights_to_hf.py │ │ │ ├── modeling_ministral3.py │ │ │ └── modular_ministral3.py │ │ ├── mistral/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mistral.py │ │ │ ├── convert_mistral_weights_to_hf.py │ │ │ ├── modeling_mistral.py │ │ │ └── modular_mistral.py │ │ ├── mistral3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mistral3.py │ │ │ ├── convert_mistral3_weights_to_hf.py │ │ │ ├── modeling_mistral3.py │ │ │ └── modular_mistral3.py │ │ ├── mistral4/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mistral4.py │ │ │ ├── convert_mistral4_weight_to_hf.py │ │ │ ├── modeling_mistral4.py │ │ │ └── modular_mistral4.py │ │ ├── mixtral/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mixtral.py │ │ │ ├── convert_mixtral_weights_to_hf.py │ │ │ ├── modeling_mixtral.py │ │ │ └── modular_mixtral.py │ │ ├── mlcd/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mlcd.py │ │ │ ├── convert_mlcd_weights_to_hf.py │ │ │ ├── modeling_mlcd.py │ │ │ └── modular_mlcd.py │ │ ├── mllama/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mllama.py │ │ │ ├── convert_mllama_weights_to_hf.py │ │ │ ├── image_processing_mllama.py │ │ │ ├── image_processing_pil_mllama.py │ │ │ ├── modeling_mllama.py │ │ │ └── processing_mllama.py │ │ ├── mluke/ │ │ │ ├── __init__.py │ │ │ ├── convert_mluke_original_pytorch_checkpoint_to_pytorch.py │ │ │ └── tokenization_mluke.py │ │ ├── mm_grounding_dino/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mm_grounding_dino.py │ │ │ ├── convert_mm_grounding_dino_to_hf.py │ │ │ ├── modeling_mm_grounding_dino.py │ │ │ └── modular_mm_grounding_dino.py │ │ ├── mobilebert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mobilebert.py │ │ │ ├── convert_mobilebert_original_tf_checkpoint_to_pytorch.py │ │ │ ├── modeling_mobilebert.py │ │ │ └── tokenization_mobilebert.py │ │ ├── mobilenet_v1/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mobilenet_v1.py │ │ │ ├── convert_original_tf_checkpoint_to_pytorch.py │ │ │ ├── image_processing_mobilenet_pil_v1.py │ │ │ ├── image_processing_mobilenet_v1.py │ │ │ └── modeling_mobilenet_v1.py │ │ ├── mobilenet_v2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mobilenet_v2.py │ │ │ ├── convert_original_tf_checkpoint_to_pytorch.py │ │ │ ├── image_processing_mobilenet_v2.py │ │ │ ├── image_processing_pil_mobilenet_v2.py │ │ │ └── modeling_mobilenet_v2.py │ │ ├── mobilevit/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mobilevit.py │ │ │ ├── convert_mlcvnets_to_pytorch.py │ │ │ ├── image_processing_mobilevit.py │ │ │ ├── image_processing_pil_mobilevit.py │ │ │ └── modeling_mobilevit.py │ │ ├── mobilevitv2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mobilevitv2.py │ │ │ ├── convert_mlcvnets_to_pytorch.py │ │ │ └── modeling_mobilevitv2.py │ │ ├── modernbert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_modernbert.py │ │ │ ├── modeling_modernbert.py │ │ │ └── modular_modernbert.py │ │ ├── modernbert_decoder/ │ │ │ ├── __init__.py │ │ │ ├── configuration_modernbert_decoder.py │ │ │ ├── modeling_modernbert_decoder.py │ │ │ └── modular_modernbert_decoder.py │ │ ├── modernvbert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_modernvbert.py │ │ │ ├── modeling_modernvbert.py │ │ │ └── modular_modernvbert.py │ │ ├── moonshine/ │ │ │ ├── __init__.py │ │ │ ├── configuration_moonshine.py │ │ │ ├── convert_usefulsensors_to_hf.py │ │ │ ├── modeling_moonshine.py │ │ │ └── modular_moonshine.py │ │ ├── moonshine_streaming/ │ │ │ ├── __init__.py │ │ │ ├── configuration_moonshine_streaming.py │ │ │ ├── modeling_moonshine_streaming.py │ │ │ ├── modular_moonshine_streaming.py │ │ │ └── processing_moonshine_streaming.py │ │ ├── moshi/ │ │ │ ├── __init__.py │ │ │ ├── configuration_moshi.py │ │ │ ├── convert_moshi_transformers.py │ │ │ └── modeling_moshi.py │ │ ├── mpnet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mpnet.py │ │ │ ├── modeling_mpnet.py │ │ │ └── tokenization_mpnet.py │ │ ├── mpt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mpt.py │ │ │ └── modeling_mpt.py │ │ ├── mra/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mra.py │ │ │ ├── convert_mra_pytorch_to_pytorch.py │ │ │ └── modeling_mra.py │ │ ├── mt5/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mt5.py │ │ │ └── modeling_mt5.py │ │ ├── musicflamingo/ │ │ │ ├── __init__.py │ │ │ ├── configuration_musicflamingo.py │ │ │ ├── convert_musicflamingo_to_hf.py │ │ │ ├── modeling_musicflamingo.py │ │ │ ├── modular_musicflamingo.py │ │ │ └── processing_musicflamingo.py │ │ ├── musicgen/ │ │ │ ├── __init__.py │ │ │ ├── configuration_musicgen.py │ │ │ ├── convert_musicgen_transformers.py │ │ │ ├── modeling_musicgen.py │ │ │ └── processing_musicgen.py │ │ ├── musicgen_melody/ │ │ │ ├── __init__.py │ │ │ ├── configuration_musicgen_melody.py │ │ │ ├── convert_musicgen_melody_transformers.py │ │ │ ├── feature_extraction_musicgen_melody.py │ │ │ ├── modeling_musicgen_melody.py │ │ │ └── processing_musicgen_melody.py │ │ ├── mvp/ │ │ │ ├── __init__.py │ │ │ ├── configuration_mvp.py │ │ │ └── modeling_mvp.py │ │ ├── myt5/ │ │ │ ├── __init__.py │ │ │ ├── convert_myt5_original_tf_checkpoint_to_pytorch.py │ │ │ └── tokenization_myt5.py │ │ ├── nanochat/ │ │ │ ├── __init__.py │ │ │ ├── configuration_nanochat.py │ │ │ ├── convert_nanochat_checkpoints.py │ │ │ ├── modeling_nanochat.py │ │ │ └── modular_nanochat.py │ │ ├── nemotron/ │ │ │ ├── __init__.py │ │ │ ├── configuration_nemotron.py │ │ │ ├── convert_nemotron_nemo_to_hf.py │ │ │ └── modeling_nemotron.py │ │ ├── nemotron_h/ │ │ │ ├── __init__.py │ │ │ ├── configuration_nemotron_h.py │ │ │ ├── modeling_nemotron_h.py │ │ │ └── modular_nemotron_h.py │ │ ├── nllb/ │ │ │ ├── __init__.py │ │ │ └── tokenization_nllb.py │ │ ├── nllb_moe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_nllb_moe.py │ │ │ ├── convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py │ │ │ └── modeling_nllb_moe.py │ │ ├── nougat/ │ │ │ ├── __init__.py │ │ │ ├── convert_nougat_to_hf.py │ │ │ ├── image_processing_nougat.py │ │ │ ├── image_processing_pil_nougat.py │ │ │ ├── processing_nougat.py │ │ │ └── tokenization_nougat.py │ │ ├── nystromformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_nystromformer.py │ │ │ ├── convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py │ │ │ └── modeling_nystromformer.py │ │ ├── olmo/ │ │ │ ├── __init__.py │ │ │ ├── configuration_olmo.py │ │ │ ├── convert_olmo_weights_to_hf.py │ │ │ ├── modeling_olmo.py │ │ │ └── modular_olmo.py │ │ ├── olmo2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_olmo2.py │ │ │ ├── convert_olmo2_weights_to_hf.py │ │ │ ├── modeling_olmo2.py │ │ │ └── modular_olmo2.py │ │ ├── olmo3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_olmo3.py │ │ │ ├── convert_olmo3_weights_to_hf.py │ │ │ ├── modeling_olmo3.py │ │ │ └── modular_olmo3.py │ │ ├── olmo_hybrid/ │ │ │ ├── __init__.py │ │ │ ├── configuration_olmo_hybrid.py │ │ │ ├── convert_olmo_hybrid_weights_to_hf.py │ │ │ ├── modeling_olmo_hybrid.py │ │ │ └── modular_olmo_hybrid.py │ │ ├── olmoe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_olmoe.py │ │ │ ├── convert_olmoe_weights_to_hf.py │ │ │ ├── modeling_olmoe.py │ │ │ └── modular_olmoe.py │ │ ├── omdet_turbo/ │ │ │ ├── __init__.py │ │ │ ├── configuration_omdet_turbo.py │ │ │ ├── convert_omdet_turbo_to_hf.py │ │ │ ├── modeling_omdet_turbo.py │ │ │ └── processing_omdet_turbo.py │ │ ├── oneformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_oneformer.py │ │ │ ├── convert_to_hf_oneformer.py │ │ │ ├── image_processing_oneformer.py │ │ │ ├── image_processing_pil_oneformer.py │ │ │ ├── modeling_oneformer.py │ │ │ └── processing_oneformer.py │ │ ├── openai/ │ │ │ ├── __init__.py │ │ │ ├── configuration_openai.py │ │ │ ├── convert_openai_original_tf_checkpoint_to_pytorch.py │ │ │ ├── modeling_openai.py │ │ │ └── tokenization_openai.py │ │ ├── opt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_opt.py │ │ │ ├── convert_opt_original_pytorch_checkpoint_to_pytorch.py │ │ │ └── modeling_opt.py │ │ ├── ovis2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_ovis2.py │ │ │ ├── convert_ovis2_weights_to_hf.py │ │ │ ├── image_processing_ovis2.py │ │ │ ├── image_processing_pil_ovis2.py │ │ │ ├── modeling_ovis2.py │ │ │ ├── modular_ovis2.py │ │ │ └── processing_ovis2.py │ │ ├── owlv2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_owlv2.py │ │ │ ├── convert_owlv2_to_hf.py │ │ │ ├── image_processing_owlv2.py │ │ │ ├── image_processing_pil_owlv2.py │ │ │ ├── modeling_owlv2.py │ │ │ ├── modular_owlv2.py │ │ │ └── processing_owlv2.py │ │ ├── owlvit/ │ │ │ ├── __init__.py │ │ │ ├── configuration_owlvit.py │ │ │ ├── convert_owlvit_original_flax_to_hf.py │ │ │ ├── image_processing_owlvit.py │ │ │ ├── image_processing_pil_owlvit.py │ │ │ ├── modeling_owlvit.py │ │ │ └── processing_owlvit.py │ │ ├── paddleocr_vl/ │ │ │ ├── __init__.py │ │ │ ├── configuration_paddleocr_vl.py │ │ │ ├── image_processing_paddleocr_vl.py │ │ │ ├── image_processing_pil_paddleocr_vl.py │ │ │ ├── modeling_paddleocr_vl.py │ │ │ ├── modular_paddleocr_vl.py │ │ │ └── processing_paddleocr_vl.py │ │ ├── paligemma/ │ │ │ ├── __init__.py │ │ │ ├── configuration_paligemma.py │ │ │ ├── convert_paligemma2_weights_to_hf.py │ │ │ ├── convert_paligemma_weights_to_hf.py │ │ │ ├── modeling_paligemma.py │ │ │ └── processing_paligemma.py │ │ ├── parakeet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_parakeet.py │ │ │ ├── convert_nemo_to_hf.py │ │ │ ├── feature_extraction_parakeet.py │ │ │ ├── modeling_parakeet.py │ │ │ ├── modular_parakeet.py │ │ │ ├── processing_parakeet.py │ │ │ └── tokenization_parakeet.py │ │ ├── patchtsmixer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_patchtsmixer.py │ │ │ └── modeling_patchtsmixer.py │ │ ├── patchtst/ │ │ │ ├── __init__.py │ │ │ ├── configuration_patchtst.py │ │ │ └── modeling_patchtst.py │ │ ├── pe_audio/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pe_audio.py │ │ │ ├── feature_extraction_pe_audio.py │ │ │ ├── modeling_pe_audio.py │ │ │ ├── modular_pe_audio.py │ │ │ └── processing_pe_audio.py │ │ ├── pe_audio_video/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pe_audio_video.py │ │ │ ├── convert_pe_audio_video_to_hf.py │ │ │ ├── modeling_pe_audio_video.py │ │ │ ├── modular_pe_audio_video.py │ │ │ └── processing_pe_audio_video.py │ │ ├── pe_video/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pe_video.py │ │ │ ├── modeling_pe_video.py │ │ │ ├── modular_pe_video.py │ │ │ ├── processing_pe_video.py │ │ │ └── video_processing_pe_video.py │ │ ├── pegasus/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pegasus.py │ │ │ ├── convert_pegasus_tf_to_pytorch.py │ │ │ ├── modeling_pegasus.py │ │ │ └── tokenization_pegasus.py │ │ ├── pegasus_x/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pegasus_x.py │ │ │ └── modeling_pegasus_x.py │ │ ├── perceiver/ │ │ │ ├── __init__.py │ │ │ ├── configuration_perceiver.py │ │ │ ├── convert_perceiver_haiku_to_pytorch.py │ │ │ ├── image_processing_perceiver.py │ │ │ ├── image_processing_pil_perceiver.py │ │ │ ├── modeling_perceiver.py │ │ │ └── tokenization_perceiver.py │ │ ├── perception_lm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_perception_lm.py │ │ │ ├── convert_perception_lm_weights_to_hf.py │ │ │ ├── image_processing_perception_lm.py │ │ │ ├── modeling_perception_lm.py │ │ │ ├── modular_perception_lm.py │ │ │ ├── processing_perception_lm.py │ │ │ └── video_processing_perception_lm.py │ │ ├── persimmon/ │ │ │ ├── __init__.py │ │ │ ├── configuration_persimmon.py │ │ │ ├── convert_persimmon_weights_to_hf.py │ │ │ └── modeling_persimmon.py │ │ ├── phi/ │ │ │ ├── __init__.py │ │ │ ├── configuration_phi.py │ │ │ ├── convert_phi_weights_to_hf.py │ │ │ ├── modeling_phi.py │ │ │ └── modular_phi.py │ │ ├── phi3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_phi3.py │ │ │ ├── modeling_phi3.py │ │ │ └── modular_phi3.py │ │ ├── phi4_multimodal/ │ │ │ ├── __init__.py │ │ │ ├── configuration_phi4_multimodal.py │ │ │ ├── convert_phi4_multimodal_weights_to_hf.py │ │ │ ├── feature_extraction_phi4_multimodal.py │ │ │ ├── image_processing_phi4_multimodal.py │ │ │ ├── modeling_phi4_multimodal.py │ │ │ ├── modular_phi4_multimodal.py │ │ │ └── processing_phi4_multimodal.py │ │ ├── phimoe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_phimoe.py │ │ │ ├── modeling_phimoe.py │ │ │ └── modular_phimoe.py │ │ ├── phobert/ │ │ │ ├── __init__.py │ │ │ └── tokenization_phobert.py │ │ ├── pi0/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pi0.py │ │ │ ├── image_processing_pi0.py │ │ │ ├── modeling_pi0.py │ │ │ ├── modular_pi0.py │ │ │ └── processing_pi0.py │ │ ├── pix2struct/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pix2struct.py │ │ │ ├── convert_pix2struct_original_pytorch_to_hf.py │ │ │ ├── image_processing_pil_pix2struct.py │ │ │ ├── image_processing_pix2struct.py │ │ │ ├── modeling_pix2struct.py │ │ │ └── processing_pix2struct.py │ │ ├── pixio/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pixio.py │ │ │ ├── convert_pixio_to_pytorch.py │ │ │ ├── modeling_pixio.py │ │ │ └── modular_pixio.py │ │ ├── pixtral/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pixtral.py │ │ │ ├── convert_pixtral_weights_to_hf.py │ │ │ ├── image_processing_pil_pixtral.py │ │ │ ├── image_processing_pixtral.py │ │ │ ├── modeling_pixtral.py │ │ │ └── processing_pixtral.py │ │ ├── plbart/ │ │ │ ├── __init__.py │ │ │ ├── configuration_plbart.py │ │ │ ├── convert_plbart_original_checkpoint_to_torch.py │ │ │ ├── modeling_plbart.py │ │ │ ├── modular_plbart.py │ │ │ └── tokenization_plbart.py │ │ ├── poolformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_poolformer.py │ │ │ ├── convert_poolformer_original_to_pytorch.py │ │ │ ├── image_processing_pil_poolformer.py │ │ │ ├── image_processing_poolformer.py │ │ │ └── modeling_poolformer.py │ │ ├── pop2piano/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pop2piano.py │ │ │ ├── convert_pop2piano_weights_to_hf.py │ │ │ ├── feature_extraction_pop2piano.py │ │ │ ├── modeling_pop2piano.py │ │ │ ├── processing_pop2piano.py │ │ │ └── tokenization_pop2piano.py │ │ ├── pp_chart2table/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pp_chart2table.py │ │ │ ├── image_processing_pil_pp_chart2table.py │ │ │ ├── image_processing_pp_chart2table.py │ │ │ ├── modular_pp_chart2table.py │ │ │ └── processing_pp_chart2table.py │ │ ├── pp_doclayout_v2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pp_doclayout_v2.py │ │ │ ├── image_processing_pp_doclayout_v2.py │ │ │ ├── modeling_pp_doclayout_v2.py │ │ │ └── modular_pp_doclayout_v2.py │ │ ├── pp_doclayout_v3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pp_doclayout_v3.py │ │ │ ├── image_processing_pp_doclayout_v3.py │ │ │ ├── modeling_pp_doclayout_v3.py │ │ │ └── modular_pp_doclayout_v3.py │ │ ├── pp_lcnet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pp_lcnet.py │ │ │ ├── image_processing_pp_lcnet.py │ │ │ ├── modeling_pp_lcnet.py │ │ │ └── modular_pp_lcnet.py │ │ ├── pp_lcnet_v3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pp_lcnet_v3.py │ │ │ ├── modeling_pp_lcnet_v3.py │ │ │ └── modular_pp_lcnet_v3.py │ │ ├── pp_ocrv5_mobile_det/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pp_ocrv5_mobile_det.py │ │ │ ├── modeling_pp_ocrv5_mobile_det.py │ │ │ └── modular_pp_ocrv5_mobile_det.py │ │ ├── pp_ocrv5_mobile_rec/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pp_ocrv5_mobile_rec.py │ │ │ ├── modeling_pp_ocrv5_mobile_rec.py │ │ │ └── modular_pp_ocrv5_mobile_rec.py │ │ ├── pp_ocrv5_server_det/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pp_ocrv5_server_det.py │ │ │ ├── image_processing_pp_ocrv5_server_det.py │ │ │ ├── modeling_pp_ocrv5_server_det.py │ │ │ └── modular_pp_ocrv5_server_det.py │ │ ├── pp_ocrv5_server_rec/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pp_ocrv5_server_rec.py │ │ │ ├── image_processing_pp_ocrv5_server_rec.py │ │ │ ├── modeling_pp_ocrv5_server_rec.py │ │ │ └── modular_pp_ocrv5_server_rec.py │ │ ├── prompt_depth_anything/ │ │ │ ├── __init__.py │ │ │ ├── configuration_prompt_depth_anything.py │ │ │ ├── convert_prompt_depth_anything_to_hf.py │ │ │ ├── image_processing_pil_prompt_depth_anything.py │ │ │ ├── image_processing_prompt_depth_anything.py │ │ │ ├── modeling_prompt_depth_anything.py │ │ │ └── modular_prompt_depth_anything.py │ │ ├── prophetnet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_prophetnet.py │ │ │ ├── convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_prophetnet.py │ │ │ └── tokenization_prophetnet.py │ │ ├── pvt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pvt.py │ │ │ ├── convert_pvt_to_pytorch.py │ │ │ ├── image_processing_pil_pvt.py │ │ │ ├── image_processing_pvt.py │ │ │ └── modeling_pvt.py │ │ ├── pvt_v2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_pvt_v2.py │ │ │ ├── convert_pvt_v2_to_pytorch.py │ │ │ └── modeling_pvt_v2.py │ │ ├── qwen2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen2.py │ │ │ ├── modeling_qwen2.py │ │ │ ├── modular_qwen2.py │ │ │ └── tokenization_qwen2.py │ │ ├── qwen2_5_omni/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen2_5_omni.py │ │ │ ├── modeling_qwen2_5_omni.py │ │ │ ├── modular_qwen2_5_omni.py │ │ │ └── processing_qwen2_5_omni.py │ │ ├── qwen2_5_vl/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen2_5_vl.py │ │ │ ├── modeling_qwen2_5_vl.py │ │ │ ├── modular_qwen2_5_vl.py │ │ │ └── processing_qwen2_5_vl.py │ │ ├── qwen2_audio/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen2_audio.py │ │ │ ├── modeling_qwen2_audio.py │ │ │ └── processing_qwen2_audio.py │ │ ├── qwen2_moe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen2_moe.py │ │ │ ├── modeling_qwen2_moe.py │ │ │ └── modular_qwen2_moe.py │ │ ├── qwen2_vl/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen2_vl.py │ │ │ ├── image_processing_pil_qwen2_vl.py │ │ │ ├── image_processing_qwen2_vl.py │ │ │ ├── modeling_qwen2_vl.py │ │ │ ├── processing_qwen2_vl.py │ │ │ └── video_processing_qwen2_vl.py │ │ ├── qwen3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen3.py │ │ │ ├── modeling_qwen3.py │ │ │ └── modular_qwen3.py │ │ ├── qwen3_5/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen3_5.py │ │ │ ├── modeling_qwen3_5.py │ │ │ ├── modular_qwen3_5.py │ │ │ └── tokenization_qwen3_5.py │ │ ├── qwen3_5_moe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen3_5_moe.py │ │ │ ├── modeling_qwen3_5_moe.py │ │ │ └── modular_qwen3_5_moe.py │ │ ├── qwen3_moe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen3_moe.py │ │ │ ├── modeling_qwen3_moe.py │ │ │ └── modular_qwen3_moe.py │ │ ├── qwen3_next/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen3_next.py │ │ │ ├── modeling_qwen3_next.py │ │ │ └── modular_qwen3_next.py │ │ ├── qwen3_omni_moe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen3_omni_moe.py │ │ │ ├── modeling_qwen3_omni_moe.py │ │ │ ├── modular_qwen3_omni_moe.py │ │ │ └── processing_qwen3_omni_moe.py │ │ ├── qwen3_vl/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen3_vl.py │ │ │ ├── modeling_qwen3_vl.py │ │ │ ├── modular_qwen3_vl.py │ │ │ ├── processing_qwen3_vl.py │ │ │ └── video_processing_qwen3_vl.py │ │ ├── qwen3_vl_moe/ │ │ │ ├── __init__.py │ │ │ ├── configuration_qwen3_vl_moe.py │ │ │ ├── modeling_qwen3_vl_moe.py │ │ │ └── modular_qwen3_vl_moe.py │ │ ├── rag/ │ │ │ ├── __init__.py │ │ │ ├── configuration_rag.py │ │ │ ├── modeling_rag.py │ │ │ ├── retrieval_rag.py │ │ │ └── tokenization_rag.py │ │ ├── recurrent_gemma/ │ │ │ ├── __init__.py │ │ │ ├── configuration_recurrent_gemma.py │ │ │ ├── convert_recurrent_gemma_to_hf.py │ │ │ └── modeling_recurrent_gemma.py │ │ ├── reformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_reformer.py │ │ │ ├── convert_reformer_trax_checkpoint_to_pytorch.py │ │ │ ├── modeling_reformer.py │ │ │ └── tokenization_reformer.py │ │ ├── regnet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_regnet.py │ │ │ ├── convert_regnet_seer_10b_to_pytorch.py │ │ │ ├── convert_regnet_to_pytorch.py │ │ │ └── modeling_regnet.py │ │ ├── rembert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_rembert.py │ │ │ ├── convert_rembert_tf_checkpoint_to_pytorch.py │ │ │ ├── modeling_rembert.py │ │ │ └── tokenization_rembert.py │ │ ├── resnet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_resnet.py │ │ │ ├── convert_resnet_to_pytorch.py │ │ │ └── modeling_resnet.py │ │ ├── roberta/ │ │ │ ├── __init__.py │ │ │ ├── configuration_roberta.py │ │ │ ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_roberta.py │ │ │ ├── modular_roberta.py │ │ │ ├── tokenization_roberta.py │ │ │ └── tokenization_roberta_old.py │ │ ├── roberta_prelayernorm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_roberta_prelayernorm.py │ │ │ ├── convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py │ │ │ └── modeling_roberta_prelayernorm.py │ │ ├── roc_bert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_roc_bert.py │ │ │ ├── modeling_roc_bert.py │ │ │ └── tokenization_roc_bert.py │ │ ├── roformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_roformer.py │ │ │ ├── convert_roformer_original_tf_checkpoint_to_pytorch.py │ │ │ ├── modeling_roformer.py │ │ │ ├── tokenization_roformer.py │ │ │ └── tokenization_utils.py │ │ ├── rt_detr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_rt_detr.py │ │ │ ├── configuration_rt_detr_resnet.py │ │ │ ├── convert_rt_detr_original_pytorch_checkpoint_to_hf.py │ │ │ ├── image_processing_pil_rt_detr.py │ │ │ ├── image_processing_rt_detr.py │ │ │ ├── modeling_rt_detr.py │ │ │ ├── modeling_rt_detr_resnet.py │ │ │ └── modular_rt_detr.py │ │ ├── rt_detr_v2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_rt_detr_v2.py │ │ │ ├── convert_rt_detr_v2_weights_to_hf.py │ │ │ ├── modeling_rt_detr_v2.py │ │ │ └── modular_rt_detr_v2.py │ │ ├── rwkv/ │ │ │ ├── __init__.py │ │ │ ├── configuration_rwkv.py │ │ │ ├── convert_rwkv_checkpoint_to_hf.py │ │ │ └── modeling_rwkv.py │ │ ├── sam/ │ │ │ ├── __init__.py │ │ │ ├── configuration_sam.py │ │ │ ├── convert_sam_to_hf.py │ │ │ ├── image_processing_pil_sam.py │ │ │ ├── image_processing_sam.py │ │ │ ├── modeling_sam.py │ │ │ └── processing_sam.py │ │ ├── sam2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_sam2.py │ │ │ ├── convert_sam2_to_hf.py │ │ │ ├── image_processing_sam2.py │ │ │ ├── modeling_sam2.py │ │ │ ├── modular_sam2.py │ │ │ └── processing_sam2.py │ │ ├── sam2_video/ │ │ │ ├── __init__.py │ │ │ ├── configuration_sam2_video.py │ │ │ ├── convert_sam2_video_to_hf.py │ │ │ ├── modeling_sam2_video.py │ │ │ ├── modular_sam2_video.py │ │ │ ├── processing_sam2_video.py │ │ │ └── video_processing_sam2_video.py │ │ ├── sam3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_sam3.py │ │ │ ├── convert_sam3_to_hf.py │ │ │ ├── image_processing_sam3.py │ │ │ ├── modeling_sam3.py │ │ │ ├── modular_sam3.py │ │ │ └── processing_sam3.py │ │ ├── sam3_tracker/ │ │ │ ├── __init__.py │ │ │ ├── configuration_sam3_tracker.py │ │ │ ├── modeling_sam3_tracker.py │ │ │ ├── modular_sam3_tracker.py │ │ │ └── processing_sam3_tracker.py │ │ ├── sam3_tracker_video/ │ │ │ ├── __init__.py │ │ │ ├── configuration_sam3_tracker_video.py │ │ │ ├── modeling_sam3_tracker_video.py │ │ │ ├── modular_sam3_tracker_video.py │ │ │ └── processing_sam3_tracker_video.py │ │ ├── sam3_video/ │ │ │ ├── __init__.py │ │ │ ├── configuration_sam3_video.py │ │ │ ├── convert_sam3_video_to_hf.py │ │ │ ├── modeling_sam3_video.py │ │ │ └── processing_sam3_video.py │ │ ├── sam_hq/ │ │ │ ├── __init__.py │ │ │ ├── configuration_sam_hq.py │ │ │ ├── convert_samhq_to_hf.py │ │ │ ├── modeling_sam_hq.py │ │ │ ├── modular_sam_hq.py │ │ │ └── processing_sam_hq.py │ │ ├── seamless_m4t/ │ │ │ ├── __init__.py │ │ │ ├── configuration_seamless_m4t.py │ │ │ ├── convert_fairseq2_to_hf.py │ │ │ ├── feature_extraction_seamless_m4t.py │ │ │ ├── modeling_seamless_m4t.py │ │ │ ├── processing_seamless_m4t.py │ │ │ └── tokenization_seamless_m4t.py │ │ ├── seamless_m4t_v2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_seamless_m4t_v2.py │ │ │ ├── convert_fairseq2_to_hf.py │ │ │ └── modeling_seamless_m4t_v2.py │ │ ├── seed_oss/ │ │ │ ├── __init__.py │ │ │ ├── configuration_seed_oss.py │ │ │ ├── modeling_seed_oss.py │ │ │ └── modular_seed_oss.py │ │ ├── segformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_segformer.py │ │ │ ├── convert_segformer_original_to_pytorch.py │ │ │ ├── image_processing_pil_segformer.py │ │ │ ├── image_processing_segformer.py │ │ │ ├── modeling_segformer.py │ │ │ └── modular_segformer.py │ │ ├── seggpt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_seggpt.py │ │ │ ├── convert_seggpt_to_hf.py │ │ │ ├── image_processing_pil_seggpt.py │ │ │ ├── image_processing_seggpt.py │ │ │ └── modeling_seggpt.py │ │ ├── sew/ │ │ │ ├── __init__.py │ │ │ ├── configuration_sew.py │ │ │ ├── convert_sew_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_sew.py │ │ │ └── modular_sew.py │ │ ├── sew_d/ │ │ │ ├── __init__.py │ │ │ ├── configuration_sew_d.py │ │ │ ├── convert_sew_d_original_pytorch_checkpoint_to_pytorch.py │ │ │ └── modeling_sew_d.py │ │ ├── shieldgemma2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_shieldgemma2.py │ │ │ ├── convert_shieldgemma2_weights_orbax_to_hf.py │ │ │ ├── modeling_shieldgemma2.py │ │ │ └── processing_shieldgemma2.py │ │ ├── siglip/ │ │ │ ├── __init__.py │ │ │ ├── configuration_siglip.py │ │ │ ├── convert_siglip_to_hf.py │ │ │ ├── image_processing_pil_siglip.py │ │ │ ├── image_processing_siglip.py │ │ │ ├── modeling_siglip.py │ │ │ ├── processing_siglip.py │ │ │ └── tokenization_siglip.py │ │ ├── siglip2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_siglip2.py │ │ │ ├── convert_siglip2_to_hf.py │ │ │ ├── image_processing_pil_siglip2.py │ │ │ ├── image_processing_siglip2.py │ │ │ ├── modeling_siglip2.py │ │ │ ├── modular_siglip2.py │ │ │ ├── processing_siglip2.py │ │ │ └── tokenization_siglip2.py │ │ ├── slanext/ │ │ │ ├── __init__.py │ │ │ ├── configuration_slanext.py │ │ │ ├── image_processing_slanext.py │ │ │ ├── modeling_slanext.py │ │ │ └── modular_slanext.py │ │ ├── smollm3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_smollm3.py │ │ │ ├── modeling_smollm3.py │ │ │ └── modular_smollm3.py │ │ ├── smolvlm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_smolvlm.py │ │ │ ├── image_processing_pil_smolvlm.py │ │ │ ├── image_processing_smolvlm.py │ │ │ ├── modeling_smolvlm.py │ │ │ ├── modular_smolvlm.py │ │ │ ├── processing_smolvlm.py │ │ │ └── video_processing_smolvlm.py │ │ ├── solar_open/ │ │ │ ├── __init__.py │ │ │ ├── configuration_solar_open.py │ │ │ ├── modeling_solar_open.py │ │ │ └── modular_solar_open.py │ │ ├── speech_encoder_decoder/ │ │ │ ├── __init__.py │ │ │ ├── configuration_speech_encoder_decoder.py │ │ │ ├── convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py │ │ │ ├── convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py │ │ │ └── modeling_speech_encoder_decoder.py │ │ ├── speech_to_text/ │ │ │ ├── __init__.py │ │ │ ├── configuration_speech_to_text.py │ │ │ ├── convert_s2t_fairseq_to_tfms.py │ │ │ ├── feature_extraction_speech_to_text.py │ │ │ ├── modeling_speech_to_text.py │ │ │ ├── processing_speech_to_text.py │ │ │ └── tokenization_speech_to_text.py │ │ ├── speecht5/ │ │ │ ├── __init__.py │ │ │ ├── configuration_speecht5.py │ │ │ ├── convert_hifigan.py │ │ │ ├── convert_speecht5_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── feature_extraction_speecht5.py │ │ │ ├── modeling_speecht5.py │ │ │ ├── number_normalizer.py │ │ │ ├── processing_speecht5.py │ │ │ └── tokenization_speecht5.py │ │ ├── splinter/ │ │ │ ├── __init__.py │ │ │ ├── configuration_splinter.py │ │ │ ├── modeling_splinter.py │ │ │ └── tokenization_splinter.py │ │ ├── squeezebert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_squeezebert.py │ │ │ ├── modeling_squeezebert.py │ │ │ └── tokenization_squeezebert.py │ │ ├── stablelm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_stablelm.py │ │ │ └── modeling_stablelm.py │ │ ├── starcoder2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_starcoder2.py │ │ │ ├── modeling_starcoder2.py │ │ │ └── modular_starcoder2.py │ │ ├── superglue/ │ │ │ ├── __init__.py │ │ │ ├── configuration_superglue.py │ │ │ ├── convert_superglue_to_hf.py │ │ │ ├── image_processing_pil_superglue.py │ │ │ ├── image_processing_superglue.py │ │ │ └── modeling_superglue.py │ │ ├── superpoint/ │ │ │ ├── __init__.py │ │ │ ├── configuration_superpoint.py │ │ │ ├── convert_superpoint_to_pytorch.py │ │ │ ├── image_processing_pil_superpoint.py │ │ │ ├── image_processing_superpoint.py │ │ │ └── modeling_superpoint.py │ │ ├── swiftformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_swiftformer.py │ │ │ ├── convert_swiftformer_original_to_hf.py │ │ │ └── modeling_swiftformer.py │ │ ├── swin/ │ │ │ ├── __init__.py │ │ │ ├── configuration_swin.py │ │ │ ├── convert_swin_simmim_to_pytorch.py │ │ │ ├── convert_swin_timm_to_pytorch.py │ │ │ └── modeling_swin.py │ │ ├── swin2sr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_swin2sr.py │ │ │ ├── convert_swin2sr_original_to_pytorch.py │ │ │ ├── image_processing_pil_swin2sr.py │ │ │ ├── image_processing_swin2sr.py │ │ │ └── modeling_swin2sr.py │ │ ├── swinv2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_swinv2.py │ │ │ ├── convert_swinv2_timm_to_pytorch.py │ │ │ └── modeling_swinv2.py │ │ ├── switch_transformers/ │ │ │ ├── __init__.py │ │ │ ├── configuration_switch_transformers.py │ │ │ ├── convert_big_switch.py │ │ │ ├── convert_switch_transformers_original_flax_checkpoint_to_pytorch.py │ │ │ ├── modeling_switch_transformers.py │ │ │ └── modular_switch_transformers.py │ │ ├── t5/ │ │ │ ├── __init__.py │ │ │ ├── configuration_t5.py │ │ │ ├── convert_t5_original_tf_checkpoint_to_pytorch.py │ │ │ ├── convert_t5x_checkpoint_to_pytorch.py │ │ │ ├── download_from_gcp.sh │ │ │ ├── modeling_t5.py │ │ │ └── tokenization_t5.py │ │ ├── t5gemma/ │ │ │ ├── __init__.py │ │ │ ├── configuration_t5gemma.py │ │ │ ├── modeling_t5gemma.py │ │ │ └── modular_t5gemma.py │ │ ├── t5gemma2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_t5gemma2.py │ │ │ ├── modeling_t5gemma2.py │ │ │ └── modular_t5gemma2.py │ │ ├── table_transformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_table_transformer.py │ │ │ ├── convert_table_transformer_to_hf.py │ │ │ ├── convert_table_transformer_to_hf_no_timm.py │ │ │ └── modeling_table_transformer.py │ │ ├── tapas/ │ │ │ ├── __init__.py │ │ │ ├── configuration_tapas.py │ │ │ ├── convert_tapas_original_tf_checkpoint_to_pytorch.py │ │ │ ├── modeling_tapas.py │ │ │ └── tokenization_tapas.py │ │ ├── textnet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_textnet.py │ │ │ ├── convert_textnet_to_hf.py │ │ │ ├── image_processing_pil_textnet.py │ │ │ ├── image_processing_textnet.py │ │ │ └── modeling_textnet.py │ │ ├── time_series_transformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_time_series_transformer.py │ │ │ └── modeling_time_series_transformer.py │ │ ├── timesfm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_timesfm.py │ │ │ ├── convert_timesfm_orignal_to_hf.py │ │ │ ├── modeling_timesfm.py │ │ │ └── modular_timesfm.py │ │ ├── timesfm2_5/ │ │ │ ├── __init__.py │ │ │ ├── configuration_timesfm2_5.py │ │ │ ├── convert_timesfm2_5_original_to_hf.py │ │ │ ├── modeling_timesfm2_5.py │ │ │ └── modular_timesfm2_5.py │ │ ├── timesformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_timesformer.py │ │ │ ├── convert_timesformer_to_pytorch.py │ │ │ └── modeling_timesformer.py │ │ ├── timm_backbone/ │ │ │ ├── __init__.py │ │ │ ├── configuration_timm_backbone.py │ │ │ └── modeling_timm_backbone.py │ │ ├── timm_wrapper/ │ │ │ ├── __init__.py │ │ │ ├── configuration_timm_wrapper.py │ │ │ ├── image_processing_timm_wrapper.py │ │ │ └── modeling_timm_wrapper.py │ │ ├── trocr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_trocr.py │ │ │ ├── convert_trocr_unilm_to_pytorch.py │ │ │ ├── modeling_trocr.py │ │ │ └── processing_trocr.py │ │ ├── tvp/ │ │ │ ├── __init__.py │ │ │ ├── configuration_tvp.py │ │ │ ├── image_processing_pil_tvp.py │ │ │ ├── image_processing_tvp.py │ │ │ ├── modeling_tvp.py │ │ │ └── processing_tvp.py │ │ ├── udop/ │ │ │ ├── __init__.py │ │ │ ├── configuration_udop.py │ │ │ ├── convert_udop_to_hf.py │ │ │ ├── modeling_udop.py │ │ │ ├── processing_udop.py │ │ │ └── tokenization_udop.py │ │ ├── umt5/ │ │ │ ├── __init__.py │ │ │ ├── configuration_umt5.py │ │ │ ├── convert_umt5_checkpoint_to_pytorch.py │ │ │ └── modeling_umt5.py │ │ ├── unispeech/ │ │ │ ├── __init__.py │ │ │ ├── configuration_unispeech.py │ │ │ ├── convert_unispeech_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_unispeech.py │ │ │ └── modular_unispeech.py │ │ ├── unispeech_sat/ │ │ │ ├── __init__.py │ │ │ ├── configuration_unispeech_sat.py │ │ │ ├── convert_unispeech_original_s3prl_checkpoint_to_pytorch.py │ │ │ ├── convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_unispeech_sat.py │ │ │ └── modular_unispeech_sat.py │ │ ├── univnet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_univnet.py │ │ │ ├── convert_univnet.py │ │ │ ├── feature_extraction_univnet.py │ │ │ └── modeling_univnet.py │ │ ├── upernet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_upernet.py │ │ │ ├── convert_convnext_upernet_to_pytorch.py │ │ │ ├── convert_swin_upernet_to_pytorch.py │ │ │ └── modeling_upernet.py │ │ ├── uvdoc/ │ │ │ ├── __init__.py │ │ │ ├── configuration_uvdoc.py │ │ │ ├── image_processing_uvdoc.py │ │ │ ├── modeling_uvdoc.py │ │ │ └── modular_uvdoc.py │ │ ├── vaultgemma/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vaultgemma.py │ │ │ ├── modeling_vaultgemma.py │ │ │ └── modular_vaultgemma.py │ │ ├── vibevoice_acoustic_tokenizer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vibevoice_acoustic_tokenizer.py │ │ │ ├── convert_vibevoice_acoustic_tokenizer_to_hf.py │ │ │ ├── feature_extraction_vibevoice_acoustic_tokenizer.py │ │ │ ├── modeling_vibevoice_acoustic_tokenizer.py │ │ │ └── modular_vibevoice_acoustic_tokenizer.py │ │ ├── vibevoice_asr/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vibevoice_asr.py │ │ │ ├── convert_vibevoice_asr_to_hf.py │ │ │ ├── modeling_vibevoice_asr.py │ │ │ ├── modular_vibevoice_asr.py │ │ │ └── processing_vibevoice_asr.py │ │ ├── video_llama_3/ │ │ │ ├── __init__.py │ │ │ ├── configuration_video_llama_3.py │ │ │ ├── image_processing_pil_video_llama_3.py │ │ │ ├── image_processing_video_llama_3.py │ │ │ ├── modeling_video_llama_3.py │ │ │ ├── modular_video_llama_3.py │ │ │ ├── processing_video_llama_3.py │ │ │ └── video_processing_video_llama_3.py │ │ ├── video_llava/ │ │ │ ├── __init__.py │ │ │ ├── configuration_video_llava.py │ │ │ ├── convert_video_llava_weights_to_hf.py │ │ │ ├── image_processing_video_llava.py │ │ │ ├── modeling_video_llava.py │ │ │ ├── processing_video_llava.py │ │ │ └── video_processing_video_llava.py │ │ ├── videomae/ │ │ │ ├── __init__.py │ │ │ ├── configuration_videomae.py │ │ │ ├── convert_videomae_to_pytorch.py │ │ │ ├── image_processing_pil_videomae.py │ │ │ ├── image_processing_videomae.py │ │ │ ├── modeling_videomae.py │ │ │ └── video_processing_videomae.py │ │ ├── videomt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_videomt.py │ │ │ ├── convert_videomt_to_hf.py │ │ │ ├── modeling_videomt.py │ │ │ ├── modular_videomt.py │ │ │ └── video_processing_videomt.py │ │ ├── vilt/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vilt.py │ │ │ ├── convert_vilt_original_to_pytorch.py │ │ │ ├── image_processing_pil_vilt.py │ │ │ ├── image_processing_vilt.py │ │ │ ├── modeling_vilt.py │ │ │ └── processing_vilt.py │ │ ├── vipllava/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vipllava.py │ │ │ ├── convert_vipllava_weights_to_hf.py │ │ │ ├── modeling_vipllava.py │ │ │ └── modular_vipllava.py │ │ ├── vision_encoder_decoder/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vision_encoder_decoder.py │ │ │ └── modeling_vision_encoder_decoder.py │ │ ├── vision_text_dual_encoder/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vision_text_dual_encoder.py │ │ │ ├── modeling_vision_text_dual_encoder.py │ │ │ └── processing_vision_text_dual_encoder.py │ │ ├── visual_bert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_visual_bert.py │ │ │ ├── convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py │ │ │ └── modeling_visual_bert.py │ │ ├── vit/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vit.py │ │ │ ├── convert_dino_to_pytorch.py │ │ │ ├── convert_vit_timm_to_pytorch.py │ │ │ ├── image_processing_pil_vit.py │ │ │ ├── image_processing_vit.py │ │ │ └── modeling_vit.py │ │ ├── vit_mae/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vit_mae.py │ │ │ ├── convert_vit_mae_to_pytorch.py │ │ │ └── modeling_vit_mae.py │ │ ├── vit_msn/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vit_msn.py │ │ │ ├── convert_msn_to_pytorch.py │ │ │ └── modeling_vit_msn.py │ │ ├── vitdet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vitdet.py │ │ │ └── modeling_vitdet.py │ │ ├── vitmatte/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vitmatte.py │ │ │ ├── convert_vitmatte_to_hf.py │ │ │ ├── image_processing_pil_vitmatte.py │ │ │ ├── image_processing_vitmatte.py │ │ │ └── modeling_vitmatte.py │ │ ├── vitpose/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vitpose.py │ │ │ ├── convert_vitpose_to_hf.py │ │ │ ├── image_processing_pil_vitpose.py │ │ │ ├── image_processing_vitpose.py │ │ │ └── modeling_vitpose.py │ │ ├── vitpose_backbone/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vitpose_backbone.py │ │ │ └── modeling_vitpose_backbone.py │ │ ├── vits/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vits.py │ │ │ ├── convert_original_checkpoint.py │ │ │ ├── modeling_vits.py │ │ │ └── tokenization_vits.py │ │ ├── vivit/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vivit.py │ │ │ ├── convert_vivit_flax_to_pytorch.py │ │ │ ├── image_processing_vivit.py │ │ │ └── modeling_vivit.py │ │ ├── vjepa2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_vjepa2.py │ │ │ ├── convert_vjepa2_classifier_to_hf.py │ │ │ ├── convert_vjepa2_to_hf.py │ │ │ ├── modeling_vjepa2.py │ │ │ └── video_processing_vjepa2.py │ │ ├── voxtral/ │ │ │ ├── __init__.py │ │ │ ├── configuration_voxtral.py │ │ │ ├── convert_voxtral_weights_to_hf.py │ │ │ ├── modeling_voxtral.py │ │ │ ├── modular_voxtral.py │ │ │ └── processing_voxtral.py │ │ ├── voxtral_realtime/ │ │ │ ├── __init__.py │ │ │ ├── configuration_voxtral_realtime.py │ │ │ ├── convert_voxtral_realtime_weights_to_hf.py │ │ │ ├── feature_extraction_voxtral_realtime.py │ │ │ ├── modeling_voxtral_realtime.py │ │ │ ├── modular_voxtral_realtime.py │ │ │ └── processing_voxtral_realtime.py │ │ ├── wav2vec2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_wav2vec2.py │ │ │ ├── convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py │ │ │ ├── feature_extraction_wav2vec2.py │ │ │ ├── modeling_wav2vec2.py │ │ │ ├── processing_wav2vec2.py │ │ │ └── tokenization_wav2vec2.py │ │ ├── wav2vec2_bert/ │ │ │ ├── __init__.py │ │ │ ├── configuration_wav2vec2_bert.py │ │ │ ├── convert_wav2vec2_seamless_checkpoint.py │ │ │ ├── modeling_wav2vec2_bert.py │ │ │ ├── modular_wav2vec2_bert.py │ │ │ └── processing_wav2vec2_bert.py │ │ ├── wav2vec2_conformer/ │ │ │ ├── __init__.py │ │ │ ├── configuration_wav2vec2_conformer.py │ │ │ ├── convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_wav2vec2_conformer.py │ │ │ └── modular_wav2vec2_conformer.py │ │ ├── wav2vec2_phoneme/ │ │ │ ├── __init__.py │ │ │ └── tokenization_wav2vec2_phoneme.py │ │ ├── wav2vec2_with_lm/ │ │ │ ├── __init__.py │ │ │ └── processing_wav2vec2_with_lm.py │ │ ├── wavlm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_wavlm.py │ │ │ ├── convert_wavlm_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── convert_wavlm_original_s3prl_checkpoint_to_pytorch.py │ │ │ ├── modeling_wavlm.py │ │ │ └── modular_wavlm.py │ │ ├── whisper/ │ │ │ ├── __init__.py │ │ │ ├── configuration_whisper.py │ │ │ ├── convert_openai_to_hf.py │ │ │ ├── english_normalizer.py │ │ │ ├── feature_extraction_whisper.py │ │ │ ├── generation_whisper.py │ │ │ ├── modeling_whisper.py │ │ │ ├── processing_whisper.py │ │ │ └── tokenization_whisper.py │ │ ├── x_clip/ │ │ │ ├── __init__.py │ │ │ ├── configuration_x_clip.py │ │ │ ├── convert_x_clip_original_pytorch_to_hf.py │ │ │ ├── modeling_x_clip.py │ │ │ └── processing_x_clip.py │ │ ├── xcodec/ │ │ │ ├── __init__.py │ │ │ ├── configuration_xcodec.py │ │ │ ├── convert_xcodec_weights_to_hf.py │ │ │ └── modeling_xcodec.py │ │ ├── xglm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_xglm.py │ │ │ ├── convert_xglm_original_ckpt_to_trfms.py │ │ │ ├── modeling_xglm.py │ │ │ └── tokenization_xglm.py │ │ ├── xlm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_xlm.py │ │ │ ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_xlm.py │ │ │ └── tokenization_xlm.py │ │ ├── xlm_roberta/ │ │ │ ├── __init__.py │ │ │ ├── configuration_xlm_roberta.py │ │ │ ├── modeling_xlm_roberta.py │ │ │ ├── modular_xlm_roberta.py │ │ │ └── tokenization_xlm_roberta.py │ │ ├── xlm_roberta_xl/ │ │ │ ├── __init__.py │ │ │ ├── configuration_xlm_roberta_xl.py │ │ │ ├── convert_xlm_roberta_xl_original_pytorch_checkpoint_to_pytorch.py │ │ │ ├── modeling_xlm_roberta_xl.py │ │ │ └── modular_xlm_roberta_xl.py │ │ ├── xlnet/ │ │ │ ├── __init__.py │ │ │ ├── configuration_xlnet.py │ │ │ ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py │ │ │ ├── modeling_xlnet.py │ │ │ └── tokenization_xlnet.py │ │ ├── xlstm/ │ │ │ ├── __init__.py │ │ │ ├── configuration_xlstm.py │ │ │ └── modeling_xlstm.py │ │ ├── xmod/ │ │ │ ├── __init__.py │ │ │ ├── configuration_xmod.py │ │ │ ├── convert_xmod_original_pytorch_checkpoint_to_pytorch.py │ │ │ └── modeling_xmod.py │ │ ├── yolos/ │ │ │ ├── __init__.py │ │ │ ├── configuration_yolos.py │ │ │ ├── convert_yolos_to_pytorch.py │ │ │ ├── image_processing_pil_yolos.py │ │ │ ├── image_processing_yolos.py │ │ │ ├── modeling_yolos.py │ │ │ └── modular_yolos.py │ │ ├── yoso/ │ │ │ ├── __init__.py │ │ │ ├── configuration_yoso.py │ │ │ ├── convert_yoso_pytorch_to_pytorch.py │ │ │ └── modeling_yoso.py │ │ ├── youtu/ │ │ │ ├── __init__.py │ │ │ ├── configuration_youtu.py │ │ │ ├── modeling_youtu.py │ │ │ └── modular_youtu.py │ │ ├── zamba/ │ │ │ ├── __init__.py │ │ │ ├── configuration_zamba.py │ │ │ └── modeling_zamba.py │ │ ├── zamba2/ │ │ │ ├── __init__.py │ │ │ ├── configuration_zamba2.py │ │ │ ├── modeling_zamba2.py │ │ │ └── modular_zamba2.py │ │ └── zoedepth/ │ │ ├── __init__.py │ │ ├── configuration_zoedepth.py │ │ ├── convert_zoedepth_to_hf.py │ │ ├── image_processing_pil_zoedepth.py │ │ ├── image_processing_zoedepth.py │ │ └── modeling_zoedepth.py │ ├── monkey_patching.py │ ├── optimization.py │ ├── pipelines/ │ │ ├── __init__.py │ │ ├── any_to_any.py │ │ ├── audio_classification.py │ │ ├── audio_utils.py │ │ ├── automatic_speech_recognition.py │ │ ├── base.py │ │ ├── depth_estimation.py │ │ ├── document_question_answering.py │ │ ├── feature_extraction.py │ │ ├── fill_mask.py │ │ ├── image_classification.py │ │ ├── image_feature_extraction.py │ │ ├── image_segmentation.py │ │ ├── image_text_to_text.py │ │ ├── keypoint_matching.py │ │ ├── mask_generation.py │ │ ├── object_detection.py │ │ ├── pt_utils.py │ │ ├── table_question_answering.py │ │ ├── text_classification.py │ │ ├── text_generation.py │ │ ├── text_to_audio.py │ │ ├── token_classification.py │ │ ├── video_classification.py │ │ ├── zero_shot_audio_classification.py │ │ ├── zero_shot_classification.py │ │ ├── zero_shot_image_classification.py │ │ └── zero_shot_object_detection.py │ ├── processing_utils.py │ ├── py.typed │ ├── pytorch_utils.py │ ├── quantizers/ │ │ ├── __init__.py │ │ ├── auto.py │ │ ├── base.py │ │ ├── quantizer_aqlm.py │ │ ├── quantizer_auto_round.py │ │ ├── quantizer_awq.py │ │ ├── quantizer_bitnet.py │ │ ├── quantizer_bnb_4bit.py │ │ ├── quantizer_bnb_8bit.py │ │ ├── quantizer_compressed_tensors.py │ │ ├── quantizer_eetq.py │ │ ├── quantizer_fbgemm_fp8.py │ │ ├── quantizer_finegrained_fp8.py │ │ ├── quantizer_fouroversix.py │ │ ├── quantizer_fp_quant.py │ │ ├── quantizer_gptq.py │ │ ├── quantizer_higgs.py │ │ ├── quantizer_hqq.py │ │ ├── quantizer_metal.py │ │ ├── quantizer_mxfp4.py │ │ ├── quantizer_quanto.py │ │ ├── quantizer_quark.py │ │ ├── quantizer_sinq.py │ │ ├── quantizer_spqr.py │ │ ├── quantizer_torchao.py │ │ ├── quantizer_vptq.py │ │ └── quantizers_utils.py │ ├── safetensors_conversion.py │ ├── testing_utils.py │ ├── time_series_utils.py │ ├── tokenization_mistral_common.py │ ├── tokenization_python.py │ ├── tokenization_utils_base.py │ ├── tokenization_utils_sentencepiece.py │ ├── tokenization_utils_tokenizers.py │ ├── trainer.py │ ├── trainer_callback.py │ ├── trainer_jit_checkpoint.py │ ├── trainer_optimizer.py │ ├── trainer_pt_utils.py │ ├── trainer_seq2seq.py │ ├── trainer_utils.py │ ├── training_args.py │ ├── training_args_seq2seq.py │ ├── utils/ │ │ ├── __init__.py │ │ ├── attention_visualizer.py │ │ ├── auto_docstring.py │ │ ├── backbone_utils.py │ │ ├── chat_parsing_utils.py │ │ ├── chat_template_utils.py │ │ ├── constants.py │ │ ├── deprecation.py │ │ ├── doc.py │ │ ├── dummy_detectron2_objects.py │ │ ├── dummy_essentia_and_librosa_and_pretty_midi_and_scipy_and_torch_objects.py │ │ ├── dummy_mistral_common_objects.py │ │ ├── dummy_music_objects.py │ │ ├── dummy_pt_objects.py │ │ ├── dummy_sentencepiece_and_tokenizers_objects.py │ │ ├── dummy_speech_objects.py │ │ ├── dummy_timm_and_torchvision_objects.py │ │ ├── dummy_tokenizers_objects.py │ │ ├── dummy_torchaudio_objects.py │ │ ├── dummy_torchvision_objects.py │ │ ├── dummy_vision_objects.py │ │ ├── generic.py │ │ ├── hp_naming.py │ │ ├── hub.py │ │ ├── import_utils.py │ │ ├── kernel_config.py │ │ ├── loading_report.py │ │ ├── logging.py │ │ ├── metrics.py │ │ ├── network_logging.py │ │ ├── notebook.py │ │ ├── output_capturing.py │ │ ├── peft_utils.py │ │ ├── pytest_helpers.py │ │ ├── quantization_config.py │ │ ├── sentencepiece_model_pb2.py │ │ ├── sentencepiece_model_pb2_new.py │ │ ├── type_validators.py │ │ └── versions.py │ ├── video_processing_utils.py │ └── video_utils.py ├── tests/ │ ├── __init__.py │ ├── causal_lm_tester.py │ ├── cli/ │ │ ├── conftest.py │ │ ├── test_chat.py │ │ ├── test_download.py │ │ ├── test_serve.py │ │ └── test_system.py │ ├── fixtures/ │ │ ├── audioflamingo3/ │ │ │ ├── expected_results_batched.json │ │ │ └── expected_results_single.json │ │ ├── config.json │ │ ├── dummy-config.json │ │ ├── dummy_feature_extractor_config.json │ │ ├── empty.txt │ │ ├── gpt_oss/ │ │ │ └── integration_tests.json │ │ ├── input.txt │ │ ├── merges.txt │ │ ├── musicflamingo/ │ │ │ ├── expected_results_batched.json │ │ │ └── expected_results_single.json │ │ ├── parakeet/ │ │ │ ├── expected_results_batch.json │ │ │ └── expected_results_single.json │ │ ├── preprocessor_config.json │ │ ├── sample_text.txt │ │ ├── sample_text_no_unicode.txt │ │ ├── spiece.model │ │ ├── test_entity_vocab.json │ │ ├── test_sentencepiece.model │ │ ├── test_sentencepiece_bpe.model │ │ ├── test_sentencepiece_bpe_char.model │ │ ├── test_sentencepiece_no_bos.model │ │ ├── test_sentencepiece_with_bytefallback.model │ │ ├── tests_samples/ │ │ │ ├── .gitignore │ │ │ ├── COCO/ │ │ │ │ ├── coco_annotations.txt │ │ │ │ └── coco_panoptic_annotations.txt │ │ │ ├── GermEval/ │ │ │ │ ├── dev.txt │ │ │ │ ├── labels.txt │ │ │ │ └── train.txt │ │ │ ├── MRPC/ │ │ │ │ ├── dev.csv │ │ │ │ ├── dev.tsv │ │ │ │ ├── train.csv │ │ │ │ └── train.tsv │ │ │ ├── SQUAD/ │ │ │ │ └── sample.json │ │ │ ├── STS-B/ │ │ │ │ ├── dev.tsv │ │ │ │ └── train.tsv │ │ │ ├── conll/ │ │ │ │ └── sample.json │ │ │ ├── swag/ │ │ │ │ └── sample.json │ │ │ ├── wiki_text/ │ │ │ │ └── wiki_00 │ │ │ ├── wmt16/ │ │ │ │ └── sample.json │ │ │ ├── wmt_en_ro/ │ │ │ │ ├── test.json │ │ │ │ ├── train.json │ │ │ │ └── val.json │ │ │ └── xsum/ │ │ │ └── sample.json │ │ ├── vibevoice/ │ │ │ └── expected_acoustic_tokenizer_results.json │ │ ├── vibevoice_asr/ │ │ │ ├── expected_results_batch.json │ │ │ ├── expected_results_single.json │ │ │ └── expected_results_with_context.json │ │ ├── vocab.json │ │ ├── vocab.txt │ │ └── xcodec/ │ │ └── integration_tests.json │ ├── generation/ │ │ ├── __init__.py │ │ ├── test_candidate_generator.py │ │ ├── test_configuration_utils.py │ │ ├── test_continuous_batching.py │ │ ├── test_flash_attention_parity.py │ │ ├── test_logits_process.py │ │ ├── test_paged_attention.py │ │ ├── test_stopping_criteria.py │ │ ├── test_streamers.py │ │ └── test_utils.py │ ├── kernels/ │ │ └── test_kernels.py │ ├── models/ │ │ ├── __init__.py │ │ ├── afmoe/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_afmoe.py │ │ ├── aimv2/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_aimv2.py │ │ ├── albert/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_albert.py │ │ │ └── test_tokenization_albert.py │ │ ├── align/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_align.py │ │ │ └── test_processing_align.py │ │ ├── altclip/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_altclip.py │ │ │ └── test_processing_altclip.py │ │ ├── apertus/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_apertus.py │ │ ├── arcee/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_arcee.py │ │ ├── aria/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_aria.py │ │ │ ├── test_modeling_aria.py │ │ │ └── test_processing_aria.py │ │ ├── audio_spectrogram_transformer/ │ │ │ ├── __init__.py │ │ │ ├── test_feature_extraction_audio_spectrogram_transformer.py │ │ │ └── test_modeling_audio_spectrogram_transformer.py │ │ ├── audioflamingo3/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_audioflamingo3.py │ │ │ └── test_processing_audioflamingo3.py │ │ ├── auto/ │ │ │ ├── __init__.py │ │ │ ├── test_configuration_auto.py │ │ │ ├── test_feature_extraction_auto.py │ │ │ ├── test_image_processing_auto.py │ │ │ ├── test_modeling_auto.py │ │ │ ├── test_processor_auto.py │ │ │ ├── test_tokenization_auto.py │ │ │ └── test_video_processing_auto.py │ │ ├── autoformer/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_autoformer.py │ │ ├── aya_vision/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_aya_vision.py │ │ │ └── test_processing_aya_vision.py │ │ ├── bamba/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_bamba.py │ │ ├── bark/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_bark.py │ │ │ └── test_processing_bark.py │ │ ├── bart/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_bart.py │ │ ├── barthez/ │ │ │ ├── __init__.py │ │ │ └── test_tokenization_barthez.py │ │ ├── bartpho/ │ │ │ ├── __init__.py │ │ │ └── test_tokenization_bartpho.py │ │ ├── beit/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_beit.py │ │ │ └── test_modeling_beit.py │ │ ├── bert/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_bert.py │ │ │ └── test_tokenization_bert.py │ │ ├── bert_generation/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_bert_generation.py │ │ │ └── test_tokenization_bert_generation.py │ │ ├── bert_japanese/ │ │ │ ├── __init__.py │ │ │ └── test_tokenization_bert_japanese.py │ │ ├── bertweet/ │ │ │ ├── __init__.py │ │ │ └── test_tokenization_bertweet.py │ │ ├── big_bird/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_big_bird.py │ │ │ └── test_tokenization_big_bird.py │ │ ├── bigbird_pegasus/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_bigbird_pegasus.py │ │ ├── biogpt/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_biogpt.py │ │ │ └── test_tokenization_biogpt.py │ │ ├── bit/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_bit.py │ │ │ └── test_modeling_bit.py │ │ ├── bitnet/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_bitnet.py │ │ ├── blenderbot/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_blenderbot.py │ │ │ └── test_tokenization_blenderbot.py │ │ ├── blenderbot_small/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_blenderbot_small.py │ │ │ └── test_tokenization_blenderbot_small.py │ │ ├── blip/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_blip.py │ │ │ ├── test_modeling_blip.py │ │ │ ├── test_modeling_blip_text.py │ │ │ └── test_processing_blip.py │ │ ├── blip_2/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_blip_2.py │ │ │ └── test_processing_blip_2.py │ │ ├── bloom/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_bloom.py │ │ │ └── test_tokenization_bloom.py │ │ ├── blt/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_blt.py │ │ ├── bridgetower/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_bridgetower.py │ │ │ ├── test_modeling_bridgetower.py │ │ │ └── test_processing_bridgetower.py │ │ ├── bros/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_bros.py │ │ ├── byt5/ │ │ │ ├── __init__.py │ │ │ └── test_tokenization_byt5.py │ │ ├── camembert/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_camembert.py │ │ │ └── test_tokenization_camembert.py │ │ ├── canine/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_canine.py │ │ │ └── test_tokenization_canine.py │ │ ├── chameleon/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_chameleon.py │ │ │ ├── test_modeling_chameleon.py │ │ │ └── test_processing_chameleon.py │ │ ├── chinese_clip/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_chinese_clip.py │ │ │ ├── test_modeling_chinese_clip.py │ │ │ └── test_processing_chinese_clip.py │ │ ├── chmv2/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_chmv2.py │ │ │ └── test_modeling_chmv2.py │ │ ├── clap/ │ │ │ ├── __init__.py │ │ │ ├── test_feature_extraction_clap.py │ │ │ ├── test_modeling_clap.py │ │ │ └── test_processing_clap.py │ │ ├── clip/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_clip.py │ │ │ ├── test_modeling_clip.py │ │ │ ├── test_processing_clip.py │ │ │ └── test_tokenization_clip.py │ │ ├── clipseg/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_clipseg.py │ │ │ └── test_processing_clipseg.py │ │ ├── clvp/ │ │ │ ├── __init__.py │ │ │ ├── test_feature_extraction_clvp.py │ │ │ ├── test_modeling_clvp.py │ │ │ ├── test_processing_clvp.py │ │ │ └── test_tokenization_clvp.py │ │ ├── code_llama/ │ │ │ ├── __init__.py │ │ │ └── test_tokenization_code_llama.py │ │ ├── codegen/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_codegen.py │ │ │ └── test_tokenization_codegen.py │ │ ├── cohere/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_cohere.py │ │ │ └── test_tokenization_cohere.py │ │ ├── cohere2/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_cohere2.py │ │ ├── cohere2_vision/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_cohere2_vision.py │ │ │ ├── test_modeling_cohere2_vision.py │ │ │ └── test_processing_cohere2_vision.py │ │ ├── cohere_asr/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_cohere_asr.py │ │ ├── colmodernvbert/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_colmodernvbert.py │ │ │ └── test_processing_colmodernvbert.py │ │ ├── colpali/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_colpali.py │ │ │ └── test_processing_colpali.py │ │ ├── colqwen2/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_colqwen2.py │ │ │ └── test_processing_colqwen2.py │ │ ├── conditional_detr/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_conditional_detr.py │ │ │ └── test_modeling_conditional_detr.py │ │ ├── convbert/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_convbert.py │ │ ├── convnext/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_convnext.py │ │ │ └── test_modeling_convnext.py │ │ ├── convnextv2/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_convnextv2.py │ │ ├── cpm/ │ │ │ ├── __init__.py │ │ │ └── test_tokenization_cpm.py │ │ ├── cpmant/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_cpmant.py │ │ │ └── test_tokenization_cpmant.py │ │ ├── csm/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_csm.py │ │ │ └── test_processing_csm.py │ │ ├── ctrl/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_ctrl.py │ │ │ └── test_tokenization_ctrl.py │ │ ├── cvt/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_cvt.py │ │ ├── cwm/ │ │ │ ├── __init__.py │ │ │ ├── test_configuration_cwm.py │ │ │ └── test_modeling_cwm.py │ │ ├── d_fine/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_d_fine.py │ │ ├── dab_detr/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_dab_detr.py │ │ ├── dac/ │ │ │ ├── __init__.py │ │ │ ├── test_feature_extraction_dac.py │ │ │ └── test_modeling_dac.py │ │ ├── data2vec/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_data2vec_audio.py │ │ │ ├── test_modeling_data2vec_text.py │ │ │ └── test_modeling_data2vec_vision.py │ │ ├── dbrx/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_dbrx.py │ │ ├── deberta/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_deberta.py │ │ │ └── test_tokenization_deberta.py │ │ ├── deberta_v2/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_deberta_v2.py │ │ │ └── test_tokenization_deberta_v2.py │ │ ├── decision_transformer/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_decision_transformer.py │ │ ├── deepseek_v2/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_deepseek_v2.py │ │ ├── deepseek_v3/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_deepseek_v3.py │ │ ├── deepseek_vl/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_deepseek_vl.py │ │ │ ├── test_modeling_deepseek_vl.py │ │ │ └── test_processing_deepseek_vl.py │ │ ├── deepseek_vl_hybrid/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_deepseek_vl_hybrid.py │ │ │ ├── test_modeling_deepseek_vl_hybrid.py │ │ │ └── test_processing_deepseek_vl_hybrid.py │ │ ├── deformable_detr/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_deformable_detr.py │ │ │ └── test_modeling_deformable_detr.py │ │ ├── deit/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_deit.py │ │ │ └── test_modeling_deit.py │ │ ├── depth_anything/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_depth_anything.py │ │ ├── depth_pro/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_depth_pro.py │ │ │ └── test_modeling_depth_pro.py │ │ ├── detr/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_detr.py │ │ │ └── test_modeling_detr.py │ │ ├── dia/ │ │ │ ├── __init__.py │ │ │ ├── test_feature_extraction_dia.py │ │ │ ├── test_modeling_dia.py │ │ │ ├── test_processing_dia.py │ │ │ └── test_tokenization_dia.py │ │ ├── diffllama/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_diffllama.py │ │ ├── dinat/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_dinat.py │ │ ├── dinov2/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_dinov2.py │ │ ├── dinov2_with_registers/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_dinov2_with_registers.py │ │ ├── dinov3_convnext/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_dinov3_convnext.py │ │ ├── dinov3_vit/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_dinov3_vit.py │ │ │ └── test_modeling_dinov3_vit.py │ │ ├── distilbert/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_distilbert.py │ │ │ └── test_tokenization_distilbert.py │ │ ├── dit/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_dit.py │ │ ├── doge/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_doge.py │ │ ├── donut/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_donut.py │ │ │ ├── test_modeling_donut_swin.py │ │ │ └── test_processing_donut.py │ │ ├── dots1/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_dots1.py │ │ ├── dpr/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_dpr.py │ │ ├── dpt/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_dpt.py │ │ │ ├── test_modeling_dpt.py │ │ │ ├── test_modeling_dpt_auto_backbone.py │ │ │ └── test_modeling_dpt_hybrid.py │ │ ├── edgetam/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_edgetam.py │ │ ├── edgetam_video/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_edgetam_video.py │ │ ├── efficientloftr/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_efficientloftr.py │ │ │ └── test_modeling_efficientloftr.py │ │ ├── efficientnet/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_efficientnet.py │ │ │ └── test_modeling_efficientnet.py │ │ ├── electra/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_electra.py │ │ ├── emu3/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_emu3.py │ │ │ └── test_processing_emu3.py │ │ ├── encodec/ │ │ │ ├── __init__.py │ │ │ ├── test_feature_extraction_encodec.py │ │ │ └── test_modeling_encodec.py │ │ ├── encoder_decoder/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_encoder_decoder.py │ │ ├── eomt/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_eomt.py │ │ │ └── test_modeling_eomt.py │ │ ├── eomt_dinov3/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_eomt_dinov3.py │ │ ├── ernie/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_ernie.py │ │ ├── ernie4_5/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_ernie4_5.py │ │ ├── ernie4_5_moe/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_ernie4_5_moe.py │ │ ├── ernie4_5_vl_moe/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_ernie4_5_vl_moe.py │ │ │ ├── test_modeling_ernie4_5_vl_moe.py │ │ │ ├── test_processing_ernie4_5_vl_moe.py │ │ │ └── test_video_processing_ernie4_5_vl_moe.py │ │ ├── esm/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_esm.py │ │ │ ├── test_modeling_esmfold.py │ │ │ └── test_tokenization_esm.py │ │ ├── eurobert/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_eurobert.py │ │ ├── evolla/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_evolla.py │ │ │ └── test_processing_evolla.py │ │ ├── exaone4/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_exaone4.py │ │ ├── exaone_moe/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_exaone_moe.py │ │ ├── falcon/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_falcon.py │ │ ├── falcon_h1/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_falcon_h1.py │ │ ├── falcon_mamba/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_falcon_mamba.py │ │ ├── fast_vlm/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_fast_vlm.py │ │ ├── fastspeech2_conformer/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_fastspeech2_conformer.py │ │ │ └── test_tokenization_fastspeech2_conformer.py │ │ ├── flaubert/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_flaubert.py │ │ │ └── test_tokenization_flaubert.py │ │ ├── flava/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_flava.py │ │ │ ├── test_modeling_flava.py │ │ │ └── test_processing_flava.py │ │ ├── flex_olmo/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_flex_olmo.py │ │ ├── florence2/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_florence2.py │ │ │ └── test_processing_florence2.py │ │ ├── fnet/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_fnet.py │ │ ├── focalnet/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_focalnet.py │ │ ├── fsmt/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_fsmt.py │ │ │ └── test_tokenization_fsmt.py │ │ ├── funnel/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_funnel.py │ │ │ └── test_tokenization_funnel.py │ │ ├── fuyu/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_fuyu.py │ │ │ ├── test_modeling_fuyu.py │ │ │ └── test_processing_fuyu.py │ │ ├── gemma/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_gemma.py │ │ │ └── test_tokenization_gemma.py │ │ ├── gemma2/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_gemma2.py │ │ ├── gemma3/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_gemma3.py │ │ │ ├── test_modeling_gemma3.py │ │ │ └── test_processing_gemma3.py │ │ ├── gemma3n/ │ │ │ ├── __init__.py │ │ │ ├── test_feature_extraction_gemma3n.py │ │ │ ├── test_modeling_gemma3n.py │ │ │ └── test_processing_gemma3n.py │ │ ├── git/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_git.py │ │ │ └── test_processing_git.py │ │ ├── glm/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_glm.py │ │ ├── glm4/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_glm4.py │ │ ├── glm46v/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_glm46v.py │ │ │ ├── test_processor_glm46v.py │ │ │ └── test_video_processing_glm46v.py │ │ ├── glm4_moe/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_glm4_moe.py │ │ ├── glm4_moe_lite/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_glm4_moe_lite.py │ │ ├── glm4v/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_glm4v.py │ │ │ ├── test_modeling_glm4v.py │ │ │ ├── test_processor_glm4v.py │ │ │ └── test_video_processing_glm4v.py │ │ ├── glm4v_moe/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_glm4v_moe.py │ │ ├── glm_image/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_glm_image.py │ │ │ └── test_processor_glm_image.py │ │ ├── glm_moe_dsa/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_glm_moe_dsa.py │ │ ├── glm_ocr/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_glm_ocr.py │ │ ├── glmasr/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_glmasr.py │ │ ├── glpn/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_glpn.py │ │ │ └── test_modeling_glpn.py │ │ ├── got_ocr2/ │ │ │ ├── __init__.py │ │ │ ├── test_image_processing_got_ocr2.py │ │ │ ├── test_modeling_got_ocr2.py │ │ │ └── test_processing_got_ocr2.py │ │ ├── gpt2/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_gpt2.py │ │ │ └── test_tokenization_gpt2.py │ │ ├── gpt_bigcode/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_gpt_bigcode.py │ │ ├── gpt_neo/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_gpt_neo.py │ │ ├── gpt_neox/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_gpt_neox.py │ │ │ └── test_tokenization_gpt_neox.py │ │ ├── gpt_neox_japanese/ │ │ │ ├── __init__.py │ │ │ ├── test_modeling_gpt_neox_japanese.py │ │ │ └── test_tokenization_gpt_neox_japanese.py │ │ ├── gpt_oss/ │ │ │ ├── __init__.py │ │ │ └── test_modeling_gpt_oss.py │ │ ├── gpt_sw3/ │ │ │ ├── __init__.py │ │ │ └── test_tok
Showing preview only (5,754K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (65457 symbols across 3224 files)
FILE: .circleci/create_circleci_config.py
class EmptyJob (line 59) | class EmptyJob:
method to_dict (line 62) | def to_dict(self):
class CircleCIJob (line 84) | class CircleCIJob:
method __post_init__ (line 99) | def __post_init__(self):
method to_dict (line 134) | def to_dict(self):
method job_name (line 230) | def job_name(self):
function create_circleci_config (line 377) | def create_circleci_config(folder=None):
FILE: .circleci/parse_test_outputs.py
function parse_pytest_output (line 5) | def parse_pytest_output(file_path):
function parse_pytest_failure_output (line 19) | def parse_pytest_failure_output(file_path):
function parse_pytest_errors_output (line 35) | def parse_pytest_errors_output(file_path):
function main (line 52) | def main():
FILE: .github/scripts/assign_reviewers.py
function pattern_to_regex (line 26) | def pattern_to_regex(pattern):
function get_file_owners (line 39) | def get_file_owners(file_path, codeowners_lines):
function pr_author_is_in_hf (line 59) | def pr_author_is_in_hf(pr_author, codeowners_lines):
function main (line 74) | def main():
FILE: benchmark/benches/llama.py
function collect_metrics (line 52) | def collect_metrics(benchmark_id, continue_metric_collection, metrics_re...
function run_benchmark (line 67) | def run_benchmark(
FILE: benchmark/benchmark.py
function checkout_commit (line 43) | def checkout_commit(repo: Repo, commit_id: str):
function summarize (line 60) | def summarize(run_dir, metrics, expand_metrics=False):
function combine_summaries (line 149) | def combine_summaries(summaries):
function list_str (line 200) | def list_str(values):
FILE: benchmark/benchmarks_entrypoint.py
class ImportModuleException (line 36) | class ImportModuleException(Exception):
class MetricsRecorder (line 40) | class MetricsRecorder:
method __init__ (line 41) | def __init__(
method initialise_benchmark (line 103) | def initialise_benchmark(self, metadata: dict[str, str]) -> str:
method collect_device_measurements (line 146) | def collect_device_measurements(self, benchmark_id: str, cpu_util, mem...
method collect_model_measurements (line 179) | def collect_model_measurements(self, benchmark_id: str, measurements: ...
method export_to_csv (line 208) | def export_to_csv(self, output_dir: str = "benchmark_results"):
method _export_pandas_data (line 228) | def _export_pandas_data(self, output_dir: str, timestamp: str, files_c...
method _create_summary (line 255) | def _create_summary(self, summary_file: str):
method close (line 305) | def close(self):
function parse_arguments (line 320) | def parse_arguments() -> tuple[str, str, str, str, bool, str]:
function import_from_path (line 367) | def import_from_path(module_name, file_path):
function create_database_connection (line 378) | def create_database_connection():
function create_global_metrics_recorder (line 397) | def create_global_metrics_recorder(
FILE: benchmark/optimum_benchmark_wrapper.py
function main (line 5) | def main(config_dir, config_name, args):
FILE: benchmark_v2/benchmark_scripts/continuous_batching_overall.py
function run_and_parse_cb_example (line 16) | def run_and_parse_cb_example(args: str) -> dict:
function build_comparison_table (line 38) | def build_comparison_table(results: list[dict], main_results: list[dict]...
FILE: benchmark_v2/framework/benchmark_config.py
function is_fa2_or_kernel_available (line 27) | def is_fa2_or_kernel_available() -> bool:
class BenchmarkConfig (line 54) | class BenchmarkConfig:
method __init__ (line 60) | def __init__(
method check_validity (line 102) | def check_validity(self, skip_validity_check: bool = False) -> None:
method hash (line 141) | def hash(self) -> str:
method infer_name (line 144) | def infer_name(self, compact: bool = True) -> str:
method to_dict (line 182) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 199) | def from_dict(cls, data: dict[str, Any], skip_validity_check: bool = F...
function adapt_configs (line 217) | def adapt_configs(
function get_config_by_level (line 255) | def get_config_by_level(level: int) -> list[BenchmarkConfig]:
FILE: benchmark_v2/framework/benchmark_runner.py
function compact_json_numeric_arrays (line 59) | def compact_json_numeric_arrays(data: dict):
function get_git_revision (line 73) | def get_git_revision() -> str:
function flush_memory (line 82) | def flush_memory(flush_compile: bool = True) -> None:
class BenchmarkStreamer (line 112) | class BenchmarkStreamer(BaseStreamer):
method __init__ (line 113) | def __init__(self, **kwargs) -> None:
method put (line 119) | def put(self, value):
method end (line 124) | def end(self):
method __iter__ (line 128) | def __iter__(self):
method __next__ (line 131) | def __next__(self):
class BenchmarkRunner (line 139) | class BenchmarkRunner:
method __init__ (line 142) | def __init__(
method cleanup (line 167) | def cleanup(self) -> None:
method _is_primary_process (line 173) | def _is_primary_process() -> bool:
method setup_benchmark (line 178) | def setup_benchmark(self, model_id: str, config: BenchmarkConfig) -> N...
method run_benchmark (line 228) | def run_benchmark(self, config: BenchmarkConfig, num_tokens_to_profile...
method time_generate (line 263) | def time_generate(
method profile_generate (line 313) | def profile_generate(self, num_tokens_to_profile: int, config_name: st...
method run_benchmarks (line 336) | def run_benchmarks(
method save_results (line 413) | def save_results(self, model_name: str, results: dict, timestamp: str ...
method push_results_to_hub (line 443) | def push_results_to_hub(self, dataset_id: str, results: dict[Any, Any]...
FILE: benchmark_v2/framework/data_classes.py
function compute_basic_statistics (line 10) | def compute_basic_statistics(measurements: list[float]) -> dict[str, flo...
function add_unit_to_duration (line 21) | def add_unit_to_duration(stats: dict[str, float]) -> dict[str, str]:
function equalize_lengths_and_collate (line 39) | def equalize_lengths_and_collate(stats: dict[str, dict[str, str]]) -> di...
function pretty_print_dict (line 49) | def pretty_print_dict(data: dict[str, str], tabs: int = 0) -> None:
class BenchmarkMetadata (line 58) | class BenchmarkMetadata:
method __init__ (line 69) | def __init__(
method to_dict (line 80) | def to_dict(self) -> dict[str, Any]:
class BenchmarkResult (line 92) | class BenchmarkResult:
method __init__ (line 95) | def __init__(self) -> None:
method accumulate (line 103) | def accumulate(
method _accumulate_ttft_and_itl (line 116) | def _accumulate_ttft_and_itl(self, timestamps: list[float]) -> None:
method to_dict (line 123) | def to_dict(self, summarized: bool = False) -> dict[str, Any]:
method from_dict (line 139) | def from_dict(cls, data: dict[str, Any]) -> "BenchmarkResult":
method get_throughput (line 160) | def get_throughput(self, total_generated_tokens: int) -> list[float]:
method pprint (line 163) | def pprint(self, batch_size: int = 0, num_generated_tokens: int = 0, t...
FILE: benchmark_v2/framework/hardware_metrics.py
function get_device_name_and_memory_total (line 30) | def get_device_name_and_memory_total() -> tuple[str, float]:
class HardwareInfo (line 39) | class HardwareInfo:
method __init__ (line 42) | def __init__(self) -> None:
method to_dict (line 59) | def to_dict(self) -> dict[str, None | int | float | str]:
function get_amd_gpu_stats (line 69) | def get_amd_gpu_stats(device_handle) -> tuple[int, float]:
function get_intel_xpu_stats (line 76) | def get_intel_xpu_stats() -> tuple[int, float]:
function get_nvidia_gpu_stats (line 106) | def get_nvidia_gpu_stats(device_handle) -> tuple[int, float]:
class GPUMonitoringStatus (line 115) | class GPUMonitoringStatus(Enum):
class GPURawMetrics (line 125) | class GPURawMetrics:
method to_dict (line 134) | def to_dict(self) -> dict[str, None | int | float | str]:
method from_dict (line 144) | def from_dict(cls, data: dict[str, None | int | float | str]) -> "GPUR...
class GPUMonitor (line 156) | class GPUMonitor:
method __init__ (line 159) | def __init__(self, sample_interval_sec: float = 0.05, logger: Logger |...
method _monitor_worker (line 184) | def _monitor_worker(gpu_type: str, sample_interval_sec: float, connect...
method start (line 247) | def start(self):
method stop_and_collect (line 266) | def stop_and_collect(self) -> GPURawMetrics:
FILE: conftest.py
function pytest_configure (line 85) | def pytest_configure(config):
function pytest_collection_modifyitems (line 105) | def pytest_collection_modifyitems(items):
function pytest_addoption (line 111) | def pytest_addoption(parser):
function pytest_terminal_summary (line 117) | def pytest_terminal_summary(terminalreporter):
function pytest_sessionfinish (line 125) | def pytest_sessionfinish(session, exitstatus):
class CustomOutputChecker (line 137) | class CustomOutputChecker(OutputChecker):
method check_output (line 138) | def check_output(self, want, got, optionflags):
FILE: examples/3D_parallel.py
function main (line 74) | def main():
function all_reduce_grads (line 355) | def all_reduce_grads(model, world_mesh, use_ddp):
class AppState (line 383) | class AppState(Stateful):
method __init__ (line 386) | def __init__(self, model, optimizer=None):
method state_dict (line 390) | def state_dict(self):
method load_state_dict (line 394) | def load_state_dict(self, state_dict):
function clip_grad_norm_ (line 400) | def clip_grad_norm_(
FILE: examples/metrics-monitoring/metrics_example.py
class ExampleClass (line 7) | class ExampleClass:
method __init__ (line 8) | def __init__(self, name):
method process_data (line 13) | def process_data(self, data):
method special_operation (line 18) | def special_operation(self, value):
method operation_with_attributes (line 28) | def operation_with_attributes(self):
function standalone_function (line 35) | def standalone_function(arg1, arg2):
FILE: examples/modular-transformers/configuration_duplicated_method.py
class DuplicatedMethodConfig (line 18) | class DuplicatedMethodConfig(PreTrainedConfig):
method __post_init__ (line 73) | def __post_init__(self, **kwargs):
method validate_architecture (line 81) | def validate_architecture(self):
method vocab_size (line 90) | def vocab_size(self): # noqa: F811 -> we need this at we cannot delet...
method vocab_size (line 94) | def vocab_size(self, value):
FILE: examples/modular-transformers/configuration_my_new_model.py
class MyNewModelConfig (line 18) | class MyNewModelConfig(PreTrainedConfig):
method __post_init__ (line 178) | def __post_init__(self, **kwargs):
method validate_architecture (line 186) | def validate_architecture(self):
FILE: examples/modular-transformers/configuration_my_new_model2.py
class MyNewModel2Config (line 17) | class MyNewModel2Config(PreTrainedConfig):
method __post_init__ (line 79) | def __post_init__(self, **kwargs):
method validate_architecture (line 87) | def validate_architecture(self):
FILE: examples/modular-transformers/configuration_new_model.py
class NewModelConfig (line 17) | class NewModelConfig(PreTrainedConfig):
method num_heads (line 71) | def num_heads(self):
FILE: examples/modular-transformers/image_processing_new_imgproc_model.py
class ImgprocModelImageProcessor (line 35) | class ImgprocModelImageProcessor(BaseImageProcessor):
method __init__ (line 72) | def __init__(
method resize (line 99) | def resize(
method preprocess (line 148) | def preprocess(
method new_image_processing_method (line 279) | def new_image_processing_method(self, pixel_values: torch.FloatTensor):
FILE: examples/modular-transformers/modeling_add_function.py
function rotate_half (line 15) | def rotate_half(x):
function apply_rotary_pos_emb (line 23) | def apply_rotary_pos_emb(q, k, cos, sin, unsqueeze_dim=1):
class TestAttention (line 48) | class TestAttention(nn.Module):
method __init__ (line 62) | def __init__(self):
method forward (line 65) | def forward(self) -> tuple[torch.Tensor, torch.Tensor | None, tuple[to...
FILE: examples/modular-transformers/modeling_dummy_bert.py
class DummyBertEmbeddings (line 27) | class DummyBertEmbeddings(nn.Module):
method __init__ (line 30) | def __init__(self, config):
method forward (line 46) | def forward(
function eager_attention_forward (line 89) | def eager_attention_forward(
class DummyBertSelfAttention (line 117) | class DummyBertSelfAttention(nn.Module):
method __init__ (line 118) | def __init__(self, config, is_causal=False, layer_idx=None):
method forward (line 142) | def forward(
class DummyBertCrossAttention (line 184) | class DummyBertCrossAttention(nn.Module):
method __init__ (line 185) | def __init__(self, config, is_causal=False, layer_idx=None):
method forward (line 208) | def forward(
class DummyBertSelfOutput (line 261) | class DummyBertSelfOutput(nn.Module):
method __init__ (line 262) | def __init__(self, config):
method forward (line 268) | def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Ten...
class DummyBertAttention (line 275) | class DummyBertAttention(nn.Module):
method __init__ (line 276) | def __init__(self, config, is_causal=False, layer_idx=None, is_cross_a...
method forward (line 283) | def forward(
class DummyBertIntermediate (line 304) | class DummyBertIntermediate(nn.Module):
method __init__ (line 305) | def __init__(self, config):
method forward (line 313) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
class DummyBertOutput (line 319) | class DummyBertOutput(nn.Module):
method __init__ (line 320) | def __init__(self, config):
method forward (line 326) | def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Ten...
class DummyBertLayer (line 333) | class DummyBertLayer(GradientCheckpointingLayer):
method __init__ (line 334) | def __init__(self, config, layer_idx=None):
method forward (line 353) | def forward(
method feed_forward_chunk (line 392) | def feed_forward_chunk(self, attention_output):
class DummyBertEncoder (line 398) | class DummyBertEncoder(nn.Module):
method __init__ (line 399) | def __init__(self, config):
method forward (line 404) | def forward(
class DummyBertPooler (line 430) | class DummyBertPooler(nn.Module):
method __init__ (line 431) | def __init__(self, config):
method forward (line 436) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
class DummyBertPredictionHeadTransform (line 445) | class DummyBertPredictionHeadTransform(nn.Module):
method __init__ (line 446) | def __init__(self, config):
method forward (line 455) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
class DummyBertLMPredictionHead (line 462) | class DummyBertLMPredictionHead(nn.Module):
method __init__ (line 463) | def __init__(self, config):
method forward (line 472) | def forward(self, hidden_states):
class DummyBertPreTrainedModel (line 479) | class DummyBertPreTrainedModel(PreTrainedModel):
method _init_weights (line 494) | def _init_weights(self, module):
class DummyBertModel (line 516) | class DummyBertModel(DummyBertPreTrainedModel):
method __init__ (line 519) | def __init__(self, config, add_pooling_layer=True):
method get_input_embeddings (line 536) | def get_input_embeddings(self):
method set_input_embeddings (line 539) | def set_input_embeddings(self, value):
method forward (line 545) | def forward(
method _create_attention_masks (line 613) | def _create_attention_masks(
FILE: examples/modular-transformers/modeling_from_uppercase_model.py
function eager_attention_forward (line 21) | def eager_attention_forward(
class FromUppercaseModelAttention (line 42) | class FromUppercaseModelAttention(nn.Module):
method __init__ (line 45) | def __init__(self, config: FromUppercaseModelVisionConfig | FromUpperc...
method forward (line 60) | def forward(
class FromUppercaseModelMLP (line 99) | class FromUppercaseModelMLP(nn.Module):
method __init__ (line 100) | def __init__(self, config):
method forward (line 107) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
class FromUppercaseModelEncoderLayer (line 114) | class FromUppercaseModelEncoderLayer(GradientCheckpointingLayer):
method __init__ (line 115) | def __init__(self, config: FromUppercaseModelVisionConfig | FromUpperc...
method forward (line 123) | def forward(
FILE: examples/modular-transformers/modeling_global_indexing.py
function rotate_half (line 21) | def rotate_half(x):
function apply_rotary_pos_emb (line 29) | def apply_rotary_pos_emb(q, k, cos, sin, unsqueeze_dim=1):
function repeat_kv (line 54) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
function eager_attention_forward (line 66) | def eager_attention_forward(
function custom_flex (line 91) | def custom_flex(x, **kwargs):
class GlobalIndexingAttention (line 102) | class GlobalIndexingAttention(nn.Module):
method __init__ (line 105) | def __init__(self, config: GlobalIndexingConfig, layer_idx: int):
method forward (line 128) | def forward(
FILE: examples/modular-transformers/modeling_multimodal2.py
function eager_attention_forward (line 24) | def eager_attention_forward(
class Multimodal2VisionAttention (line 45) | class Multimodal2VisionAttention(nn.Module):
method __init__ (line 48) | def __init__(self, config: Multimodal2VisionConfig | Multimodal2TextCo...
method forward (line 63) | def forward(
class Multimodal2VisionMLP (line 102) | class Multimodal2VisionMLP(nn.Module):
method __init__ (line 103) | def __init__(self, config):
method forward (line 110) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
class Multimodal2VisionEncoderLayer (line 117) | class Multimodal2VisionEncoderLayer(GradientCheckpointingLayer):
method __init__ (line 118) | def __init__(self, config):
method forward (line 126) | def forward(
class Multimodal2VisionEncoder (line 150) | class Multimodal2VisionEncoder(nn.Module):
method __init__ (line 159) | def __init__(self, config):
method forward (line 165) | def forward(
class Multimodal2VisionPreTrainedModel (line 199) | class Multimodal2VisionPreTrainedModel(PreTrainedModel):
method _init_weights (line 214) | def _init_weights(self, module):
class Multimodal2VisionEmbeddings (line 220) | class Multimodal2VisionEmbeddings(nn.Module):
method __init__ (line 221) | def __init__(self, config: Multimodal2VisionConfig):
method interpolate_pos_encoding (line 243) | def interpolate_pos_encoding(self, embeddings: torch.Tensor, height: i...
method forward (line 284) | def forward(self, pixel_values: torch.FloatTensor, interpolate_pos_enc...
class Multimodal2VisionTransformer (line 303) | class Multimodal2VisionTransformer(Multimodal2VisionPreTrainedModel):
method __init__ (line 309) | def __init__(self, config):
method forward (line 323) | def forward(
class Multimodal2VisionModel (line 355) | class Multimodal2VisionModel(Multimodal2VisionPreTrainedModel):
method __init__ (line 361) | def __init__(self, config: Multimodal2VisionConfig):
method get_input_embeddings (line 367) | def get_input_embeddings(self) -> nn.Module:
method forward (line 371) | def forward(
FILE: examples/modular-transformers/modeling_my_new_model2.py
class MyNewModel2TextScaledWordEmbedding (line 23) | class MyNewModel2TextScaledWordEmbedding(nn.Embedding):
method __init__ (line 28) | def __init__(self, num_embeddings: int, embedding_dim: int, padding_id...
method forward (line 33) | def forward(self, input_ids: torch.Tensor):
class MyNewModel2RMSNorm (line 37) | class MyNewModel2RMSNorm(nn.Module):
method __init__ (line 38) | def __init__(self, dim: int, eps: float = 1e-6):
method _norm (line 43) | def _norm(self, x):
method forward (line 46) | def forward(self, x):
method extra_repr (line 53) | def extra_repr(self):
class MyNewModel2MLP (line 57) | class MyNewModel2MLP(nn.Module):
method __init__ (line 58) | def __init__(self, config):
method forward (line 68) | def forward(self, x):
function rotate_half (line 73) | def rotate_half(x):
function apply_rotary_pos_emb (line 81) | def apply_rotary_pos_emb(q, k, cos, sin, unsqueeze_dim=1):
function repeat_kv (line 106) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
function eager_attention_forward (line 118) | def eager_attention_forward(
class MyNewModel2Attention (line 144) | class MyNewModel2Attention(nn.Module):
method __init__ (line 147) | def __init__(self, config: MyNewModel2Config, layer_idx: int):
method forward (line 170) | def forward(
class MyNewModel2DecoderLayer (line 211) | class MyNewModel2DecoderLayer(GradientCheckpointingLayer):
method __init__ (line 212) | def __init__(self, config: MyNewModel2Config, layer_idx: int):
method forward (line 222) | def forward(
class MyNewModel2PreTrainedModel (line 255) | class MyNewModel2PreTrainedModel(PreTrainedModel):
method _init_weights (line 273) | def _init_weights(self, module):
class MyNewModel2ForSequenceClassification (line 282) | class MyNewModel2ForSequenceClassification(GenericForSequenceClassificat...
FILE: examples/modular-transformers/modeling_new_task_model.py
class NewTaskModelModelOutputWithPast (line 37) | class NewTaskModelModelOutputWithPast(BaseModelOutputWithPast):
class NewTaskModelCausalLMOutputWithPast (line 53) | class NewTaskModelCausalLMOutputWithPast(ModelOutput):
class NewTaskModelMultiModalProjector (line 77) | class NewTaskModelMultiModalProjector(nn.Module):
method __init__ (line 78) | def __init__(self, config: NewTaskModelConfig):
method forward (line 82) | def forward(self, image_features):
class NewTaskModelPreTrainedModel (line 89) | class NewTaskModelPreTrainedModel(PreTrainedModel):
function token_type_ids_mask_function (line 103) | def token_type_ids_mask_function(
function create_causal_mask_mapping (line 144) | def create_causal_mask_mapping(
class NewTaskModelModel (line 221) | class NewTaskModelModel(NewTaskModelPreTrainedModel):
method __init__ (line 225) | def __init__(self, config: NewTaskModelConfig):
method get_input_embeddings (line 237) | def get_input_embeddings(self):
method set_input_embeddings (line 240) | def set_input_embeddings(self, value):
method get_image_features (line 247) | def get_image_features(
method get_placeholder_mask (line 257) | def get_placeholder_mask(
method forward (line 283) | def forward(
class NewTaskModelForNewTask (line 390) | class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
method __init__ (line 394) | def __init__(self, config):
method get_input_embeddings (line 403) | def get_input_embeddings(self):
method set_input_embeddings (line 406) | def set_input_embeddings(self, value):
method get_image_features (line 410) | def get_image_features(self, pixel_values: torch.FloatTensor, **kwargs...
method forward (line 415) | def forward(
method prepare_inputs_for_generation (line 460) | def prepare_inputs_for_generation(
method create_masks_for_generate (line 505) | def create_masks_for_generate(
method resize_token_embeddings (line 527) | def resize_token_embeddings(
FILE: examples/modular-transformers/modeling_roberta.py
class RobertaEmbeddings (line 27) | class RobertaEmbeddings(nn.Module):
method __init__ (line 30) | def __init__(self, config):
method forward (line 49) | def forward(
function eager_attention_forward (line 92) | def eager_attention_forward(
class RobertaSelfAttention (line 120) | class RobertaSelfAttention(nn.Module):
method __init__ (line 121) | def __init__(self, config, is_causal=False, layer_idx=None):
method forward (line 145) | def forward(
class RobertaCrossAttention (line 187) | class RobertaCrossAttention(nn.Module):
method __init__ (line 188) | def __init__(self, config, is_causal=False, layer_idx=None):
method forward (line 211) | def forward(
class RobertaSelfOutput (line 264) | class RobertaSelfOutput(nn.Module):
method __init__ (line 265) | def __init__(self, config):
method forward (line 271) | def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Ten...
class RobertaAttention (line 278) | class RobertaAttention(nn.Module):
method __init__ (line 279) | def __init__(self, config, is_causal=False, layer_idx=None, is_cross_a...
method forward (line 286) | def forward(
class RobertaIntermediate (line 307) | class RobertaIntermediate(nn.Module):
method __init__ (line 308) | def __init__(self, config):
method forward (line 316) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
class RobertaOutput (line 322) | class RobertaOutput(nn.Module):
method __init__ (line 323) | def __init__(self, config):
method forward (line 329) | def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Ten...
class RobertaLayer (line 336) | class RobertaLayer(GradientCheckpointingLayer):
method __init__ (line 337) | def __init__(self, config, layer_idx=None):
method forward (line 356) | def forward(
method feed_forward_chunk (line 395) | def feed_forward_chunk(self, attention_output):
class RobertaEncoder (line 401) | class RobertaEncoder(nn.Module):
method __init__ (line 402) | def __init__(self, config):
method forward (line 407) | def forward(
class RobertaPooler (line 433) | class RobertaPooler(nn.Module):
method __init__ (line 434) | def __init__(self, config):
method forward (line 439) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
class RobertaPredictionHeadTransform (line 448) | class RobertaPredictionHeadTransform(nn.Module):
method __init__ (line 449) | def __init__(self, config):
method forward (line 458) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
class RobertaLMPredictionHead (line 465) | class RobertaLMPredictionHead(nn.Module):
method __init__ (line 466) | def __init__(self, config):
method forward (line 475) | def forward(self, hidden_states):
class RobertaPreTrainedModel (line 482) | class RobertaPreTrainedModel(PreTrainedModel):
method _init_weights (line 497) | def _init_weights(self, module):
class RobertaModel (line 519) | class RobertaModel(RobertaPreTrainedModel):
method __init__ (line 522) | def __init__(self, config, add_pooling_layer=True):
method get_input_embeddings (line 539) | def get_input_embeddings(self):
method set_input_embeddings (line 542) | def set_input_embeddings(self, value):
method forward (line 548) | def forward(
method _create_attention_masks (line 613) | def _create_attention_masks(
FILE: examples/modular-transformers/modeling_super.py
class SuperRMSNorm (line 29) | class SuperRMSNorm(nn.Module):
method __init__ (line 30) | def __init__(self, hidden_size, eps: float = 1e-6) -> None:
method forward (line 38) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
method extra_repr (line 45) | def extra_repr(self):
class SuperRotaryEmbedding (line 49) | class SuperRotaryEmbedding(nn.Module):
method __init__ (line 52) | def __init__(self, config: SuperConfig, device=None):
method compute_default_rope_parameters (line 69) | def compute_default_rope_parameters(
method forward (line 100) | def forward(self, x, position_ids):
class SuperMLP (line 114) | class SuperMLP(nn.Module):
method __init__ (line 115) | def __init__(self, config):
method forward (line 125) | def forward(self, x):
function rotate_half (line 130) | def rotate_half(x):
function apply_rotary_pos_emb (line 138) | def apply_rotary_pos_emb(q, k, cos, sin, unsqueeze_dim=1):
function repeat_kv (line 163) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
function eager_attention_forward (line 175) | def eager_attention_forward(
class SuperAttention (line 201) | class SuperAttention(nn.Module):
method __init__ (line 204) | def __init__(self, config: SuperConfig, layer_idx: int):
method forward (line 227) | def forward(
class SuperDecoderLayer (line 268) | class SuperDecoderLayer(GradientCheckpointingLayer):
method __init__ (line 269) | def __init__(self, config: SuperConfig, layer_idx: int):
method forward (line 279) | def forward(
class SuperPreTrainedModel (line 312) | class SuperPreTrainedModel(PreTrainedModel):
class SuperModel (line 331) | class SuperModel(SuperPreTrainedModel):
method __init__ (line 332) | def __init__(self, config: SuperConfig):
method forward (line 351) | def forward(
FILE: examples/modular-transformers/modeling_switch_function.py
function rotate_half (line 21) | def rotate_half(x):
function apply_rotary_pos_emb (line 30) | def apply_rotary_pos_emb(q, k, cos, sin, unsqueeze_dim=1):
function repeat_kv (line 55) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
function eager_attention_forward (line 67) | def eager_attention_forward(
class SwitchFunctionAttention (line 93) | class SwitchFunctionAttention(nn.Module):
method __init__ (line 96) | def __init__(self, config: SwitchFunctionConfig, layer_idx: int):
method forward (line 119) | def forward(
FILE: examples/modular-transformers/modeling_test_detr.py
class TestDetrDecoderOutput (line 40) | class TestDetrDecoderOutput(BaseModelOutputWithCrossAttentions):
class TestDetrModelOutput (line 64) | class TestDetrModelOutput(ModelOutput):
class MultiScaleDeformableAttention (line 97) | class MultiScaleDeformableAttention(nn.Module):
method forward (line 98) | def forward(
class TestDetrFrozenBatchNorm2d (line 151) | class TestDetrFrozenBatchNorm2d(nn.Module):
method __init__ (line 159) | def __init__(self, n):
method _load_from_state_dict (line 166) | def _load_from_state_dict(
method forward (line 177) | def forward(self, x):
function replace_batch_norm (line 190) | def replace_batch_norm(model):
class TestDetrConvEncoder (line 214) | class TestDetrConvEncoder(nn.Module):
method __init__ (line 222) | def __init__(self, config):
method forward (line 252) | def forward(self, pixel_values: torch.Tensor, pixel_mask: torch.Tensor):
class TestDetrSinePositionEmbedding (line 266) | class TestDetrSinePositionEmbedding(nn.Module):
method __init__ (line 272) | def __init__(
method forward (line 288) | def forward(
class TestDetrLearnedPositionEmbedding (line 318) | class TestDetrLearnedPositionEmbedding(nn.Module):
method __init__ (line 323) | def __init__(self, embedding_dim=256):
method forward (line 329) | def forward(
function eager_attention_forward (line 351) | def eager_attention_forward(
class TestDetrSelfAttention (line 379) | class TestDetrSelfAttention(nn.Module):
method __init__ (line 386) | def __init__(
method forward (line 406) | def forward(
class TestDetrMultiscaleDeformableAttention (line 445) | class TestDetrMultiscaleDeformableAttention(nn.Module):
method __init__ (line 450) | def __init__(self, config: TestDetrConfig, num_heads: int, n_points: i...
method forward (line 482) | def forward(
class TestDetrMLP (line 552) | class TestDetrMLP(nn.Module):
method __init__ (line 553) | def __init__(self, config: TestDetrConfig, hidden_size: int, intermedi...
method forward (line 561) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
class TestDetrEncoderLayer (line 569) | class TestDetrEncoderLayer(GradientCheckpointingLayer):
method __init__ (line 570) | def __init__(self, config: TestDetrConfig):
method forward (line 583) | def forward(
class TestDetrDecoderLayer (line 639) | class TestDetrDecoderLayer(GradientCheckpointingLayer):
method __init__ (line 640) | def __init__(self, config: TestDetrConfig):
method forward (line 662) | def forward(
class TestDetrPreTrainedModel (line 735) | class TestDetrPreTrainedModel(PreTrainedModel):
method _init_weights (line 755) | def _init_weights(self, module):
class TestDetrEncoder (line 799) | class TestDetrEncoder(TestDetrPreTrainedModel):
method __init__ (line 815) | def __init__(self, config: TestDetrConfig):
method forward (line 826) | def forward(
method get_reference_points (line 876) | def get_reference_points(spatial_shapes_list, valid_ratios, device):
function inverse_sigmoid (line 907) | def inverse_sigmoid(x, eps=1e-5):
class TestDetrDecoder (line 914) | class TestDetrDecoder(TestDetrPreTrainedModel):
method __init__ (line 935) | def __init__(self, config: TestDetrConfig):
method forward (line 950) | def forward(
class TestDetrModel (line 1054) | class TestDetrModel(TestDetrPreTrainedModel):
method __init__ (line 1055) | def __init__(self, config: TestDetrConfig):
method freeze_backbone (line 1128) | def freeze_backbone(self):
method unfreeze_backbone (line 1132) | def unfreeze_backbone(self):
method get_valid_ratio (line 1136) | def get_valid_ratio(self, mask, dtype=torch.float32):
method get_proposal_pos_embed (line 1147) | def get_proposal_pos_embed(self, proposals):
method gen_encoder_output_proposals (line 1167) | def gen_encoder_output_proposals(self, enc_output, padding_mask, spati...
method forward (line 1230) | def forward(
FILE: examples/modular-transformers/modeling_test_suffix.py
class TestSuffixDecoderLayer (line 22) | class TestSuffixDecoderLayer(nn.module):
class TestSuffixLlamaRMSNorm (line 27) | class TestSuffixLlamaRMSNorm(nn.Module):
method __init__ (line 28) | def __init__(self, hidden_size, eps: float = 1e-6) -> None:
method forward (line 36) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
method extra_repr (line 43) | def extra_repr(self):
class TestSuffixLlamaMLP (line 47) | class TestSuffixLlamaMLP(nn.Module):
method __init__ (line 48) | def __init__(self, config):
method forward (line 58) | def forward(self, x):
function rotate_half (line 63) | def rotate_half(x):
function apply_rotary_pos_emb (line 71) | def apply_rotary_pos_emb(q, k, cos, sin, unsqueeze_dim=1):
function repeat_kv (line 96) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
function eager_attention_forward (line 108) | def eager_attention_forward(
class TestSuffixLlamaAttention (line 134) | class TestSuffixLlamaAttention(nn.Module):
method __init__ (line 137) | def __init__(self, config: TestSuffixLlamaConfig, layer_idx: int):
method forward (line 160) | def forward(
class TestSuffixLlamaDecoderLayer (line 201) | class TestSuffixLlamaDecoderLayer(GradientCheckpointingLayer):
method __init__ (line 202) | def __init__(self, config: TestSuffixLlamaConfig, layer_idx: int):
method forward (line 212) | def forward(
FILE: examples/modular-transformers/modular_add_function.py
class TestAttention (line 10) | class TestAttention(ZambaAttention):
method __init__ (line 11) | def __init__(self):
method forward (line 14) | def forward(self):
FILE: examples/modular-transformers/modular_dummy_bert.py
class DummyBertModel (line 10) | class DummyBertModel(BertModel):
method forward (line 11) | def forward(
FILE: examples/modular-transformers/modular_duplicated_method.py
class DuplicatedMethodConfig (line 4) | class DuplicatedMethodConfig(LlamaConfig):
method vocab_size (line 6) | def vocab_size(self): # noqa: F811 -> we need this at we cannot delet...
method vocab_size (line 10) | def vocab_size(self, value):
FILE: examples/modular-transformers/modular_from_uppercase_model.py
class FromUppercaseModelEncoderLayer (line 5) | class FromUppercaseModelEncoderLayer(CLIPEncoderLayer):
FILE: examples/modular-transformers/modular_global_indexing.py
function custom_flex (line 5) | def custom_flex(x, **kwargs):
class GlobalIndexingAttention (line 15) | class GlobalIndexingAttention(LlamaAttention):
FILE: examples/modular-transformers/modular_multimodal2.py
class Multimodal2VisionAttention (line 25) | class Multimodal2VisionAttention(CLIPAttention):
class Multimodal2VisionMLP (line 29) | class Multimodal2VisionMLP(CLIPMLP):
class Multimodal2VisionEncoderLayer (line 33) | class Multimodal2VisionEncoderLayer(CLIPEncoderLayer):
method __init__ (line 34) | def __init__(self, config):
class Multimodal2VisionEncoder (line 40) | class Multimodal2VisionEncoder(CLIPEncoder):
method __init__ (line 41) | def __init__(self, config):
class Multimodal2VisionPreTrainedModel (line 46) | class Multimodal2VisionPreTrainedModel(CLIPPreTrainedModel):
method _init_weights (line 52) | def _init_weights(self, module):
class Multimodal2VisionTransformer (line 59) | class Multimodal2VisionTransformer(CLIPVisionTransformer, Multimodal2Vis...
method __init__ (line 62) | def __init__(self, config):
class Multimodal2VisionModel (line 69) | class Multimodal2VisionModel(CLIPVisionModel, Multimodal2VisionPreTraine...
FILE: examples/modular-transformers/modular_my_new_model.py
class MyNewModelConfig (line 5) | class MyNewModelConfig(LlamaConfig):
FILE: examples/modular-transformers/modular_my_new_model2.py
class MyNewModel2Config (line 6) | class MyNewModel2Config(LlamaConfig):
class MyNewModel2ForSequenceClassification (line 30) | class MyNewModel2ForSequenceClassification(GemmaForSequenceClassification):
FILE: examples/modular-transformers/modular_new_imgproc_model.py
class ImgprocModelImageProcessor (line 7) | class ImgprocModelImageProcessor(BlipImageProcessor):
method new_image_processing_method (line 8) | def new_image_processing_method(self, pixel_values: torch.FloatTensor):
FILE: examples/modular-transformers/modular_new_model.py
class NewModelConfig (line 6) | class NewModelConfig(GemmaConfig):
method num_heads (line 30) | def num_heads(self):
FILE: examples/modular-transformers/modular_new_task_model.py
class NewTaskModelForNewTask (line 12) | class NewTaskModelForNewTask(PaliGemmaForConditionalGeneration):
method __init__ (line 15) | def __init__(self, config):
method forward (line 23) | def forward(
method resize_token_embeddings (line 68) | def resize_token_embeddings(
FILE: examples/modular-transformers/modular_roberta.py
class RobertaEmbeddings (line 6) | class RobertaEmbeddings(BertEmbeddings):
method __init__ (line 7) | def __init__(self, config):
class RobertaModel (line 15) | class RobertaModel(BertModel):
method __init__ (line 16) | def __init__(self, config, add_pooling_layer=True):
FILE: examples/modular-transformers/modular_super.py
class SuperModel (line 10) | class SuperModel(LlamaModel):
method forward (line 11) | def forward(
FILE: examples/modular-transformers/modular_switch_function.py
class SwitchFunctionAttention (line 9) | class SwitchFunctionAttention(LlamaAttention):
FILE: examples/modular-transformers/modular_test_detr.py
class TestDetrModel (line 6) | class TestDetrModel(DeformableDetrModel):
FILE: examples/modular-transformers/modular_test_suffix.py
class TestSuffixDecoderLayer (line 6) | class TestSuffixDecoderLayer(nn.module):
class TestSuffixLlamaDecoderLayer (line 11) | class TestSuffixLlamaDecoderLayer(LlamaDecoderLayer):
FILE: examples/pytorch/3d_parallel_checks.py
function main (line 75) | def main():
function all_reduce_grads (line 557) | def all_reduce_grads(model, world_mesh, use_ddp):
class ContextParallelCollator (line 585) | class ContextParallelCollator:
method __init__ (line 588) | def __init__(self, cp_mesh: DeviceMesh | None = None):
method __call__ (line 591) | def __call__(self, batch: dict[str, torch.Tensor]) -> dict[str, torch....
class AppState (line 612) | class AppState(Stateful):
method __init__ (line 615) | def __init__(self, model, optimizer=None):
method state_dict (line 619) | def state_dict(self):
method load_state_dict (line 623) | def load_state_dict(self, state_dict):
function sanity_check_tensor_sync (line 629) | def sanity_check_tensor_sync(
function clip_grad_norm_ (line 694) | def clip_grad_norm_(
function check_params_sync (line 727) | def check_params_sync(model_params, original_params):
function get_parameters (line 745) | def get_parameters(model: nn.Module) -> Iterable[torch.Tensor]:
function update_model_parameters (line 765) | def update_model_parameters(model: nn.Module) -> None:
FILE: examples/pytorch/audio-classification/run_audio_classification.py
function random_subsample (line 60) | def random_subsample(wav: np.ndarray, max_length: float, sample_rate: in...
class DataTrainingArguments (line 70) | class DataTrainingArguments:
class ModelArguments (line 134) | class ModelArguments:
function main (line 185) | def main():
FILE: examples/pytorch/conftest.py
function pytest_addoption (line 34) | def pytest_addoption(parser):
function pytest_terminal_summary (line 40) | def pytest_terminal_summary(terminalreporter):
FILE: examples/pytorch/continuous_batching.py
function generate_without_cb (line 33) | def generate_without_cb(
function maybe_setup_metrics (line 54) | def maybe_setup_metrics(use_metrics: bool) -> None:
function batch_generate (line 86) | def batch_generate(
FILE: examples/pytorch/contrastive-image-text/run_clip.py
class ModelArguments (line 70) | class ModelArguments:
class DataTrainingArguments (line 124) | class DataTrainingArguments:
method __post_init__ (line 184) | def __post_init__(self):
class Transform (line 203) | class Transform(torch.nn.Module):
method __init__ (line 204) | def __init__(self, image_size, mean, std):
method forward (line 213) | def forward(self, x) -> torch.Tensor:
function collate_fn (line 220) | def collate_fn(examples):
function main (line 232) | def main():
FILE: examples/pytorch/image-classification/run_image_classification.py
function pil_loader (line 77) | def pil_loader(path: str):
class DataTrainingArguments (line 84) | class DataTrainingArguments:
method __post_init__ (line 132) | def __post_init__(self):
class ModelArguments (line 140) | class ModelArguments:
function main (line 189) | def main():
FILE: examples/pytorch/image-classification/run_image_classification_no_trainer.py
function parse_args (line 73) | def parse_args():
function main (line 236) | def main():
FILE: examples/pytorch/image-pretraining/run_mae.py
class DataTrainingArguments (line 58) | class DataTrainingArguments:
method __post_init__ (line 109) | def __post_init__(self):
class ModelArguments (line 119) | class ModelArguments:
class CustomTrainingArguments (line 170) | class CustomTrainingArguments(TrainingArguments):
function collate_fn (line 176) | def collate_fn(examples):
function main (line 181) | def main():
function _mp_fn (line 387) | def _mp_fn(index):
FILE: examples/pytorch/image-pretraining/run_mim.py
class DataTrainingArguments (line 66) | class DataTrainingArguments:
method __post_init__ (line 112) | def __post_init__(self):
class ModelArguments (line 122) | class ModelArguments:
class MaskGenerator (line 203) | class MaskGenerator:
method __init__ (line 211) | def __init__(self, input_size=192, mask_patch_size=32, model_patch_siz...
method __call__ (line 228) | def __call__(self):
function collate_fn (line 239) | def collate_fn(examples):
function main (line 245) | def main():
FILE: examples/pytorch/image-pretraining/run_mim_no_trainer.py
function parse_args (line 74) | def parse_args():
class MaskGenerator (line 344) | class MaskGenerator:
method __init__ (line 352) | def __init__(self, input_size=192, mask_patch_size=32, model_patch_siz...
method __call__ (line 369) | def __call__(self):
function collate_fn (line 380) | def collate_fn(examples):
function main (line 386) | def main():
FILE: examples/pytorch/instance-segmentation/run_instance_segmentation.py
class Arguments (line 65) | class Arguments:
function augment_and_transform_batch (line 113) | def augment_and_transform_batch(
function collate_fn (line 154) | def collate_fn(examples):
class ModelOutput (line 165) | class ModelOutput:
function nested_cpu (line 170) | def nested_cpu(tensors):
class Evaluator (line 181) | class Evaluator:
method __init__ (line 186) | def __init__(
method get_metric (line 206) | def get_metric(self):
method reset_metric (line 210) | def reset_metric(self):
method postprocess_target_batch (line 213) | def postprocess_target_batch(self, target_batch) -> list[dict[str, tor...
method get_target_sizes (line 227) | def get_target_sizes(self, post_processed_targets) -> list[list[int]]:
method postprocess_prediction_batch (line 233) | def postprocess_prediction_batch(self, prediction_batch, target_sizes)...
method __call__ (line 264) | def __call__(self, evaluation_results: EvalPrediction, compute_result:...
function setup_logging (line 310) | def setup_logging(training_args: TrainingArguments) -> None:
function find_last_checkpoint (line 330) | def find_last_checkpoint(training_args: TrainingArguments) -> str | None:
function main (line 340) | def main():
FILE: examples/pytorch/instance-segmentation/run_instance_segmentation_no_trainer.py
function parse_args (line 71) | def parse_args():
function augment_and_transform_batch (line 232) | def augment_and_transform_batch(
function collate_fn (line 273) | def collate_fn(examples):
function nested_cpu (line 283) | def nested_cpu(tensors):
function evaluation_loop (line 294) | def evaluation_loop(model, image_processor, accelerator: Accelerator, da...
function setup_logging (line 369) | def setup_logging(accelerator: Accelerator) -> None:
function handle_repository_creation (line 387) | def handle_repository_creation(accelerator: Accelerator, args: argparse....
function main (line 413) | def main():
FILE: examples/pytorch/language-modeling/run_clm.py
class ModelArguments (line 82) | class ModelArguments:
method __post_init__ (line 156) | def __post_init__(self):
class DataTrainingArguments (line 164) | class DataTrainingArguments:
method __post_init__ (line 226) | def __post_init__(self):
function split_streaming_dataset (line 241) | def split_streaming_dataset(
function main (line 280) | def main():
function _mp_fn (line 704) | def _mp_fn(index):
FILE: examples/pytorch/language-modeling/run_clm_no_trainer.py
function parse_args (line 86) | def parse_args():
function main (line 270) | def main():
FILE: examples/pytorch/language-modeling/run_fim.py
class ModelArguments (line 85) | class ModelArguments:
method __post_init__ (line 171) | def __post_init__(self):
class DataTrainingArguments (line 179) | class DataTrainingArguments:
method __post_init__ (line 292) | def __post_init__(self):
function main (line 307) | def main():
function _mp_fn (line 838) | def _mp_fn(index):
FILE: examples/pytorch/language-modeling/run_fim_no_trainer.py
function parse_args (line 89) | def parse_args():
function main (line 330) | def main():
FILE: examples/pytorch/language-modeling/run_mlm.py
class ModelArguments (line 79) | class ModelArguments:
method __post_init__ (line 153) | def __post_init__(self):
class DataTrainingArguments (line 161) | class DataTrainingArguments:
method __post_init__ (line 235) | def __post_init__(self):
function main (line 252) | def main():
function _mp_fn (line 662) | def _mp_fn(index):
FILE: examples/pytorch/language-modeling/run_mlm_no_trainer.py
function parse_args (line 84) | def parse_args():
function main (line 277) | def main():
FILE: examples/pytorch/language-modeling/run_plm.py
class ModelArguments (line 70) | class ModelArguments:
method __post_init__ (line 120) | def __post_init__(self):
class DataTrainingArguments (line 128) | class DataTrainingArguments:
method __post_init__ (line 220) | def __post_init__(self):
function main (line 232) | def main():
function _mp_fn (line 559) | def _mp_fn(index):
FILE: examples/pytorch/multiple-choice/run_swag.py
class ModelArguments (line 64) | class ModelArguments:
class DataTrainingArguments (line 112) | class DataTrainingArguments:
method __post_init__ (line 167) | def __post_init__(self):
function main (line 176) | def main():
function _mp_fn (line 426) | def _mp_fn(index):
FILE: examples/pytorch/multiple-choice/run_swag_no_trainer.py
function parse_args (line 78) | def parse_args():
function main (line 240) | def main():
FILE: examples/pytorch/object-detection/run_object_detection.py
class ModelOutput (line 67) | class ModelOutput:
function format_image_annotations_as_coco (line 72) | def format_image_annotations_as_coco(
function convert_bbox_yolo_to_pascal (line 107) | def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: tuple[i...
function augment_and_transform_batch (line 129) | def augment_and_transform_batch(
function collate_fn (line 161) | def collate_fn(batch: list[BatchFeature]) -> Mapping[str, torch.Tensor |...
function compute_metrics (line 171) | def compute_metrics(
class DataTrainingArguments (line 243) | class DataTrainingArguments:
class ModelArguments (line 291) | class ModelArguments:
function main (line 338) | def main():
FILE: examples/pytorch/object-detection/run_object_detection_no_trainer.py
function format_image_annotations_as_coco (line 75) | def format_image_annotations_as_coco(
function convert_bbox_yolo_to_pascal (line 111) | def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: tuple[i...
function augment_and_transform_batch (line 134) | def augment_and_transform_batch(
function collate_fn (line 167) | def collate_fn(batch: list[BatchFeature]) -> Mapping[str, torch.Tensor |...
function nested_to_cpu (line 176) | def nested_to_cpu(objects):
function evaluation_loop (line 189) | def evaluation_loop(
function parse_args (line 240) | def parse_args():
function main (line 406) | def main():
FILE: examples/pytorch/old_test_xla_examples.py
function get_results (line 31) | def get_results(output_dir):
class TorchXLAExamplesTests (line 47) | class TorchXLAExamplesTests(TestCasePlus):
method test_run_glue (line 48) | def test_run_glue(self):
method test_trainer_tpu (line 83) | def test_trainer_tpu(self):
FILE: examples/pytorch/question-answering/run_qa.py
class ModelArguments (line 57) | class ModelArguments:
class DataTrainingArguments (line 101) | class DataTrainingArguments:
method __post_init__ (line 204) | def __post_init__(self):
function main (line 224) | def main():
function _mp_fn (line 685) | def _mp_fn(index):
FILE: examples/pytorch/question-answering/run_qa_beam_search.py
class ModelArguments (line 56) | class ModelArguments:
class DataTrainingArguments (line 90) | class DataTrainingArguments:
method __post_init__ (line 203) | def __post_init__(self):
function main (line 223) | def main():
function _mp_fn (line 712) | def _mp_fn(index):
FILE: examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
function save_prefixed_metrics (line 66) | def save_prefixed_metrics(results, output_dir, file_name: str = "all_res...
function parse_args (line 89) | def parse_args():
function main (line 301) | def main():
FILE: examples/pytorch/question-answering/run_qa_no_trainer.py
function save_prefixed_metrics (line 71) | def save_prefixed_metrics(results, output_dir, file_name: str = "all_res...
function parse_args (line 94) | def parse_args():
function main (line 340) | def main():
FILE: examples/pytorch/question-answering/run_seq2seq_qa.py
class ModelArguments (line 55) | class ModelArguments:
class DataTrainingArguments (line 103) | class DataTrainingArguments:
method __post_init__ (line 244) | def __post_init__(self):
function main (line 271) | def main():
function _mp_fn (line 714) | def _mp_fn(index):
FILE: examples/pytorch/question-answering/trainer_qa.py
class QuestionAnsweringTrainer (line 30) | class QuestionAnsweringTrainer(Trainer):
method __init__ (line 31) | def __init__(self, *args, eval_examples=None, post_process_function=No...
method evaluate (line 36) | def evaluate(self, eval_dataset=None, eval_examples=None, ignore_keys=...
method predict (line 90) | def predict(self, predict_dataset, predict_examples, ignore_keys=None,...
FILE: examples/pytorch/question-answering/trainer_seq2seq_qa.py
class QuestionAnsweringSeq2SeqTrainer (line 32) | class QuestionAnsweringSeq2SeqTrainer(Seq2SeqTrainer):
method __init__ (line 33) | def __init__(self, *args, eval_examples=None, post_process_function=No...
method evaluate (line 39) | def evaluate(
method predict (line 112) | def predict(
FILE: examples/pytorch/question-answering/utils_qa.py
function postprocess_qa_predictions (line 30) | def postprocess_qa_predictions(
function postprocess_qa_predictions_with_beam_search (line 251) | def postprocess_qa_predictions_with_beam_search(
FILE: examples/pytorch/semantic-segmentation/run_semantic_segmentation.py
function reduce_labels_transform (line 67) | def reduce_labels_transform(labels: np.ndarray, **kwargs) -> np.ndarray:
class DataTrainingArguments (line 83) | class DataTrainingArguments:
method __post_init__ (line 125) | def __post_init__(self):
class ModelArguments (line 133) | class ModelArguments:
function main (line 174) | def main():
FILE: examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py
function reduce_labels_transform (line 71) | def reduce_labels_transform(labels: np.ndarray, **kwargs) -> np.ndarray:
function parse_args (line 86) | def parse_args():
function main (line 234) | def main():
FILE: examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py
function parse_args (line 60) | def parse_args():
class DataCollatorForWav2Vec2Pretraining (line 297) | class DataCollatorForWav2Vec2Pretraining:
method __call__ (line 340) | def __call__(self, features: list[dict[str, list[int] | torch.Tensor]]...
function multiply_grads (line 385) | def multiply_grads(params, c):
function get_grad_norm (line 394) | def get_grad_norm(params, scale=1):
function main (line 405) | def main():
FILE: examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
function list_field (line 72) | def list_field(default=None, metadata=None):
class ModelArguments (line 77) | class ModelArguments:
class DataTrainingArguments (line 166) | class DataTrainingArguments:
class DataCollatorCTCWithPadding (line 310) | class DataCollatorCTCWithPadding:
method __call__ (line 341) | def __call__(self, features: list[dict[str, list[int] | torch.Tensor]]...
function create_vocabulary_from_data (line 373) | def create_vocabulary_from_data(
function main (line 416) | def main():
FILE: examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py
function list_field (line 75) | def list_field(default=None, metadata=None):
class ModelArguments (line 80) | class ModelArguments:
class DataTrainingArguments (line 142) | class DataTrainingArguments:
class DataCollatorCTCWithPadding (line 290) | class DataCollatorCTCWithPadding:
method __call__ (line 320) | def __call__(self, features: list[dict[str, list[int] | torch.Tensor]]...
function create_vocabulary_from_data (line 350) | def create_vocabulary_from_data(
function main (line 393) | def main():
FILE: examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py
class ModelArguments (line 72) | class ModelArguments:
class DataTrainingArguments (line 140) | class DataTrainingArguments:
class DataCollatorSpeechSeq2SeqWithPadding (line 244) | class DataCollatorSpeechSeq2SeqWithPadding:
method __call__ (line 260) | def __call__(self, features: list[dict[str, list[int] | torch.Tensor]]...
function main (line 287) | def main():
FILE: examples/pytorch/summarization/run_summarization.py
class ModelArguments (line 90) | class ModelArguments:
class DataTrainingArguments (line 147) | class DataTrainingArguments:
method __post_init__ (line 288) | def __post_init__(self):
function main (line 326) | def main():
function _mp_fn (line 760) | def _mp_fn(index):
FILE: examples/pytorch/summarization/run_summarization_no_trainer.py
function parse_args (line 108) | def parse_args():
function main (line 339) | def main():
FILE: examples/pytorch/test_accelerate_examples.py
function get_setup_file (line 42) | def get_setup_file():
function get_results (line 49) | def get_results(output_dir):
class ExamplesTestsNoTrainer (line 64) | class ExamplesTestsNoTrainer(TestCasePlus):
method setUpClass (line 66) | def setUpClass(cls):
method tearDownClass (line 74) | def tearDownClass(cls):
method test_run_glue_no_trainer (line 79) | def test_run_glue_no_trainer(self):
method test_run_clm_no_trainer (line 104) | def test_run_clm_no_trainer(self):
method test_run_mlm_no_trainer (line 132) | def test_run_mlm_no_trainer(self):
method test_run_ner_no_trainer (line 153) | def test_run_ner_no_trainer(self):
method test_run_squad_no_trainer (line 182) | def test_run_squad_no_trainer(self):
method test_run_swag_no_trainer (line 211) | def test_run_swag_no_trainer(self):
method test_run_summarization_no_trainer (line 234) | def test_run_summarization_no_trainer(self):
method test_run_translation_no_trainer (line 262) | def test_run_translation_no_trainer(self):
method test_run_semantic_segmentation_no_trainer (line 291) | def test_run_semantic_segmentation_no_trainer(self):
method test_run_image_classification_no_trainer (line 314) | def test_run_image_classification_no_trainer(self):
method test_run_object_detection_no_trainer (line 341) | def test_run_object_detection_no_trainer(self):
method test_run_instance_segmentation_no_trainer (line 365) | def test_run_instance_segmentation_no_trainer(self):
FILE: examples/pytorch/test_pytorch_examples.py
function get_results (line 85) | def get_results(output_dir):
class ExamplesTests (line 100) | class ExamplesTests(TestCasePlus):
method test_run_glue (line 101) | def test_run_glue(self):
method test_run_clm (line 128) | def test_run_clm(self):
method test_run_clm_config_overrides (line 156) | def test_run_clm_config_overrides(self):
method test_run_mlm (line 181) | def test_run_mlm(self):
method test_run_ner (line 203) | def test_run_ner(self):
method test_run_squad (line 233) | def test_run_squad(self):
method test_run_squad_seq2seq (line 257) | def test_run_squad_seq2seq(self):
method test_run_swag (line 285) | def test_run_swag(self):
method test_generation (line 307) | def test_generation(self):
method test_run_summarization (line 322) | def test_run_summarization(self):
method test_run_translation (line 349) | def test_run_translation(self):
method test_run_image_classification (line 378) | def test_run_image_classification(self):
method test_run_speech_recognition_ctc (line 408) | def test_run_speech_recognition_ctc(self):
method test_run_speech_recognition_ctc_adapter (line 437) | def test_run_speech_recognition_ctc_adapter(self):
method test_run_speech_recognition_seq2seq (line 468) | def test_run_speech_recognition_seq2seq(self):
method test_run_audio_classification (line 497) | def test_run_audio_classification(self):
method test_run_wav2vec2_pretraining (line 528) | def test_run_wav2vec2_pretraining(self):
method test_run_vit_mae_pretraining (line 551) | def test_run_vit_mae_pretraining(self):
method test_run_semantic_segmentation (line 579) | def test_run_semantic_segmentation(self):
method test_run_object_detection (line 605) | def test_run_object_detection(self):
method test_run_instance_segmentation (line 633) | def test_run_instance_segmentation(self):
FILE: examples/pytorch/text-classification/run_classification.py
class DataTrainingArguments (line 71) | class DataTrainingArguments:
method __post_init__ (line 206) | def __post_init__(self):
class ModelArguments (line 220) | class ModelArguments:
function get_label_list (line 271) | def get_label_list(raw_dataset, split="train") -> list[str]:
function main (line 284) | def main():
function _mp_fn (line 738) | def _mp_fn(index):
FILE: examples/pytorch/text-classification/run_glue.py
class DataTrainingArguments (line 84) | class DataTrainingArguments:
method __post_init__ (line 159) | def __post_init__(self):
class ModelArguments (line 178) | class ModelArguments:
function main (line 229) | def main():
function _mp_fn (line 638) | def _mp_fn(index):
FILE: examples/pytorch/text-classification/run_glue_no_trainer.py
function parse_args (line 87) | def parse_args():
function main (line 237) | def main():
FILE: examples/pytorch/text-classification/run_xnli.py
class DataTrainingArguments (line 71) | class DataTrainingArguments:
class ModelArguments (line 131) | class ModelArguments:
function main (line 192) | def main():
FILE: examples/pytorch/text-generation/run_generation.py
function prepare_ctrl_input (line 99) | def prepare_ctrl_input(args, _, tokenizer, prompt_text):
function prepare_xlm_input (line 109) | def prepare_xlm_input(args, model, tokenizer, prompt_text):
function prepare_xlnet_input (line 135) | def prepare_xlnet_input(args, _, tokenizer, prompt_text):
function prepare_transfoxl_input (line 141) | def prepare_transfoxl_input(args, _, tokenizer, prompt_text):
function adjust_length_to_model (line 155) | def adjust_length_to_model(length, max_sequence_length):
function sparse_model_config (line 165) | def sparse_model_config(model_config):
function generate_past_key_values (line 194) | def generate_past_key_values(model, batch_size, seq_len):
function prepare_jit_inputs (line 223) | def prepare_jit_inputs(inputs, model, tokenizer):
class _ModelFallbackWrapper (line 241) | class _ModelFallbackWrapper(GenerationMixin):
method __init__ (line 244) | def __init__(self, optimized, default):
method __call__ (line 248) | def __call__(self, *args, **kwargs):
method __getattr__ (line 267) | def __getattr__(self, item):
method prepare_inputs_for_generation (line 270) | def prepare_inputs_for_generation(
method _reorder_cache (line 277) | def _reorder_cache(
function main (line 288) | def main():
FILE: examples/pytorch/token-classification/run_ner.py
class ModelArguments (line 68) | class ModelArguments:
class DataTrainingArguments (line 116) | class DataTrainingArguments:
method __post_init__ (line 212) | def __post_init__(self):
function main (line 225) | def main():
function _mp_fn (line 635) | def _mp_fn(index):
FILE: examples/pytorch/token-classification/run_ner_no_trainer.py
function parse_args (line 82) | def parse_args():
function main (line 286) | def main():
FILE: examples/pytorch/transformers_serve_cb_eval_job.py
function wait_for_server_up (line 21) | def wait_for_server_up(server_process, port=8000, timeout=600):
function main (line 56) | def main():
FILE: examples/pytorch/translation/run_translation.py
class ModelArguments (line 79) | class ModelArguments:
class DataTrainingArguments (line 127) | class DataTrainingArguments:
method __post_init__ (line 254) | def __post_init__(self):
function main (line 274) | def main():
function _mp_fn (line 685) | def _mp_fn(index):
FILE: examples/pytorch/translation/run_translation_no_trainer.py
function parse_args (line 85) | def parse_args():
function main (line 329) | def main():
FILE: examples/pytorch/xla_spawn.py
function parse_args (line 34) | def parse_args():
function main (line 66) | def main():
FILE: examples/quantization/custom_quantization.py
class CustomConfig (line 12) | class CustomConfig(QuantizationConfigMixin):
method __init__ (line 13) | def __init__(self):
method to_dict (line 17) | def to_dict(self) -> dict[str, Any]:
method __repr__ (line 23) | def __repr__(self):
method to_diff_dict (line 27) | def to_diff_dict(self) -> dict[str, Any]:
class CustomQuantizer (line 42) | class CustomQuantizer(HfQuantizer):
method __init__ (line 43) | def __init__(self, quantization_config: QuantizationConfigMixin, **kwa...
method _process_model_before_weight_loading (line 50) | def _process_model_before_weight_loading(self, model, **kwargs):
method _process_model_after_weight_loading (line 53) | def _process_model_after_weight_loading(self, model, **kwargs):
method is_serializable (line 56) | def is_serializable(self) -> bool:
method is_trainable (line 59) | def is_trainable(self) -> bool:
FILE: examples/quantization/custom_quantization_int8_example.py
class Int8SymmetricLinear (line 16) | class Int8SymmetricLinear(torch.nn.Module):
method __init__ (line 17) | def __init__(self, in_features, out_features, bias, dtype=torch.float32):
method forward (line 30) | def forward(self, x):
function _replace_with_int8_symmetric_linear (line 39) | def _replace_with_int8_symmetric_linear(
function replace_with_int8_symmetric_linear (line 85) | def replace_with_int8_symmetric_linear(
class Int8SymmetricConfig (line 110) | class Int8SymmetricConfig(QuantizationConfigMixin):
method __init__ (line 115) | def __init__(self, modules_to_not_convert: list[str] | None = None, **...
method __repr__ (line 119) | def __repr__(self):
method to_diff_dict (line 123) | def to_diff_dict(self) -> dict[str, Any]:
class Int8SymmetricQuantizer (line 136) | class Int8SymmetricQuantizer(HfQuantizer):
method __init__ (line 145) | def __init__(self, quantization_config: QuantizationConfigMixin, **kwa...
method _process_model_before_weight_loading (line 149) | def _process_model_before_weight_loading(self, model, **kwargs):
method param_needs_quantization (line 162) | def param_needs_quantization(self, model, param_name: str, **kwargs) -...
method create_quantized_param (line 172) | def create_quantized_param(
method update_missing_keys (line 200) | def update_missing_keys(self, model, missing_keys: list[str], prefix: ...
method _process_model_after_weight_loading (line 213) | def _process_model_after_weight_loading(self, model, **kwargs):
method is_serializable (line 219) | def is_serializable(self):
method is_trainable (line 223) | def is_trainable(self) -> bool:
FILE: examples/scheduler/run_greedy.py
function parse_args (line 53) | def parse_args():
function main (line 89) | def main():
FILE: examples/training/distributed_training.py
function run (line 18) | def run(backend):
function init_processes (line 34) | def init_processes(backend):
FILE: scripts/check_tokenizers.py
function check_diff (line 25) | def check_diff(
function check_LTR_mark (line 48) | def check_LTR_mark(line: str, idx: int, fast: PreTrainedTokenizerBase) -...
function check_details (line 59) | def check_details(
function test_string (line 122) | def test_string(slow: PreTrainedTokenizerBase, fast: PreTrainedTokenizer...
function test_tokenizer (line 154) | def test_tokenizer(slow: PreTrainedTokenizerBase, fast: PreTrainedTokeni...
FILE: scripts/distributed/torch-distributed-gpu-test.py
function printflock (line 53) | def printflock(*msgs):
FILE: scripts/stale.py
function main (line 36) | def main():
FILE: setup.py
function deps_list (line 172) | def deps_list(*pkgs):
class DepsTableUpdateCommand (line 274) | class DepsTableUpdateCommand(Command):
method initialize_options (line 286) | def initialize_options(self):
method finalize_options (line 289) | def finalize_options(self):
method run (line 292) | def run(self):
FILE: src/transformers/__init__.py
function _create_module_alias (line 803) | def _create_module_alias(alias: str, target: str) -> None:
function getattr_factory (line 833) | def getattr_factory(target):
FILE: src/transformers/_typing.py
class TransformersLogger (line 39) | class TransformersLogger(Protocol):
method setLevel (line 52) | def setLevel(self, level: Level) -> None: ...
method isEnabledFor (line 53) | def isEnabledFor(self, level: Level) -> bool: ...
method getEffectiveLevel (line 54) | def getEffectiveLevel(self) -> int: ...
method getChild (line 56) | def getChild(self, suffix: str) -> logging.Logger: ...
method addHandler (line 58) | def addHandler(self, hdlr: logging.Handler) -> None: ...
method removeHandler (line 59) | def removeHandler(self, hdlr: logging.Handler) -> None: ...
method hasHandlers (line 60) | def hasHandlers(self) -> bool: ...
method debug (line 63) | def debug(self, msg: object, *args: object, **kwargs: object) -> None:...
method info (line 64) | def info(self, msg: object, *args: object, **kwargs: object) -> None: ...
method warning (line 65) | def warning(self, msg: object, *args: object, **kwargs: object) -> Non...
method warn (line 66) | def warn(self, msg: object, *args: object, **kwargs: object) -> None: ...
method error (line 67) | def error(self, msg: object, *args: object, **kwargs: object) -> None:...
method exception (line 68) | def exception(self, msg: object, *args: object, exc_info: ExcInfo = Tr...
method critical (line 69) | def critical(self, msg: object, *args: object, **kwargs: object) -> No...
method fatal (line 70) | def fatal(self, msg: object, *args: object, **kwargs: object) -> None:...
method log (line 73) | def log(self, level: Level, msg: object, *args: object, **kwargs: obje...
method makeRecord (line 76) | def makeRecord(
method handle (line 90) | def handle(self, record: logging.LogRecord) -> None: ...
method findCaller (line 91) | def findCaller(
method callHandlers (line 97) | def callHandlers(self, record: logging.LogRecord) -> None: ...
method getMessage (line 98) | def getMessage(self) -> str: ... # NOTE: actually on LogRecord; inclu...
method _log (line 100) | def _log(
method addFilter (line 112) | def addFilter(self, filt: logging.Filter) -> None: ...
method removeFilter (line 113) | def removeFilter(self, filt: logging.Filter) -> None: ...
method filters (line 115) | def filters(self) -> list[logging.Filter]: ...
method filter (line 117) | def filter(self, record: logging.LogRecord) -> bool: ...
method setFormatter (line 120) | def setFormatter(self, fmt: logging.Formatter) -> None: ... # mostly ...
method debugStack (line 121) | def debugStack(self, msg: object, *args: object, **kwargs: object) -> ...
method warning_advice (line 129) | def warning_advice(self, msg: object, *args: object, **kwargs: object)...
method warning_once (line 130) | def warning_once(self, msg: object, *args: object, **kwargs: object) -...
method info_once (line 131) | def info_once(self, msg: object, *args: object, **kwargs: object) -> N...
class GenerativePreTrainedModel (line 134) | class GenerativePreTrainedModel(Protocol):
method __getattr__ (line 155) | def __getattr__(self, name: str) -> Any: ...
method forward (line 156) | def forward(self, *args: Any, **kwargs: Any) -> Any: ...
method __call__ (line 157) | def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
method can_generate (line 158) | def can_generate(self) -> bool: ...
method get_encoder (line 159) | def get_encoder(self) -> Any: ...
method get_output_embeddings (line 160) | def get_output_embeddings(self) -> Any: ...
method get_input_embeddings (line 161) | def get_input_embeddings(self) -> Any: ...
method set_output_embeddings (line 162) | def set_output_embeddings(self, value: Any) -> None: ...
method set_input_embeddings (line 163) | def set_input_embeddings(self, value: Any) -> None: ...
method get_compiled_call (line 164) | def get_compiled_call(self, compile_config: Any) -> Any: ...
method set_experts_implementation (line 165) | def set_experts_implementation(self, *args: Any, **kwargs: Any) -> Any...
method _supports_logits_to_keep (line 166) | def _supports_logits_to_keep(self) -> bool: ...
class WhisperGenerationConfigLike (line 169) | class WhisperGenerationConfigLike(Protocol):
FILE: src/transformers/activations.py
class GELUTanh (line 31) | class GELUTanh(nn.Module):
method __init__ (line 40) | def __init__(self, use_gelu_tanh_python: bool = False):
method _gelu_tanh_python (line 47) | def _gelu_tanh_python(self, input: Tensor) -> Tensor:
method forward (line 50) | def forward(self, input: Tensor) -> Tensor:
class NewGELUActivation (line 59) | class NewGELUActivation(nn.Module):
method forward (line 65) | def forward(self, input: Tensor) -> Tensor:
class GELUActivation (line 70) | class GELUActivation(nn.Module):
method __init__ (line 78) | def __init__(self, use_gelu_python: bool = False):
method _gelu_python (line 85) | def _gelu_python(self, input: Tensor) -> Tensor:
method forward (line 88) | def forward(self, input: Tensor) -> Tensor:
class SiLUActivation (line 93) | class SiLUActivation(nn.Module):
method forward (line 102) | def forward(self, input: Tensor) -> Tensor:
class FastGELUActivation (line 107) | class FastGELUActivation(nn.Module):
method forward (line 112) | def forward(self, input: Tensor) -> Tensor:
class QuickGELUActivation (line 117) | class QuickGELUActivation(nn.Module):
method forward (line 122) | def forward(self, input: Tensor) -> Tensor:
class ClippedGELUActivation (line 126) | class ClippedGELUActivation(nn.Module):
method __init__ (line 139) | def __init__(self, min: float, max: float):
method forward (line 147) | def forward(self, x: Tensor) -> Tensor:
class AccurateGELUActivation (line 151) | class AccurateGELUActivation(nn.Module):
method __init__ (line 159) | def __init__(self):
method forward (line 163) | def forward(self, input: Tensor) -> Tensor:
class MishActivation (line 167) | class MishActivation(nn.Module):
method __init__ (line 173) | def __init__(self):
method _mish_python (line 177) | def _mish_python(self, input: Tensor) -> Tensor:
method forward (line 180) | def forward(self, input: Tensor) -> Tensor:
class LinearActivation (line 184) | class LinearActivation(nn.Module):
method forward (line 189) | def forward(self, input: Tensor) -> Tensor:
class LaplaceActivation (line 193) | class LaplaceActivation(nn.Module):
method forward (line 201) | def forward(self, input, mu=0.707107, sigma=0.282095):
class ReLUSquaredActivation (line 206) | class ReLUSquaredActivation(nn.Module):
method forward (line 211) | def forward(self, input):
class ClassInstantier (line 217) | class ClassInstantier(OrderedDict):
method __getitem__ (line 218) | def __getitem__(self, key):
class XIELUActivation (line 224) | class XIELUActivation(nn.Module):
method __init__ (line 232) | def __init__(
method _xielu_python (line 274) | def _xielu_python(self, x: Tensor) -> Tensor:
method _xielu_cuda (line 283) | def _xielu_cuda(self, x: Tensor) -> Tensor:
method forward (line 308) | def forward(self, input: Tensor) -> Tensor:
function get_activation (line 345) | def get_activation(activation_string):
FILE: src/transformers/audio_utils.py
function load_audio (line 60) | def load_audio(audio: str | np.ndarray, sampling_rate=16000, timeout=Non...
function load_audio_torchcodec (line 91) | def load_audio_torchcodec(audio: str | np.ndarray, sampling_rate=16000) ...
function load_audio_librosa (line 115) | def load_audio_librosa(audio: str | np.ndarray, sampling_rate=16000, tim...
function load_audio_as (line 143) | def load_audio_as(
function conv1d_output_length (line 221) | def conv1d_output_length(module: "torch.nn.Conv1d", input_length: int) -...
function is_valid_audio (line 233) | def is_valid_audio(audio):
function is_valid_list_of_audio (line 237) | def is_valid_list_of_audio(audio):
function make_list_of_audio (line 241) | def make_list_of_audio(
function hertz_to_mel (line 263) | def hertz_to_mel(freq: float | np.ndarray, mel_scale: str = "htk") -> fl...
function mel_to_hertz (line 299) | def mel_to_hertz(mels: float | np.ndarray, mel_scale: str = "htk") -> fl...
function hertz_to_octave (line 335) | def hertz_to_octave(freq: float | np.ndarray, tuning: float = 0.0, bins_...
function _create_triangular_filter_bank (line 356) | def _create_triangular_filter_bank(fft_freqs: np.ndarray, filter_freqs: ...
function chroma_filter_bank (line 378) | def chroma_filter_bank(
function mel_filter_bank (line 453) | def mel_filter_bank(
function optimal_fft_length (line 547) | def optimal_fft_length(window_length: int) -> int:
function window_function (line 560) | def window_function(
function spectrogram (line 624) | def spectrogram(
function spectrogram_batch (line 835) | def spectrogram_batch(
function power_to_db (line 1046) | def power_to_db(
function power_to_db_batch (line 1097) | def power_to_db_batch(
function amplitude_to_db (line 1146) | def amplitude_to_db(
function amplitude_to_db_batch (line 1195) | def amplitude_to_db_batch(
FILE: src/transformers/backbone_utils.py
class BackboneType (line 30) | class BackboneType(enum.Enum):
class BackboneConfigMixin (line 35) | class BackboneConfigMixin:
method set_output_features_output_indices (line 40) | def set_output_features_output_indices(
method verify_out_features_out_indices (line 76) | def verify_out_features_out_indices(self):
method out_features (line 127) | def out_features(self):
method out_features (line 131) | def out_features(self, out_features: list[str]):
method out_indices (line 138) | def out_indices(self):
method out_indices (line 142) | def out_indices(self, out_indices: tuple[int, ...] | list[int]):
method to_dict (line 149) | def to_dict(self):
function filter_output_hidden_states (line 160) | def filter_output_hidden_states(forward_function):
class BackboneMixin (line 181) | class BackboneMixin:
method __init__ (line 188) | def __init__(self, *args, **kwargs) -> None:
method post_init (line 207) | def post_init(self):
method _init_timm_backbone (line 219) | def _init_timm_backbone(self, backbone) -> None:
method _init_transformers_backbone (line 252) | def _init_transformers_backbone(self) -> None:
method out_features (line 259) | def out_features(self):
method out_features (line 263) | def out_features(self, out_features: list[str]):
method out_indices (line 270) | def out_indices(self):
method out_indices (line 274) | def out_indices(self, out_indices: tuple[int] | list[int]):
method out_feature_channels (line 281) | def out_feature_channels(self):
method channels (line 287) | def channels(self):
method forward_with_filtered_kwargs (line 290) | def forward_with_filtered_kwargs(self, *args, **kwargs):
method forward (line 298) | def forward(
function consolidate_backbone_kwargs_to_config (line 308) | def consolidate_backbone_kwargs_to_config(
function load_backbone (line 358) | def load_backbone(config):
FILE: src/transformers/cache_utils.py
class CacheLayerMixin (line 26) | class CacheLayerMixin(ABC):
method __init__ (line 31) | def __init__(self):
method __repr__ (line 36) | def __repr__(self):
method lazy_initialization (line 40) | def lazy_initialization(self, key_states: torch.Tensor, value_states: ...
method update (line 43) | def update(
method get_mask_sizes (line 48) | def get_mask_sizes(self, query_length: int) -> tuple[int, int]: ...
method get_seq_length (line 51) | def get_seq_length(self) -> int: ...
method get_max_cache_shape (line 54) | def get_max_cache_shape(self) -> int: ...
method offload (line 56) | def offload(self):
method prefetch (line 62) | def prefetch(self):
method reset (line 68) | def reset(self) -> None:
method reorder_cache (line 81) | def reorder_cache(self, beam_idx: torch.LongTensor) -> None:
class DynamicLayer (line 88) | class DynamicLayer(CacheLayerMixin):
method lazy_initialization (line 96) | def lazy_initialization(self, key_states: torch.Tensor, value_states: ...
method update (line 102) | def update(
method get_mask_sizes (line 123) | def get_mask_sizes(self, query_length: int) -> tuple[int, int]:
method get_seq_length (line 129) | def get_seq_length(self) -> int:
method get_max_cache_shape (line 135) | def get_max_cache_shape(self) -> int:
method crop (line 139) | def crop(self, max_length: int) -> None:
method batch_repeat_interleave (line 153) | def batch_repeat_interleave(self, repeats: int) -> None:
method batch_select_indices (line 159) | def batch_select_indices(self, indices: torch.Tensor) -> None:
class DynamicSlidingWindowLayer (line 166) | class DynamicSlidingWindowLayer(DynamicLayer):
method __init__ (line 174) | def __init__(self, sliding_window: int):
method lazy_initialization (line 180) | def lazy_initialization(self, key_states: torch.Tensor, value_states: ...
method update (line 184) | def update(
method get_mask_sizes (line 213) | def get_mask_sizes(self, query_length: int) -> tuple[int, int]:
method get_seq_length (line 225) | def get_seq_length(self) -> int:
method get_max_cache_shape (line 229) | def get_max_cache_shape(self) -> int:
method crop (line 233) | def crop(self, max_length: int) -> None:
class StaticLayer (line 247) | class StaticLayer(CacheLayerMixin):
method __init__ (line 260) | def __init__(self, max_cache_len: int):
method lazy_initialization (line 266) | def lazy_initialization(self, key_states: torch.Tensor, value_states: ...
method update (line 308) | def update(
method get_mask_sizes (line 342) | def get_mask_sizes(self, query_length: int) -> tuple[int, int]:
method get_seq_length (line 348) | def get_seq_length(self) -> int:
method get_max_cache_shape (line 352) | def get_max_cache_shape(self) -> int:
class StaticSlidingWindowLayer (line 357) | class StaticSlidingWindowLayer(StaticLayer):
method __init__ (line 372) | def __init__(self, max_cache_len: int, sliding_window: int):
method update (line 378) | def update(
method get_mask_sizes (line 457) | def get_mask_sizes(self, query_length: int) -> tuple[int, int]:
method get_seq_length (line 475) | def get_seq_length(self) -> int:
method reset (line 479) | def reset(self):
class QuantizedLayer (line 484) | class QuantizedLayer(DynamicLayer):
method __init__ (line 496) | def __init__(
method update (line 512) | def update(
method _quantize (line 550) | def _quantize(self, tensor, axis): ...
method _dequantize (line 553) | def _dequantize(self, q_tensor): ...
method get_seq_length (line 555) | def get_seq_length(self) -> int:
class QuantoQuantizedLayer (line 560) | class QuantoQuantizedLayer(QuantizedLayer):
method __init__ (line 561) | def __init__(
method _quantize (line 604) | def _quantize(self, tensor, axis):
method _dequantize (line 611) | def _dequantize(self, qtensor):
class HQQQuantizedLayer (line 615) | class HQQQuantizedLayer(QuantizedLayer):
method __init__ (line 616) | def __init__(
method _quantize (line 651) | def _quantize(self, tensor, axis):
method _dequantize (line 666) | def _dequantize(self, qtensor):
class LinearAttentionCacheLayerMixin (line 672) | class LinearAttentionCacheLayerMixin(ABC):
method __init__ (line 678) | def __init__(self):
method __repr__ (line 685) | def __repr__(self):
method lazy_initialization (line 689) | def lazy_initialization(
method update_conv_state (line 694) | def update_conv_state(self, conv_states: torch.Tensor) -> torch.Tensor...
method update_recurrent_state (line 697) | def update_recurrent_state(self, recurrent_states: torch.Tensor) -> to...
method offload (line 699) | def offload(self):
method prefetch (line 706) | def prefetch(self):
method reset (line 713) | def reset(self) -> None:
method reorder_cache (line 721) | def reorder_cache(self, beam_idx: torch.LongTensor):
method crop (line 729) | def crop(self, max_length: int):
class LinearAttentionLayer (line 734) | class LinearAttentionLayer(LinearAttentionCacheLayerMixin):
method lazy_initialization (line 735) | def lazy_initialization(
method update_conv_state (line 757) | def update_conv_state(self, conv_states: torch.Tensor, **kwargs) -> to...
method update_recurrent_state (line 790) | def update_recurrent_state(self, recurrent_states: torch.Tensor, **kwa...
class LinearAttentionAndFullAttentionLayer (line 807) | class LinearAttentionAndFullAttentionLayer(LinearAttentionLayer, Dynamic...
method __init__ (line 811) | def __init__(self):
method lazy_initialization (line 815) | def lazy_initialization(self, *args, **kwargs) -> None:
method reset (line 824) | def reset(self) -> None:
method reorder_cache (line 828) | def reorder_cache(self, beam_idx: torch.LongTensor):
class Cache (line 834) | class Cache:
method __init__ (line 854) | def __init__(
method __repr__ (line 878) | def __repr__(self):
method prefetch (line 881) | def prefetch(self, layer_idx: int, only_non_sliding: bool = True):
method offload (line 901) | def offload(self, layer_idx: int, only_non_sliding: bool = True):
method update (line 910) | def update(
method update_conv_state (line 944) | def update_conv_state(self, conv_states: torch.Tensor, layer_idx: int,...
method update_recurrent_state (line 964) | def update_recurrent_state(self, recurrent_states: torch.Tensor, layer...
method early_initialization (line 984) | def early_initialization(
method get_seq_length (line 999) | def get_seq_length(self, layer_idx: int = 0) -> int:
method has_previous_state (line 1023) | def has_previous_state(self, layer_idx: int | None = None) -> bool:
method get_mask_sizes (line 1049) | def get_mask_sizes(self, query_length: int, layer_idx: int) -> tuple[i...
method get_max_cache_shape (line 1079) | def get_max_cache_shape(self, layer_idx: int = 0) -> int:
method reset (line 1087) | def reset(self):
method reorder_cache (line 1092) | def reorder_cache(self, beam_idx: torch.LongTensor):
method crop (line 1097) | def crop(self, max_length: int):
method batch_repeat_interleave (line 1102) | def batch_repeat_interleave(self, repeats: int):
method batch_select_indices (line 1107) | def batch_select_indices(self, indices: torch.Tensor):
method max_batch_size (line 1113) | def max_batch_size(self) -> int:
method max_cache_len (line 1121) | def max_cache_len(self) -> int:
method is_compileable (line 1127) | def is_compileable(self) -> bool:
method is_initialized (line 1135) | def is_initialized(self) -> bool:
method is_sliding (line 1140) | def is_sliding(self) -> list[bool]:
method __len__ (line 1144) | def __len__(self):
class DynamicCache (line 1153) | class DynamicCache(Cache):
method __init__ (line 1196) | def __init__(
method __iter__ (line 1267) | def __iter__(self):
class StaticCache (line 1272) | class StaticCache(Cache):
method __init__ (line 1312) | def __init__(
class QuantizedCache (line 1354) | class QuantizedCache(Cache):
method __init__ (line 1385) | def __init__(
class EncoderDecoderCache (line 1410) | class EncoderDecoderCache(Cache):
method __init__ (line 1443) | def __init__(self, *caches) -> None:
method __iter__ (line 1473) | def __iter__(self):
method __repr__ (line 1478) | def __repr__(self) -> str:
method __len__ (line 1484) | def __len__(self):
method get_seq_length (line 1491) | def get_seq_length(self, layer_idx: int = 0) -> int:
method reset (line 1495) | def reset(self):
method reorder_cache (line 1501) | def reorder_cache(self, beam_idx: torch.LongTensor):
method check_dynamic_cache (line 1506) | def check_dynamic_cache(self, method: str):
method crop (line 1517) | def crop(self, maximum_length: int):
method batch_repeat_interleave (line 1525) | def batch_repeat_interleave(self, repeats: int):
method batch_select_indices (line 1531) | def batch_select_indices(self, indices: torch.Tensor):
method get_max_cache_shape (line 1537) | def get_max_cache_shape(self) -> int:
method get_mask_sizes (line 1541) | def get_mask_sizes(self, query_length: int, layer_idx: int) -> tuple[i...
method is_sliding (line 1545) | def is_sliding(self):
method is_compileable (line 1549) | def is_compileable(self) -> bool:
FILE: src/transformers/cli/add_new_model_like.py
class ClassFinder (line 36) | class ClassFinder(CSTVisitor):
method __init__ (line 41) | def __init__(self):
method visit_ClassDef (line 46) | def visit_ClassDef(self, node: cst.ClassDef) -> None:
method leave_ClassDef (line 51) | def leave_ClassDef(self, node: cst.ClassDef):
method visit_SimpleStatementLine (line 54) | def visit_SimpleStatementLine(self, node: cst.SimpleStatementLine):
function add_new_model_like (line 89) | def add_new_model_like(
class ModelInfos (line 116) | class ModelInfos:
method __init__ (line 121) | def __init__(self, lowercase_name: str):
function add_content_to_file (line 156) | def add_content_to_file(file_name: str | os.PathLike, new_content: str, ...
function add_model_to_auto_mappings (line 178) | def add_model_to_auto_mappings(
function create_doc_file (line 245) | def create_doc_file(new_paper_name: str, public_classes: list[str]):
function insert_model_in_doc_toc (line 301) | def insert_model_in_doc_toc(
function create_init_file (line 326) | def create_init_file(old_lowercase_name: str, new_lowercase_name: str, f...
function find_all_classes_from_file (line 364) | def find_all_classes_from_file(module_name: str) -> set:
function find_modular_structure (line 380) | def find_modular_structure(
function create_modular_file (line 405) | def create_modular_file(
function create_test_files (line 458) | def create_test_files(
function _add_new_model_like_internal (line 510) | def _add_new_model_like_internal(
function get_user_field (line 605) | def get_user_field(
function convert_to_bool (line 654) | def convert_to_bool(x: str) -> bool:
function get_user_input (line 665) | def get_user_input():
FILE: src/transformers/cli/chat.py
class RichInterface (line 106) | class RichInterface:
method __init__ (line 107) | def __init__(self, model_id: str, user_id: str, base_url: str):
method stream_output (line 113) | async def stream_output(self, stream: AsyncIterator[ChatCompletionStre...
method input (line 172) | def input(self) -> str:
method clear (line 178) | def clear(self):
method print_user_message (line 182) | def print_user_message(self, text: str):
method print_color (line 187) | def print_color(self, text: str, color: str):
method confirm (line 192) | def confirm(self, message: str, default: bool = False) -> bool:
method print_help (line 204) | def print_help(self, minimal: bool = False):
method print_model_load (line 209) | def print_model_load(self, model: str):
method print_status (line 291) | def print_status(self, config: GenerationConfig):
class Chat (line 298) | class Chat:
method __init__ (line 303) | def __init__(
method check_health (line 374) | def check_health(url):
method handle_non_exit_user_commands (line 390) | def handle_non_exit_user_commands(
method _inner_run (line 464) | async def _inner_run(self):
function load_generation_config (line 583) | def load_generation_config(generation_config: str | None) -> GenerationC...
function parse_generate_flags (line 595) | def parse_generate_flags(generate_flags: list[str] | None) -> dict:
function new_chat_history (line 649) | def new_chat_history(system_prompt: str | None = None) -> list[dict]:
function save_chat (line 654) | def save_chat(filename: str, chat: list[dict], settings: dict) -> str:
function get_username (line 662) | def get_username() -> str:
FILE: src/transformers/cli/download.py
function download (line 19) | def download(
FILE: src/transformers/cli/serve.py
class Serve (line 33) | class Serve:
method __init__ (line 34) | def __init__(
method start_server (line 157) | def start_server(self):
method reset_loaded_models (line 166) | def reset_loaded_models(self):
method kill_server (line 170) | def kill_server(self):
FILE: src/transformers/cli/serving/chat_completion.py
class TransformersCompletionCreateParamsStreaming (line 51) | class TransformersCompletionCreateParamsStreaming(CompletionCreateParams...
class ChatCompletionHandler (line 86) | class ChatCompletionHandler(BaseHandler):
method handle_request (line 95) | async def handle_request(self, body: dict, request_id: str) -> Streami...
method _streaming (line 162) | def _streaming(
method _non_streaming (line 255) | async def _non_streaming(
method _build_generation_config (line 314) | def _build_generation_config(self, body: dict, model_generation_config...
method _build_completion (line 332) | def _build_completion(
method _build_chunk_sse (line 371) | def _build_chunk_sse(
FILE: src/transformers/cli/serving/model_manager.py
class TimedModel (line 43) | class TimedModel:
method __init__ (line 53) | def __init__(
method reset_timer (line 68) | def reset_timer(self) -> None:
method delete_model (line 74) | def delete_model(self) -> None:
method _timeout_reached (line 87) | def _timeout_reached(self) -> None:
class ModelManager (line 93) | class ModelManager:
method __init__ (line 109) | def __init__(
method _resolve_dtype (line 149) | def _resolve_dtype(dtype: str | None):
method _validate_args (line 161) | def _validate_args(self):
method process_model_name (line 181) | def process_model_name(model_id: str) -> str:
method get_quantization_config (line 187) | def get_quantization_config(self) -> BitsAndBytesConfig | None:
method _load_processor (line 199) | def _load_processor(self, model_id_and_revision: str) -> "ProcessorMix...
method _load_model (line 210) | def _load_model(
method load_model_and_processor (line 244) | def load_model_and_processor(
method load_model_streaming (line 287) | async def load_model_streaming(self, model_id_and_revision: str):
method shutdown (line 373) | def shutdown(self) -> None:
method get_model_modality (line 379) | def get_model_modality(
method get_gen_models (line 410) | def get_gen_models(cache_dir: str | None = None) -> list[dict]:
FILE: src/transformers/cli/serving/response.py
class TransformersResponseCreateParamsStreaming (line 70) | class TransformersResponseCreateParamsStreaming(ResponseCreateParamsStre...
class ResponseHandler (line 92) | class ResponseHandler(BaseHandler):
method handle_request (line 98) | async def handle_request(self, body: dict, request_id: str) -> Streami...
method _input_to_messages (line 168) | def _input_to_messages(body: dict) -> list[dict]:
method _streaming (line 205) | def _streaming(
method _non_streaming (line 475) | async def _non_streaming(
method _build_generation_config (line 538) | def _build_generation_config(self, body: dict, model_generation_config...
function compute_usage (line 548) | def compute_usage(input_tokens: int, output_tokens: int) -> ResponseUsage:
FILE: src/transformers/cli/serving/server.py
function build_server (line 40) | def build_server(
FILE: src/transformers/cli/serving/transcription.py
class TransformersTranscriptionCreateParams (line 41) | class TransformersTranscriptionCreateParams(TranscriptionCreateParamsBas...
class TranscriptionHandler (line 56) | class TranscriptionHandler:
method __init__ (line 67) | def __init__(self, model_manager: ModelManager, generation_state: Gene...
method _validate_request (line 76) | def _validate_request(self, form_keys: set[str]) -> None:
method handle_request (line 85) | async def handle_request(self, request: Request) -> JSONResponse | Str...
method _prepare_audio_inputs (line 120) | def _prepare_audio_inputs(
method _non_streaming (line 134) | async def _non_streaming(
method _streaming (line 150) | def _streaming(
FILE: src/transformers/cli/serving/utils.py
class Modality (line 61) | class Modality(enum.Enum):
class _StreamError (line 68) | class _StreamError:
method __init__ (line 71) | def __init__(self, msg: str):
class _GenerationCancelled (line 75) | class _GenerationCancelled(Exception):
function detect_tool_format (line 91) | def detect_tool_format(model: "PreTrainedModel") -> dict | None:
class ToolCallParser (line 108) | class ToolCallParser:
method __init__ (line 132) | def __init__(self, tool_format: dict):
method feed (line 140) | def feed(self, text: str) -> object | dict | None:
method _extract_name_and_args (line 166) | def _extract_name_and_args(block: str) -> tuple[str, str] | None:
method parse (line 178) | def parse(text: str, tool_format: dict) -> list[dict] | None:
method _parse_block (line 199) | def _parse_block(self, block: str) -> dict | None:
class DownloadAggregator (line 207) | class DownloadAggregator:
method __init__ (line 214) | def __init__(self, enqueue: Callable, model_id: str):
method register (line 220) | def register(self, bar_id: int, total: int | None) -> None:
method update (line 225) | def update(self, bar_id: int, current: int, total: int | None) -> None:
method close (line 230) | def close(self, bar_id: int) -> None:
method _emit (line 233) | def _emit(self) -> None:
function make_progress_tqdm_class (line 250) | def make_progress_tqdm_class(callback: Callable, model_id: str) -> type:
class DirectStreamer (line 321) | class DirectStreamer:
method __init__ (line 330) | def __init__(
method put (line 355) | def put(self, value: "torch.Tensor") -> None:
method end (line 369) | def end(self) -> None:
method cancel (line 373) | def cancel(self) -> None:
class CBStreamer (line 378) | class CBStreamer:
method __init__ (line 387) | def __init__(
method put (line 414) | def put(self, output: "GenerationOutput") -> None:
method end (line 424) | def end(self) -> None:
method cancel (line 428) | def cancel(self) -> None:
function set_torch_seed (line 433) | def set_torch_seed(seed: int) -> None:
function reset_torch_cache (line 440) | def reset_torch_cache() -> None:
class InferenceThread (line 448) | class InferenceThread:
method __init__ (line 455) | def __init__(self):
method _run (line 460) | def _run(self) -> None:
method submit (line 475) | def submit(self, fn, *args, **kwargs) -> Future:
method async_submit (line 481) | def async_submit(self, fn, *args, **kwargs) -> asyncio.Future:
class BaseGenerateManager (line 489) | class BaseGenerateManager(ABC):
method generate_streaming (line 498) | def generate_streaming(
method generate_non_streaming (line 522) | def generate_non_streaming(
method stop (line 544) | def stop(self) -> None:
class GenerateManager (line 548) | class GenerateManager(BaseGenerateManager):
method __init__ (line 551) | def __init__(self):
method generate_streaming (line 554) | def generate_streaming(
method generate_non_streaming (line 579) | async def generate_non_streaming(
method submit (line 596) | def submit(self, fn: Callable, *args, **kwargs) -> Future:
method async_submit (line 600) | def async_submit(self, fn: Callable, *args, **kwargs) -> asyncio.Future:
method stop (line 604) | def stop(self) -> None:
class CBGenerateManager (line 608) | class CBGenerateManager(BaseGenerateManager):
method __init__ (line 623) | def __init__(self, cb_config: "ContinuousBatchingConfig | None" = None):
method init_cb (line 627) | def init_cb(self, model: "PreTrainedModel", gen_config: "GenerationCon...
method generate_streaming (line 644) | def generate_streaming(
method generate_non_streaming (line 679) | async def generate_non_streaming(
method scheduler (line 716) | def scheduler(self) -> "Scheduler":
method stop (line 720) | def stop(self) -> None:
class GenerationState (line 725) | class GenerationState:
method __init__ (line 739) | def __init__(
method use_continuous_batching (line 752) | def use_continuous_batching(self, model: "PreTrainedModel", modality: ...
method get_manager (line 772) | def get_manager(self, model_id: str, use_cb: bool = False) -> BaseGene...
method shutdown (line 795) | def shutdown(self) -> None:
class BaseHandler (line 802) | class BaseHandler:
method __init__ (line 818) | def __init__(
method _validate_request (line 826) | def _validate_request(self, body: dict) -> None:
method chunk_to_sse (line 840) | def chunk_to_sse(chunk: "str | pydantic.BaseModel") -> str:
method _resolve_model (line 846) | def _resolve_model(self, body: dict) -> tuple[str, "PreTrainedModel", ...
method _build_generation_config (line 859) | def _build_generation_config(
method get_processor_inputs_from_messages (line 910) | def get_processor_inputs_from_messages(messages: list[dict], modality:...
FILE: src/transformers/cli/system.py
function env (line 41) | def env(
function version (line 131) | def version() -> None:
function _format_dict (line 136) | def _format_dict(d: dict) -> str:
FILE: src/transformers/cli/transformers.py
function main (line 35) | def main():
FILE: src/transformers/configuration_utils.py
function wrap_init_to_accept_kwargs (line 77) | def wrap_init_to_accept_kwargs(cls: dataclass):
class PreTrainedConfig (line 118) | class PreTrainedConfig(PushToHubMixin, RotaryEmbeddingConfigMixin):
method __post_init__ (line 238) | def __post_init__(self, **kwargs):
method __init_subclass__ (line 298) | def __init_subclass__(cls, *args, **kwargs):
method name_or_path (line 311) | def name_or_path(self) -> str | None:
method name_or_path (line 315) | def name_or_path(self, value):
method num_labels (line 319) | def num_labels(self) -> int:
method num_labels (line 326) | def num_labels(self, num_labels: int):
method output_attentions (line 334) | def output_attentions(self):
method output_attentions (line 341) | def output_attentions(self, value: bool):
method _attn_implementation (line 353) | def _attn_implementation(self):
method _attn_implementation (line 357) | def _attn_implementation(self, value: str | dict | None):
method _experts_implementation (line 375) | def _experts_implementation(self):
method _experts_implementation (line 379) | def _experts_implementation(self, value: str | dict | None):
method torch_dtype (line 397) | def torch_dtype(self):
method use_return_dict (line 402) | def use_return_dict(self):
method torch_dtype (line 407) | def torch_dtype(self, value):
method __setattr__ (line 411) | def __setattr__(self, key, value):
method __getattribute__ (line 416) | def __getattribute__(self, key):
method validate_output_attentions (line 421) | def validate_output_attentions(self):
method validate_architecture (line 428) | def validate_architecture(self):
method validate_token_ids (line 441) | def validate_token_ids(self):
method validate_layer_type (line 456) | def validate_layer_type(self):
method rope_scaling (line 469) | def rope_scaling(self):
method rope_scaling (line 473) | def rope_scaling(self, value):
method save_pretrained (line 476) | def save_pretrained(self, save_directory: str | os.PathLike, push_to_h...
method from_pretrained (line 539) | def from_pretrained(
method get_config_dict (line 650) | def get_config_dict(
method _get_config_dict (line 683) | def _get_config_dict(
method from_dict (line 783) | def from_dict(
method from_json_file (line 848) | def from_json_file(
method _dict_from_json_file (line 866) | def _dict_from_json_file(cls, json_file: str | os.PathLike):
method _encode_special_floats (line 874) | def _encode_special_floats(cls, obj: Any) -> Any:
method _decode_special_floats (line 899) | def _decode_special_floats(cls, obj: Any) -> Any:
method __eq__ (line 920) | def __eq__(self, other):
method __repr__ (line 923) | def __repr__(self):
method __iter__ (line 926) | def __iter__(self):
method to_diff_dict (line 929) | def to_diff_dict(self) -> dict[str, Any]:
method to_dict (line 988) | def to_dict(self) -> dict[str, Any]:
method to_json_string (line 1035) | def to_json_string(self, use_diff: bool = True) -> str:
method to_json_file (line 1057) | def to_json_file(self, json_file_path: str | os.PathLike, use_diff: bo...
method update (line 1071) | def update(self, config_dict: dict[str, Any]):
method update_from_string (line 1081) | def update_from_string(self, update_str: str):
method dict_dtype_to_str (line 1119) | def dict_dtype_to_str(self, d: dict[str, Any]) -> None:
method _remove_keys_not_serialized (line 1136) | def _remove_keys_not_serialized(self, d: dict[str, Any]) -> None:
method register_for_auto_class (line 1162) | def register_for_auto_class(cls, auto_class="AutoConfig"):
method _get_generation_parameters (line 1183) | def _get_generation_parameters(self) -> dict[str, Any]:
method get_text_config (line 1199) | def get_text_config(self, decoder=None, encoder=None) -> "PreTrainedCo...
function get_configuration_file (line 1277) | def get_configuration_file(configuration_files: list[str]) -> str:
function recursive_diff_dict (line 1307) | def recursive_diff_dict(dict_a, dict_b, config_obj=None):
function layer_type_validation (line 1337) | def layer_type_validation(layer_types: list[str], num_hidden_layers: int...
FILE: src/transformers/conversion_mapping.py
function _build_checkpoint_conversion_mapping (line 85) | def _build_checkpoint_conversion_mapping():
function get_checkpoint_conversion_mapping (line 546) | def get_checkpoint_conversion_mapping(model_type):
function register_checkpoint_conversion_mapping (line 553) | def register_checkpoint_conversion_mapping(
function extract_weight_conversions_for_model (line 564) | def extract_weight_conversions_for_model(model: PreTrainedModel) -> list...
function get_model_conversion_mapping (line 572) | def get_model_conversion_mapping(
FILE: src/transformers/convert_slow_tokenizer.py
function import_protobuf (line 95) | def import_protobuf(error_message=""):
function _get_prepend_scheme (line 112) | def _get_prepend_scheme(add_prefix_space: bool, original_tokenizer) -> str:
function generate_merges (line 122) | def generate_merges(vocab, vocab_scores, skip_tokens: Collection[str] | ...
class SentencePieceExtractor (line 146) | class SentencePieceExtractor:
method __init__ (line 151) | def __init__(self, model: str):
method extract (line 163) | def extract(self, model_type, **kwargs) -> tuple[dict[str, int], list[...
class GemmaSentencePieceExtractor (line 199) | class GemmaSentencePieceExtractor(SentencePieceExtractor):
method extract (line 200) | def extract(self, vocab_scores=None) -> tuple[dict[str, int], list[tup...
function check_number_comma (line 216) | def check_number_comma(piece: str) -> bool:
class Converter (line 220) | class Converter:
method __init__ (line 221) | def __init__(self, original_tokenizer):
method converted (line 224) | def converted(self) -> Tokenizer:
class BertConverter (line 228) | class BertConverter(Converter):
method converted (line 229) | def converted(self) -> Tokenizer:
class SplinterConverter (line 267) | class SplinterConverter(Converter):
method converted (line 268) | def converted(self) -> Tokenizer:
class FunnelConverter (line 317) | class FunnelConverter(Converter):
method converted (line 318) | def converted(self) -> Tokenizer:
class MPNetConverter (line 356) | class MPNetConverter(Converter):
method converted (line 357) | def converted(self) -> Tokenizer:
class OpenAIGPTConverter (line 395) | class OpenAIGPTConverter(Converter):
method converted (line 396) | def converted(self) -> Tokenizer:
class GPT2Converter (line 422) | class GPT2Converter(Converter):
method converted (line 423) | def converted(self, vocab: dict[str, int] | None = None, merges: list[...
class HerbertConverter (line 460) | class HerbertConverter(Converter):
method converted (line 461) | def converted(self) -> Tokenizer:
class Qwen2Converter (line 491) | class Qwen2Converter(Converter):
method converted (line 492) | def converted(self, vocab: dict[str, int] | None = None, merges: list[...
class RobertaConverter (line 535) | class RobertaConverter(Converter):
method converted (line 536) | def converted(self) -> Tokenizer:
class RoFormerConverter (line 564) | class RoFormerConverter(Converter):
method converted (line 565) | def converted(self) -> Tokenizer:
class DebertaConverter (line 603) | class DebertaConverter(Converter):
method converted (line 604) | def converted(self) -> Tokenizer:
class SpmConverter (line 634) | class SpmConverter(Converter):
method build_tokenizer_from_spm_proto (line 640) | def build_tokenizer_from_spm_proto(proto, vocab, merges=None):
method convert_from_spm (line 688) | def convert_from_spm(cls, vocab=None, **kwargs):
method __init__ (line 697) | def __init__(self, *args):
method vocab (line 718) | def vocab(self, proto):
method unk_id (line 721) | def unk_id(self, proto):
method tokenizer (line 724) | def tokenizer(self, proto):
method normalizer (line 774) | def normalizer(self, proto):
method pre_tokenizer (line 785) | def pre_tokenizer(self, replacement, add_prefix_space):
method post_processor (line 789) | def post_processor(self):
method decoder (line 792) | def decoder(self, replacement, add_prefix_space):
method converted (line 796) | def converted(self) -> Tokenizer:
class AlbertConverter (line 821) | class AlbertConverter(SpmConverter):
method vocab (line 822) | def vocab(self, proto):
method normalizer (line 828) | def normalizer(self, proto):
method post_processor (line 847) | def post_processor(self):
class BarthezConverter (line 858) | class BarthezConverter(SpmConverter):
method unk_id (line 859) | def unk_id(self, proto):
method post_processor (line 863) | def post_processor(self):
class CamembertConverter (line 874) | class CamembertConverter(SpmConverter):
method vocab (line 875) | def vocab(self, proto):
method unk_id (line 888) | def unk_id(self, proto):
method post_processor (line 892) | def post_processor(self):
method convert_from_spm (line 903) | def convert_from_spm(cls, vocab=None, **kwargs):
class DebertaV2Converter (line 922) | class DebertaV2Converter(SpmConverter):
method pre_tokenizer (line 923) | def pre_tokenizer(self, replacement, add_prefix_space):
method normalizer (line 931) | def normalizer(self, proto):
method post_processor (line 944) | def post_processor(self):
class MBartConverter (line 955) | class MBartConverter(SpmConverter):
method vocab (line 956) | def vocab(self, proto):
method unk_id (line 994) | def unk_id(self, proto):
method post_processor (line 997) | def post_processor(self):
method convert_from_spm (line 1008) | def convert_from_spm(cls, vocab=None, **kwargs):
class MBart50Converter (line 1029) | class MBart50Converter(SpmConverter):
method vocab (line 1030) | def vocab(self, proto):
method unk_id (line 1042) | def unk_id(self, proto):
method post_processor (line 1045) | def post_processor(self):
method convert_from_spm (line 1056) | def convert_from_spm(cls, vocab=None, **kwargs):
class NllbConverter (line 1077) | class NllbConverter(SpmConverter):
method vocab (line 1078) | def vocab(self, proto):
method unk_id (line 1088) | def unk_id(self, proto):
method post_processor (line 1091) | def post_processor(self):
method convert_from_spm (line 1102) | def convert_from_spm(cls, vocab=None, **kwargs):
class SeamlessM4TConverter (line 1124) | class SeamlessM4TConverter(SpmConverter):
method vocab (line 1125) | def vocab(self, proto):
method unk_id (line 1135) | def unk_id(self, proto):
method post_processor (line 1138) | def post_processor(self):
class XLMRobertaConverter (line 1149) | class XLMRobertaConverter(SpmConverter):
method vocab (line 1150) | def vocab(self, proto):
method unk_id (line 1161) | def unk_id(self, proto):
method post_processor (line 1165) | def post_processor(self):
method convert_from_spm (line 1176) | def convert_from_spm(cls, vocab=None, **kwargs):
class XLNetConverter (line 1196) | class XLNetConverter(SpmConverter):
method vocab (line 1197) | def vocab(self, proto):
method normalizer (line 1203) | def normalizer(self, proto):
method post_processor (line 1222) | def post_processor(self):
class ReformerConverter (line 1233) | class ReformerConverter(SpmConverter):
class RemBertConverter (line 1237) | class RemBertConverter(SpmConverter):
method normalizer (line 1239) | def normalizer(self, proto):
method post_processor (line 1258) | def post_processor(self):
class BertGenerationConverter (line 1269) | class BertGenerationConverter(SpmConverter):
class PegasusConverter (line 1273) | class PegasusConverter(SpmConverter):
method vocab (line 1274) | def vocab(self, proto):
method convert_from_spm (line 1294) | def convert_from_spm(cls, vocab=None, **kwargs):
method unk_id (line 1315) | def unk_id(self, proto):
method pre_tokenizer (line 1318) | def pre_tokenizer(self, replacement, add_prefix_space):
method post_processor (line 1327) | def post_processor(self):
class T5Converter (line 1335) | class T5Converter(SpmConverter):
method vocab (line 1336) | def vocab(self, proto):
method post_processor (line 1342) | def post_processor(self):
method convert_from_spm (line 1352) | def convert_from_spm(cls, vocab=None, **kwargs):
class UdopConverter (line 1363) | class UdopConverter(SpmConverter):
method post_processor (line 1364) | def post_processor(self):
class WhisperConverter (line 1374) | class WhisperConverter(Converter):
method converted (line 1375) | def converted(self) -> Tokenizer:
class BigBirdConverter (line 1410) | class BigBirdConverter(SpmConverter):
method post_processor (line 1411) | def post_processor(self):
class CLIPConverter (line 1422) | class CLIPConverter(Converter):
method converted (line 1423) | def converted(self) -> Tokenizer:
class LayoutLMv2Converter (line 1465) | class LayoutLMv2Converter(Converter):
method converted (line 1466) | def converted(self) -> Tokenizer:
class BlenderbotConverter (line 1504) | class BlenderbotConverter(Converter):
method converted (line 1505) | def converted(self) -> Tokenizer:
class XGLMConverter (line 1533) | class XGLMConverter(SpmConverter):
method vocab (line 1534) | def vocab(self, proto):
method unk_id (line 1545) | def unk_id(self, proto):
method post_processor (line 1549) | def post_processor(self):
class GemmaConverter (line 1560) | class GemmaConverter(SpmConverter):
method normalizer (line 1576) | def normalizer(self, proto):
method vocab (line 1579) | def vocab(self, proto):
method pre_tokenizer (line 1595) | def pre_tokenizer(self, replacement, add_prefix_space):
method unk_id (line 1598) | def unk_id(self, proto):
method decoder (line 1602) | def decoder(self, replacement, add_prefix_space):
class LlamaConverter (line 1612) | class LlamaConverter(SpmConverter):
method vocab (line 1615) | def vocab(self, proto):
method unk_id (line 1624) | def unk_id(self, proto):
method decoder (line 1628) | def decoder(self, replacement, add_prefix_space):
method normalizer (line 1638) | def normalizer(self, proto):
method pre_tokenizer (line 1647) | def pre_tokenizer(self, replacement, add_prefix_space):
method post_processor (line 1653) | def post_processor(self):
class MarkupLMConverter (line 1658) | class MarkupLMConverter(Converter):
method converted (line 1659) | def converted(self) -> Tokenizer:
class MoshiConverter (line 1696) | class MoshiConverter(SpmConverter):
method __init__ (line 1699) | def __init__(self, vocab_file, **kwargs):
method normalizer (line 1712) | def normalizer(self, proto):
method decoder (line 1722) | def decoder(self, replacement, add_prefix_space):
method pre_tokenizer (line 1732) | def pre_tokenizer(self, replacement, add_prefix_space):
class HeliumConverter (line 1737) | class HeliumConverter(SpmConverter):
method __init__ (line 1740) | def __init__(self, vocab_file=None, **kwargs):
method tokenizer (line 1752) | def tokenizer(self, proto):
method vocab (line 1780) | def vocab(self, proto):
method unk_id (line 1789) | def unk_id(self, proto):
method decoder (line 1793) | def decoder(self, replacement, add_prefix_space):
method normalizer (line 1802) | def normalizer(self, proto):
method pre_tokenizer (line 1805) | def pre_tokenizer(self, replacement, add_prefix_space):
method post_processor (line 1808) | def post_processor(self):
class ParakeetConverter (line 1826) | class ParakeetConverter(SpmConverter):
method __init__ (line 1829) | def __init__(self, vocab_file=None, *args):
method tokenizer (line 1842) | def tokenizer(self, proto):
function bytes_to_unicode (line 1874) | def bytes_to_unicode():
class TikTokenConverter (line 1898) | class TikTokenConverter:
method __init__ (line 1903) | def __init__(
method extract_vocab_merges_from_model (line 1918) | def extract_vocab_merges_from_model(self, tiktoken_url: str):
method tokenizer (line 1949) | def tokenizer(self):
method converted (line 1956) | def converted(self) -> Tokenizer:
class MistralConverter (line 1976) | class MistralConverter:
method __init__ (line 1977) | def __init__(
method extract_vocab_merges_from_model (line 1994) | def extract_vocab_merges_from_model(self, tiktoken_url: str):
method tokenizer (line 2033) | def tokenizer(self):
method converted (line 2040) | def converted(self) -> Tokenizer:
function convert_slow_tokenizer (line 2115) | def convert_slow_tokenizer(transformer_tokenizer, from_tiktoken=False) -...
FILE: src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py
function convert_slow_checkpoint_to_fast (line 48) | def convert_slow_checkpoint_to_fast(tokenizer_name, checkpoint_name, dum...
FILE: src/transformers/core_model_loading.py
function build_glob_alternation (line 52) | def build_glob_alternation(
class ConversionOps (line 84) | class ConversionOps:
method __repr__ (line 87) | def __repr__(self):
method convert (line 94) | def convert(
method reverse_op (line 100) | def reverse_op(self) -> ConversionOps:
class _IdentityOp (line 104) | class _IdentityOp(ConversionOps):
method convert (line 111) | def convert(self, input_dict: dict[str, Any], **kwargs) -> dict[str, A...
class Chunk (line 115) | class Chunk(ConversionOps):
method __init__ (line 118) | def __init__(self, dim: int = 0):
method convert (line 122) | def convert(
method get_target_patterns (line 132) | def get_target_patterns(self, input_dict: dict, target_patterns: list[...
method reverse_op (line 139) | def reverse_op(self) -> ConversionOps:
class Concatenate (line 143) | class Concatenate(ConversionOps):
method __init__ (line 146) | def __init__(self, dim: int = 0):
method convert (line 150) | def convert(
method get_target_pattern (line 169) | def get_target_pattern(self, target_patterns: list[str]) -> str:
method reverse_op (line 176) | def reverse_op(self) -> ConversionOps:
class MergeModulelist (line 180) | class MergeModulelist(ConversionOps):
method __init__ (line 187) | def __init__(self, dim: int = 0):
method convert (line 191) | def convert(
method get_target_pattern (line 204) | def get_target_pattern(self, input_dict: dict, source_pattern: str, ta...
method reverse_op (line 216) | def reverse_op(self) -> ConversionOps:
class SplitModulelist (line 220) | class SplitModulelist(ConversionOps):
method __init__ (line 223) | def __init__(self, dim: int = 0):
method convert (line 227) | def convert(
method get_target_patterns (line 241) | def get_target_patterns(
method reverse_op (line 255) | def reverse_op(self) -> ConversionOps:
class Transpose (line 259) | class Transpose(ConversionOps):
method __init__ (line 264) | def __init__(self, dim0: int = 0, dim1: int = 1, check_dims: bool = Fa...
method convert (line 270) | def convert(
method get_target_pattern (line 289) | def get_target_pattern(
method reverse_op (line 305) | def reverse_op(self) -> ConversionOps:
class PermuteForRope (line 309) | class PermuteForRope(ConversionOps):
method __init__ (line 314) | def __init__(self):
method _apply (line 317) | def _apply(self, tensor: torch.Tensor) -> torch.Tensor:
method convert (line 326) | def convert(
class ErnieFuseAndSplitTextVisionExperts (line 343) | class ErnieFuseAndSplitTextVisionExperts(ConversionOps):
method __init__ (line 363) | def __init__(self, stack_dim: int = 0, concat_dim: int = 1):
method split_list_into_chunks (line 367) | def split_list_into_chunks(self, tensor_list: list[torch.Tensor], chun...
method convert (line 372) | def convert(
method reverse_op (line 400) | def reverse_op(self) -> ConversionOps:
class ErnieSplitAndDecoupleTextVisionExperts (line 404) | class ErnieSplitAndDecoupleTextVisionExperts(ConversionOps):
method __init__ (line 425) | def __init__(self, stack_dim: int = 0, concat_dim: int = 1):
method convert (line 430) | def convert(
method reverse_op (line 462) | def reverse_op(self) -> ConversionOps:
function process_target_pattern (line 466) | def process_target_pattern(pattern: str) -> tuple[str, str | None]:
function process_source_pattern (line 500) | def process_source_pattern(source_pattern: str, target_pattern: str) -> ...
class WeightTransform (line 517) | class WeightTransform:
method __setattr__ (line 528) | def __setattr__(self, name, value):
method __post_init__ (line 539) | def __post_init__(self):
method add_tensor (line 587) | def add_tensor(self, target_key: str, source_key: str, source_pattern:...
method rename_source_key (line 591) | def rename_source_key(self, source_key: str) -> tuple[str, str | None]:
method reverse_transform (line 617) | def reverse_transform(self) -> WeightTransform:
method materialize_tensors (line 635) | def materialize_tensors(self) -> dict[str, list[torch.Tensor]]:
class WeightRenaming (line 663) | class WeightRenaming(WeightTransform):
method convert (line 666) | def convert(
class WeightConverter (line 708) | class WeightConverter(WeightTransform):
method __post_init__ (line 711) | def __post_init__(self):
method convert (line 722) | def convert(
function _materialize_copy (line 783) | def _materialize_copy(tensor: torch.Tensor, device=None, dtype=None) -> ...
function spawn_materialize (line 791) | def spawn_materialize(
function spawn_tp_materialize (line 811) | def spawn_tp_materialize(
function dot_natural_key (line 828) | def dot_natural_key(s: str):
function log_conversion_errors (line 843) | def log_conversion_errors(
function set_param_for_module (line 891) | def set_param_for_module(
function offload_and_maybe_resave_param (line 934) | def offload_and_maybe_resave_param(
class SkipParameters (line 953) | class SkipParameters(Exception):
function rename_source_key (line 960) | def rename_source_key(
function convert_and_load_state_dict_in_model (line 998) | def convert_and_load_state_dict_in_model(
function revert_weight_conversion (line 1277) | def revert_weight_conversion(model: PreTrainedModel, state_dict: dict[st...
FILE: src/transformers/data/data_collator.py
class DataCollatorMixin (line 37) | class DataCollatorMixin:
method __call__ (line 38) | def __call__(self, features, return_tensors: str | None = None):
function pad_without_fast_tokenizer_warning (line 49) | def pad_without_fast_tokenizer_warning(tokenizer, *pad_args, **pad_kwargs):
function default_data_collator (line 71) | def default_data_collator(features: list[InputDataClass], return_tensors...
class DefaultDataCollator (line 95) | class DefaultDataCollator(DataCollatorMixin):
method __call__ (line 116) | def __call__(self, features: list[dict[str, Any]], return_tensors=None...
function torch_default_data_collator (line 122) | def torch_default_data_collator(features: list[InputDataClass]) -> dict[...
function numpy_default_data_collator (line 158) | def numpy_default_data_collator(features: list[InputDataClass]) -> dict[...
class DataCollatorWithPadding (line 191) | class DataCollatorWithPadding:
method __call__ (line 224) | def __call__(self, features: list[dict[str, Any]]) -> dict[str, Any]:
class DataCollatorForTokenClassification (line 243) | class DataCollatorForTokenClassification(DataCollatorMixin):
method torch_call (line 279) | def torch_call(self, features):
method numpy_call (line 319) | def numpy_call(self, features):
function _torch_collate_batch (line 350) | def _torch_collate_batch(examples, tokenizer, pad_to_multiple_of: int | ...
function _numpy_collate_batch (line 387) | def _numpy_collate_batch(examples, tokenizer, pad_to_multiple_of: int | ...
class DataCollatorForMultipleChoice (line 420) | class DataCollatorForMultipleChoice(DataCollatorMixin):
method torch_call (line 455) | def torch_call(self, examples: list[dict[str, Any]]): # Refactored im...
class DataCollatorForSeq2Seq (line 487) | class DataCollatorForSeq2Seq:
method __call__ (line 529) | def __call__(self, features, return_tensors=None):
class DataCollatorForLanguageModeling (line 619) | class DataCollatorForLanguageModeling(DataCollatorMixin):
method __post_init__ (line 692) | def __post_init__(self):
method get_generator (line 734) | def get_generator(self, seed):
method create_rng (line 742) | def create_rng(self):
method torch_call (line 765) | def torch_call(self, examples: list[list[int] | Any | dict[str, Any]])...
method torch_mask_tokens (line 796) | def torch_mask_tokens(
method numpy_call (line 859) | def numpy_call(self, examples: list[list[int] | Any | dict[str, Any]])...
method numpy_mask_tokens (line 890) | def numpy_mask_tokens(
method _calc_word_ids_and_prob_mask (line 973) | def _calc_word_ids_and_prob_mask(
method _whole_word_mask (line 1003) | def _whole_word_mask(word_ids: np.ndarray[np.ndarray[int]], mask: Any)...
class DataCollatorForWholeWordMask (line 1019) | class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling):
method __init__ (line 1027) | def __init__(self, *args, **kwargs):
function tolist (line 1038) | def tolist(x) -> list[Any]:
function to_numpy (line 1046) | def to_numpy(x) -> np.ndarray[Any]:
class DataCollatorForSOP (line 1056) | class DataCollatorForSOP(DataCollatorForLanguageModeling):
method __init__ (line 1064) | def __init__(self, *args, **kwargs):
method __call__ (line 1071) | def __call__(self, examples: list[dict[str, Any]]) -> dict[str, Any]:
method mask_tokens (line 1094) | def mask_tokens(self, inputs: Any) -> tuple[Any, Any, Any]:
class DataCollatorForPermutationLanguageModeling (line 1139) | class DataCollatorForPermutationLanguageModeling(DataCollatorMixin):
method torch_call (line 1152) | def torch_call(self, examples: list[list[int] | Any | dict[str, Any]])...
method numpy_call (line 1159) | def numpy_call(self, examples: list[list[int] | Any | dict[str, Any]])...
method torch_mask_tokens (line 1166) | def torch_mask_tokens(self, inputs: Any) -> tuple[Any, Any, Any, Any]:
method numpy_mask_tokens (line 1265) | def numpy_mask_tokens(self, inputs: Any) -> tuple[Any, Any, Any, Any]:
class DataCollatorWithFlattening (line 1364) | class DataCollatorWithFlattening(DefaultDataCollator):
method __init__ (line 1382) | def __init__(
method __call__ (line 1400) | def __call__(self, features, return_tensors=None, separator_id=None):
FILE: src/transformers/data/datasets/glue.py
class GlueDataTrainingArguments (line 35) | class GlueDataTrainingArguments:
method __post_init__ (line 60) | def __post_init__(self):
class Split (line 64) | class Split(Enum):
class GlueDataset (line 70) | class GlueDataset(Dataset):
method __init__ (line 75) | def __init__(
method __len__ (line 149) | def __len__(self):
method __getitem__ (line 152) | def __getitem__(self, i) -> InputFeatures:
method get_labels (line 155) | def get_labels(self):
FILE: src/transformers/data/datasets/squad.py
class SquadDataTrainingArguments (line 37) | class SquadDataTrainingArguments:
class Split (line 103) | class Split(Enum):
class SquadDataset (line 108) | class SquadDataset(Dataset):
method __init__ (line 114) | def __init__(
method __len__ (line 190) | def __len__(self):
method __getitem__ (line 193) | def __getitem__(self, i) -> dict[str, torch.Tensor]:
FILE: src/transformers/data/metrics/__init__.py
function simple_accuracy (line 30) | def simple_accuracy(preds, labels):
function acc_and_f1 (line 36) | def acc_and_f1(preds, labels):
function pearson_and_spearman (line 48) | def pearson_and_spearman(preds, labels):
function glue_compute_metrics (line 60) | def glue_compute_metrics(task_name, preds, labels):
function xnli_compute_metrics (line 90) | def xnli_compute_metrics(task_name, preds, labels):
FILE: src/transformers/data/metrics/squad_metrics.py
function normalize_answer (line 36) | def normalize_answer(s):
function get_tokens (line 56) | def get_tokens(s):
function compute_exact (line 62) | def compute_exact(a_gold, a_pred):
function compute_f1 (line 66) | def compute_f1(a_gold, a_pred):
function get_raw_scores (line 82) | def get_raw_scores(examples, preds):
function apply_no_ans_threshold (line 108) | def apply_no_ans_threshold(scores, na_probs, qid_to_has_ans, na_prob_thr...
function make_eval_dict (line 119) | def make_eval_dict(exact_scores, f1_scores, qid_list=None):
function merge_eval (line 140) | def merge_eval(main_eval, new_eval, prefix):
function find_best_thresh_v2 (line 145) | def find_best_thresh_v2(preds, scores, na_probs, qid_to_has_ans):
function find_all_best_thresh_v2 (line 179) | def find_all_best_thresh_v2(main_eval, preds, exact_raw, f1_raw, na_prob...
function find_best_thresh (line 190) | def find_best_thresh(preds, scores, na_probs, qid_to_has_ans):
function find_all_best_thresh (line 213) | def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs, ...
function squad_evaluate (line 223) | def squad_evaluate(examples, preds, no_answer_probs=None, no_answer_prob...
function get_final_text (line 254) | def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=...
function _get_best_indexes (line 348) | def _get_best_indexes(logits, n_best_size):
function _compute_softmax (line 360) | def _compute_softmax(scores):
function compute_predictions_logits (line 383) | def compute_predictions_logits(
function compute_predictions_log_probs (line 590) | def compute_predictions_log_probs(
FILE: src/transformers/data/processors/glue.py
function glue_convert_examples_to_features (line 35) | def glue_convert_examples_to_features(
function _glue_convert_examples_to_features (line 64) | def _glue_convert_examples_to_features(
class OutputMode (line 119) | class OutputMode(Enum):
class MrpcProcessor (line 124) | class MrpcProcessor(DataProcessor):
method __init__ (line 127) | def __init__(self, *args, **kwargs):
method get_example_from_tensor_dict (line 131) | def get_example_from_tensor_dict(self, tensor_dict):
method get_train_examples (line 140) | def get_train_examples(self, data_dir):
method get_dev_examples (line 145) | def get_dev_examples(self, data_dir):
method get_test_examples (line 149) | def get_test_examples(self, data_dir):
method get_labels (line 153) | def get_labels(self):
method _create_examples (line 157) | def _create_examples(self, lines, set_type):
class MnliProcessor (line 171) | class MnliProcessor(DataProcessor):
method __init__ (line 174) | def __init__(self, *args, **kwargs):
method get_example_from_tensor_dict (line 178) | def get_example_from_tensor_dict(self, tensor_dict):
method get_train_examples (line 187) | def get_train_examples(self, data_dir):
method get_dev_examples (line 191) | def get_dev_examples(self, data_dir):
method get_test_examples (line 195) | def get_test_examples(self, data_dir):
method get_labels (line 199) | def get_labels(self):
method _create_examples (line 203) | def _create_examples(self, lines, set_type):
class MnliMismatchedProcessor (line 217) | class MnliMismatchedProcessor(MnliProcessor):
method __init__ (line 220) | def __init__(self, *args, **kwargs):
method get_dev_examples (line 224) | def get_dev_examples(self, data_dir):
method get_test_examples (line 228) | def get_test_examples(self, data_dir):
class ColaProcessor (line 233) | class ColaProcessor(DataProcessor):
method __init__ (line 236) | def __init__(self, *args, **kwargs):
method get_example_from_tensor_dict (line 240) | def get_example_from_tensor_dict(self, tensor_dict):
method get_train_examples (line 249) | def get_train_examples(self, data_dir):
method get_dev_examples (line 253) | def get_dev_examples(self, data_dir):
method get_test_examples (line 257) | def get_test_examples(self, data_dir):
method get_labels (line 261) | def get_labels(self):
method _create_examples (line 265) | def _create_examples(self, lines, set_type):
class Sst2Processor (line 280) | class Sst2Processor(DataProcessor):
method __init__ (line 283) | def __init__(self, *args, **kwargs):
method get_example_from_tensor_dict (line 287) | def get_example_from_tensor_dict(self, tensor_dict):
method get_train_examples (line 296) | def get_train_examples(self, data_dir):
method get_dev_examples (line 300) | def get_dev_examples(self, data_dir):
method get_test_examples (line 304) | def get_test_examples(self, data_dir):
method get_labels (line 308) | def get_labels(self):
method _create_examples (line 312) | def _create_examples(self, lines, set_type):
class StsbProcessor (line 326) | class StsbProcessor(DataProcessor):
method __init__ (line 329) | def __init__(self, *args, **kwargs):
method get_example_from_tensor_dict (line 333) | def get_example_from_tensor_dict(self, tensor_dict):
method get_train_examples (line 342) | def get_train_examples(self, data_dir):
method get_dev_examples (line 346) | def get_dev_examples(self, data_dir):
method get_test_examples (line 350) | def get_test_examples(self, data_dir):
method get_labels (line 354) | def get_labels(self):
method _create_examples (line 358) | def _create_examples(self, lines, set_type):
class QqpProcessor (line 372) | class QqpProcessor(DataProcessor):
method __init__ (line 375) | def __init__(self, *args, **kwargs):
method get_example_from_tensor_dict (line 379) | def get_example_from_tensor_dict(self, tensor_dict):
method get_train_examples (line 388) | def get_train_examples(self, data_dir):
method get_dev_examples (line 392) | def get_dev_examples(self, data_dir):
method get_test_examples (line 396) | def get_test_examples(self, data_dir):
method get_labels (line 400) | def get_labels(self):
method _create_examples (line 404) | def _create_examples(self, lines, set_type):
class QnliProcessor (line 424) | class QnliProcessor(DataProcessor):
method __init__ (line 427) | def __init__(self, *args, **kwargs):
method get_example_from_tensor_dict (line 431) | def get_example_from_tensor_dict(self, tensor_dict):
method get_train_examples (line 440) | def get_train_examples(self, data_dir):
method get_dev_examples (line 444) | def get_dev_examples(self, data_dir):
method get_test_examples (line 448) | def get_test_examples(self, data_dir):
method get_labels (line 452) | def get_labels(self):
method _create_examples (line 456) | def _create_examples(self, lines, set_type):
class RteProcessor (line 470) | class RteProcessor(DataProcessor):
method __init__ (line 473) | def __init__(self, *args, **kwargs):
method get_example_from_tensor_dict (line 477) | def get_example_from_tensor_dict(self, tensor_dict):
method get_train_examples (line 486) | def get_train_examples(self, data_dir):
method get_dev_examples (line 490) | def get_dev_examples(self, data_dir):
method get_test_examples (line 494) | def get_test_examples(self, data_dir):
method get_labels (line 498) | def get_labels(self):
method _create_examples (line 502) | def _create_examples(self, lines, set_type):
class WnliProcessor (line 516) | class WnliProcessor(DataProcessor):
method __init__ (line 519) | def __init__(self, *args, **kwargs):
method get_example_from_tensor_dict (line 523) | def get_example_from_tensor_dict(self, tensor_dict):
method get_train_examples (line 532) | def get_train_examples(self, data_dir):
method get_dev_examples (line 536) | def get_dev_examples(self, data_dir):
method get_test_examples (line 540) | def get_test_examples(self, data_dir):
method get_labels (line 544) | def get_labels(self):
method _create_examples (line 548) | def _create_examples(self, lines, set_type):
FILE: src/transformers/data/processors/squad.py
function _improve_answer_span (line 42) | def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, ...
function _check_is_max_context (line 55) | def _check_is_max_context(doc_spans, cur_span_index, position):
function _new_check_is_max_context (line 75) | def _new_check_is_max_context(doc_spans, cur_span_index, position):
function _is_whitespace (line 97) | def _is_whitespace(c):
function squad_convert_example_to_features (line 103) | def squad_convert_example_to_features(
function squad_convert_example_to_features_init (line 308) | def squad_convert_example_to_features_init(tokenizer_for_convert: PreTra...
function squad_convert_examples_to_features (line 313) | def squad_convert_examples_to_features(
class SquadProcessor (line 433) | class SquadProcessor(DataProcessor):
method _get_example_from_tensor_dict (line 442) | def _get_example_from_tensor_dict(self, tensor_dict, evaluate=False):
method get_examples_from_dataset (line 466) | def get_examples_from_dataset(self, dataset, evaluate=False):
method get_train_examples (line 499) | def get_train_examples(self, data_dir, filename=None):
method get_dev_examples (line 521) | def get_dev_examples(self, data_dir, filename=None):
method _create_examples (line 542) | def _create_examples(self, input_data, set_type):
class SquadV1Processor (line 579) | class SquadV1Processor(SquadProcessor):
class SquadV2Processor (line 584) | class SquadV2Processor(SquadProcessor):
class SquadExample (line 589) | class SquadExample:
method __init__ (line 604) | def __init__(
class SquadFeatures (line 652) | class SquadFeatures:
method __init__ (line 679) | def __init__(
class SquadResult (line 719) | class SquadResult:
method __init__ (line 729) | def __init__(self, unique_id, start_logits, end_logits, start_top_inde...
FILE: src/transformers/data/processors/utils.py
class InputExample (line 28) | class InputExample:
method to_json_string (line 47) | def to_json_string(self):
class InputFeatures (line 53) | class InputFeatures:
method to_json_string (line 73) | def to_json_string(self):
class DataProcessor (line 78) | class DataProcessor:
method get_example_from_tensor_dict (line 81) | def get_example_from_tensor_dict(self, tensor_dict):
method get_train_examples (line 91) | def get_train_examples(self, data_dir):
method get_dev_examples (line 95) | def get_dev_examples(self, data_dir):
method get_test_examples (line 99) | def get_test_examples(self, data_dir):
method get_labels (line 103) | def get_labels(self):
method tfds_map (line 107) | def tfds_map(self, example):
method _read_tsv (line 117) | def _read_tsv(cls, input_file, quotechar=None):
class SingleSentenceClassificationProcessor (line 123) | class SingleSentenceClassificationProcessor(DataProcessor):
method __init__ (line 126) | def __init__(self, labels=None, examples=None, mode="classification", ...
method __len__ (line 132) | def __len__(self):
method __getitem__ (line 135) | def __getitem__(self, idx):
method create_from_csv (line 141) | def create_from_csv(
method create_from_examples (line 158) | def create_from_examples(cls, texts_or_text_and_labels, labels=None, *...
method add_examples_from_csv (line 163) | def add_examples_from_csv(
method add_examples (line 193) | def add_examples(
method get_features (line 230) | def get_features(
FILE: src/transformers/data/processors/xnli.py
class XnliProcessor (line 26) | class XnliProcessor(DataProcessor):
method __init__ (line 32) | def __init__(self, language, train_language=None):
method get_train_examples (line 36) | def get_train_examples(self, data_dir):
method get_test_examples (line 57) | def get_test_examples(self, data_dir):
method get_labels (line 80) | def get_labels(self):
FILE: src/transformers/debug_utils.py
class DebugUnderflowOverflow (line 27) | class DebugUnderflowOverflow:
method __init__ (line 145) | def __init__(self, model, max_frames_to_save=21, trace_batch_nums=None...
method save_frame (line 164) | def save_frame(self, frame=None):
method expand_frame (line 170) | def expand_frame(self, line):
method trace_frames (line 173) | def trace_frames(self):
method reset_saved_frames (line 177) | def reset_saved_frames(self):
method dump_saved_frames (line 180) | def dump_saved_frames(self):
method analyse_model (line 188) | def analyse_model(self):
method analyse_variable (line 196) | def analyse_variable(self, var, ctx):
method batch_start_frame (line 206) | def batch_start_frame(self):
method batch_end_frame (line 210) | def batch_end_frame(self):
method create_frame (line 213) | def create_frame(self, module, input, output):
method register_forward_hook (line 241) | def register_forward_hook(self):
method _register_forward_hook (line 244) | def _register_forward_hook(self, module):
method forward_hook (line 247) | def forward_hook(self, module, input, output):
function get_abs_min_max (line 295) | def get_abs_min_max(var, ctx):
function detect_overflow (line 300) | def detect_overflow(var, ctx):
class DebugOption (line 346) | class DebugOption(ExplicitEnum):
FILE: src/transformers/dependency_versions_check.py
function dep_version_check (line 61) | def dep_version_check(pkg, hint=None):
FILE: src/transformers/distributed/configuration_utils.py
class DistributedConfig (line 23) | class DistributedConfig:
method from_dict (line 32) | def from_dict(cls, config_dict, **kwargs):
method to_json_file (line 52) | def to_json_file(self, json_file_path: str | os.PathLike):
method to_dict (line 68) | def to_dict(self) -> dict[str, Any]:
method __iter__ (line 76) | def __iter__(self):
method __repr__ (line 81) | def __repr__(self):
method to_json_string (line 84) | def to_json_string(self):
method update (line 92) | def update(self, **kwargs):
FILE: src/transformers/dynamic_module_utils.py
function _sanitize_module_name (line 49) | def _sanitize_module_name(name: str) -> str:
function init_hf_modules (line 85) | def init_hf_modules():
function create_dynamic_module (line 101) | def create_dynamic_module(name: str | os.PathLike) -> None:
function get_relative_imports (line 123) | def get_relative_imports(module_file: str | os.PathLike) -> list[str]:
function get_relative_import_files (line 144) | def get_relative_import_files(module_file: str | os.PathLike) -> list[str]:
function get_imports (line 176) | def get_imports(filename: str | os.PathLike) -> list[str]:
function check_imports (line 231) | def check_imports(filename: str | os.PathLike) -> list[str]:
function get_class_in_module (line 266) | def get_class_in_module(
function get_cached_module_file (line 314) | def get_cached_module_file(
function get_class_from_dynamic_module (line 476) | def get_class_from_dynamic_module(
function custom_object_save (line 586) | def custom_object_save(obj: Any, folder: str | os.PathLike, config: dict...
function _raise_timeout_error (line 662) | def _raise_timeout_error(signum, frame):
function resolve_trust_remote_code (line 672) | def resolve_trust_remote_code(
function check_python_requirements (line 757) | def check_python_requirements(path_or_repo_id, requirements_file="requir...
FILE: src/transformers/feature_extraction_sequence_utils.py
class SequenceFeatureExtractor (line 28) | class SequenceFeatureExtractor(FeatureExtractionMixin):
method __init__ (line 41) | def __init__(self, feature_size: int, sampling_rate: int, padding_valu...
method pad (line 51) | def pad(
method _pad (line 221) | def _pad(
method _truncate (line 293) | def _truncate(
method _get_padding_strategies (line 336) | def _get_padding_strategies(self, padding=False, max_length=None):
method fetch_audio (line 368) | def fetch_audio(self, audio_url_or_urls: str | list[str] | list[list[s...
FILE: src/transformers/feature_extraction_utils.py
class BatchFeature (line 58) | class BatchFeature(UserDict):
method __init__ (line 75) | def __init__(
method __getitem__ (line 85) | def __getitem__(self, item: str) -> Any:
method __getattr__ (line 95) | def __getattr__(self, item: str):
method __getstate__ (line 101) | def __getstate__(self):
method __setstate__ (line 104) | def __setstate__(self, state):
method _get_is_as_tensor_fns (line 108) | def _get_is_as_tensor_fns(self, tensor_type: str | TensorType | None =...
method convert_to_tensors (line 158) | def convert_to_tensors(
method to (line 215) | def to(self, *args, **kwargs) -> "BatchFeature":
class FeatureExtractionMixin (line 266) | class FeatureExtractionMixin(PushToHubMixin):
method __init__ (line 274) | def __init__(self, **kwargs):
method from_pretrained (line 287) | def from_pretrained(
method save_pretrained (line 383) | def save_pretrained(self, save_directory: str | os.PathLike, push_to_h...
method get_feature_extractor_dict (line 432) | def get_feature_extractor_dict(
method from_dict (line 546) | def from_dict(
method to_dict (line 584) | def to_dict(self) -> dict[str, Any]:
method from_json_file (line 598) | def from_json_file(cls, json_file: str | os.PathLike) -> "FeatureExtra...
method to_json_string (line 616) | def to_json_string(self) -> str:
method to_json_file (line 631) | def to_json_file(self, json_file_path: str | os.PathLike):
method __repr__ (line 642) | def __repr__(self):
method register_for_auto_class (line 646) | def register_for_auto_class(cls, auto_class="AutoFeatureExtractor"):
FILE: src/transformers/generation/candidate_generator.py
class CandidateGenerator (line 39) | class CandidateGenerator:
method get_candidates (line 42) | def get_candidates(self, input_ids: torch.LongTensor) -> tuple[torch.L...
method update_candidate_strategy (line 59) | def update_candidate_strategy(self, input_ids: torch.LongTensor, score...
class AssistedCandidateGenerator (line 78) | class AssistedCandidateGenerator(CandidateGenerator):
method __init__ (line 101) | def __init__(
method get_candidates (line 200) | def get_candidates(self, input_ids: torch.LongTensor) -> tuple[torch.L...
method update_candidate_strategy (line 225) | def update_candidate_strategy(self, input_ids: torch.LongTensor, score...
method _calculate_new_tokens (line 283) | def _calculate_new_tokens(self, input_ids: torch.LongTensor) -> tuple[...
method _update_past_and_masks (line 290) | def _update_past_and_masks(
method _prepare_generation_args (line 312) | def _prepare_generation_args(self, input_ids: torch.LongTensor, min_ne...
method _generate_candidates (line 322) | def _generate_candidates(self, generation_args: dict) -> tuple[torch.L...
class AssistedCandidateGeneratorDifferentTokenizers (line 341) | class AssistedCandidateGeneratorDifferentTokenizers(AssistedCandidateGen...
method __init__ (line 374) | def __init__(
method _get_longest_diag_dict (line 395) | def _get_longest_diag_dict(input_matrix, nonzero_idx):
method _get_longest_diag_index (line 431) | def _get_longest_diag_index(input_matrix):
method _get_tokens_diag (line 451) | def _get_tokens_diag(prompt, prompt_plus_new_tokens):
method convert_source_tokens_to_target_tokens (line 483) | def convert_source_tokens_to_target_tokens(
method get_candidates (line 502) | def get_candidates(self, input_ids: torch.LongTensor) -> tuple[torch.L...
method _prepare_assistant_input_ids (line 544) | def _prepare_assistant_input_ids(self, input_ids: torch.LongTensor) ->...
method _process_assistant_outputs (line 590) | def _process_assistant_outputs(
class _PruneReindexingLMHead (line 624) | class _PruneReindexingLMHead(nn.Module):
method __init__ (line 636) | def __init__(self, original_lm_head, assistant_overlap_token_ids):
method forward (line 642) | def forward(self, hidden_states):
class _MapInputEmbedding (line 647) | class _MapInputEmbedding(nn.Module):
method __init__ (line 648) | def __init__(self, original_embedding: nn.Embedding, assistant_overlap...
method forward (line 663) | def forward(self, input_ids: torch.LongTensor) -> torch.FloatTensor:
class AssistantToTargetTranslator (line 681) | class AssistantToTargetTranslator:
method __init__ (line 705) | def __init__(
method unmap_input_ids (line 746) | def unmap_input_ids(self):
method _get_assistant_to_target_input_ids (line 756) | def _get_assistant_to_target_input_ids(self):
method _get_suppress_input_ids (line 791) | def _get_suppress_input_ids(self) -> list[int]:
method get_target_ids (line 797) | def get_target_ids(
method get_target_logits (line 818) | def get_target_logits(self, assistant_logits: torch.FloatTensor) -> to...
class AssistantVocabTranslatorCache (line 840) | class AssistantVocabTranslatorCache:
method get_translator (line 849) | def get_translator(
method cleanup (line 876) | def cleanup(cls):
class UniversalSpeculativeDecodingGenerator (line 894) | class UniversalSpeculativeDecodingGenerator(AssistedCandidateGeneratorDi...
method __init__ (line 900) | def __init__(
method get_candidates (line 928) | def get_candidates(self, input_ids: torch.LongTensor) -> tuple[torch.L...
method _update_past_and_masks (line 960) | def _update_past_and_masks(self, assistant_input_ids: torch.LongTensor...
method _prepare_assistant_input_ids (line 972) | def _prepare_assistant_input_ids(self, target_input_ids: torch.LongTen...
class PromptLookupCandidateGenerator (line 1013) | class PromptLookupCandidateGenerator(CandidateGenerator):
method __init__ (line 1038) | def __init__(
method get_candidates (line 1057) | def get_candidates(self, input_ids: torch.LongTensor) -> tuple[torch.L...
method update_candidate_strategy (line 1151) | def update_candidate_strategy(self, input_ids: torch.LongTensor, score...
class EarlyExitCandidateGenerator (line 1168) | class EarlyExitCandidateGenerator(AssistedCandidateGenerator):
method __init__ (line 1192) | def __init__(
method get_candidates (line 1214) | def get_candidates(self, input_ids: torch.LongTensor) -> tuple[torch.L...
function _prepare_attention_mask (line 1224) | def _prepare_attention_mask(model_kwargs: dict[str, Any], new_length: in...
function _prepare_position_ids (line 1260) | def _prepare_position_ids(model_kwargs: dict[str, Any], new_length: int,...
function _prepare_token_type_ids (line 1286) | def _prepare_token_type_ids(model_kwargs: dict[str, Any], new_length: in...
FILE: src/transformers/generation/configuration_utils.py
class GenerationMode (line 65) | class GenerationMode(ExplicitEnum):
class GenerationConfig (line 83) | class GenerationConfig(PushToHubMixin):
method __init__ (line 352) | def __init__(self, **kwargs):
method __hash__ (line 471) | def __hash__(self):
method __eq__ (line 474) | def __eq__(self, other):
method __repr__ (line 482) | def __repr__(self):
method get_generation_mode (line 485) | def get_generation_mode(self, assistant_model: Optional["PreTrainedMod...
method _get_default_generation_params (line 551) | def _get_default_generation_params() -> dict[str, Any]:
method validate (line 590) | def validate(self, strict=False):
method save_pretrained (line 768) | def save_pretrained(
method from_pretrained (line 828) | def from_pretrained(
method _dict_from_json_file (line 994) | def _dict_from_json_file(cls, json_file: str | os.PathLike):
method from_dict (line 1000) | def from_dict(cls, config_dict: dict[str, Any], **kwargs) -> "Generati...
method dict_dtype_to_str (line 1033) | def dict_dtype_to_str(self, d: dict[str, Any]) -> None:
method to_diff_dict (line 1045) | def to_diff_dict(self) -> dict[str, Any]:
method to_dict (line 1068) | def to_dict(self) -> dict[str, Any]:
method to_json_string (line 1089) | def to_json_string(
method to_json_file (line 1141) | def to_json_file(
method from_model_config (line 1160) | def from_model_config(cls, model_config: Union["PreTrainedConfig", dic...
method update (line 1209) | def update(self, defaults_only=False, allow_custom_entries=False, **kw...
class BaseWatermarkingConfig (line 1244) | class BaseWatermarkingConfig(ABC):
method from_dict (line 1248) | def from_dict(cls, config_dict, **kwargs):
method to_json_file (line 1269) | def to_json_file(self, json_file_path: str | os.PathLike):
method to_dict (line 1282) | def to_dict(self) -> dict[str, Any]:
method __iter__ (line 1292) | def __iter__(self):
method __repr__ (line 1295) | def __repr__(self):
method to_json_string (line 1298) | def to_json_string(self):
method update (line 1307) | def update(self, **kwargs):
method validate (line 1319) | def validate(self): ...
method construct_processor (line 1322) | def construct_processor(self, vocab_size): ...
class WatermarkingConfig (line 1326) | class WatermarkingConfig(BaseWatermarkingConfig):
method __init__ (line 1347) | def __init__(
method validate (line 1361) | def validate(self):
method construct_processor (line 1391) | def construct_processor(self, vocab_size: int, device) -> "WatermarkLo...
class SynthIDTextWatermarkingConfig (line 1404) | class SynthIDTextWatermarkingConfig(BaseWatermarkingConfig):
method __init__ (line 1448) | def __init__(
method validate (line 1466) | def validate(self):
method construct_processor (line 1480) | def construct_processor(self, vocab_size: int, device) -> "WatermarkLo...
class CompileConfig (line 1494) | class CompileConfig:
method to_dict (line 1540) | def to_dict(self) -> dict[str, Any]:
class ContinuousBatchingConfig (line 1547) | class ContinuousBatchingConfig:
method account_for_cb_deprecated_arguments (line 1645) | def account_for_cb_deprecated_arguments(
method decide_use_cuda_graphs (line 1684) | def decide_use_cuda_graphs(self, compile_config: CompileConfig | None,...
method decide_use_async_batching (line 1732) | def decide_use_async_batching(self, is_attn_mask_needed: bool) -> bool:
method resolve_sentinel_values (line 1749) | def resolve_sentinel_values(self) -> None:
method resolve_compile_configs (line 1762) | def resolve_compile_configs(
FILE: src/transformers/generation/continuous_batching/cache.py
function group_layers_by_attn_type (line 28) | def group_layers_by_attn_type(config: PreTrainedConfig) -> tuple[list[li...
class PagedAttentionCache (line 62) | class PagedAttentionCache:
method __init__ (line 119) | def __init__(
method will_allocation_be_successful (line 261) | def will_allocation_be_successful(self, num_requested_blocks: int, all...
method allocate_blocks (line 280) | def allocate_blocks(self, n_blocks: int, request_id: str, allocated_bl...
method free_blocks (line 296) | def free_blocks(self, request_id: str) -> None:
method get_num_free_blocks (line 302) | def get_num_free_blocks(self) -> int:
method extend_read_and_write_indices (line 307) | def extend_read_and_write_indices(
method fill_block_table (line 324) | def fill_block_table(
method get_seqlens_k (line 331) | def get_seqlens_k(self, past_length: int, query_length: int) -> dict[s...
method update (line 343) | def update(
method get_block_table_key (line 395) | def get_block_table_key(self, flash_attn_with_kvcache_fn: Any) -> str:
method search_prefix_match (line 411) | def search_prefix_match(self, request_id: str, prompt_ids: list[int]) ...
method mark_shareable_blocks_as_complete (line 437) | def mark_shareable_blocks_as_complete(self, state: RequestState, num_c...
method copy_cache (line 454) | def copy_cache(self, list_source_blocks: list[int], list_forked_blocks...
method fork_request (line 466) | def fork_request(self, source_request_id: str, destination_request_ids...
method free_all_requests (line 477) | def free_all_requests(self) -> None:
class PagedAttentionMemoryHandler (line 489) | class PagedAttentionMemoryHandler:
method __init__ (line 519) | def __init__(
method get_available_memory (line 546) | def get_available_memory(max_memory_percent: float = 1.0) -> int:
method infer_num_blocks_and_max_batch_tokens (line 562) | def infer_num_blocks_and_max_batch_tokens(
method compute_num_blocks_and_max_batch_tokens (line 606) | def compute_num_blocks_and_max_batch_tokens(
method compute_max_batch_tokens (line 661) | def compute_max_batch_tokens(
method compute_num_blocks (line 689) | def compute_num_blocks(
method compute_memory_footprint (line 717) | def compute_memory_footprint(
FILE: src/transformers/generation/continuous_batching/cache_manager.py
function reverse_enumerate (line 28) | def reverse_enumerate(xs: list[T]) -> Iterator[tuple[int, T]]:
class Block (line 35) | class Block: # TODO: rename to ShareableBlock and update the docs
method __init__ (line 41) | def __init__(self, id_: int, parent_id: int | None, group_id: int) -> ...
method __repr__ (line 48) | def __repr__(self) -> str:
method is_complete (line 52) | def is_complete(self) -> bool:
class BlockManager (line 56) | class BlockManager:
method __init__ (line 76) | def __init__(self, num_blocks: int, block_size: int) -> None:
method num_free_blocks (line 86) | def num_free_blocks(self) -> int:
method has_enough_free_blocks (line 90) | def has_enough_free_blocks(self, n_blocks: int) -> bool:
method get_free_blocks (line 109) | def get_free_blocks(
method fork_blocks (line 128) | def fork_blocks(
method increase_ref_count (line 184) | def increase_ref_count(self, block_id: int) -> None:
method decrease_ref_count (line 191) | def decrease_ref_count(self, block_id: int) -> None:
method free_blocks (line 203) | def free_blocks(self, blocks: list[int], shareable: bool) -> None:
method uninitialize_unshared_block (line 212) | def uninitialize_unshared_block(self, block_id: int) -> None:
method mark_shareable_blocks_as_complete (line 221) | def mark_shareable_blocks_as_complete(
method compute_hash (line 276) | def compute_hash(self, parent_hash: int | None, tokens: list[int], gro...
class CacheAllocator (line 282) | class CacheAllocator(ABC):
method allocate_blocks (line 291) | def allocate_blocks(self, n_blocks: int, request_id: str, block_manage...
method free_blocks (line 295) | def free_blocks(self, request_id: str, block_manager: BlockManager) ->...
method get_read_indices (line 306) | def get_read_indices(self, request_id: str, past_length: int, query_le...
method get_write_indices (line 310) | def get_write_indices(self, request_id: str, past_length: int, query_l...
method fill_block_table (line 314) | def fill_block_table(
method fork_blocks (line 319) | def fork_blocks(
class FullAttentionCacheAllocator (line 350) | class FullAttentionCacheAllocator(CacheAllocator):
method __init__ (line 353) | def __init__(self, index: int, block_size: int, allow_block_sharing: b...
method allocate_blocks (line 364) | def allocate_blocks(self, n_blocks: int, request_id: str, block_manage...
method get_read_indices (line 382) | def get_read_indices(self, request_id: str, past_length: int, query_le...
method get_write_indices (line 403) | def get_write_indices(self, request_id: str, past_length: int, query_l...
method fill_block_table (line 424) | def fill_block_table(
class SlidingAttentionCacheAllocator (line 441) | class SlidingAttentionCacheAllocator(CacheAllocator):
method __init__ (line 444) | def __init__(self, index: int, block_size: int, sliding_window: int) -...
method allocate_blocks (line 458) | def allocate_blocks(self, n_blocks: int, request_id: str, block_manage...
method get_read_indices (line 480) | def get_read_indices(self, request_id: str, past_length: int, query_le...
method get_write_indices (line 503) | def get_write_indices(self, request_id: str, past_length: int, query_l...
method fill_block_table (line 528) | def fill_block_table(
FILE: src/transformers/generation/continuous_batching/continuous_api.py
class ProtoPretrainedModel (line 68) | class ProtoPretrainedModel(nn.Module):
method set_attn_implementation (line 74) | def set_attn_implementation(self, attn_implementation: str) -> None:
method _get_logits_processor (line 78) | def _get_logits_processor(self, generation_config: GenerationConfig) -...
class OutputRouter (line 82) | class OutputRouter:
method __init__ (line 90) | def __init__(self) -> None:
method deliver (line 95) | def deliver(self, output: GenerationOutput) -> None:
method deliver_batch (line 105) | def deliver_batch(self, outputs: list[GenerationOutput]) -> None:
class ContinuousBatchProcessor (line 131) | class ContinuousBatchProcessor:
method __init__ (line 135) | def __init__(
method __repr__ (line 227) | def __repr__(self) -> str:
method __del__ (line 234) | def __del__(self) -> None:
method _ensure_decode_fast_path_is_available (line 240) | def _ensure_decode_fast_path_is_available(self) -> None:
method reset (line 264) | def reset(self) -> None:
method _get_new_requests (line 272) | def _get_new_requests(self) -> None:
method _handle_request_error (line 290) | def _handle_request_error(self, error: Exception, state: RequestState)...
method soft_reset_one_request (line 306) | def soft_reset_one_request(self) -> None:
method prepare_next_batch (line 334) | def prepare_next_batch(self) -> bool:
method update_batch (line 380) | def update_batch(self) -> None:
method has_pending_requests (line 450) | def has_pending_requests(self) -> bool:
method handle_batch_error (line 455) | def handle_batch_error(self, error):
method fail_all_requests (line 463) | def fail_all_requests(self, error: Exception) -> None:
method _generation_step (line 485) | def _generation_step(self, model: nn.Module, logit_processor: LogitsPr...
method capture_graph (line 520) | def capture_graph(self, forward_fn: Any, compute_stream: torch.cuda.St...
method _forward_process_and_sample (line 535) | def _forward_process_and_sample(
method _model_forward (line 552) | def _model_forward(self, model: nn.Module, batch_data: dict) -> torch....
method _process_logit (line 556) | def _process_logit(
method _sample (line 574) | def _sample(self, probs: torch.Tensor, logits_indices: torch.Tensor, o...
method warmup (line 610) | def warmup(
class ContinuousBatchingManager (line 711) | class ContinuousBatchingManager:
method __init__ (line 720) | def __init__(
method start (line 776) | def start(self) -> None:
method is_running (line 785) | def is_running(self) -> bool:
method warmup (line 789) | def warmup(self, num_query_tokens: int = 0, num_cache_tokens: int = 0)...
method stop (line 798) | def stop(self, block: bool = True, timeout: float | None = None, keep_...
method join (line 838) | def join(self, stop_trigger_time: float, timeout: float | None = None)...
method add_request (line 853) | def add_request(
method add_requests (line 896) | def add_requests(
method cancel_request (line 920) | def cancel_request(self, request_id: str) -> None:
method get_result (line 930) | def get_result(self, request_id: str | None = None, timeout: float | N...
method __iter__ (line 951) | def __iter__(self):
method request_id_iter (line 958) | def request_id_iter(self, request_id: str) -> Generator[GenerationOutp...
method register_result_handler (line 971) | def register_result_handler(self, request_id: str, callback: Callable)...
method _generation_step (line 996) | def _generation_step(self) -> None:
method _create_batch_processor (line 1002) | def _create_batch_processor(self) -> ContinuousBatchProcessor:
method _run_generation_loop (line 1036) | def _run_generation_loop(self) -> None:
method _inner_generation_loop (line 1076) | def _inner_generation_loop(self, batch_processor: ContinuousBatchProce...
method _handle_critical_error (line 1087) | def _handle_critical_error(self, error: Exception, batch_processor: Co...
class ContinuousMixin (line 1106) | class ContinuousMixin:
method init_continuous_batching (line 1119) | def init_continuous_batching(
method destroy_cached_continuous_batching_manager (line 1171) | def destroy_cached_continuous_batching_manager(self) -> None:
method continuous_batching_context_manager (line 1180) | def continuous_batching_context_manager(
method generate_batch (line 1218) | def generate_batch(
FILE: src/transformers/generation/continuous_batching/input_outputs.py
class PagedAttentionArgs (line 32) | class PagedAttentionArgs:
method asdict (line 68) | def asdict(self) -> dict[str, Any]:
class ContinuousBatchingIOs (line 86) | class ContinuousBatchingIOs:
method __init__ (line 92) | def __init__(
method _setup_static_tensors (line 133) | def _setup_static_tensors(self) -> None:
method _transfer_inputs (line 214) | def _transfer_inputs(
method _reset_static_tensors (line 246) | def _reset_static_tensors(self, full_reset: bool = False) -> None:
method reset (line 284) | def reset(self) -> None:
method get_cumulative_seqlens (line 293) | def get_cumulative_seqlens(self) -> tuple[torch.Tensor, dict[str, torc...
method carry_over_tokens (line 297) | def carry_over_tokens(
method retrieve_device_outputs (line 302) | def retrieve_device_outputs(self) -> None:
method prepare_batch_update (line 306) | def prepare_batch_update(self) -> tuple[list[FutureRequestState], list...
method prepare_batch_tensors (line 318) | def prepare_batch_tensors(
method get_model_kwargs (line 432) | def get_model_kwargs(self, use_padding: bool = False) -> dict[str, Any]:
method get_cb_kwargs (line 503) | def get_cb_kwargs(self) -> tuple[torch.Tensor, torch.Tensor, torch.Ten...
method get_graph (line 509) | def get_graph(self) -> torch.cuda.CUDAGraph | None:
method set_graph (line 517) | def set_graph(self, graph: torch.cuda.CUDAGraph) -> None:
class HostDeviceIOPair (line 521) | class HostDeviceIOPair:
method __init__ (line 522) | def __init__(
method reset (line 541) | def reset(self) -> None:
method transfer_inputs_h2d (line 548) | def transfer_inputs_h2d(self, stream: torch.cuda.Stream) -> None:
method transfer_outputs_d2h (line 551) | def transfer_outputs_d2h(self, stream: torch.cuda.Stream | None) -> None:
class ContinuousBatchingAsyncIOs (line 557) | class ContinuousBatchingAsyncIOs:
method __init__ (line 603) | def __init__(
method get_cumulative_seqlens (line 633) | def get_cumulative_seqlens(self) -> tuple[torch.Tensor, dict[str, torc...
method prepare_batch_tensors (line 637) | def prepare_batch_tensors(
method infer_carry_over_ids (line 648) | def infer_carry_over_ids(self) -> torch.Tensor:
method get_model_kwargs (line 668) | def get_model_kwargs(self, use_padding: bool = False) -> dict[str, Any]:
method get_cb_kwargs (line 675) | def get_cb_kwargs(self) -> tuple[torch.Tensor, torch.Tensor, torch.Ten...
method carry_over_tokens (line 691) | def carry_over_tokens(
method output_ids (line 708) | def output_ids(self) -> torch.Tensor:
method get_graph (line 712) | def get_graph(self) -> torch.cuda.CUDAGraph | None:
method set_graph (line 715) | def set_graph(self, graph: torch.cuda.CUDAGraph) -> None:
method use_block_table (line 719) | def use_block_table(self) -> bool:
method retrieve_device_outputs (line 723) | def retrieve_device_outputs(self) -> None:
method prepare_batch_update (line 735) | def prepare_batch_update(self) -> tuple[list[FutureRequestState], list...
method reset (line 740) | def reset(self) -> None:
FILE: src/transformers/generation/continuous_batching/requests.py
function get_device_and_memory_breakdown (line 42) | def get_device_and_memory_breakdown() -> tuple[torch.device, int, int, i...
class RequestStatus (line 81) | class RequestStatus(IntEnum):
class GenerationOutput (line 92) | class GenerationOutput:
method is_finished (line 116) | def is_finished(self) -> bool:
class RequestState (line 121) | class RequestState:
method __post_init__ (line 172) | def __post_init__(self):
method status (line 191) | def status(self) -> RequestStatus:
method status (line 195) | def status(self, value: RequestStatus):
method timestamps (line 204) | def timestamps(self) -> list[float] | None:
method log_end_of_request (line 207) | def log_end_of_request(self):
method current_len (line 216) | def current_len(self) -> int:
method generated_len (line 220) | def generated_len(self) -> int:
method update_and_check_completion (line 226) | def update_and_check_completion(self, token_id: int, logprob: float | ...
method __repr__ (line 257) | def __repr__(self):
method to_generation_output (line 271) | def to_generation_output(self):
method fork (line 288) | def fork(self, new_request_id: str) -> "RequestState":
method create_equivalent_initial_request (line 314) | def create_equivalent_initial_request(self) -> "RequestState":
class FutureRequestState (line 338) | class FutureRequestState:
method __init__ (line 344) | def __init__(self, state: RequestState, has_new_token: bool, complete_...
FILE: src/transformers/generation/continuous_batching/scheduler.py
class Scheduler (line 23) | class Scheduler(ABC):
method __init__ (line 30) | def __init__(self, cache: PagedAttentionCache):
method reset (line 39) | def reset(self) -> None:
method add_waiting_request (line 49) | def add_waiting_request(self, state: RequestState):
method schedule_batch (line 55) | def schedule_batch(
method has_pending_requests (line 66) | def has_pending_requests(self) -> bool:
method finish_request (line 71) | def finish_request(self, request_id: str) -> None:
method get_active_request_static_outputs (line 79) | def get_active_request_static_outputs(self, request_id: str) -> list[i...
method set_request_cancellation (line 86) | def set_request_cancellation(self, request_id: str):
method clear_cancelled_requests (line 92) | def clear_cancelled_requests(self):
method request_is_cancelled (line 104) | def request_is_cancelled(self, request_id: str) -> bool:
method _allocate_blocks_if_needed (line 111) | def _allocate_blocks_if_needed(self, state: RequestState, len_next_tok...
method _infer_request_tokens (line 129) | def _infer_request_tokens(self, state: RequestState, request_ids_to_re...
method _schedule_request (line 154) | def _schedule_request(
method _process_candidates (line 192) | def _process_candidates(
method _cleanup_waiting_queue (line 283) | def _cleanup_waiting_queue(self, request_ids_to_remove_from_waiting: s...
class FIFOScheduler (line 292) | class FIFOScheduler(Scheduler):
method __init__ (line 297) | def __init__(self, cache: PagedAttentionCache, safety_margin: float = ...
method schedule_batch (line 306) | def schedule_batch(
class PrefillFirstScheduler (line 348) | class PrefillFirstScheduler(Scheduler):
method schedule_batch (line 354) | def schedule_batch(
FILE: src/transformers/generation/continuous_batching/utils.py
class CudaGraphBuffer (line 25) | class CudaGraphBuffer:
method __init__ (line 28) | def __init__(self, max_size: int) -> None:
method __del__ (line 34) | def __del__(self) -> None:
method get_graph (line 40) | def get_graph(self, q_len: int, kv_len: int) -> torch.cuda.CUDAGraph |...
method plan_for_new_graph (line 46) | def plan_for_new_graph(self, silent: bool = False) -> None:
method set_graph (line 53) | def set_graph(self, q_len: int, kv_len: int, graph: torch.cuda.CUDAGra...
function attn_mask_is_needed (line 60) | def attn_mask_is_needed(config: PretrainedConfig) -> bool:
function pad_to_interval (line 65) | def pad_to_interval(size: int, interval_size: int, max_value: int) -> int:
function aligned_divide (line 73) | def aligned_divide(x: int, divide_by: int, align_to: int) -> int:
function build_attention_mask (line 80) | def build_attention_mask(
function create_warmup_future_states (line 164) | def create_warmup_future_states(
FILE: src/transformers/generation/logits_process.py
class LogitsProcessor (line 49) | class LogitsProcessor:
method __call__ (line 53) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class LogitsProcessorList (line 59) | class LogitsProcessorList(list):
method __call__ (line 66) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
method set_continuous_batching_context (line 96) | def set_continuous_batching_context(self, logits_indices: torch.Tensor...
class MinLengthLogitsProcessor (line 103) | class MinLengthLogitsProcessor(LogitsProcessor):
method __init__ (line 142) | def __init__(self, min_length: int, eos_token_id: int | list[int] | to...
method __call__ (line 155) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class MinNewTokensLengthLogitsProcessor (line 164) | class MinNewTokensLengthLogitsProcessor(LogitsProcessor):
method __init__ (line 201) | def __init__(
method __call__ (line 225) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class TemperatureLogitsWarper (line 236) | class TemperatureLogitsWarper(LogitsProcessor):
method __init__ (line 284) | def __init__(self, temperature: float):
method __call__ (line 297) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class RepetitionPenaltyLogitsProcessor (line 302) | class RepetitionPenaltyLogitsProcessor(LogitsProcessor):
method __init__ (line 352) | def __init__(self, penalty: float, prompt_ignore_length: int | None = ...
method set_continuous_batching_context (line 366) | def set_continuous_batching_context(self, logits_indices: torch.Tensor...
method __call__ (line 371) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class EncoderRepetitionPenaltyLogitsProcessor (line 414) | class EncoderRepetitionPenaltyLogitsProcessor(LogitsProcessor):
method __init__ (line 451) | def __init__(self, penalty: float, encoder_input_ids: torch.LongTensor):
method __call__ (line 459) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class TopPLogitsWarper (line 469) | class TopPLogitsWarper(LogitsProcessor):
method __init__ (line 509) | def __init__(self, top_p: float, filter_value: float = -float("Inf"), ...
method __call__ (line 521) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class TopKLogitsWarper (line 536) | class TopKLogitsWarper(LogitsProcessor):
method __init__ (line 573) | def __init__(self, top_k: int, filter_value: float = -float("Inf"), mi...
method __call__ (line 581) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class TopHLogitsWarper (line 589) | class TopHLogitsWarper(LogitsProcessor):
method __init__ (line 625) | def __init__(self, top_h: float, filter_value: float = -float("Inf")):
method __call__ (line 640) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class MinPLogitsWarper (line 695) | class MinPLogitsWarper(LogitsProcessor):
method __init__ (line 742) | def __init__(self, min_p: float, filter_value: float = -float("Inf"), ...
method __call__ (line 752) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class TypicalLogitsWarper (line 771) | class TypicalLogitsWarper(LogitsProcessor):
method __init__ (line 824) | def __init__(self, mass: float = 0.9, filter_value: float = -float("In...
method __call__ (line 836) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class EpsilonLogitsWarper (line 859) | class EpsilonLogitsWarper(LogitsProcessor):
method __init__ (line 899) | def __init__(self, epsilon: float, filter_value: float = -float("Inf")...
method __call__ (line 915) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class EtaLogitsWarper (line 928) | class EtaLogitsWarper(LogitsProcessor):
method __init__ (line 980) | def __init__(
method __call__ (line 998) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
function _get_ngrams (line 1012) | def _get_ngrams(ngram_size: int, prev_input_ids: torch.Tensor, num_hypos...
function _get_generated_ngrams (line 1041) | def _get_generated_ngrams(banned_ngrams, prev_input_ids, ngram_size, cur...
function _calc_banned_ngram_tokens (line 1064) | def _calc_banned_ngram_tokens(
class NoRepeatNGramLogitsProcessor (line 1079) | class NoRepeatNGramLogitsProcessor(LogitsProcessor):
method __init__ (line 1121) | def __init__(self, ngram_size: int):
method __call__ (line 1127) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class EncoderNoRepeatNGramLogitsProcessor (line 1138) | class EncoderNoRepeatNGramLogitsProcessor(LogitsProcessor):
method __init__ (line 1176) | def __init__(self, encoder_ngram_size: int, encoder_input_ids: torch.L...
method __call__ (line 1188) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class SequenceBiasLogitsProcessor (line 1207) | class SequenceBiasLogitsProcessor(LogitsProcessor):
method __init__ (line 1272) | def __init__(self, sequence_bias: list[list[list[int] | float]]):
method __call__ (line 1284) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
method _prepare_bias_variables (line 1317) | def _prepare_bias_variables(self, scores: torch.FloatTensor):
method _validate_arguments (line 1347) | def _validate_arguments(self):
method _convert_list_arguments_into_dict (line 1384) | def _convert_list_arguments_into_dict(self):
class NoBadWordsLogitsProcessor (line 1391) | class NoBadWordsLogitsProcessor(SequenceBiasLogitsProcessor):
method __init__ (line 1446) | def __init__(self, bad_words_ids: list[list[int]], eos_token_id: int |...
method _validate_arguments (line 1465) | def _validate_arguments(self):
class PrefixConstrainedLogitsProcessor (line 1480) | class PrefixConstrainedLogitsProcessor(LogitsProcessor):
method __init__ (line 1527) | def __init__(self, prefix_allowed_tokens_fn: Callable[[int, torch.Tens...
method __call__ (line 1532) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class ForcedBOSTokenLogitsProcessor (line 1552) | class ForcedBOSTokenLogitsProcessor(LogitsProcessor):
method __init__ (line 1584) | def __init__(self, bos_token_id: int):
method __call__ (line 1588) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class ForcedEOSTokenLogitsProcessor (line 1597) | class ForcedEOSTokenLogitsProcessor(LogitsProcessor):
method __init__ (line 1631) | def __init__(self, max_length: int, eos_token_id: int | list[int] | to...
method __call__ (line 1644) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class InfNanRemoveLogitsProcessor (line 1653) | class InfNanRemoveLogitsProcessor(LogitsProcessor):
method __call__ (line 1663) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class ExponentialDecayLengthPenalty (line 1674) | class ExponentialDecayLengthPenalty(LogitsProcessor):
method __init__ (line 1742) | def __init__(
method __call__ (line 1761) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class LogitNormalization (line 1775) | class LogitNormalization(LogitsProcessor):
method __call__ (line 1807) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class SuppressTokensAtBeginLogitsProcessor (line 1812) | class SuppressTokensAtBeginLogitsProcessor(LogitsProcessor):
method __init__ (line 1847) | def __init__(self, begin_suppress_tokens, begin_index, device: str = "...
method set_begin_index (line 1851) | def set_begin_index(self, begin_index):
method __call__ (line 1855) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class SuppressTokensLogitsProcessor (line 1865) | class SuppressTokensLogitsProcessor(LogitsProcessor):
method __init__ (line 1894) | def __init__(self, suppress_tokens, device: str = "cpu"):
method __call__ (line 1898) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class WhisperTimeStampLogitsProcessor (line 1905) | class WhisperTimeStampLogitsProcessor(LogitsProcessor):
method __init__ (line 1963) | def __init__(
method set_begin_index (line 1992) | def set_begin_index(self, begin_index):
method __call__ (line 1996) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class WhisperNoSpeechDetection (line 2046) | class WhisperNoSpeechDetection(LogitsProcessor):
method __init__ (line 2052) | def __init__(self, no_speech_token: int, begin_index: int, scores_is_l...
method set_model (line 2067) | def set_model(self, model):
method set_inputs (line 2070) | def set_inputs(self, inputs):
method no_speech_prob (line 2080) | def no_speech_prob(self):
method set_begin_index (line 2083) | def set_begin_index(self, begin_index):
method __call__ (line 2087) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class ClassifierFreeGuidanceLogitsProcessor (line 2111) | class ClassifierFreeGuidanceLogitsProcessor(LogitsProcessor):
method __init__ (line 2150) | def __init__(self, guidance_scale):
method __call__ (line 2160) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class AlternatingCodebooksLogitsProcessor (line 2175) | class AlternatingCodebooksLogitsProcessor(LogitsProcessor):
method __init__ (line 2196) | def __init__(self, input_start_len: int, semantic_vocab_size: int, cod...
method __call__ (line 2204) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class UnbatchedClassifierFreeGuidanceLogitsProcessor (line 2220) | class UnbatchedClassifierFreeGuidanceLogitsProcessor(LogitsProcessor):
method __init__ (line 2272) | def __init__(
method get_unconditional_logits (line 2290) | def get_unconditional_logits(self, input_ids):
method __call__ (line 2326) | def __call__(self, input_ids, scores):
class BarkEosPrioritizerLogitsProcessor (line 2338) | class BarkEosPrioritizerLogitsProcessor(LogitsProcessor):
method __init__ (line 2355) | def __init__(self, eos_token_id: int | list[int] | torch.Tensor, min_e...
method __call__ (line 2370) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class WatermarkLogitsProcessor (line 2385) | class WatermarkLogitsProcessor(LogitsProcessor):
method __init__ (line 2447) | def __init__(
method set_seed (line 2476) | def set_seed(self, input_seq: torch.LongTensor):
method _get_greenlist_ids (line 2486) | def _get_greenlist_ids(self, input_seq: torch.LongTensor) -> torch.Lon...
method _score_rejection_sampling (line 2492) | def _score_rejection_sampling(self, input_seq: torch.LongTensor, score...
method __call__ (line 2508) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class SynthIDTextWatermarkState (line 2527) | class SynthIDTextWatermarkState:
method __init__ (line 2530) | def __init__(
class SynthIDTextWatermarkLogitsProcessor (line 2558) | class SynthIDTextWatermarkLogitsProcessor(LogitsProcessor):
method __init__ (line 2630) | def __init__(
method _init_state (line 2663) | def _init_state(self, batch_size: int):
method update_scores (line 2672) | def update_scores(self, scores: torch.FloatTensor, g_values: torch.Flo...
method __call__ (line 2697) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
method accumulate_hash (line 2759) | def accumulate_hash(
method compute_ngram_keys (line 2796) | def compute_ngram_keys(self, ngrams: torch.LongTensor) -> torch.LongTe...
method _compute_keys (line 2829) | def _compute_keys(
method sample_g_values (line 2869) | def sample_g_values(self, ngram_keys: torch.LongTensor) -> torch.LongT...
method _check_input_ids_shape (line 2889) | def _check_input_ids_shape(self, input_ids: torch.LongTensor):
method compute_g_values (line 2894) | def compute_g_values(self, input_ids: torch.LongTensor) -> torch.LongT...
method compute_context_repetition_mask (line 2910) | def compute_context_repetition_mask(self, input_ids: torch.LongTensor)...
method compute_eos_token_mask (line 2956) | def compute_eos_token_mask(self, input_ids: torch.LongTensor, eos_toke...
method expected_mean_g_value (line 2982) | def expected_mean_g_value(self, vocab_size: int, coinflip_prob: float ...
class DiaClassifierFreeGuidanceLogitsProcessor (line 3000) | class DiaClassifierFreeGuidanceLogitsProcessor(LogitsProcessor):
method __init__ (line 3024) | def __init__(self, guidance_scale: float, guidance_top_k: int | None =...
method __call__ (line 3040) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class DiaEOSChannelFilterLogitsProcessor (line 3066) | class DiaEOSChannelFilterLogitsProcessor(LogitsProcessor):
method __init__ (line 3089) | def __init__(self, num_channels: int, eos_token_id: int):
method __call__ (line 3099) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class DiaEOSDelayPatternLogitsProcessor (line 3138) | class DiaEOSDelayPatternLogitsProcessor(LogitsProcessor):
method __init__ (line 3175) | def __init__(self, delay_pattern: list[int], eos_token_id: int, max_ge...
method __call__ (line 3185) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
FILE: src/transformers/generation/stopping_criteria.py
class StoppingCriteria (line 45) | class StoppingCriteria(ABC):
method __call__ (line 53) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class MaxLengthCriteria (line 57) | class MaxLengthCriteria(StoppingCriteria):
method __init__ (line 69) | def __init__(self, max_length: int, max_position_embeddings: int | Non...
method __call__ (line 74) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class MaxTimeCriteria (line 86) | class MaxTimeCriteria(StoppingCriteria):
method __init__ (line 99) | def __init__(self, max_time: float, initial_timestamp: float | None = ...
method __call__ (line 104) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class StopStringCriteria (line 109) | class StopStringCriteria(StoppingCriteria):
method __init__ (line 240) | def __init__(self, tokenizer: PreTrainedTokenizerBase, stop_strings: s...
method clean_and_embed_tokens_with_cache (line 254) | def clean_and_embed_tokens_with_cache(self, token_list, token_indices,...
method clean_tokenizer_vocab (line 276) | def clean_tokenizer_vocab(tokenizer, static_prefix="abcdef"):
method _stop_string_get_matching_positions (line 297) | def _stop_string_get_matching_positions(
method _stop_string_create_embedding_vec (line 338) | def _stop_string_create_embedding_vec(token_list, token_indices, stop_...
method __call__ (line 389) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class EosTokenCriteria (line 450) | class EosTokenCriteria(StoppingCriteria):
method __init__ (line 460) | def __init__(self, eos_token_id: int | list[int] | torch.Tensor):
method __call__ (line 468) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class ConfidenceCriteria (line 474) | class ConfidenceCriteria(StoppingCriteria):
method __init__ (line 484) | def __init__(self, assistant_confidence_threshold):
method __call__ (line 487) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
class StoppingCriteriaList (line 495) | class StoppingCriteriaList(list):
method __call__ (line 497) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
method max_length (line 504) | def max_length(self) -> int | None:
function validate_stopping_criteria (line 511) | def validate_stopping_criteria(stopping_criteria: StoppingCriteriaList, ...
FILE: src/transformers/generation/streamers.py
class BaseStreamer (line 27) | class BaseStreamer:
method put (line 32) | def put(self, value):
method end (line 36) | def end(self):
class TextStreamer (line 41) | class TextStreamer(BaseStreamer):
method __init__ (line 75) | def __init__(self, tokenizer: PreTrainedTokenizerBase, skip_prompt: bo...
method put (line 85) | def put(self, value):
method end (line 119) | def end(self):
method on_finalized_text (line 133) | def on_finalized_text(self, text: str, stream_end: bool = False):
method _is_chinese_char (line 137) | def _is_chinese_char(self, cp):
class TextIteratorStreamer (line 162) | class TextIteratorStreamer(TextStreamer):
method __init__ (line 208) | def __init__(
method on_finalized_text (line 220) | def on_finalized_text(self, text: str, stream_end: bool = False):
method __iter__ (line 226) | def __iter__(self):
method __next__ (line 229) | def __next__(self):
class AsyncTextIteratorStreamer (line 237) | class AsyncTextIteratorStreamer(TextStreamer):
method __init__ (line 290) | def __init__(
method on_finalized_text (line 306) | def on_finalized_text(self, text: str, stream_end: bool = False):
method __aiter__ (line 312) | def __aiter__(self):
method __anext__ (line 315) | async def __anext__(self):
FILE: src/transformers/generation/utils.py
class GenerateDecoderOnlyOutput (line 148) | class GenerateDecoderOnlyOutput(ModelOutput):
class GenerateEncoderDecoderOutput (line 184) | class GenerateEncoderDecoderOutput(ModelOutput):
class GenerateBeamDecoderOnlyOutput (line 232) | class GenerateBeamDecoderOnlyOutput(ModelOutput):
class GenerateBeamEncoderDecoderOutput (line 276) | class GenerateBeamEncoderDecoderOutput(ModelOutput):
class GenerationMixin (line 338) | class GenerationMixin(ContinuousMixin):
method adjust_generation_fn (line 370) | def adjust_generation_fn(
method load_custom_generate (line 433) | def load_custom_generate(
method prepare_inputs_for_generation (line 494) | def prepare_inputs_for_generation(
method _prepare_model_inputs (line 600) | def _prepare_model_inputs(
method _maybe_initialize_input_ids_for_generation (line 665) | def _maybe_initialize_input_ids_for_generation(
method _prepare_position_ids_for_generation (line 705) | def _prepare_position_ids_for_generation(self, inputs_tensor, model_kw...
method _prepare_attention_mask_for_generation (line 729) | def _prepare_attention_mask_for_generation(
method _prepare_encoder_decoder_kwargs_for_generation (line 763) | def _prepare_encoder_decoder_kwargs_for_generation(
method _prepare_decoder_input_ids_for_generation (line 804) | def _prepare_decoder_input_ids_for_generation(
method _expand_inputs_for_generation (line 864) | def _expand_inputs_for_generation(
method _update_model_kwargs_for_generation (line 894) | def _update_model_kwargs_for_generation(
method _get_candidate_generator (line 944) | def _get_candidate_generator(
method _get_logits_processor (line 1029) | def _get_logits_processor(
method _get_stopping_criteria (line 1252) | def _get_stopping_criteria(
method _merge_criteria_processor_list (line 1290) | def _merge_criteria_processor_list(
method compute_transition_scores (line 1327) | def compute_transition_scores(
method _validate_generation_mode (line 1451) | def _validate_generation_mode(
method _validate_model_kwargs (line 1499) | def _validate_model_kwargs(self: "GenerativePreTrainedModel", model_kw...
method _validate_generated_length (line 1554) | def _validate_generated_length(
method _prepare_generated_length (line 1600) | def _prepare_generated_length(
method _prepare_generation_config (line 1656) | def _prepare_generation_config(
method _prepare_static_cache (line 1723) | def _prepare_static_cache(
method _supports_default_dynamic_cache (line 1775) | def _supports_default_dynamic_cache(cls: type["GenerativePreTrainedMod...
method _prepare_cache_for_generation (line 1793) | def _prepare_cache_for_generation(
method _supports_logits_to_keep (line 1902) | def _supports_logits_to_keep(self: "GenerativePreTrainedModel") -> bool:
method _prepare_special_tokens (line 1909) | def _prepare_special_tokens(
method _valid_auto_compile_criteria (line 1988) | def _valid_auto_compile_criteria(
method _optimize_model_for_decode (line 2048) | def _optimize_model_for_decode(self: "GenerativePreTrainedModel"):
method _get_deprecated_gen_repo (line 2065) | def _get_deprecated_gen_repo(
method _extract_generation_mode_kwargs (line 2089) | def _extract_generation_mode_kwargs(
method generate (line 2123) | def generate(
method _has_unfinished_sequences (line 2547) | def _has_unfinished_sequences(self, this_peer_finished: bool, synced_g...
method heal_tokens (line 2565) | def heal_tokens(
method _sample (line 2650) | def _sample(
method _flatten_beam_dim (line 2836) | def _flatten_beam_dim(tensor: torch.Tensor) -> torch.Tensor:
method _unflatten_beam_dim (line 2842) | def _unflatten_beam_dim(tensor: torch.Tensor, batch_size: int, num_bea...
method _gather_beams (line 2848) | def _gather_beams(tensor: torch.Tensor, beam_indices: torch.Tensor) ->...
method _check_early_stop_heuristic (line 2868) | def _check_early_stop_heuristic(
method _beam_search_has_unfinished_sequences (line 2915) | def _beam_search_has_unfinished_sequences(
method _get_top_k_continuations (line 2937) | def _get_top_k_continuations(
method _get_running_beams_for_next_iteration (line 2991) | def _get_running_beams_for_next_iteration(
method _update_finished_beams (line 3013) | def _update_finished_beams(
method _beam_search (line 3068) | def _beam_search(
method _assisted_decoding (line 3417) | def _assisted_decoding(
method _prefill (line 3716) | def _prefill(
function _speculative_sampling (line 3804) | def _speculative_sampling(
function _split_model_outputs (line 3860) | def _split_model_outputs(outputs, new_outputs, cur_len, added_len, is_de...
FILE: src/transformers/generation/watermarking.py
class WatermarkDetectorOutput (line 39) | class WatermarkDetectorOutput:
class WatermarkDetector (line 71) | class WatermarkDetector:
method __init__ (line 123) | def __init__(
method _get_ngram_score (line 146) | def _get_ngram_score(self, prefix: torch.LongTensor, target: int):
method _score_ngrams_in_passage (line 150) | def _score_ngrams_in_passage(self, input_ids: torch.LongTensor):
method _compute_z_score (line 180) | def _compute_z_score(self, green_token_count: np.ndarray, total_num_to...
method _compute_pval (line 187) | def _compute_pval(self, x, loc=0, scale=1):
method __call__ (line 191) | def __call__(
class BayesianDetectorConfig (line 243) | class BayesianDetectorConfig(PreTrainedConfig):
method __init__ (line 258) | def __init__(self, watermarking_depth: int | None = None, base_rate: f...
method set_detector_information (line 267) | def set_detector_information(self, model_name, watermarking_config):
class BayesianWatermarkDetectorModelOutput (line 273) | class BayesianWatermarkDetectorModelOutput(ModelOutput):
class BayesianDetectorWatermarkedLikelihood (line 288) | class BayesianDetectorWatermarkedLikelihood(nn.Module):
method __init__ (line 294) | def __init__(self, watermarking_depth: int):
method _compute_latents (line 301) | def _compute_latents(self, g_values: torch.Tensor) -> tuple[torch.Tens...
method forward (line 333) | def forward(self, g_values: torch.Tensor) -> torch.Tensor:
class BayesianDetectorModel (line 350) | class BayesianDetectorModel(PreTrainedModel):
method __init__ (line 379) | def __init__(self, config):
method _init_weights (line 390) | def _init_weights(self, module):
method _compute_posterior (line 395) | def _compute_posterior(
method forward (line 437) | def forward(
class SynthIDTextWatermarkDetector (line 481) | class SynthIDTextWatermarkDetector:
method __init__ (line 518) | def __init__(
method __call__ (line 528) | def __call__(self, tokenized_outputs: torch.Tensor):
FILE: src/transformers/hf_argparser.py
function string_to_bool (line 36) | def string_to_bool(v):
function make_choice_type_function (line 49) | def make_choice_type_function(choices: list) -> Callable[[str], Any]:
function HfArg (line 64) | def HfArg(
class HfArgumentParser (line 111) | class HfArgumentParser(ArgumentParser):
method __init__ (line 128) | def __init__(self, dataclass_types: DataClassType | Iterable[DataClass...
method _parse_dataclass_field (line 146) | def _parse_dataclass_field(parser: ArgumentParser, field: dataclasses....
method _add_dataclass_arguments (line 251) | def _add_dataclass_arguments(self, dtype: DataClassType):
method parse_args_into_dataclasses (line 272) | def parse_args_into_dataclasses(
method parse_dict (line 358) | def parse_dict(self, args: dict[str, Any], allow_extra_keys: bool = Fa...
method parse_json_file (line 386) | def parse_json_file(self, json_file: str | os.PathLik
Copy disabled (too large)
Download .json
Condensed preview — 5705 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (82,521K chars).
[
{
"path": ".ai/AGENTS.md",
"chars": 4370,
"preview": "## Useful commands\n- `make style`: runs formatters and linters (ruff), necessary to pass code style checks\n- `make typin"
},
{
"path": ".ai/skills/add-or-fix-type-checking/SKILL.md",
"chars": 10302,
"preview": "---\nname: add-or-fix-type-checking\ndescription: Fixes broken typing checks detected by ty, make typing, or make check-re"
},
{
"path": ".circleci/TROUBLESHOOT.md",
"chars": 319,
"preview": "# Troubleshooting\n\nThis is a document explaining how to deal with various issues on Circle-CI. The entries may include a"
},
{
"path": ".circleci/config.yml",
"chars": 10117,
"preview": "version: 2.1\nsetup: true\norbs:\n continuation: circleci/continuation@0.1.0\n\nparameters:\n nightly:\n type: boo"
},
{
"path": ".circleci/create_circleci_config.py",
"chars": 20434,
"preview": "# coding=utf-8\n# Copyright 2022 The HuggingFace Inc. team.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": ".circleci/parse_test_outputs.py",
"chars": 2590,
"preview": "import argparse\nimport re\n\n\ndef parse_pytest_output(file_path):\n skipped_tests = {}\n skipped_count = 0\n with op"
},
{
"path": ".git-blame-ignore-revs",
"chars": 41,
"preview": "8008e6c83e1467dbe0ae3c81d19b29c17f4ff456\n"
},
{
"path": ".gitattributes",
"chars": 51,
"preview": "*.py\teol=lf\n*.rst\teol=lf\n*.md\teol=lf\n*.mdx eol=lf"
},
{
"path": ".github/ISSUE_TEMPLATE/bug-report.yml",
"chars": 4576,
"preview": "name: \"\\U0001F41B Bug Report\"\ndescription: Submit a bug report to help us improve transformers\nlabels: [ \"bug\" ]\nbody:\n "
},
{
"path": ".github/ISSUE_TEMPLATE/config.yml",
"chars": 529,
"preview": "blank_issues_enabled: true\nversion: 2.1\ncontact_links:\n - name: Model checkpoints on the Hugging Face Hub\n url: http"
},
{
"path": ".github/ISSUE_TEMPLATE/feature-request.yml",
"chars": 1100,
"preview": "name: \"\\U0001F680 Feature request\"\ndescription: Submit a proposal/request for a new transformers feature\nlabels: [ \"Feat"
},
{
"path": ".github/ISSUE_TEMPLATE/i18n.md",
"chars": 2829,
"preview": "---\nname: 🌐 Translating a new language?\nabout: Start a new translation effort in your language\ntitle: '[i18n-<languageCo"
},
{
"path": ".github/ISSUE_TEMPLATE/migration.yml",
"chars": 2726,
"preview": "name: \"\\U0001F4DA Migration from pytorch-pretrained-bert or pytorch-transformers\"\ndescription: Report a problem when mig"
},
{
"path": ".github/ISSUE_TEMPLATE/new-model-addition.yml",
"chars": 1077,
"preview": "name: \"\\U0001F31F New model addition\"\ndescription: Submit a proposal/request to implement a new model\nlabels: [ \"New mod"
},
{
"path": ".github/PULL_REQUEST_TEMPLATE.md",
"chars": 4058,
"preview": "# What does this PR do?\n\n<!--\nCongratulations! You've made it this far! You're not quite done yet though.\n\nOnce merged, "
},
{
"path": ".github/conda/build.sh",
"chars": 69,
"preview": "$PYTHON setup.py install # Python command to install the script.\n"
},
{
"path": ".github/conda/meta.yaml",
"chars": 971,
"preview": "{% set name = \"transformers\" %}\n\npackage:\n name: \"{{ name|lower }}\"\n version: \"{{ TRANSFORMERS_VERSION }}\"\n\nsource:\n "
},
{
"path": ".github/copilot-instructions.md",
"chars": 3529,
"preview": "# copilot-instructions.md Guide for Hugging Face Transformers\n\nThis copilot-instructions.md file provides guidance for c"
},
{
"path": ".github/scripts/assign_reviewers.py",
"chars": 4180,
"preview": "# coding=utf-8\n# Copyright 2025 the HuggingFace Inc. team. All rights reserved.\n#\n# Licensed under the Apache License, V"
},
{
"path": ".github/scripts/codeowners_for_review_action",
"chars": 21802,
"preview": "# Top-level rules are matched only if nothing else matches\n* @Rocketknight1 @ArthurZucker # if no one is pinged based on"
},
{
"path": ".github/workflows/TROUBLESHOOT.md",
"chars": 382,
"preview": "# Troubleshooting\n\nThis is a document explaining how to deal with various issues on github-actions self-hosted CI. The e"
},
{
"path": ".github/workflows/add-model-like.yml",
"chars": 2656,
"preview": "name: Add model like runner\n\non:\n push:\n branches:\n - none # put main here when this is fixed\n #pull_request:\n"
},
{
"path": ".github/workflows/anti-slop.yml",
"chars": 2238,
"preview": "name: Anti-Slop\n\npermissions:\n contents: read\n issues: read\n pull-requests: write\n\non:\n pull_request_target:\n typ"
},
{
"path": ".github/workflows/assign-reviewers.yml",
"chars": 649,
"preview": "name: Assign PR Reviewers\non:\n pull_request_target:\n branches:\n - main\n types: [ready_for_review]\n\njobs:\n a"
},
{
"path": ".github/workflows/benchmark.yml",
"chars": 2405,
"preview": "name: Self-hosted runner (benchmark)\r\n\r\non:\r\n push:\r\n branches: [main]\r\n pull_request:\r\n types: [ opened, labele"
},
{
"path": ".github/workflows/benchmark_v2.yml",
"chars": 2021,
"preview": "name: Benchmark v2 Framework\n\non:\n workflow_dispatch:\n\nenv:\n HF_HOME: /mnt/cache\n TRANSFORMERS_IS_CI: yes\n # For gat"
},
{
"path": ".github/workflows/benchmark_v2_a10_caller.yml",
"chars": 565,
"preview": "name: Benchmark v2 Scheduled Runner - A10 Single-GPU\n\non:\n workflow_dispatch:\n\njobs:\n benchmark-v2-default:\n name: "
},
{
"path": ".github/workflows/benchmark_v2_mi325_caller.yml",
"chars": 633,
"preview": "name: Benchmark v2 Scheduled Runner - MI325 Single-GPU\n\non:\n workflow_dispatch:\n\njobs:\n benchmark-v2-default:\n name"
},
{
"path": ".github/workflows/build-ci-docker-images.yml",
"chars": 2621,
"preview": "name: Build pr ci-docker\n\non:\n push:\n branches:\n - push-ci-image # for now let's only build on this branch\n re"
},
{
"path": ".github/workflows/build-docker-images.yml",
"chars": 9482,
"preview": "name: Build docker images (scheduled)\n\non:\n push:\n branches:\n - build_ci_docker_image*\n repository_dispatch:\n "
},
{
"path": ".github/workflows/build-nightly-ci-docker-images.yml",
"chars": 2001,
"preview": "name: Build docker images (Nightly CI)\n\non:\n workflow_call:\n inputs:\n job:\n required: true\n type:"
},
{
"path": ".github/workflows/build-past-ci-docker-images.yml",
"chars": 3225,
"preview": "name: Build docker images (Past CI)\n\non:\n push:\n branches:\n - build_past_ci_docker_image*\n\nconcurrency:\n group"
},
{
"path": ".github/workflows/build_documentation.yml",
"chars": 990,
"preview": "name: Build documentation\n\non:\n workflow_dispatch:\n push:\n branches:\n - main\n - doc-builder*\n - v*-r"
},
{
"path": ".github/workflows/build_pr_documentation.yml",
"chars": 1156,
"preview": "name: Build PR Documentation\n\non:\n pull_request:\n merge_group:\n\nconcurrency:\n group: ${{ github.workflow }}-${{ githu"
},
{
"path": ".github/workflows/check-workflow-permissions.yml",
"chars": 549,
"preview": "---\nname: Check Permissions Advisor\n\non:\n workflow_dispatch:\n inputs:\n workflow_name:\n description: 'Wor"
},
{
"path": ".github/workflows/check_failed_tests.yml",
"chars": 14423,
"preview": "name: Process failed tests\n\non:\n workflow_call:\n inputs:\n docker:\n required: true\n type: string\n "
},
{
"path": ".github/workflows/check_tiny_models.yml",
"chars": 2800,
"preview": "name: Check Tiny Models\r\n\r\non:\r\n push:\r\n branches:\r\n - check_tiny_models*\r\n repository_dispatch:\r\n schedule:\r"
},
{
"path": ".github/workflows/circleci-failure-summary-comment.yml",
"chars": 10540,
"preview": "name: CircleCI Failure Summary Comment\n\non:\n pull_request_target:\n types: [opened, synchronize, reopened]\n\njobs:\n c"
},
{
"path": ".github/workflows/codeql.yml",
"chars": 521,
"preview": "---\nname: CodeQL Security Analysis\n\non:\n push:\n branches: [\"main\", \"fix_security_issue_*\"]\n # pull_request:\n # b"
},
{
"path": ".github/workflows/collated-reports.yml",
"chars": 1399,
"preview": "name: CI collated reports\n\non:\n workflow_call:\n inputs:\n job:\n required: true\n type: string\n "
},
{
"path": ".github/workflows/doctest_job.yml",
"chars": 2596,
"preview": "name: Doctest job\n\non:\n workflow_call:\n inputs:\n job_splits:\n required: true\n type: string\n "
},
{
"path": ".github/workflows/doctests.yml",
"chars": 2925,
"preview": "name: Doctests\n\non:\n push:\n branches:\n - run_doctest*\n repository_dispatch:\n schedule:\n - cron: \"17 2 * * "
},
{
"path": ".github/workflows/extras-smoke-test.yml",
"chars": 6219,
"preview": "name: Extras Smoke Test\n\non:\n schedule:\n # Run every night at 3 AM UTC\n - cron: \"0 3 * * *\"\nenv:\n SLACK_CHANNEL_"
},
{
"path": ".github/workflows/get-pr-info.yml",
"chars": 8005,
"preview": "name: Get PR commit SHA\non:\n workflow_call:\n inputs:\n pr_number:\n required: true\n type: string\n "
},
{
"path": ".github/workflows/get-pr-number.yml",
"chars": 1371,
"preview": "name: Get PR number\non:\n workflow_call:\n outputs:\n PR_NUMBER:\n description: \"The extracted PR number\"\n "
},
{
"path": ".github/workflows/model_jobs.yml",
"chars": 8424,
"preview": "name: model jobs\n\non:\n workflow_call:\n inputs:\n folder_slices:\n required: true\n type: string\n "
},
{
"path": ".github/workflows/model_jobs_intel_gaudi.yml",
"chars": 4115,
"preview": "name: model jobs\n\non:\n workflow_call:\n inputs:\n folder_slices:\n required: true\n type: string\n "
},
{
"path": ".github/workflows/new_model_pr_merged_notification.yml",
"chars": 2244,
"preview": "# Used to notify core maintainers about new model PR being merged\nname: New model PR merged notification\n\non:\n push:\n "
},
{
"path": ".github/workflows/pr-repo-consistency-bot.yml",
"chars": 15452,
"preview": "name: PR Repo. Consistency Bot\n\non:\n issue_comment:\n types:\n - created\n branches-ignore:\n - main\nconcur"
},
{
"path": ".github/workflows/pr_build_doc_with_comment.yml",
"chars": 5600,
"preview": "name: PR - build doc via comment\non:\n issue_comment:\n types:\n - created\n branches-ignore:\n - main\nconcu"
},
{
"path": ".github/workflows/pr_slow_ci_suggestion.yml",
"chars": 6932,
"preview": "name: PR slow CI - Suggestion\non:\n pull_request_target:\n types: [opened, synchronize, reopened]\n\njobs:\n get-pr-numb"
},
{
"path": ".github/workflows/push-important-models.yml",
"chars": 6248,
"preview": "name: Slow tests on important models (on Push - A10)\n\non:\n push:\n branches: [ main ]\n\njobs:\n get_modified_models:\n "
},
{
"path": ".github/workflows/release-conda.yml",
"chars": 1077,
"preview": "name: Release - Conda\n\non:\n push:\n tags:\n - v*\n branches:\n - conda_*\n\nenv:\n ANACONDA_API_TOKEN: ${{ se"
},
{
"path": ".github/workflows/release.yml",
"chars": 1416,
"preview": "name: Release\non:\n push:\n tags:\n - v*\n branches:\n - 'v*-release'\n\njobs:\n build_and_test:\n name: bui"
},
{
"path": ".github/workflows/self-comment-ci.yml",
"chars": 20688,
"preview": "name: PR comment GitHub CI\n\non:\n issue_comment:\n types:\n - created\n branches-ignore:\n - main\nconcurrenc"
},
{
"path": ".github/workflows/self-nightly-caller.yml",
"chars": 1741,
"preview": "name: Nvidia CI with nightly torch\n\non:\n repository_dispatch:\n # triggered when the daily scheduled Nvidia CI is compl"
},
{
"path": ".github/workflows/self-nightly-past-ci-caller.yml",
"chars": 3432,
"preview": "name: Self-hosted runner (nightly-past-ci-caller)\r\n\r\non:\r\n schedule:\r\n - cron: \"17 2,14 * * *\"\r\n push:\r\n branche"
},
{
"path": ".github/workflows/self-past-caller.yml",
"chars": 1124,
"preview": "name: Self-hosted runner (past-ci)\n\n\non:\n workflow_call:\n inputs:\n framework:\n required: true\n ty"
},
{
"path": ".github/workflows/self-scheduled-amd-caller.yml",
"chars": 338,
"preview": "name: Self-hosted runner (AMD scheduled CI caller)\n\non:\n schedule:\n - cron: \"17 5 * * *\"\n\njobs:\n run_scheduled_amd_"
},
{
"path": ".github/workflows/self-scheduled-amd-mi250-caller.yml",
"chars": 1968,
"preview": "name: Self-hosted runner (AMD mi250 scheduled CI caller)\n\non:\n workflow_run:\n workflows: [\"Self-hosted runner (AMD s"
},
{
"path": ".github/workflows/self-scheduled-amd-mi325-caller.yml",
"chars": 2535,
"preview": "name: Self-hosted runner scale set (AMD mi325 scheduled CI caller)\n\n# Note: For every job in this workflow, the name of "
},
{
"path": ".github/workflows/self-scheduled-amd-mi355-caller.yml",
"chars": 2329,
"preview": "name: Self-hosted runner scale set (AMD mi355 scheduled CI caller)\n\n# Note: For every job in this workflow, the name of "
},
{
"path": ".github/workflows/self-scheduled-caller.yml",
"chars": 4141,
"preview": "name: Nvidia CI\n\non:\n repository_dispatch:\n schedule:\n - cron: \"17 2 * * *\"\n push:\n branches:\n - run_nvidi"
},
{
"path": ".github/workflows/self-scheduled-flash-attn-caller.yml",
"chars": 1679,
"preview": "name: Nvidia CI - Flash Attn\n\non:\n repository_dispatch:\n schedule:\n - cron: \"17 2 * * *\"\n push:\n branches:\n "
},
{
"path": ".github/workflows/self-scheduled-intel-gaudi.yml",
"chars": 11949,
"preview": "name: Self-hosted runner (scheduled-intel-gaudi)\n\non:\n workflow_call:\n inputs:\n job:\n required: true\n "
},
{
"path": ".github/workflows/self-scheduled-intel-gaudi3-caller.yml",
"chars": 2129,
"preview": "name: Self-hosted runner (Intel Gaudi3 scheduled CI caller)\n\non:\n repository_dispatch:\n workflow_dispatch:\n schedule:"
},
{
"path": ".github/workflows/self-scheduled.yml",
"chars": 25486,
"preview": "name: Nvidia CI (job definitions)\n\n# Note that each job's dependencies go into a corresponding docker file.\n#\n# For exam"
},
{
"path": ".github/workflows/slack-report.yml",
"chars": 4280,
"preview": "name: CI slack report\n\non:\n workflow_call:\n inputs:\n job:\n required: true\n type: string\n sla"
},
{
"path": ".github/workflows/ssh-runner.yml",
"chars": 6198,
"preview": "name: SSH into our runners\n\non:\n workflow_dispatch:\n inputs:\n runner_type:\n description: 'Type of runner"
},
{
"path": ".github/workflows/stale.yml",
"chars": 588,
"preview": "name: Stale Bot\n\non:\n schedule:\n - cron: \"0 8 * * *\"\n\njobs:\n close_stale_issues:\n name: Close Stale Issues\n i"
},
{
"path": ".github/workflows/trl-ci-bot.yml",
"chars": 2992,
"preview": "# This workflow allows trusted contributors to trigger TRL CI runs against\n# specific Transformers commits by commenting"
},
{
"path": ".github/workflows/trufflehog.yml",
"chars": 362,
"preview": "on:\n push:\n\nname: Secret Leaks\n\npermissions:\n contents: read\n\njobs:\n trufflehog:\n runs-on: ubuntu-latest\n steps"
},
{
"path": ".github/workflows/update_metdata.yml",
"chars": 622,
"preview": "name: Update Transformers metadata\n\non:\n push:\n branches:\n - main\n - update_transformers_metadata*\n\njobs:\n"
},
{
"path": ".github/workflows/upload_pr_documentation.yml",
"chars": 385,
"preview": "name: Upload PR Documentation\n\non:\n workflow_run:\n workflows: [\"Build PR Documentation\"]\n types:\n - complete"
},
{
"path": ".gitignore",
"chars": 2059,
"preview": "# Initially taken from Github's Python gitignore file\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$"
},
{
"path": "CITATION.cff",
"chars": 2331,
"preview": "cff-version: \"1.2.0\"\r\ndate-released: 2020-10\r\nmessage: \"If you use this software, please cite it using these metadata.\"\r"
},
{
"path": "CODE_OF_CONDUCT.md",
"chars": 5489,
"preview": "\n# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nWe as members, contributors, and leaders pledge to make particip"
},
{
"path": "CONTRIBUTING.md",
"chars": 30843,
"preview": "<!---\nCopyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "ISSUES.md",
"chars": 18789,
"preview": "<!---\nCopyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "LICENSE",
"chars": 11418,
"preview": "Copyright 2018- The Hugging Face team. All rights reserved.\n\n Apache License\n "
},
{
"path": "MIGRATION_GUIDE_V5.md",
"chars": 41318,
"preview": "<!---\nCopyright 2025 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "Makefile",
"chars": 3112,
"preview": "# make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)\nexport PYTHONPATH = "
},
{
"path": "README.md",
"chars": 17816,
"preview": "<!---\nCopyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "SECURITY.md",
"chars": 2015,
"preview": "# Security Policy\n\n## Hugging Face Hub, remote artefacts, and remote code\n\nTransformers is open-source software that is "
},
{
"path": "awesome-transformers.md",
"chars": 39627,
"preview": "# Awesome projects built with Transformers\n\nThis page lists awesome projects built on top of Transformers. Transformers "
},
{
"path": "benchmark/.gitignore",
"chars": 18,
"preview": "benchmark_results/"
},
{
"path": "benchmark/README.md",
"chars": 2400,
"preview": "# Benchmarks\n\nYou might want to add new benchmarks.\n\nYou will need to define a python function named `run_benchmark` in "
},
{
"path": "benchmark/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "benchmark/benches/llama.py",
"chars": 14722,
"preview": "# Copyright 2025 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
},
{
"path": "benchmark/benchmark.py",
"chars": 12159,
"preview": "# Copyright 2024 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
},
{
"path": "benchmark/benchmarks_entrypoint.py",
"chars": 19806,
"preview": "# Copyright 2025 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
},
{
"path": "benchmark/config/generation.yaml",
"chars": 1172,
"preview": "defaults:\n - benchmark # inheriting benchmark schema\n - scenario: inference\n - launcher: process\n - backend: pytorch"
},
{
"path": "benchmark/default.yml",
"chars": 194,
"preview": "apiVersion: 1\n\nproviders:\n - name: 'Transformers Benchmarks'\n orgId: 1\n type: file\n updateIntervalSeconds: 10\n"
},
{
"path": "benchmark/grafana_dashboard.json",
"chars": 65620,
"preview": "{\n \"annotations\": {\n \"list\": [\n {\n \"builtIn\": 1,\n \"datasource\": {\n \"type\": \"grafana\",\n "
},
{
"path": "benchmark/grafana_datasource.yaml",
"chars": 462,
"preview": "apiVersion: 1\ndatasources:\n - name: grafana-postgresql-datasource\n uid: be28nkzirtb0gd\n type: postgres\n url: $"
},
{
"path": "benchmark/optimum_benchmark_wrapper.py",
"chars": 656,
"preview": "import argparse\nimport subprocess\n\n\ndef main(config_dir, config_name, args):\n subprocess.run(\n [\"optimum-bench"
},
{
"path": "benchmark/requirements.txt",
"chars": 59,
"preview": "gpustat==1.1.1\npsutil==6.0.0\npsycopg2==2.9.9\npandas>=1.5.0\n"
},
{
"path": "benchmark/utils/init_db.sql",
"chars": 0,
"preview": ""
},
{
"path": "benchmark_v2/.gitignore",
"chars": 47,
"preview": "benchmark_results/\nbenchmark_results_profiles/\n"
},
{
"path": "benchmark_v2/README.md",
"chars": 3664,
"preview": "# Benchmarking v2\n\nA comprehensive benchmarking framework for transformer models that supports multiple execution modes "
},
{
"path": "benchmark_v2/benchmark_scripts/continuous_batching_overall.py",
"chars": 5134,
"preview": "import argparse\nimport json\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom tabulate import tabulate\n\n\nSCRIPT"
},
{
"path": "benchmark_v2/framework/benchmark_config.py",
"chars": 12913,
"preview": "import hashlib\nimport itertools\nimport json\nimport logging\nfrom functools import lru_cache\nfrom typing import Any\n\nimpor"
},
{
"path": "benchmark_v2/framework/benchmark_runner.py",
"chars": 21553,
"preview": "import gc\nimport json\nimport logging\nimport os\nimport pathlib\nimport re\nimport tempfile\nimport time\nfrom datetime import"
},
{
"path": "benchmark_v2/framework/data_classes.py",
"chars": 7162,
"preview": "from dataclasses import dataclass\nfrom datetime import datetime, timezone\nfrom typing import Any\n\nimport numpy as np\n\nfr"
},
{
"path": "benchmark_v2/framework/hardware_metrics.py",
"chars": 12148,
"preview": "import logging\nimport subprocess\nimport sys\nimport time\nfrom dataclasses import dataclass\nfrom enum import Enum\nfrom log"
},
{
"path": "benchmark_v2/requirements.txt",
"chars": 117,
"preview": "numpy>=1.21.0\npsutil>=5.8.0\nnvidia-ml-py>=12.0.0\ntorch>=2.0.0\ndatasets>=2.10.0\nhuggingface_hub>=0.16.0\namdsmi>=7.0.2\n"
},
{
"path": "benchmark_v2/run_benchmarks.py",
"chars": 6020,
"preview": "#!/usr/bin/env python3\n# Copyright 2025 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License"
},
{
"path": "conftest.py",
"chars": 6268,
"preview": "# Copyright 2020 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
},
{
"path": "docker/README.md",
"chars": 1115,
"preview": "# Dockers for `transformers`\n\nIn this folder you will find various docker files, and some subfolders. \n- dockerfiles (ex"
},
{
"path": "docker/consistency.dockerfile",
"chars": 746,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nUSER root\nARG REF=main\nRUN apt-get update && apt-get install -y time"
},
{
"path": "docker/custom-tokenizers.dockerfile",
"chars": 1921,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y libs"
},
{
"path": "docker/examples-torch.dockerfile",
"chars": 1380,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y --n"
},
{
"path": "docker/exotic-models.dockerfile",
"chars": 1717,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y libs"
},
{
"path": "docker/pipeline-torch.dockerfile",
"chars": 1288,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y --n"
},
{
"path": "docker/quality.dockerfile",
"chars": 421,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y time"
},
{
"path": "docker/torch-light.dockerfile",
"chars": 1326,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y --n"
},
{
"path": "docker/transformers-all-latest-gpu/Dockerfile",
"chars": 5001,
"preview": "FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04\nLABEL maintainer=\"Hugging Face\"\n\nARG DEBIAN_FRONTEND=noninteractive\n\n# U"
},
{
"path": "docker/transformers-doc-builder/Dockerfile",
"chars": 936,
"preview": "FROM python:3.10\nLABEL maintainer=\"Hugging Face\"\n\nRUN apt update\nRUN git clone https://github.com/huggingface/transforme"
},
{
"path": "docker/transformers-gpu/Dockerfile",
"chars": 931,
"preview": "FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04\nLABEL maintainer=\"Hugging Face\"\nLABEL repository=\"transformers\"\n\nRUN apt "
},
{
"path": "docker/transformers-intel-cpu/Dockerfile",
"chars": 2238,
"preview": "FROM intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu24.04 AS base\nLABEL maintainer=\"Hugging Face\"\nSHELL [\"/bin/ba"
},
{
"path": "docker/transformers-pytorch-amd-gpu/Dockerfile",
"chars": 2275,
"preview": "FROM rocm/pytorch:rocm7.1_ubuntu22.04_py3.10_pytorch_release_2.8.0\nLABEL maintainer=\"Hugging Face\"\n\nARG DEBIAN_FRONTEND="
},
{
"path": "docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile",
"chars": 2018,
"preview": "FROM rocm/dev-ubuntu-22.04:6.2.4\nLABEL maintainer=\"Hugging Face\"\n\nARG DEBIAN_FRONTEND=noninteractive\nARG PYTORCH='2.6.0'"
},
{
"path": "docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile",
"chars": 3044,
"preview": "# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html\nFROM nvcr.io/nvidia/pytorch:24.08"
},
{
"path": "docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile",
"chars": 3316,
"preview": "# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11\nFROM nvcr.io/nvidia/pyt"
},
{
"path": "docker/transformers-pytorch-gpu/Dockerfile",
"chars": 1877,
"preview": "FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04\nLABEL maintainer=\"Hugging Face\"\n\nARG DEBIAN_FRONTEND=noninteractive\n\nRUN"
},
{
"path": "docker/transformers-pytorch-tpu/Dockerfile",
"chars": 2877,
"preview": "FROM google/cloud-sdk:slim\n\n# Build args.\nARG GITHUB_REF=refs/heads/main\n\n# TODO: This Dockerfile installs pytorch/xla 3"
},
{
"path": "docker/transformers-pytorch-tpu/bert-base-cased.jsonnet",
"chars": 937,
"preview": "local base = import 'templates/base.libsonnet';\nlocal tpus = import 'templates/tpus.libsonnet';\nlocal utils = import \"te"
},
{
"path": "docker/transformers-pytorch-tpu/dataset.yaml",
"chars": 797,
"preview": "apiVersion: v1\nkind: PersistentVolume\nmetadata:\n name: huggingface-cluster-disk\nspec:\n storageClassName: \"\"\n capacity"
},
{
"path": "docker/transformers-pytorch-tpu/docker-entrypoint.sh",
"chars": 247,
"preview": "#!/bin/bash\nsource ~/.bashrc\necho \"running docker-entrypoint.sh\"\nconda activate container\necho $KUBE_GOOGLE_CLOUD_TPU_EN"
},
{
"path": "docker/transformers-pytorch-xpu/Dockerfile",
"chars": 3907,
"preview": "FROM intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu22.04 AS base\nLABEL maintainer=\"Hugging Face\"\n\nSHELL [\"/bin/b"
},
{
"path": "docker/transformers-quantization-latest-gpu/Dockerfile",
"chars": 4356,
"preview": "FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04\nLABEL maintainer=\"Hugging Face\"\n\nARG DEBIAN_FRONTEND=noninteractive\n\n# U"
},
{
"path": "docs/README.md",
"chars": 17125,
"preview": "<!---\nCopyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "docs/TRANSLATING.md",
"chars": 2609,
"preview": "# Translating the Transformers documentation into your language\n\nAs part of our mission to democratize machine learning,"
},
{
"path": "docs/source/_config.py",
"chars": 534,
"preview": "# docstyle-ignore\nINSTALL_CONTENT = \"\"\"\n# Transformers installation\n! pip install transformers datasets evaluate acceler"
},
{
"path": "docs/source/ar/_config.py",
"chars": 534,
"preview": "# docstyle-ignore\nINSTALL_CONTENT = \"\"\"\n# Transformers installation\n! pip install transformers datasets evaluate acceler"
},
{
"path": "docs/source/ar/_toctree.yml",
"chars": 27427,
"preview": "- sections:\n - local: index\n title: 🤗 المحولات\n - local: quicktour\n title: جولة سريعة\n - local: installation\n "
},
{
"path": "docs/source/ar/accelerate.md",
"chars": 3962,
"preview": "# التدريب الموزع باستخدام 🤗 Accelerate\n\n\nمع تزايد حجم النماذج اللغوية، برز التوازي كأحد الاستراتيجيات لتدريب نماذج أكبر"
},
{
"path": "docs/source/ar/attention.md",
"chars": 2693,
"preview": "# آليات الانتباه \n\nتستخدم معظم نماذج المحول (Transformer) الانتباه الكامل بحيث تكون مصفوفة الانتباه ذات الأبعاد المتساوي"
},
{
"path": "docs/source/ar/autoclass_tutorial.md",
"chars": 6815,
"preview": "# تحميل نماذج مدربة مسبقًا باستخدام AutoClass\nلم ترغب في إنشاء محول معماري لمؤشر الترابط الخاص بك، فهناك العديد من محولا"
},
{
"path": "docs/source/ar/bertology.md",
"chars": 1536,
"preview": "# BERTology\n\nيُشهد في الآونة الأخيرة نمو مجال دراسي يُعنى باستكشاف آلية عمل نماذج المحولات الضخمة مثل BERT (والذي يُطلق "
},
{
"path": "docs/source/ar/chat_templating.md",
"chars": 44211,
"preview": "# قوالب نماذج الدردشة\n\n## مقدمة\n\nتعد **الدردشة** أحد استخدامات نماذج اللغات الكبيرة (LLMs) شائعة الاستخدام بشكل متزايد. "
},
{
"path": "docs/source/ar/community.md",
"chars": 26369,
"preview": "# مجتمع المطورين\n\nهذه الصفحة تجمع الموارد حول 🤗 Transformers التي طورها المجتمع.\n\n## موارد المجتمع:\n\n| المصدر | "
},
{
"path": "docs/source/ar/conversations.md",
"chars": 15217,
"preview": "# الدردشة مع المحوّلات \n\nإذا كنت تقرأ هذه المقالة، فمن المؤكد أنك على علم بـ **نماذج الدردشة**. نماذج الدردشة هي أنظمة ذ"
},
{
"path": "docs/source/ar/create_a_model.md",
"chars": 15517,
"preview": "# إنشاء بنية مخصصة\n\nتحدد فئة [`AutoClass`](model_doc/auto) تلقائيًا بنية النموذج وتقوم بتنزيل تكوين وأوزان مسبقين للنموذ"
},
{
"path": "docs/source/ar/custom_models.md",
"chars": 13583,
"preview": "# بناء نماذج مخصصة\n\nتم تصميم مكتبة 🤗 Transformers لتكون قابلة للتوسيع بسهولة. كل نموذج مُشفّر بالكامل في مجلد فرعي معين "
},
{
"path": "docs/source/ar/fast_tokenizers.md",
"chars": 2042,
"preview": "# استخدام مجزئيات النصوص من 🤗 Tokenizers\n\nيعتمد [`PreTrainedTokenizerFast`] على مكتبة [🤗 Tokenizers](https://huggingface"
},
{
"path": "docs/source/ar/gguf.md",
"chars": 2873,
"preview": "# GGUF وتفاعلها مع المحولات\n\nتُستخدم صيغة ملف GGUF لتخزين النماذج للاستدلال باستخدام [GGML](https://github.com/ggerganov"
},
{
"path": "docs/source/ar/glossary.md",
"chars": 23382,
"preview": "# قاموس المصطلحات\n\nيحدد هذا المسرد مصطلحات التعلم الآلي العامة و 🤗 Transformers لمساعدتك على فهم الوثائق بشكل أفضل.\n\n## "
},
{
"path": "docs/source/ar/how_to_hack_models.md",
"chars": 8257,
"preview": "# كيفية تعديل أي نموذج من نماذج Transformers\n\nتوفر مكتبة [🤗 Transformers](https://github.com/huggingface/transformers) م"
},
{
"path": "docs/source/ar/index.md",
"chars": 41106,
"preview": "# 🤗 Transformers: لمحة عامة\n\nأحدث ما في مجال التعلم الآلي لـ [PyTorch](https://pytorch.org/) و [TensorFlow](https://www."
},
{
"path": "docs/source/ar/installation.md",
"chars": 8072,
"preview": "# التثبيت (Installation)\n\nقم بتثبيت مكتبة 🤗 Transformers المناسبة لمكتبة التعلم العميق التي تستخدمها، وقم بإعداد ذاكرة ا"
},
{
"path": "docs/source/ar/llm_tutorial.md",
"chars": 14459,
"preview": "# التوليد باستخدام نماذج اللغات الكبيرة (LLMs)\n\n[[open-in-colab]]\n\nتعد LLMs، أو نماذج اللغة الكبيرة، المكون الرئيسي وراء"
},
{
"path": "docs/source/ar/llm_tutorial_optimization.md",
"chars": 42706,
"preview": "# تحسين نماذج اللغة الكبيرة من حيث السرعة والذاكرة\n\n\n[[open-in-colab]]\n\nتحقق نماذج اللغة الكبيرة (LLMs) مثل GPT3/4، [Fal"
},
{
"path": "docs/source/ar/model_memory_anatomy.md",
"chars": 10041,
"preview": "# تشريح عملية تدريب النموذج\n\nلفهم تقنيات تحسين الأداء التي يمكن تطبيقها لتحسين كفاءة استخدام الذاكرة وسرعة تدريب النموذج"
},
{
"path": "docs/source/ar/model_sharing.md",
"chars": 7562,
"preview": "# شارك نموذجك مع العالم\n\nأظهرت آخر درسين تعليميين كيفية ضبط نموذج بدقة باستخدام PyTorch و Keras و 🤗 Accelerate لعمليات ا"
},
{
"path": "docs/source/ar/model_summary.md",
"chars": 15044,
"preview": "# عائلة نماذج المحول\n\nمنذ إطلاقه في عام 2017، ألهم نموذج [المحول الأصلي](https://huggingface.co/papers/1706.03762) (راجع"
},
{
"path": "docs/source/ar/modular_transformers.md",
"chars": 8466,
"preview": "# المحولات النمطية\n\nمكتبة `transformers` هي إطار عمل ذو فلسفة محدد؛ يتم تعريف فلسفتنا في [الدليل المفاهيمي](./philosophy"
},
{
"path": "docs/source/ar/multilingual.md",
"chars": 7060,
"preview": "# النماذج متعددة اللغات للاستدلال\n\nهناك العديد من النماذج متعددة اللغات في مكتبة 🤗 Transformers، وتختلف طريقة استخدامها "
},
{
"path": "docs/source/ar/notebooks.md",
"chars": 35356,
"preview": "# دفاتر ملاحظات 🤗 Transformers\n\nيمكنك أن تجد هنا قائمة بدفاتر الملاحظات الرسمية التي تقدمها Hugging Face.\n\nكما نود أن ند"
},
{
"path": "docs/source/ar/pad_truncation.md",
"chars": 6558,
"preview": "# الحشو والتقليم \n\nغالبًا ما تختلف مدخلات الدُفعات في الطول، لذا لا يمكن تحويلها إلى مصفوفات ذات حجم ثابت .يُعدّ الحشو و"
},
{
"path": "docs/source/ar/peft.md",
"chars": 7714,
"preview": "# تحميل المحوّلات باستخدام 🤗 PEFT\n\n[[open-in-colab]]\n\nتقنية \"التدريب الدقيق ذو الكفاءة البارامتيرية\" (PEFT)](https://hug"
},
{
"path": "docs/source/ar/perplexity.md",
"chars": 6564,
"preview": "# التعقيد اللغوي للنماذج ذات الطول الثابت\n\n[[open-in-colab]]\n\n التعقيد اللغوي (PPL) هي واحدة من أكثر المقاييس شيوعًا لتق"
},
{
"path": "docs/source/ar/philosophy.md",
"chars": 5026,
"preview": "# الفلسفة\n\nتُعد 🤗 Transformers مكتبة برمجية ذات رؤية واضحة صُممت من أجل:\n\n- الباحثون والمُتعلّمون في مجال التعلم الآلي م"
},
{
"path": "docs/source/ar/pipeline_tutorial.md",
"chars": 15195,
"preview": "# خطوط الأنابيب الاستدلال\n\nيجعل [`pipeline`] من السهل استخدام أي نموذج من [Hub](https://huggingface.co/models) للاستدلال"
},
{
"path": "docs/source/ar/pipeline_webserver.md",
"chars": 6036,
"preview": "# استخدام قنوات المعالجة لخادم ويب \n\n<Tip>\n\nيُعدّ إنشاء محرك استدلال أمرًا معقدًا، ويعتمد الحل \"الأفضل\" على مساحة مشكلتك"
},
{
"path": "docs/source/ar/preprocessing.md",
"chars": 22007,
"preview": "# المعالجة المسبقة Preprocessing\n\n[[open-in-colab]]\n\nقبل تدريب نموذج على مجموعة بيانات، يجب معالجتها مسبقًا وفقًا تنسيق "
},
{
"path": "docs/source/ar/quicktour.md",
"chars": 19898,
"preview": "# جولة سريعة\n\n[[open-in-colab]]\n\nابدأ رحلتك مع مكتبة 🤗 Transformers! سواء كنت مطورًا أو مستخدمًا عاديًا، ستساعدك هذه الج"
},
{
"path": "docs/source/ar/run_scripts.md",
"chars": 13240,
"preview": "# التدريب باستخدام نص برمجى\n\nبالإضافة إلى دفاتر الملاحظات [notebooks](./notebooks) الخاصة بـ 🤗 Transformers، هناك أيضًا "
},
{
"path": "docs/source/ar/sagemaker.md",
"chars": 454,
"preview": "# تشغيل التدريب على Amazon SageMaker\n\nتم نقل التوثيق إلى [hf.co/docs/sagemaker](https://huggingface.co/docs/sagemaker). "
},
{
"path": "docs/source/ar/serialization.md",
"chars": 6471,
"preview": "# التصدير إلى ONNX\n\nغالباً ما يتطلب نشر نماذج 🤗 Transformers في بيئات الإنتاج أو يمكن أن يستفيد من تصدير النماذج إلى تنس"
},
{
"path": "docs/source/ar/task_summary.md",
"chars": 18033,
"preview": "# ما الذي تستطيع مكتبة 🤗 Transformers القيام به؟\n\nمكتبة 🤗 Transformers هي مجموعة من النماذج المُدرّبة مسبقًا الأفضل في ف"
},
{
"path": "docs/source/ar/tasks/language_modeling.md",
"chars": 13356,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\nLicensed under the Apache License, Version 2.0 (the \"Licen"
},
{
"path": "docs/source/ar/tasks/masked_language_modeling.md",
"chars": 13072,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\nLicensed under the Apache License, Version 2.0 (the \"Licen"
},
{
"path": "docs/source/ar/tasks/multiple_choice.md",
"chars": 10493,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "docs/source/ar/tasks/question_answering.md",
"chars": 11002,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "docs/source/ar/tasks/sequence_classification.md",
"chars": 9637,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\nLicensed under the Apache License, Version 2.0 (the \"Licen"
},
{
"path": "docs/source/ar/tasks/summarization.md",
"chars": 17245,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "docs/source/ar/tasks/token_classification.md",
"chars": 13219,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\tLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "docs/source/ar/tasks/translation.md",
"chars": 10675,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "docs/source/ar/tasks_explained.md",
"chars": 25069,
"preview": "# كيف تُنجز نماذج 🤗 Transformers المهام؟\n\nفي [ما الذي يمكن أن تفعله نماذج 🤗 Transformers](task_summary)، تعلمت عن معالجة"
},
{
"path": "docs/source/ar/tiktoken.md",
"chars": 1767,
"preview": "# Tiktoken والتفاعل مع Transformers\n\nيتم دمج دعم ملفات نموذج tiktoken بسلاسة في 🤗 transformers عند تحميل النماذج\n`from_p"
},
{
"path": "docs/source/ar/tokenizer_summary.md",
"chars": 15163,
"preview": "# ملخص عن المجزئات اللغوية\n\n[[open-in-colab]]\n\nفي هذه الصفحة، سنتناول بالتفصيل عملية التجزئة.\n\n<Youtube id=\"VFp38yj8h3A\""
},
{
"path": "docs/source/ar/trainer.md",
"chars": 25407,
"preview": "# Trainer\n\nتُتيح وحدة [`Trainer`] حلقة تدريب وتقييم متكاملة لنماذج PyTorch المطبقة في مكتبة Transformers. تحتاج فقط إلى "
},
{
"path": "docs/source/ar/training.md",
"chars": 12110,
"preview": "# ضبط نموذج مُدرب مسبقًا\n\nهناك فوائد كبيرة لاستخدام نموذج مُدرب مسبقًا. فهو يقلل من تكاليف الحوسبة، ويحد من أثرنا البيئي"
},
{
"path": "docs/source/ar/troubleshooting.md",
"chars": 7689,
"preview": "# استكشاف الأخطاء وإصلاحها\n\nتحدث الأخطاء أحيانًا، لكننا هنا للمساعدة! يغطي هذا الدليل بعض المشكلات الأكثر شيوعًا التي وا"
},
{
"path": "docs/source/de/_config.py",
"chars": 534,
"preview": "# docstyle-ignore\nINSTALL_CONTENT = \"\"\"\n# Transformers installation\n! pip install transformers datasets evaluate acceler"
},
{
"path": "docs/source/de/_toctree.yml",
"chars": 1186,
"preview": "- sections:\n - local: index\n title: 🤗 Transformers\n - local: quicktour\n title: Schnellstart\n - local: installat"
},
{
"path": "docs/source/de/accelerate.md",
"chars": 5252,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "docs/source/de/add_new_model.md",
"chars": 60967,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "docs/source/de/add_new_pipeline.md",
"chars": 11928,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "docs/source/de/autoclass_tutorial.md",
"chars": 6163,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "docs/source/de/contributing.md",
"chars": 20953,
"preview": "<!---\nCopyright 2024 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "docs/source/de/index.md",
"chars": 60546,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "docs/source/de/installation.md",
"chars": 9905,
"preview": "<!---\nCopyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "docs/source/de/llm_tutorial.md",
"chars": 13034,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "docs/source/de/model_sharing.md",
"chars": 10061,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "docs/source/de/peft.md",
"chars": 8433,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\nLicensed under the Apache License, Version 2.0 (the \"Licen"
},
{
"path": "docs/source/de/pipeline_tutorial.md",
"chars": 8291,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "docs/source/de/pr_checks.md",
"chars": 13035,
"preview": "<!---\nCopyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "docs/source/de/preprocessing.md",
"chars": 22900,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
}
]
// ... and 5505 more files (download for full content)
About this extraction
This page contains the full source code of the huggingface/transformers GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 5705 files (74.1 MB), approximately 19.7M tokens, and a symbol index with 65457 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.