Copy disabled (too large)
Download .txt
Showing preview only (78,530K chars total). Download the full file to get everything.
Repository: Ablustrund/MPLSandbox
Branch: main
Commit: dd96e06d4edb
Files: 4523
Total size: 90.7 MB
Directory structure:
gitextract_kdapck8a/
├── LICENSE
├── README.md
├── mplsandbox/
│ ├── __init__.py
│ ├── analyzetools.py
│ ├── const.py
│ ├── sandbox.py
│ ├── tool.py
│ └── utils.py
├── mplsandbox_for_rl/
│ ├── README.md
│ ├── config.py
│ ├── config.yaml
│ ├── data/
│ │ ├── add_index.py
│ │ ├── train.json
│ │ ├── train_all.json
│ │ ├── valid.json
│ │ └── valid_all.json
│ ├── data_helper.py
│ ├── generate_utils.py
│ ├── generation_config.json
│ ├── llama/
│ │ ├── __init__.py
│ │ ├── llama_model.py
│ │ ├── llama_trainer.py
│ │ ├── modeling_moe.py
│ │ └── reward/
│ │ ├── __init__.py
│ │ └── llama_reward_model.py
│ ├── log/
│ │ └── mplsandbox_for_ppo50_beta005_rollout1_0508_debug.log
│ ├── metric.py
│ ├── metric_utils.py
│ ├── ppo/
│ │ ├── __init__.py
│ │ ├── ppo_datahelper.py
│ │ ├── ppo_trainer.py
│ │ └── ppo_utils.py
│ ├── requirements.txt
│ ├── scheduler.py
│ ├── tensorboard_log/
│ │ └── ppo/
│ │ ├── GoReturn_evalstep50_beta005_rollout1_0508_debug/
│ │ │ ├── events.out.tfevents.1724743396.llm1.271880.0
│ │ │ ├── events.out.tfevents.1724927232.llm1.394070.0
│ │ │ ├── events.out.tfevents.1724932911.llm1.405634.0
│ │ │ ├── events.out.tfevents.1724982924.llm1.419045.0
│ │ │ ├── events.out.tfevents.1725000736.llm1.430015.0
│ │ │ ├── events.out.tfevents.1725001088.llm1.432277.0
│ │ │ ├── events.out.tfevents.1725178222.llm1.478675.0
│ │ │ ├── events.out.tfevents.1725180164.llm1.483165.0
│ │ │ ├── events.out.tfevents.1725181196.llm1.486635.0
│ │ │ ├── events.out.tfevents.1725182089.llm1.489725.0
│ │ │ ├── events.out.tfevents.1725182662.llm1.492414.0
│ │ │ ├── events.out.tfevents.1725186372.llm1.505624.0
│ │ │ ├── events.out.tfevents.1725186685.llm1.507779.0
│ │ │ ├── events.out.tfevents.1725186819.llm1.509958.0
│ │ │ ├── events.out.tfevents.1725187117.llm1.512114.0
│ │ │ ├── events.out.tfevents.1725187314.llm1.514285.0
│ │ │ ├── events.out.tfevents.1725241169.llm1.533455.0
│ │ │ ├── events.out.tfevents.1725242595.llm1.538369.0
│ │ │ ├── events.out.tfevents.1725258146.llm1.1526176.0
│ │ │ ├── events.out.tfevents.1725266674.llm1.2196706.0
│ │ │ ├── events.out.tfevents.1725267306.llm1.2247374.0
│ │ │ ├── events.out.tfevents.1725268298.llm1.2318320.0
│ │ │ ├── events.out.tfevents.1725269766.llm1.2321657.0
│ │ │ ├── events.out.tfevents.1725282491.llm1.2710308.0
│ │ │ ├── events.out.tfevents.1725283474.llm1.2793280.0
│ │ │ ├── events.out.tfevents.1725287656.llm1.3115365.0
│ │ │ ├── events.out.tfevents.1725288391.llm1.3177375.0
│ │ │ ├── events.out.tfevents.1725346126.llm1.888255.0
│ │ │ ├── events.out.tfevents.1725354538.llm1.1599322.0
│ │ │ ├── events.out.tfevents.1744640674.n211.1164140.0
│ │ │ ├── events.out.tfevents.1744641932.n211.1171336.0
│ │ │ ├── events.out.tfevents.1744642057.n211.1173796.0
│ │ │ ├── events.out.tfevents.1744644164.n211.1188180.0
│ │ │ ├── events.out.tfevents.1744647751.n211.1208086.0
│ │ │ ├── events.out.tfevents.1744649599.n211.1223334.0
│ │ │ ├── events.out.tfevents.1744650450.n211.1230053.0
│ │ │ ├── events.out.tfevents.1744652152.n211.1240128.0
│ │ │ ├── events.out.tfevents.1744652296.n211.1241825.0
│ │ │ ├── events.out.tfevents.1744653799.n211.1252570.0
│ │ │ ├── events.out.tfevents.1744653913.n211.1253768.0
│ │ │ ├── events.out.tfevents.1744654033.n211.1255378.0
│ │ │ ├── events.out.tfevents.1744654092.n211.1256279.0
│ │ │ ├── events.out.tfevents.1744655044.n211.1262359.0
│ │ │ ├── events.out.tfevents.1744692411.n211.1452941.0
│ │ │ ├── events.out.tfevents.1744704887.n211.1517461.0
│ │ │ ├── events.out.tfevents.1744704949.n211.1518406.0
│ │ │ ├── events.out.tfevents.1744705307.n211.1521301.0
│ │ │ ├── events.out.tfevents.1744705581.n211.1523626.0
│ │ │ ├── events.out.tfevents.1744706082.n211.1527175.0
│ │ │ ├── events.out.tfevents.1744706273.n211.1528838.0
│ │ │ ├── events.out.tfevents.1744706473.n211.1530754.0
│ │ │ ├── events.out.tfevents.1744706726.n211.1533256.0
│ │ │ ├── events.out.tfevents.1744709157.n211.1549899.0
│ │ │ ├── events.out.tfevents.1744709846.n211.1554540.0
│ │ │ ├── events.out.tfevents.1744719922.n211.1610921.0
│ │ │ ├── events.out.tfevents.1744720643.n211.1615790.0
│ │ │ └── events.out.tfevents.1744720998.n211.1618813.0
│ │ └── mplsandbox_for_ppo50_beta005_rollout1_0508_debug/
│ │ ├── events.out.tfevents.1744721546.n211.1623949.0
│ │ ├── events.out.tfevents.1744722319.n211.1631146.0
│ │ └── events.out.tfevents.1744722598.n211.1634201.0
│ ├── tmp/
│ │ └── GoReturn_evalstep50_beta005_rollout1_0508_debug/
│ │ └── experiences/
│ │ └── experiences_0.json
│ ├── tokenizer.py
│ ├── train_ppo.py
│ ├── train_ppo.sh
│ ├── trainer.py
│ ├── transformers/
│ │ ├── .circleci/
│ │ │ ├── TROUBLESHOOT.md
│ │ │ ├── config.yml
│ │ │ ├── create_circleci_config.py
│ │ │ └── parse_test_outputs.py
│ │ ├── .coveragerc
│ │ ├── .gitattributes
│ │ ├── .github/
│ │ │ ├── ISSUE_TEMPLATE/
│ │ │ │ ├── bug-report.yml
│ │ │ │ ├── config.yml
│ │ │ │ ├── feature-request.yml
│ │ │ │ ├── i18n.md
│ │ │ │ ├── migration.yml
│ │ │ │ └── new-model-addition.yml
│ │ │ ├── PULL_REQUEST_TEMPLATE.md
│ │ │ ├── conda/
│ │ │ │ ├── build.sh
│ │ │ │ └── meta.yaml
│ │ │ └── workflows/
│ │ │ ├── TROUBLESHOOT.md
│ │ │ ├── add-model-like.yml
│ │ │ ├── benchmark.yml
│ │ │ ├── build-ci-docker-images.yml
│ │ │ ├── build-docker-images.yml
│ │ │ ├── build-nightly-ci-docker-images.yml
│ │ │ ├── build-past-ci-docker-images.yml
│ │ │ ├── build_documentation.yml
│ │ │ ├── build_pr_documentation.yml
│ │ │ ├── check_tiny_models.yml
│ │ │ ├── doctest_job.yml
│ │ │ ├── doctests.yml
│ │ │ ├── model_jobs.yml
│ │ │ ├── push-important-models.yml
│ │ │ ├── release-conda.yml
│ │ │ ├── self-nightly-caller.yml
│ │ │ ├── self-nightly-past-ci-caller.yml
│ │ │ ├── self-past-caller.yml
│ │ │ ├── self-pr-slow-ci.yml
│ │ │ ├── self-push-amd-mi210-caller.yml
│ │ │ ├── self-push-amd-mi250-caller.yml
│ │ │ ├── self-push-amd-mi300-caller.yml
│ │ │ ├── self-push-amd.yml
│ │ │ ├── self-push-caller.yml
│ │ │ ├── self-push.yml
│ │ │ ├── self-scheduled-amd-caller.yml
│ │ │ ├── self-scheduled-amd-mi210-caller.yml
│ │ │ ├── self-scheduled-amd-mi250-caller.yml
│ │ │ ├── self-scheduled-amd-mi300-caller.yml
│ │ │ ├── self-scheduled-amd.yml
│ │ │ ├── self-scheduled-caller.yml
│ │ │ ├── self-scheduled.yml
│ │ │ ├── slack-report.yml
│ │ │ ├── ssh-runner.yml
│ │ │ ├── stale.yml
│ │ │ ├── trufflehog.yml
│ │ │ ├── update_metdata.yml
│ │ │ └── upload_pr_documentation.yml
│ │ ├── .gitignore
│ │ ├── CITATION.cff
│ │ ├── CODE_OF_CONDUCT.md
│ │ ├── CONTRIBUTING.md
│ │ ├── ISSUES.md
│ │ ├── LICENSE
│ │ ├── Makefile
│ │ ├── README.md
│ │ ├── SECURITY.md
│ │ ├── awesome-transformers.md
│ │ ├── benchmark/
│ │ │ ├── __init__.py
│ │ │ ├── benchmark.py
│ │ │ ├── config/
│ │ │ │ └── generation.yaml
│ │ │ └── optimum_benchmark_wrapper.py
│ │ ├── conftest.py
│ │ ├── docker/
│ │ │ ├── consistency.dockerfile
│ │ │ ├── custom-tokenizers.dockerfile
│ │ │ ├── examples-tf.dockerfile
│ │ │ ├── examples-torch.dockerfile
│ │ │ ├── exotic-models.dockerfile
│ │ │ ├── jax-light.dockerfile
│ │ │ ├── pipeline-tf.dockerfile
│ │ │ ├── pipeline-torch.dockerfile
│ │ │ ├── quality.dockerfile
│ │ │ ├── tf-light.dockerfile
│ │ │ ├── torch-jax-light.dockerfile
│ │ │ ├── torch-light.dockerfile
│ │ │ ├── torch-tf-light.dockerfile
│ │ │ ├── transformers-all-latest-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-doc-builder/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-past-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-pytorch-amd-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-pytorch-deepspeed-amd-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-pytorch-deepspeed-latest-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-pytorch-deepspeed-nightly-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-pytorch-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-pytorch-tpu/
│ │ │ │ ├── Dockerfile
│ │ │ │ ├── bert-base-cased.jsonnet
│ │ │ │ ├── dataset.yaml
│ │ │ │ └── docker-entrypoint.sh
│ │ │ ├── transformers-quantization-latest-gpu/
│ │ │ │ └── Dockerfile
│ │ │ └── transformers-tensorflow-gpu/
│ │ │ └── Dockerfile
│ │ ├── docs/
│ │ │ ├── README.md
│ │ │ ├── TRANSLATING.md
│ │ │ └── source/
│ │ │ ├── _config.py
│ │ │ ├── de/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── add_new_model.md
│ │ │ │ ├── add_new_pipeline.md
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── contributing.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── llm_tutorial.md
│ │ │ │ ├── model_sharing.md
│ │ │ │ ├── peft.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── pr_checks.md
│ │ │ │ ├── preprocessing.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── testing.md
│ │ │ │ ├── training.md
│ │ │ │ └── transformers_agents.md
│ │ │ ├── en/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _redirects.yml
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── add_new_model.md
│ │ │ │ ├── add_new_pipeline.md
│ │ │ │ ├── agents.md
│ │ │ │ ├── attention.md
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── benchmarks.md
│ │ │ │ ├── bertology.md
│ │ │ │ ├── big_models.md
│ │ │ │ ├── chat_templating.md
│ │ │ │ ├── community.md
│ │ │ │ ├── contributing.md
│ │ │ │ ├── conversations.md
│ │ │ │ ├── create_a_model.md
│ │ │ │ ├── custom_models.md
│ │ │ │ ├── debugging.md
│ │ │ │ ├── deepspeed.md
│ │ │ │ ├── fast_tokenizers.md
│ │ │ │ ├── fsdp.md
│ │ │ │ ├── generation_strategies.md
│ │ │ │ ├── gguf.md
│ │ │ │ ├── glossary.md
│ │ │ │ ├── hpo_train.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── internal/
│ │ │ │ │ ├── audio_utils.md
│ │ │ │ │ ├── file_utils.md
│ │ │ │ │ ├── generation_utils.md
│ │ │ │ │ ├── image_processing_utils.md
│ │ │ │ │ ├── modeling_utils.md
│ │ │ │ │ ├── pipelines_utils.md
│ │ │ │ │ ├── time_series_utils.md
│ │ │ │ │ ├── tokenization_utils.md
│ │ │ │ │ └── trainer_utils.md
│ │ │ │ ├── kv_cache.md
│ │ │ │ ├── llm_optims.md
│ │ │ │ ├── llm_tutorial.md
│ │ │ │ ├── llm_tutorial_optimization.md
│ │ │ │ ├── main_classes/
│ │ │ │ │ ├── agent.md
│ │ │ │ │ ├── backbones.md
│ │ │ │ │ ├── callback.md
│ │ │ │ │ ├── configuration.md
│ │ │ │ │ ├── data_collator.md
│ │ │ │ │ ├── deepspeed.md
│ │ │ │ │ ├── feature_extractor.md
│ │ │ │ │ ├── image_processor.md
│ │ │ │ │ ├── keras_callbacks.md
│ │ │ │ │ ├── logging.md
│ │ │ │ │ ├── model.md
│ │ │ │ │ ├── onnx.md
│ │ │ │ │ ├── optimizer_schedules.md
│ │ │ │ │ ├── output.md
│ │ │ │ │ ├── pipelines.md
│ │ │ │ │ ├── processors.md
│ │ │ │ │ ├── quantization.md
│ │ │ │ │ ├── text_generation.md
│ │ │ │ │ ├── tokenizer.md
│ │ │ │ │ └── trainer.md
│ │ │ │ ├── model_doc/
│ │ │ │ │ ├── albert.md
│ │ │ │ │ ├── align.md
│ │ │ │ │ ├── altclip.md
│ │ │ │ │ ├── audio-spectrogram-transformer.md
│ │ │ │ │ ├── auto.md
│ │ │ │ │ ├── autoformer.md
│ │ │ │ │ ├── bark.md
│ │ │ │ │ ├── bart.md
│ │ │ │ │ ├── barthez.md
│ │ │ │ │ ├── bartpho.md
│ │ │ │ │ ├── beit.md
│ │ │ │ │ ├── bert-generation.md
│ │ │ │ │ ├── bert-japanese.md
│ │ │ │ │ ├── bert.md
│ │ │ │ │ ├── bertweet.md
│ │ │ │ │ ├── big_bird.md
│ │ │ │ │ ├── bigbird_pegasus.md
│ │ │ │ │ ├── biogpt.md
│ │ │ │ │ ├── bit.md
│ │ │ │ │ ├── blenderbot-small.md
│ │ │ │ │ ├── blenderbot.md
│ │ │ │ │ ├── blip-2.md
│ │ │ │ │ ├── blip.md
│ │ │ │ │ ├── bloom.md
│ │ │ │ │ ├── bort.md
│ │ │ │ │ ├── bridgetower.md
│ │ │ │ │ ├── bros.md
│ │ │ │ │ ├── byt5.md
│ │ │ │ │ ├── camembert.md
│ │ │ │ │ ├── canine.md
│ │ │ │ │ ├── chameleon.md
│ │ │ │ │ ├── chinese_clip.md
│ │ │ │ │ ├── clap.md
│ │ │ │ │ ├── clip.md
│ │ │ │ │ ├── clipseg.md
│ │ │ │ │ ├── clvp.md
│ │ │ │ │ ├── code_llama.md
│ │ │ │ │ ├── codegen.md
│ │ │ │ │ ├── cohere.md
│ │ │ │ │ ├── conditional_detr.md
│ │ │ │ │ ├── convbert.md
│ │ │ │ │ ├── convnext.md
│ │ │ │ │ ├── convnextv2.md
│ │ │ │ │ ├── cpm.md
│ │ │ │ │ ├── cpmant.md
│ │ │ │ │ ├── ctrl.md
│ │ │ │ │ ├── cvt.md
│ │ │ │ │ ├── dac.md
│ │ │ │ │ ├── data2vec.md
│ │ │ │ │ ├── dbrx.md
│ │ │ │ │ ├── deberta-v2.md
│ │ │ │ │ ├── deberta.md
│ │ │ │ │ ├── decision_transformer.md
│ │ │ │ │ ├── deformable_detr.md
│ │ │ │ │ ├── deit.md
│ │ │ │ │ ├── deplot.md
│ │ │ │ │ ├── depth_anything.md
│ │ │ │ │ ├── depth_anything_v2.md
│ │ │ │ │ ├── deta.md
│ │ │ │ │ ├── detr.md
│ │ │ │ │ ├── dialogpt.md
│ │ │ │ │ ├── dinat.md
│ │ │ │ │ ├── dinov2.md
│ │ │ │ │ ├── distilbert.md
│ │ │ │ │ ├── dit.md
│ │ │ │ │ ├── donut.md
│ │ │ │ │ ├── dpr.md
│ │ │ │ │ ├── dpt.md
│ │ │ │ │ ├── efficientformer.md
│ │ │ │ │ ├── efficientnet.md
│ │ │ │ │ ├── electra.md
│ │ │ │ │ ├── encodec.md
│ │ │ │ │ ├── encoder-decoder.md
│ │ │ │ │ ├── ernie.md
│ │ │ │ │ ├── ernie_m.md
│ │ │ │ │ ├── esm.md
│ │ │ │ │ ├── falcon.md
│ │ │ │ │ ├── falcon_mamba.md
│ │ │ │ │ ├── fastspeech2_conformer.md
│ │ │ │ │ ├── flan-t5.md
│ │ │ │ │ ├── flan-ul2.md
│ │ │ │ │ ├── flaubert.md
│ │ │ │ │ ├── flava.md
│ │ │ │ │ ├── fnet.md
│ │ │ │ │ ├── focalnet.md
│ │ │ │ │ ├── fsmt.md
│ │ │ │ │ ├── funnel.md
│ │ │ │ │ ├── fuyu.md
│ │ │ │ │ ├── gemma.md
│ │ │ │ │ ├── gemma2.md
│ │ │ │ │ ├── git.md
│ │ │ │ │ ├── glpn.md
│ │ │ │ │ ├── gpt-sw3.md
│ │ │ │ │ ├── gpt2.md
│ │ │ │ │ ├── gpt_bigcode.md
│ │ │ │ │ ├── gpt_neo.md
│ │ │ │ │ ├── gpt_neox.md
│ │ │ │ │ ├── gpt_neox_japanese.md
│ │ │ │ │ ├── gptj.md
│ │ │ │ │ ├── gptsan-japanese.md
│ │ │ │ │ ├── graphormer.md
│ │ │ │ │ ├── grounding-dino.md
│ │ │ │ │ ├── groupvit.md
│ │ │ │ │ ├── herbert.md
│ │ │ │ │ ├── hiera.md
│ │ │ │ │ ├── hubert.md
│ │ │ │ │ ├── ibert.md
│ │ │ │ │ ├── idefics.md
│ │ │ │ │ ├── idefics2.md
│ │ │ │ │ ├── imagegpt.md
│ │ │ │ │ ├── informer.md
│ │ │ │ │ ├── instructblip.md
│ │ │ │ │ ├── instructblipvideo.md
│ │ │ │ │ ├── jamba.md
│ │ │ │ │ ├── jetmoe.md
│ │ │ │ │ ├── jukebox.md
│ │ │ │ │ ├── kosmos-2.md
│ │ │ │ │ ├── layoutlm.md
│ │ │ │ │ ├── layoutlmv2.md
│ │ │ │ │ ├── layoutlmv3.md
│ │ │ │ │ ├── layoutxlm.md
│ │ │ │ │ ├── led.md
│ │ │ │ │ ├── levit.md
│ │ │ │ │ ├── lilt.md
│ │ │ │ │ ├── llama.md
│ │ │ │ │ ├── llama2.md
│ │ │ │ │ ├── llama3.md
│ │ │ │ │ ├── llava.md
│ │ │ │ │ ├── llava_next.md
│ │ │ │ │ ├── llava_next_video.md
│ │ │ │ │ ├── longformer.md
│ │ │ │ │ ├── longt5.md
│ │ │ │ │ ├── luke.md
│ │ │ │ │ ├── lxmert.md
│ │ │ │ │ ├── m2m_100.md
│ │ │ │ │ ├── madlad-400.md
│ │ │ │ │ ├── mamba.md
│ │ │ │ │ ├── mamba2.md
│ │ │ │ │ ├── marian.md
│ │ │ │ │ ├── markuplm.md
│ │ │ │ │ ├── mask2former.md
│ │ │ │ │ ├── maskformer.md
│ │ │ │ │ ├── matcha.md
│ │ │ │ │ ├── mbart.md
│ │ │ │ │ ├── mctct.md
│ │ │ │ │ ├── mega.md
│ │ │ │ │ ├── megatron-bert.md
│ │ │ │ │ ├── megatron_gpt2.md
│ │ │ │ │ ├── mgp-str.md
│ │ │ │ │ ├── mistral.md
│ │ │ │ │ ├── mixtral.md
│ │ │ │ │ ├── mluke.md
│ │ │ │ │ ├── mms.md
│ │ │ │ │ ├── mobilebert.md
│ │ │ │ │ ├── mobilenet_v1.md
│ │ │ │ │ ├── mobilenet_v2.md
│ │ │ │ │ ├── mobilevit.md
│ │ │ │ │ ├── mobilevitv2.md
│ │ │ │ │ ├── mpnet.md
│ │ │ │ │ ├── mpt.md
│ │ │ │ │ ├── mra.md
│ │ │ │ │ ├── mt5.md
│ │ │ │ │ ├── musicgen.md
│ │ │ │ │ ├── musicgen_melody.md
│ │ │ │ │ ├── mvp.md
│ │ │ │ │ ├── nat.md
│ │ │ │ │ ├── nemotron.md
│ │ │ │ │ ├── nezha.md
│ │ │ │ │ ├── nllb-moe.md
│ │ │ │ │ ├── nllb.md
│ │ │ │ │ ├── nougat.md
│ │ │ │ │ ├── nystromformer.md
│ │ │ │ │ ├── olmo.md
│ │ │ │ │ ├── oneformer.md
│ │ │ │ │ ├── open-llama.md
│ │ │ │ │ ├── openai-gpt.md
│ │ │ │ │ ├── opt.md
│ │ │ │ │ ├── owlv2.md
│ │ │ │ │ ├── owlvit.md
│ │ │ │ │ ├── paligemma.md
│ │ │ │ │ ├── patchtsmixer.md
│ │ │ │ │ ├── patchtst.md
│ │ │ │ │ ├── pegasus.md
│ │ │ │ │ ├── pegasus_x.md
│ │ │ │ │ ├── perceiver.md
│ │ │ │ │ ├── persimmon.md
│ │ │ │ │ ├── phi.md
│ │ │ │ │ ├── phi3.md
│ │ │ │ │ ├── phobert.md
│ │ │ │ │ ├── pix2struct.md
│ │ │ │ │ ├── plbart.md
│ │ │ │ │ ├── poolformer.md
│ │ │ │ │ ├── pop2piano.md
│ │ │ │ │ ├── prophetnet.md
│ │ │ │ │ ├── pvt.md
│ │ │ │ │ ├── pvt_v2.md
│ │ │ │ │ ├── qdqbert.md
│ │ │ │ │ ├── qwen2.md
│ │ │ │ │ ├── qwen2_audio.md
│ │ │ │ │ ├── qwen2_moe.md
│ │ │ │ │ ├── rag.md
│ │ │ │ │ ├── realm.md
│ │ │ │ │ ├── recurrent_gemma.md
│ │ │ │ │ ├── reformer.md
│ │ │ │ │ ├── regnet.md
│ │ │ │ │ ├── rembert.md
│ │ │ │ │ ├── resnet.md
│ │ │ │ │ ├── retribert.md
│ │ │ │ │ ├── roberta-prelayernorm.md
│ │ │ │ │ ├── roberta.md
│ │ │ │ │ ├── roc_bert.md
│ │ │ │ │ ├── roformer.md
│ │ │ │ │ ├── rt_detr.md
│ │ │ │ │ ├── rwkv.md
│ │ │ │ │ ├── sam.md
│ │ │ │ │ ├── seamless_m4t.md
│ │ │ │ │ ├── seamless_m4t_v2.md
│ │ │ │ │ ├── segformer.md
│ │ │ │ │ ├── seggpt.md
│ │ │ │ │ ├── sew-d.md
│ │ │ │ │ ├── sew.md
│ │ │ │ │ ├── siglip.md
│ │ │ │ │ ├── speech-encoder-decoder.md
│ │ │ │ │ ├── speech_to_text.md
│ │ │ │ │ ├── speech_to_text_2.md
│ │ │ │ │ ├── speecht5.md
│ │ │ │ │ ├── splinter.md
│ │ │ │ │ ├── squeezebert.md
│ │ │ │ │ ├── stablelm.md
│ │ │ │ │ ├── starcoder2.md
│ │ │ │ │ ├── superpoint.md
│ │ │ │ │ ├── swiftformer.md
│ │ │ │ │ ├── swin.md
│ │ │ │ │ ├── swin2sr.md
│ │ │ │ │ ├── swinv2.md
│ │ │ │ │ ├── switch_transformers.md
│ │ │ │ │ ├── t5.md
│ │ │ │ │ ├── t5v1.1.md
│ │ │ │ │ ├── table-transformer.md
│ │ │ │ │ ├── tapas.md
│ │ │ │ │ ├── tapex.md
│ │ │ │ │ ├── time_series_transformer.md
│ │ │ │ │ ├── timesformer.md
│ │ │ │ │ ├── trajectory_transformer.md
│ │ │ │ │ ├── transfo-xl.md
│ │ │ │ │ ├── trocr.md
│ │ │ │ │ ├── tvlt.md
│ │ │ │ │ ├── tvp.md
│ │ │ │ │ ├── udop.md
│ │ │ │ │ ├── ul2.md
│ │ │ │ │ ├── umt5.md
│ │ │ │ │ ├── unispeech-sat.md
│ │ │ │ │ ├── unispeech.md
│ │ │ │ │ ├── univnet.md
│ │ │ │ │ ├── upernet.md
│ │ │ │ │ ├── van.md
│ │ │ │ │ ├── video_llava.md
│ │ │ │ │ ├── videomae.md
│ │ │ │ │ ├── vilt.md
│ │ │ │ │ ├── vipllava.md
│ │ │ │ │ ├── vision-encoder-decoder.md
│ │ │ │ │ ├── vision-text-dual-encoder.md
│ │ │ │ │ ├── visual_bert.md
│ │ │ │ │ ├── vit.md
│ │ │ │ │ ├── vit_hybrid.md
│ │ │ │ │ ├── vit_mae.md
│ │ │ │ │ ├── vit_msn.md
│ │ │ │ │ ├── vitdet.md
│ │ │ │ │ ├── vitmatte.md
│ │ │ │ │ ├── vits.md
│ │ │ │ │ ├── vivit.md
│ │ │ │ │ ├── wav2vec2-bert.md
│ │ │ │ │ ├── wav2vec2-conformer.md
│ │ │ │ │ ├── wav2vec2.md
│ │ │ │ │ ├── wav2vec2_phoneme.md
│ │ │ │ │ ├── wavlm.md
│ │ │ │ │ ├── whisper.md
│ │ │ │ │ ├── xclip.md
│ │ │ │ │ ├── xglm.md
│ │ │ │ │ ├── xlm-prophetnet.md
│ │ │ │ │ ├── xlm-roberta-xl.md
│ │ │ │ │ ├── xlm-roberta.md
│ │ │ │ │ ├── xlm-v.md
│ │ │ │ │ ├── xlm.md
│ │ │ │ │ ├── xlnet.md
│ │ │ │ │ ├── xls_r.md
│ │ │ │ │ ├── xlsr_wav2vec2.md
│ │ │ │ │ ├── xmod.md
│ │ │ │ │ ├── yolos.md
│ │ │ │ │ ├── yoso.md
│ │ │ │ │ └── zoedepth.md
│ │ │ │ ├── model_memory_anatomy.md
│ │ │ │ ├── model_sharing.md
│ │ │ │ ├── model_summary.md
│ │ │ │ ├── multilingual.md
│ │ │ │ ├── notebooks.md
│ │ │ │ ├── pad_truncation.md
│ │ │ │ ├── peft.md
│ │ │ │ ├── perf_hardware.md
│ │ │ │ ├── perf_infer_cpu.md
│ │ │ │ ├── perf_infer_gpu_one.md
│ │ │ │ ├── perf_torch_compile.md
│ │ │ │ ├── perf_train_cpu.md
│ │ │ │ ├── perf_train_cpu_many.md
│ │ │ │ ├── perf_train_gpu_many.md
│ │ │ │ ├── perf_train_gpu_one.md
│ │ │ │ ├── perf_train_special.md
│ │ │ │ ├── perf_train_tpu_tf.md
│ │ │ │ ├── performance.md
│ │ │ │ ├── perplexity.md
│ │ │ │ ├── philosophy.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── pipeline_webserver.md
│ │ │ │ ├── pr_checks.md
│ │ │ │ ├── preprocessing.md
│ │ │ │ ├── quantization/
│ │ │ │ │ ├── aqlm.md
│ │ │ │ │ ├── awq.md
│ │ │ │ │ ├── bitsandbytes.md
│ │ │ │ │ ├── contribute.md
│ │ │ │ │ ├── eetq.md
│ │ │ │ │ ├── fbgemm_fp8.md
│ │ │ │ │ ├── gptq.md
│ │ │ │ │ ├── hqq.md
│ │ │ │ │ ├── optimum.md
│ │ │ │ │ ├── overview.md
│ │ │ │ │ ├── quanto.md
│ │ │ │ │ └── torchao.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── sagemaker.md
│ │ │ │ ├── serialization.md
│ │ │ │ ├── task_summary.md
│ │ │ │ ├── tasks/
│ │ │ │ │ ├── asr.md
│ │ │ │ │ ├── audio_classification.md
│ │ │ │ │ ├── document_question_answering.md
│ │ │ │ │ ├── idefics.md
│ │ │ │ │ ├── image_captioning.md
│ │ │ │ │ ├── image_classification.md
│ │ │ │ │ ├── image_feature_extraction.md
│ │ │ │ │ ├── image_text_to_text.md
│ │ │ │ │ ├── image_to_image.md
│ │ │ │ │ ├── knowledge_distillation_for_image_classification.md
│ │ │ │ │ ├── language_modeling.md
│ │ │ │ │ ├── mask_generation.md
│ │ │ │ │ ├── masked_language_modeling.md
│ │ │ │ │ ├── monocular_depth_estimation.md
│ │ │ │ │ ├── multiple_choice.md
│ │ │ │ │ ├── object_detection.md
│ │ │ │ │ ├── prompting.md
│ │ │ │ │ ├── question_answering.md
│ │ │ │ │ ├── semantic_segmentation.md
│ │ │ │ │ ├── sequence_classification.md
│ │ │ │ │ ├── summarization.md
│ │ │ │ │ ├── text-to-speech.md
│ │ │ │ │ ├── token_classification.md
│ │ │ │ │ ├── translation.md
│ │ │ │ │ ├── video_classification.md
│ │ │ │ │ ├── visual_question_answering.md
│ │ │ │ │ ├── zero_shot_image_classification.md
│ │ │ │ │ └── zero_shot_object_detection.md
│ │ │ │ ├── tasks_explained.md
│ │ │ │ ├── testing.md
│ │ │ │ ├── tf_xla.md
│ │ │ │ ├── tflite.md
│ │ │ │ ├── tokenizer_summary.md
│ │ │ │ ├── torchscript.md
│ │ │ │ ├── trainer.md
│ │ │ │ ├── training.md
│ │ │ │ └── troubleshooting.md
│ │ │ ├── es/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── add_new_pipeline.md
│ │ │ │ ├── attention.md
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── bertology.md
│ │ │ │ ├── chat_templating.md
│ │ │ │ ├── community.md
│ │ │ │ ├── converting_tensorflow_models.md
│ │ │ │ ├── create_a_model.md
│ │ │ │ ├── custom_models.md
│ │ │ │ ├── debugging.md
│ │ │ │ ├── fast_tokenizers.md
│ │ │ │ ├── glossary.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── model_memory_anatomy.md
│ │ │ │ ├── model_sharing.md
│ │ │ │ ├── multilingual.md
│ │ │ │ ├── pad_truncation.md
│ │ │ │ ├── performance.md
│ │ │ │ ├── perplexity.md
│ │ │ │ ├── philosophy.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── pipeline_webserver.md
│ │ │ │ ├── pr_checks.md
│ │ │ │ ├── preprocessing.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── sagemaker.md
│ │ │ │ ├── serialization.md
│ │ │ │ ├── task_summary.md
│ │ │ │ ├── tasks/
│ │ │ │ │ ├── asr.md
│ │ │ │ │ ├── image_captioning.md
│ │ │ │ │ ├── image_classification.md
│ │ │ │ │ ├── language_modeling.md
│ │ │ │ │ ├── multiple_choice.md
│ │ │ │ │ ├── question_answering.md
│ │ │ │ │ └── summarization.md
│ │ │ │ ├── tasks_explained.md
│ │ │ │ ├── tokenizer_summary.md
│ │ │ │ ├── torchscript.md
│ │ │ │ ├── trainer.md
│ │ │ │ └── training.md
│ │ │ ├── fr/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── in_translation.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts_fr.md
│ │ │ │ └── tutoriel_pipeline.md
│ │ │ ├── hi/
│ │ │ │ ├── _toctree.yml
│ │ │ │ └── pipeline_tutorial.md
│ │ │ ├── it/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── add_new_model.md
│ │ │ │ ├── add_new_pipeline.md
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── big_models.md
│ │ │ │ ├── community.md
│ │ │ │ ├── converting_tensorflow_models.md
│ │ │ │ ├── create_a_model.md
│ │ │ │ ├── custom_models.md
│ │ │ │ ├── debugging.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── migration.md
│ │ │ │ ├── model_sharing.md
│ │ │ │ ├── multilingual.md
│ │ │ │ ├── perf_hardware.md
│ │ │ │ ├── perf_infer_cpu.md
│ │ │ │ ├── perf_infer_gpu_many.md
│ │ │ │ ├── perf_infer_gpu_one.md
│ │ │ │ ├── perf_infer_special.md
│ │ │ │ ├── perf_train_cpu.md
│ │ │ │ ├── perf_train_cpu_many.md
│ │ │ │ ├── perf_train_special.md
│ │ │ │ ├── perf_train_tpu.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── pr_checks.md
│ │ │ │ ├── preprocessing.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── serialization.md
│ │ │ │ └── training.md
│ │ │ ├── ja/
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── add_new_model.md
│ │ │ │ ├── attention.md
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── benchmarks.md
│ │ │ │ ├── bertology.md
│ │ │ │ ├── big_models.md
│ │ │ │ ├── chat_templating.md
│ │ │ │ ├── community.md
│ │ │ │ ├── create_a_model.md
│ │ │ │ ├── custom_models.md
│ │ │ │ ├── custom_tools.md
│ │ │ │ ├── fast_tokenizers.md
│ │ │ │ ├── generation_strategies.md
│ │ │ │ ├── glossary.md
│ │ │ │ ├── hpo_train.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── internal/
│ │ │ │ │ ├── audio_utils.md
│ │ │ │ │ ├── file_utils.md
│ │ │ │ │ ├── generation_utils.md
│ │ │ │ │ ├── image_processing_utils.md
│ │ │ │ │ ├── modeling_utils.md
│ │ │ │ │ ├── pipelines_utils.md
│ │ │ │ │ ├── time_series_utils.md
│ │ │ │ │ ├── tokenization_utils.md
│ │ │ │ │ └── trainer_utils.md
│ │ │ │ ├── llm_tutorial.md
│ │ │ │ ├── main_classes/
│ │ │ │ │ ├── agent.md
│ │ │ │ │ ├── callback.md
│ │ │ │ │ ├── configuration.md
│ │ │ │ │ ├── data_collator.md
│ │ │ │ │ ├── deepspeed.md
│ │ │ │ │ ├── feature_extractor.md
│ │ │ │ │ ├── image_processor.md
│ │ │ │ │ ├── keras_callbacks.md
│ │ │ │ │ ├── logging.md
│ │ │ │ │ ├── model.md
│ │ │ │ │ ├── onnx.md
│ │ │ │ │ ├── optimizer_schedules.md
│ │ │ │ │ ├── output.md
│ │ │ │ │ ├── pipelines.md
│ │ │ │ │ ├── processors.md
│ │ │ │ │ ├── quantization.md
│ │ │ │ │ ├── text_generation.md
│ │ │ │ │ ├── tokenizer.md
│ │ │ │ │ └── trainer.md
│ │ │ │ ├── model_doc/
│ │ │ │ │ ├── albert.md
│ │ │ │ │ ├── align.md
│ │ │ │ │ ├── altclip.md
│ │ │ │ │ ├── audio-spectrogram-transformer.md
│ │ │ │ │ ├── auto.md
│ │ │ │ │ ├── autoformer.md
│ │ │ │ │ ├── bark.md
│ │ │ │ │ ├── bart.md
│ │ │ │ │ ├── barthez.md
│ │ │ │ │ ├── bartpho.md
│ │ │ │ │ ├── beit.md
│ │ │ │ │ ├── bert-generation.md
│ │ │ │ │ ├── bert-japanese.md
│ │ │ │ │ ├── bert.md
│ │ │ │ │ ├── bertweet.md
│ │ │ │ │ ├── big_bird.md
│ │ │ │ │ ├── bigbird_pegasus.md
│ │ │ │ │ ├── biogpt.md
│ │ │ │ │ ├── bit.md
│ │ │ │ │ ├── blenderbot-small.md
│ │ │ │ │ ├── blenderbot.md
│ │ │ │ │ ├── blip-2.md
│ │ │ │ │ ├── blip.md
│ │ │ │ │ ├── bloom.md
│ │ │ │ │ ├── bort.md
│ │ │ │ │ ├── bridgetower.md
│ │ │ │ │ ├── bros.md
│ │ │ │ │ ├── byt5.md
│ │ │ │ │ ├── camembert.md
│ │ │ │ │ ├── canine.md
│ │ │ │ │ ├── chinese_clip.md
│ │ │ │ │ ├── clap.md
│ │ │ │ │ ├── clip.md
│ │ │ │ │ ├── clipseg.md
│ │ │ │ │ ├── clvp.md
│ │ │ │ │ ├── code_llama.md
│ │ │ │ │ ├── codegen.md
│ │ │ │ │ ├── conditional_detr.md
│ │ │ │ │ ├── convbert.md
│ │ │ │ │ ├── convnext.md
│ │ │ │ │ ├── convnextv2.md
│ │ │ │ │ ├── cpm.md
│ │ │ │ │ ├── cpmant.md
│ │ │ │ │ ├── ctrl.md
│ │ │ │ │ ├── cvt.md
│ │ │ │ │ ├── data2vec.md
│ │ │ │ │ ├── deberta-v2.md
│ │ │ │ │ ├── deberta.md
│ │ │ │ │ ├── decision_transformer.md
│ │ │ │ │ ├── deformable_detr.md
│ │ │ │ │ ├── deit.md
│ │ │ │ │ ├── deplot.md
│ │ │ │ │ ├── deta.md
│ │ │ │ │ ├── detr.md
│ │ │ │ │ ├── dialogpt.md
│ │ │ │ │ └── dinat.md
│ │ │ │ ├── model_memory_anatomy.md
│ │ │ │ ├── model_sharing.md
│ │ │ │ ├── model_summary.md
│ │ │ │ ├── multilingual.md
│ │ │ │ ├── pad_truncation.md
│ │ │ │ ├── peft.md
│ │ │ │ ├── perf_hardware.md
│ │ │ │ ├── perf_infer_cpu.md
│ │ │ │ ├── perf_infer_gpu_many.md
│ │ │ │ ├── perf_infer_gpu_one.md
│ │ │ │ ├── perf_infer_special.md
│ │ │ │ ├── perf_torch_compile.md
│ │ │ │ ├── perf_train_cpu.md
│ │ │ │ ├── perf_train_cpu_many.md
│ │ │ │ ├── perf_train_gpu_many.md
│ │ │ │ ├── perf_train_gpu_one.md
│ │ │ │ ├── perf_train_special.md
│ │ │ │ ├── perf_train_tpu.md
│ │ │ │ ├── perf_train_tpu_tf.md
│ │ │ │ ├── performance.md
│ │ │ │ ├── perplexity.md
│ │ │ │ ├── philosophy.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── pipeline_webserver.md
│ │ │ │ ├── pr_checks.md
│ │ │ │ ├── preprocessing.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── serialization.md
│ │ │ │ ├── task_summary.md
│ │ │ │ ├── tasks/
│ │ │ │ │ ├── asr.md
│ │ │ │ │ ├── audio_classification.md
│ │ │ │ │ ├── document_question_answering.md
│ │ │ │ │ ├── idefics.md
│ │ │ │ │ ├── image_captioning.md
│ │ │ │ │ ├── image_classification.md
│ │ │ │ │ ├── image_to_image.md
│ │ │ │ │ ├── knowledge_distillation_for_image_classification.md
│ │ │ │ │ ├── language_modeling.md
│ │ │ │ │ ├── masked_language_modeling.md
│ │ │ │ │ ├── monocular_depth_estimation.md
│ │ │ │ │ ├── multiple_choice.md
│ │ │ │ │ ├── object_detection.md
│ │ │ │ │ ├── prompting.md
│ │ │ │ │ ├── question_answering.md
│ │ │ │ │ ├── semantic_segmentation.md
│ │ │ │ │ ├── sequence_classification.md
│ │ │ │ │ ├── summarization.md
│ │ │ │ │ ├── text-to-speech.md
│ │ │ │ │ ├── token_classification.md
│ │ │ │ │ ├── translation.md
│ │ │ │ │ ├── video_classification.md
│ │ │ │ │ ├── visual_question_answering.md
│ │ │ │ │ ├── zero_shot_image_classification.md
│ │ │ │ │ └── zero_shot_object_detection.md
│ │ │ │ ├── tasks_explained.md
│ │ │ │ ├── testing.md
│ │ │ │ ├── tf_xla.md
│ │ │ │ ├── tflite.md
│ │ │ │ ├── tokenizer_summary.md
│ │ │ │ ├── torchscript.md
│ │ │ │ ├── training.md
│ │ │ │ ├── transformers_agents.md
│ │ │ │ └── troubleshooting.md
│ │ │ ├── ko/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── add_new_model.md
│ │ │ │ ├── add_new_pipeline.md
│ │ │ │ ├── attention.md
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── bertology.md
│ │ │ │ ├── big_models.md
│ │ │ │ ├── chat_templating.md
│ │ │ │ ├── community.md
│ │ │ │ ├── contributing.md
│ │ │ │ ├── create_a_model.md
│ │ │ │ ├── custom_models.md
│ │ │ │ ├── debugging.md
│ │ │ │ ├── deepspeed.md
│ │ │ │ ├── fast_tokenizers.md
│ │ │ │ ├── fsdp.md
│ │ │ │ ├── generation_strategies.md
│ │ │ │ ├── hpo_train.md
│ │ │ │ ├── in_translation.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── llm_tutorial.md
│ │ │ │ ├── llm_tutorial_optimization.md
│ │ │ │ ├── main_classes/
│ │ │ │ │ └── agent.md
│ │ │ │ ├── model_doc/
│ │ │ │ │ ├── llama.md
│ │ │ │ │ ├── llama2.md
│ │ │ │ │ └── whisper.md
│ │ │ │ ├── model_memory_anatomy.md
│ │ │ │ ├── model_sharing.md
│ │ │ │ ├── model_summary.md
│ │ │ │ ├── multilingual.md
│ │ │ │ ├── pad_truncation.md
│ │ │ │ ├── peft.md
│ │ │ │ ├── perf_hardware.md
│ │ │ │ ├── perf_infer_cpu.md
│ │ │ │ ├── perf_infer_gpu_one.md
│ │ │ │ ├── perf_train_cpu.md
│ │ │ │ ├── perf_train_cpu_many.md
│ │ │ │ ├── perf_train_gpu_many.md
│ │ │ │ ├── perf_train_tpu_tf.md
│ │ │ │ ├── performance.md
│ │ │ │ ├── perplexity.md
│ │ │ │ ├── philosophy.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── pipeline_webserver.md
│ │ │ │ ├── pr_checks.md
│ │ │ │ ├── preprocessing.md
│ │ │ │ ├── quantization/
│ │ │ │ │ ├── awq.md
│ │ │ │ │ ├── bitsandbytes.md
│ │ │ │ │ ├── eetq.md
│ │ │ │ │ ├── gptq.md
│ │ │ │ │ └── quanto.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── sagemaker.md
│ │ │ │ ├── serialization.md
│ │ │ │ ├── task_summary.md
│ │ │ │ ├── tasks/
│ │ │ │ │ ├── asr.md
│ │ │ │ │ ├── audio_classification.md
│ │ │ │ │ ├── document_question_answering.md
│ │ │ │ │ ├── idefics.md
│ │ │ │ │ ├── image_captioning.md
│ │ │ │ │ ├── image_classification.md
│ │ │ │ │ ├── image_feature_extraction.md
│ │ │ │ │ ├── image_to_image.md
│ │ │ │ │ ├── language_modeling.md
│ │ │ │ │ ├── mask_generation.md
│ │ │ │ │ ├── masked_language_modeling.md
│ │ │ │ │ ├── monocular_depth_estimation.md
│ │ │ │ │ ├── multiple_choice.md
│ │ │ │ │ ├── object_detection.md
│ │ │ │ │ ├── prompting.md
│ │ │ │ │ ├── question_answering.md
│ │ │ │ │ ├── semantic_segmentation.md
│ │ │ │ │ ├── sequence_classification.md
│ │ │ │ │ ├── summarization.md
│ │ │ │ │ ├── token_classification.md
│ │ │ │ │ ├── translation.md
│ │ │ │ │ ├── video_classification.md
│ │ │ │ │ ├── visual_question_answering.md
│ │ │ │ │ ├── zero_shot_image_classification.md
│ │ │ │ │ └── zero_shot_object_detection.md
│ │ │ │ ├── tasks_explained.md
│ │ │ │ ├── testing.md
│ │ │ │ ├── tf_xla.md
│ │ │ │ ├── tflite.md
│ │ │ │ ├── tokenizer_summary.md
│ │ │ │ ├── torchscript.md
│ │ │ │ ├── trainer.md
│ │ │ │ ├── training.md
│ │ │ │ ├── transformers_agents.md
│ │ │ │ └── troubleshooting.md
│ │ │ ├── ms/
│ │ │ │ ├── _toctree.yml
│ │ │ │ └── index.md
│ │ │ ├── pt/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── converting_tensorflow_models.md
│ │ │ │ ├── create_a_model.md
│ │ │ │ ├── custom_models.md
│ │ │ │ ├── fast_tokenizers.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── multilingual.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── serialization.md
│ │ │ │ ├── tasks/
│ │ │ │ │ ├── sequence_classification.md
│ │ │ │ │ └── token_classification.md
│ │ │ │ └── training.md
│ │ │ ├── te/
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── index.md
│ │ │ │ └── quicktour.md
│ │ │ ├── tr/
│ │ │ │ ├── _toctree.yml
│ │ │ │ └── index.md
│ │ │ └── zh/
│ │ │ ├── _toctree.yml
│ │ │ ├── accelerate.md
│ │ │ ├── add_new_pipeline.md
│ │ │ ├── autoclass_tutorial.md
│ │ │ ├── big_models.md
│ │ │ ├── chat_templating.md
│ │ │ ├── contributing.md
│ │ │ ├── create_a_model.md
│ │ │ ├── custom_models.md
│ │ │ ├── debugging.md
│ │ │ ├── fast_tokenizers.md
│ │ │ ├── fsdp.md
│ │ │ ├── hpo_train.md
│ │ │ ├── index.md
│ │ │ ├── installation.md
│ │ │ ├── internal/
│ │ │ │ ├── audio_utils.md
│ │ │ │ ├── file_utils.md
│ │ │ │ ├── generation_utils.md
│ │ │ │ ├── image_processing_utils.md
│ │ │ │ ├── modeling_utils.md
│ │ │ │ ├── pipelines_utils.md
│ │ │ │ ├── time_series_utils.md
│ │ │ │ ├── tokenization_utils.md
│ │ │ │ └── trainer_utils.md
│ │ │ ├── llm_tutorial.md
│ │ │ ├── main_classes/
│ │ │ │ ├── agent.md
│ │ │ │ ├── callback.md
│ │ │ │ ├── configuration.md
│ │ │ │ ├── data_collator.md
│ │ │ │ ├── deepspeed.md
│ │ │ │ ├── feature_extractor.md
│ │ │ │ ├── image_processor.md
│ │ │ │ ├── keras_callbacks.md
│ │ │ │ ├── logging.md
│ │ │ │ ├── model.md
│ │ │ │ ├── onnx.md
│ │ │ │ ├── optimizer_schedules.md
│ │ │ │ ├── output.md
│ │ │ │ ├── pipelines.md
│ │ │ │ ├── processors.md
│ │ │ │ ├── quantization.md
│ │ │ │ ├── text_generation.md
│ │ │ │ ├── tokenizer.md
│ │ │ │ └── trainer.md
│ │ │ ├── model_sharing.md
│ │ │ ├── multilingual.md
│ │ │ ├── peft.md
│ │ │ ├── perf_hardware.md
│ │ │ ├── perf_torch_compile.md
│ │ │ ├── performance.md
│ │ │ ├── philosophy.md
│ │ │ ├── pipeline_tutorial.md
│ │ │ ├── preprocessing.md
│ │ │ ├── quicktour.md
│ │ │ ├── run_scripts.md
│ │ │ ├── serialization.md
│ │ │ ├── task_summary.md
│ │ │ ├── tasks/
│ │ │ │ └── asr.md
│ │ │ ├── tf_xla.md
│ │ │ ├── tflite.md
│ │ │ ├── tokenizer_summary.md
│ │ │ ├── torchscript.md
│ │ │ ├── training.md
│ │ │ └── transformers_agents.md
│ │ ├── examples/
│ │ │ ├── README.md
│ │ │ ├── diff-conversion/
│ │ │ │ ├── README.md
│ │ │ │ ├── convert_examples.sh
│ │ │ │ ├── diff_dummy.py
│ │ │ │ ├── diff_my_new_model.py
│ │ │ │ ├── diff_my_new_model2.py
│ │ │ │ ├── diff_new_model.py
│ │ │ │ └── diff_super.py
│ │ │ ├── flax/
│ │ │ │ ├── README.md
│ │ │ │ ├── _tests_requirements.txt
│ │ │ │ ├── conftest.py
│ │ │ │ ├── image-captioning/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── create_model_from_encoder_decoder_models.py
│ │ │ │ │ └── run_image_captioning_flax.py
│ │ │ │ ├── language-modeling/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_bart_dlm_flax.py
│ │ │ │ │ ├── run_clm_flax.py
│ │ │ │ │ ├── run_mlm_flax.py
│ │ │ │ │ ├── run_t5_mlm_flax.py
│ │ │ │ │ └── t5_tokenizer_model.py
│ │ │ │ ├── question-answering/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_qa.py
│ │ │ │ │ └── utils_qa.py
│ │ │ │ ├── speech-recognition/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_flax_speech_recognition_seq2seq.py
│ │ │ │ ├── summarization/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_summarization_flax.py
│ │ │ │ ├── test_flax_examples.py
│ │ │ │ ├── text-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_flax_glue.py
│ │ │ │ ├── token-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_flax_ner.py
│ │ │ │ └── vision/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_image_classification.py
│ │ │ ├── legacy/
│ │ │ │ ├── README.md
│ │ │ │ ├── benchmarking/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── plot_csv_file.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_benchmark.py
│ │ │ │ ├── multiple_choice/
│ │ │ │ │ ├── run_multiple_choice.py
│ │ │ │ │ └── utils_multiple_choice.py
│ │ │ │ ├── pytorch-lightning/
│ │ │ │ │ ├── lightning_base.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_glue.py
│ │ │ │ │ ├── run_glue.sh
│ │ │ │ │ ├── run_ner.py
│ │ │ │ │ ├── run_ner.sh
│ │ │ │ │ └── run_pos.sh
│ │ │ │ ├── question-answering/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── run_squad.py
│ │ │ │ │ └── run_squad_trainer.py
│ │ │ │ ├── run_camembert.py
│ │ │ │ ├── run_chinese_ref.py
│ │ │ │ ├── run_language_modeling.py
│ │ │ │ ├── run_openai_gpt.py
│ │ │ │ ├── run_swag.py
│ │ │ │ ├── run_transfo_xl.py
│ │ │ │ ├── seq2seq/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── convert_model_to_fp16.py
│ │ │ │ │ ├── download_wmt.py
│ │ │ │ │ ├── finetune.sh
│ │ │ │ │ ├── finetune_tpu.sh
│ │ │ │ │ ├── finetune_trainer.py
│ │ │ │ │ ├── minify_dataset.py
│ │ │ │ │ ├── old_test_calculate_rouge.py
│ │ │ │ │ ├── old_test_datasets.py
│ │ │ │ │ ├── old_test_fsmt_bleu_score.py
│ │ │ │ │ ├── old_test_seq2seq_examples.py
│ │ │ │ │ ├── old_test_seq2seq_examples_multi_gpu.py
│ │ │ │ │ ├── old_test_tatoeba_conversion.py
│ │ │ │ │ ├── pack_dataset.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── romanian_postprocessing.md
│ │ │ │ │ ├── rouge_cli.py
│ │ │ │ │ ├── run_distributed_eval.py
│ │ │ │ │ ├── run_eval.py
│ │ │ │ │ ├── run_eval_search.py
│ │ │ │ │ ├── save_len_file.py
│ │ │ │ │ ├── save_randomly_initialized_model.py
│ │ │ │ │ ├── sentence_splitter.py
│ │ │ │ │ ├── seq2seq_trainer.py
│ │ │ │ │ ├── seq2seq_training_args.py
│ │ │ │ │ ├── test_data/
│ │ │ │ │ │ ├── fsmt/
│ │ │ │ │ │ │ ├── build-eval-data.py
│ │ │ │ │ │ │ └── fsmt_val_data.json
│ │ │ │ │ │ └── wmt_en_ro/
│ │ │ │ │ │ ├── test.source
│ │ │ │ │ │ ├── test.target
│ │ │ │ │ │ ├── train.len
│ │ │ │ │ │ ├── train.source
│ │ │ │ │ │ ├── train.target
│ │ │ │ │ │ ├── val.len
│ │ │ │ │ │ ├── val.source
│ │ │ │ │ │ └── val.target
│ │ │ │ │ ├── train_distil_marian_enro.sh
│ │ │ │ │ ├── train_distil_marian_enro_tpu.sh
│ │ │ │ │ ├── train_distilbart_cnn.sh
│ │ │ │ │ ├── train_mbart_cc25_enro.sh
│ │ │ │ │ ├── utils.py
│ │ │ │ │ └── xla_spawn.py
│ │ │ │ └── token-classification/
│ │ │ │ ├── README.md
│ │ │ │ ├── run.sh
│ │ │ │ ├── run_chunk.sh
│ │ │ │ ├── run_ner.py
│ │ │ │ ├── run_pos.sh
│ │ │ │ ├── scripts/
│ │ │ │ │ └── preprocess.py
│ │ │ │ ├── tasks.py
│ │ │ │ └── utils_ner.py
│ │ │ ├── pytorch/
│ │ │ │ ├── README.md
│ │ │ │ ├── _tests_requirements.txt
│ │ │ │ ├── audio-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_audio_classification.py
│ │ │ │ ├── conftest.py
│ │ │ │ ├── contrastive-image-text/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_clip.py
│ │ │ │ ├── image-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_image_classification.py
│ │ │ │ │ └── run_image_classification_no_trainer.py
│ │ │ │ ├── image-pretraining/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_mae.py
│ │ │ │ │ ├── run_mim.py
│ │ │ │ │ └── run_mim_no_trainer.py
│ │ │ │ ├── instance-segmentation/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_instance_segmentation.py
│ │ │ │ │ └── run_instance_segmentation_no_trainer.py
│ │ │ │ ├── language-modeling/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_clm.py
│ │ │ │ │ ├── run_clm_no_trainer.py
│ │ │ │ │ ├── run_fim.py
│ │ │ │ │ ├── run_fim_no_trainer.py
│ │ │ │ │ ├── run_mlm.py
│ │ │ │ │ ├── run_mlm_no_trainer.py
│ │ │ │ │ └── run_plm.py
│ │ │ │ ├── multiple-choice/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_no_trainer.sh
│ │ │ │ │ ├── run_swag.py
│ │ │ │ │ └── run_swag_no_trainer.py
│ │ │ │ ├── object-detection/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_object_detection.py
│ │ │ │ │ └── run_object_detection_no_trainer.py
│ │ │ │ ├── old_test_xla_examples.py
│ │ │ │ ├── question-answering/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_qa.py
│ │ │ │ │ ├── run_qa_beam_search.py
│ │ │ │ │ ├── run_qa_beam_search_no_trainer.py
│ │ │ │ │ ├── run_qa_no_trainer.py
│ │ │ │ │ ├── run_seq2seq_qa.py
│ │ │ │ │ ├── trainer_qa.py
│ │ │ │ │ ├── trainer_seq2seq_qa.py
│ │ │ │ │ └── utils_qa.py
│ │ │ │ ├── semantic-segmentation/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_semantic_segmentation.py
│ │ │ │ │ └── run_semantic_segmentation_no_trainer.py
│ │ │ │ ├── speech-pretraining/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_wav2vec2_pretraining_no_trainer.py
│ │ │ │ ├── speech-recognition/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_speech_recognition_ctc.py
│ │ │ │ │ ├── run_speech_recognition_ctc_adapter.py
│ │ │ │ │ └── run_speech_recognition_seq2seq.py
│ │ │ │ ├── summarization/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_summarization.py
│ │ │ │ │ └── run_summarization_no_trainer.py
│ │ │ │ ├── test_accelerate_examples.py
│ │ │ │ ├── test_pytorch_examples.py
│ │ │ │ ├── text-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_classification.py
│ │ │ │ │ ├── run_glue.py
│ │ │ │ │ ├── run_glue_no_trainer.py
│ │ │ │ │ └── run_xnli.py
│ │ │ │ ├── text-generation/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_generation.py
│ │ │ │ │ └── run_generation_contrastive_search.py
│ │ │ │ ├── token-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run.sh
│ │ │ │ │ ├── run_ner.py
│ │ │ │ │ ├── run_ner_no_trainer.py
│ │ │ │ │ └── run_no_trainer.sh
│ │ │ │ ├── translation/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_translation.py
│ │ │ │ │ └── run_translation_no_trainer.py
│ │ │ │ └── xla_spawn.py
│ │ │ ├── research_projects/
│ │ │ │ ├── README.md
│ │ │ │ ├── adversarial/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_hans.py
│ │ │ │ │ └── utils_hans.py
│ │ │ │ ├── bert-loses-patience/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── pabee/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── modeling_pabee_albert.py
│ │ │ │ │ │ └── modeling_pabee_bert.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_glue_with_pabee.py
│ │ │ │ │ └── test_run_glue_with_pabee.py
│ │ │ │ ├── bertabs/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bertabs.py
│ │ │ │ │ ├── convert_bertabs_original_pytorch_checkpoint.py
│ │ │ │ │ ├── modeling_bertabs.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_summarization.py
│ │ │ │ │ ├── test_utils_summarization.py
│ │ │ │ │ └── utils_summarization.py
│ │ │ │ ├── bertology/
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_bertology.py
│ │ │ │ │ └── run_prune_gpt.py
│ │ │ │ ├── codeparrot/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── examples/
│ │ │ │ │ │ ├── README.md
│ │ │ │ │ │ ├── requirements.txt
│ │ │ │ │ │ └── train_complexity_predictor.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── scripts/
│ │ │ │ │ ├── arguments.py
│ │ │ │ │ ├── bpe_training.py
│ │ │ │ │ ├── codeparrot_training.py
│ │ │ │ │ ├── human_eval.py
│ │ │ │ │ ├── initialize_model.py
│ │ │ │ │ ├── minhash_deduplication.py
│ │ │ │ │ ├── preprocessing.py
│ │ │ │ │ ├── pretokenizing.py
│ │ │ │ │ ├── tests/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── test_deduplicate.py
│ │ │ │ │ └── validation_loss.py
│ │ │ │ ├── decision_transformer/
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_decision_transformer.py
│ │ │ │ ├── deebert/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── entropy_eval.sh
│ │ │ │ │ ├── eval_deebert.sh
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_glue_deebert.py
│ │ │ │ │ ├── src/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── modeling_highway_bert.py
│ │ │ │ │ │ └── modeling_highway_roberta.py
│ │ │ │ │ ├── test_glue_deebert.py
│ │ │ │ │ └── train_deebert.sh
│ │ │ │ ├── distillation/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── distiller.py
│ │ │ │ │ ├── grouped_batch_sampler.py
│ │ │ │ │ ├── lm_seqs_dataset.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_squad_w_distillation.py
│ │ │ │ │ ├── scripts/
│ │ │ │ │ │ ├── binarized_data.py
│ │ │ │ │ │ ├── extract.py
│ │ │ │ │ │ ├── extract_distilbert.py
│ │ │ │ │ │ └── token_counts.py
│ │ │ │ │ ├── train.py
│ │ │ │ │ ├── training_configs/
│ │ │ │ │ │ ├── distilbert-base-cased.json
│ │ │ │ │ │ ├── distilbert-base-multilingual-cased.json
│ │ │ │ │ │ ├── distilbert-base-uncased.json
│ │ │ │ │ │ ├── distilgpt2.json
│ │ │ │ │ │ └── distilroberta-base.json
│ │ │ │ │ └── utils.py
│ │ │ │ ├── fsner/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── pyproject.toml
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── setup.py
│ │ │ │ │ └── src/
│ │ │ │ │ └── fsner/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── model.py
│ │ │ │ │ └── tokenizer_utils.py
│ │ │ │ ├── information-gain-filtration/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── igf/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── igf.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_clm_igf.py
│ │ │ │ ├── jax-projects/
│ │ │ │ │ ├── HOW_TO_PROPOSE_PROJECT.md
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── big_bird/
│ │ │ │ │ │ ├── README.md
│ │ │ │ │ │ ├── bigbird_flax.py
│ │ │ │ │ │ ├── evaluate.py
│ │ │ │ │ │ ├── prepare_natural_questions.py
│ │ │ │ │ │ ├── requirements.txt
│ │ │ │ │ │ ├── sweep_flax.yaml
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── dataset-streaming/
│ │ │ │ │ │ ├── README.md
│ │ │ │ │ │ └── run_mlm_flax_stream.py
│ │ │ │ │ ├── hybrid_clip/
│ │ │ │ │ │ ├── README.md
│ │ │ │ │ │ ├── configuration_hybrid_clip.py
│ │ │ │ │ │ ├── modeling_hybrid_clip.py
│ │ │ │ │ │ ├── requirements.txt
│ │ │ │ │ │ └── run_hybrid_clip.py
│ │ │ │ │ ├── model_parallel/
│ │ │ │ │ │ ├── README.md
│ │ │ │ │ │ ├── partitions.py
│ │ │ │ │ │ └── run_clm_mp.py
│ │ │ │ │ └── wav2vec2/
│ │ │ │ │ ├── README.md
│ │ │ │ │ └── run_wav2vec2_pretrain_flax.py
│ │ │ │ ├── layoutlmv3/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_funsd_cord.py
│ │ │ │ ├── longform-qa/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── eli5_app.py
│ │ │ │ │ ├── eli5_utils.py
│ │ │ │ │ └── requirements.txt
│ │ │ │ ├── luke/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── luke_utils.py
│ │ │ │ │ └── run_luke_ner_no_trainer.py
│ │ │ │ ├── lxmert/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── demo.ipynb
│ │ │ │ │ ├── extracting_data.py
│ │ │ │ │ ├── modeling_frcnn.py
│ │ │ │ │ ├── processing_image.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── utils.py
│ │ │ │ │ └── visualizing_image.py
│ │ │ │ ├── mlm_wwm/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_chinese_ref.py
│ │ │ │ │ └── run_mlm_wwm.py
│ │ │ │ ├── mm-imdb/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── run_mmimdb.py
│ │ │ │ │ └── utils_mmimdb.py
│ │ │ │ ├── movement-pruning/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── Saving_PruneBERT.ipynb
│ │ │ │ │ ├── bertarize.py
│ │ │ │ │ ├── counts_parameters.py
│ │ │ │ │ ├── emmental/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_bert_masked.py
│ │ │ │ │ │ ├── modeling_bert_masked.py
│ │ │ │ │ │ └── modules/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── binarizer.py
│ │ │ │ │ │ └── masked_nn.py
│ │ │ │ │ ├── masked_run_glue.py
│ │ │ │ │ ├── masked_run_squad.py
│ │ │ │ │ └── requirements.txt
│ │ │ │ ├── onnx/
│ │ │ │ │ └── summarization/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── bart_onnx/
│ │ │ │ │ │ ├── generation_onnx.py
│ │ │ │ │ │ └── reduce_onnx_size.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_onnx_exporter.py
│ │ │ │ ├── performer/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── full_script.sh
│ │ │ │ │ ├── modeling_flax_performer.py
│ │ │ │ │ ├── modeling_flax_performer_utils.py
│ │ │ │ │ ├── run_mlm_performer.py
│ │ │ │ │ └── sanity_script.sh
│ │ │ │ ├── pplm/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── pplm_classification_head.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_pplm.py
│ │ │ │ │ └── run_pplm_discrim_train.py
│ │ │ │ ├── quantization-qdqbert/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── evaluate-hf-trt-qa.py
│ │ │ │ │ ├── ort-infer-benchmark.py
│ │ │ │ │ ├── quant_trainer.py
│ │ │ │ │ ├── run_quant_qa.py
│ │ │ │ │ ├── trainer_quant_qa.py
│ │ │ │ │ └── utils_qa.py
│ │ │ │ ├── rag/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── _test_finetune_rag.py
│ │ │ │ │ ├── callbacks_rag.py
│ │ │ │ │ ├── consolidate_rag_checkpoint.py
│ │ │ │ │ ├── distributed_pytorch_retriever.py
│ │ │ │ │ ├── distributed_ray_retriever.py
│ │ │ │ │ ├── eval_rag.py
│ │ │ │ │ ├── finetune_rag.py
│ │ │ │ │ ├── finetune_rag.sh
│ │ │ │ │ ├── finetune_rag_ray.sh
│ │ │ │ │ ├── lightning_base.py
│ │ │ │ │ ├── parse_dpr_relevance_data.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── test_data/
│ │ │ │ │ │ └── my_knowledge_dataset.csv
│ │ │ │ │ ├── test_distributed_retriever.py
│ │ │ │ │ ├── use_own_knowledge_dataset.py
│ │ │ │ │ └── utils_rag.py
│ │ │ │ ├── rag-end2end-retriever/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── callbacks_rag.py
│ │ │ │ │ ├── distributed_ray_retriever.py
│ │ │ │ │ ├── eval_rag.py
│ │ │ │ │ ├── finetune_rag.py
│ │ │ │ │ ├── finetune_rag_ray_end2end.sh
│ │ │ │ │ ├── kb_encode_utils.py
│ │ │ │ │ ├── lightning_base.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── test_run/
│ │ │ │ │ │ ├── dummy-kb/
│ │ │ │ │ │ │ └── my_knowledge_dataset.csv
│ │ │ │ │ │ ├── dummy-train-data/
│ │ │ │ │ │ │ ├── test.source
│ │ │ │ │ │ │ ├── test.target
│ │ │ │ │ │ │ ├── train.source
│ │ │ │ │ │ │ ├── train.target
│ │ │ │ │ │ │ ├── val.source
│ │ │ │ │ │ │ └── val.target
│ │ │ │ │ │ ├── test_finetune.sh
│ │ │ │ │ │ └── test_rag_new_features.sh
│ │ │ │ │ ├── use_own_knowledge_dataset.py
│ │ │ │ │ └── utils_rag.py
│ │ │ │ ├── robust-speech-event/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── run_speech_recognition_ctc_bnb.py
│ │ │ │ │ └── run_speech_recognition_ctc_streaming.py
│ │ │ │ ├── self-training-text-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── finetuning.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run.sh
│ │ │ │ │ └── selftraining.py
│ │ │ │ ├── seq2seq-distillation/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── _test_bash_script.py
│ │ │ │ │ ├── _test_make_student.py
│ │ │ │ │ ├── _test_seq2seq_examples.py
│ │ │ │ │ ├── _test_seq2seq_examples_multi_gpu.py
│ │ │ │ │ ├── callbacks.py
│ │ │ │ │ ├── convert_pl_checkpoint_to_hf.py
│ │ │ │ │ ├── distil_marian_enro_teacher.sh
│ │ │ │ │ ├── distil_marian_no_teacher.sh
│ │ │ │ │ ├── distillation.py
│ │ │ │ │ ├── dynamic_bs_example.sh
│ │ │ │ │ ├── finetune.py
│ │ │ │ │ ├── finetune.sh
│ │ │ │ │ ├── finetune_bart_tiny.sh
│ │ │ │ │ ├── finetune_pegasus_xsum.sh
│ │ │ │ │ ├── finetune_t5.sh
│ │ │ │ │ ├── lightning_base.py
│ │ │ │ │ ├── make_student.py
│ │ │ │ │ ├── precomputed_pseudo_labels.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_eval.py
│ │ │ │ │ ├── sentence_splitter.py
│ │ │ │ │ ├── train_distilbart_cnn.sh
│ │ │ │ │ ├── train_distilbart_xsum.sh
│ │ │ │ │ ├── train_mbart_cc25_enro.sh
│ │ │ │ │ └── utils.py
│ │ │ │ ├── tapex/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_tabfact_with_tapex.py
│ │ │ │ │ ├── run_wikisql_with_tapex.py
│ │ │ │ │ ├── run_wikitablequestions_with_tapex.py
│ │ │ │ │ └── wikisql_utils.py
│ │ │ │ ├── token-healing/
│ │ │ │ │ ├── README.md
│ │ │ │ │ └── run_token_healing.py
│ │ │ │ ├── visual_bert/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── demo.ipynb
│ │ │ │ │ ├── extracting_data.py
│ │ │ │ │ ├── modeling_frcnn.py
│ │ │ │ │ ├── processing_image.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── utils.py
│ │ │ │ │ └── visualizing_image.py
│ │ │ │ ├── vqgan-clip/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── VQGAN_CLIP.py
│ │ │ │ │ ├── img_processing.py
│ │ │ │ │ ├── loaders.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── utils.py
│ │ │ │ ├── wav2vec2/
│ │ │ │ │ ├── FINE_TUNE_XLSR_WAV2VEC2.md
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── alignment.py
│ │ │ │ │ ├── ds_config_wav2vec2_zero2.json
│ │ │ │ │ ├── ds_config_wav2vec2_zero3.json
│ │ │ │ │ ├── finetune_base_100.sh
│ │ │ │ │ ├── finetune_base_timit_asr.sh
│ │ │ │ │ ├── finetune_large_lv60_100.sh
│ │ │ │ │ ├── finetune_large_lv60_timit_asr.sh
│ │ │ │ │ ├── finetune_large_xlsr_53_arabic_speech_corpus.sh
│ │ │ │ │ ├── finetune_wav2vec2_xlsr_turkish.sh
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_alignment.sh
│ │ │ │ │ ├── run_asr.py
│ │ │ │ │ ├── run_common_voice.py
│ │ │ │ │ ├── run_pretrain.py
│ │ │ │ │ ├── test_wav2vec2_deepspeed.py
│ │ │ │ │ └── vocab/
│ │ │ │ │ └── buckwalter.json
│ │ │ │ ├── xtreme-s/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_xtreme_s.py
│ │ │ │ └── zero-shot-distillation/
│ │ │ │ ├── README.md
│ │ │ │ └── distill_classifier.py
│ │ │ ├── run_on_remote.py
│ │ │ └── tensorflow/
│ │ │ ├── README.md
│ │ │ ├── _tests_requirements.txt
│ │ │ ├── benchmarking/
│ │ │ │ ├── README.md
│ │ │ │ ├── plot_csv_file.py
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_benchmark_tf.py
│ │ │ ├── contrastive-image-text/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_clip.py
│ │ │ ├── image-classification/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_image_classification.py
│ │ │ ├── language-modeling/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ ├── run_clm.py
│ │ │ │ └── run_mlm.py
│ │ │ ├── language-modeling-tpu/
│ │ │ │ ├── README.md
│ │ │ │ ├── prepare_tfrecord_shards.py
│ │ │ │ ├── requirements.txt
│ │ │ │ ├── run_mlm.py
│ │ │ │ └── train_unigram.py
│ │ │ ├── multiple-choice/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_swag.py
│ │ │ ├── question-answering/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ ├── run_qa.py
│ │ │ │ └── utils_qa.py
│ │ │ ├── summarization/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_summarization.py
│ │ │ ├── test_tensorflow_examples.py
│ │ │ ├── text-classification/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ ├── run_glue.py
│ │ │ │ └── run_text_classification.py
│ │ │ ├── token-classification/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_ner.py
│ │ │ └── translation/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ └── run_translation.py
│ │ ├── hubconf.py
│ │ ├── i18n/
│ │ │ ├── README_de.md
│ │ │ ├── README_es.md
│ │ │ ├── README_fr.md
│ │ │ ├── README_hd.md
│ │ │ ├── README_ja.md
│ │ │ ├── README_ko.md
│ │ │ ├── README_pt-br.md
│ │ │ ├── README_ru.md
│ │ │ ├── README_te.md
│ │ │ ├── README_vi.md
│ │ │ ├── README_zh-hans.md
│ │ │ └── README_zh-hant.md
│ │ ├── model_cards/
│ │ │ └── README.md
│ │ ├── notebooks/
│ │ │ └── README.md
│ │ ├── pyproject.toml
│ │ ├── scripts/
│ │ │ ├── benchmark/
│ │ │ │ └── trainer-benchmark.py
│ │ │ ├── check_tokenizers.py
│ │ │ ├── distributed/
│ │ │ │ └── torch-distributed-gpu-test.py
│ │ │ ├── fsmt/
│ │ │ │ ├── convert-allenai-wmt16.sh
│ │ │ │ ├── convert-allenai-wmt19.sh
│ │ │ │ ├── convert-facebook-wmt19.sh
│ │ │ │ ├── eval-allenai-wmt16.sh
│ │ │ │ ├── eval-allenai-wmt19.sh
│ │ │ │ ├── eval-facebook-wmt19.sh
│ │ │ │ ├── fsmt-make-super-tiny-model.py
│ │ │ │ ├── fsmt-make-tiny-model.py
│ │ │ │ ├── gen-card-allenai-wmt16.py
│ │ │ │ ├── gen-card-allenai-wmt19.py
│ │ │ │ ├── gen-card-facebook-wmt19.py
│ │ │ │ ├── s3-move.sh
│ │ │ │ └── tests-to-run.sh
│ │ │ ├── pegasus/
│ │ │ │ └── build_test_sample_spm_no_bos.py
│ │ │ ├── stale.py
│ │ │ └── tatoeba/
│ │ │ ├── README.md
│ │ │ └── upload_models.sh
│ │ ├── setup.py
│ │ ├── src/
│ │ │ └── transformers/
│ │ │ ├── __init__.py
│ │ │ ├── activations.py
│ │ │ ├── activations_tf.py
│ │ │ ├── agents/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── agent_types.py
│ │ │ │ ├── agents.py
│ │ │ │ ├── default_tools.py
│ │ │ │ ├── document_question_answering.py
│ │ │ │ ├── evaluate_agent.py
│ │ │ │ ├── image_question_answering.py
│ │ │ │ ├── llm_engine.py
│ │ │ │ ├── monitoring.py
│ │ │ │ ├── prompts.py
│ │ │ │ ├── python_interpreter.py
│ │ │ │ ├── speech_to_text.py
│ │ │ │ ├── text_to_speech.py
│ │ │ │ ├── tools.py
│ │ │ │ └── translation.py
│ │ │ ├── audio_utils.py
│ │ │ ├── benchmark/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── benchmark.py
│ │ │ │ ├── benchmark_args.py
│ │ │ │ ├── benchmark_args_tf.py
│ │ │ │ ├── benchmark_args_utils.py
│ │ │ │ ├── benchmark_tf.py
│ │ │ │ └── benchmark_utils.py
│ │ │ ├── cache_utils.py
│ │ │ ├── commands/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── add_new_model_like.py
│ │ │ │ ├── convert.py
│ │ │ │ ├── download.py
│ │ │ │ ├── env.py
│ │ │ │ ├── lfs.py
│ │ │ │ ├── pt_to_tf.py
│ │ │ │ ├── run.py
│ │ │ │ ├── serving.py
│ │ │ │ ├── train.py
│ │ │ │ ├── transformers_cli.py
│ │ │ │ └── user.py
│ │ │ ├── configuration_utils.py
│ │ │ ├── convert_graph_to_onnx.py
│ │ │ ├── convert_pytorch_checkpoint_to_tf2.py
│ │ │ ├── convert_slow_tokenizer.py
│ │ │ ├── convert_slow_tokenizers_checkpoints_to_fast.py
│ │ │ ├── convert_tf_hub_seq_to_seq_bert_to_pytorch.py
│ │ │ ├── data/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── data_collator.py
│ │ │ │ ├── datasets/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── glue.py
│ │ │ │ │ ├── language_modeling.py
│ │ │ │ │ └── squad.py
│ │ │ │ ├── metrics/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── squad_metrics.py
│ │ │ │ └── processors/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── glue.py
│ │ │ │ ├── squad.py
│ │ │ │ ├── utils.py
│ │ │ │ └── xnli.py
│ │ │ ├── debug_utils.py
│ │ │ ├── deepspeed.py
│ │ │ ├── dependency_versions_check.py
│ │ │ ├── dependency_versions_table.py
│ │ │ ├── dynamic_module_utils.py
│ │ │ ├── feature_extraction_sequence_utils.py
│ │ │ ├── feature_extraction_utils.py
│ │ │ ├── file_utils.py
│ │ │ ├── generation/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── beam_constraints.py
│ │ │ │ ├── beam_search.py
│ │ │ │ ├── candidate_generator.py
│ │ │ │ ├── configuration_utils.py
│ │ │ │ ├── flax_logits_process.py
│ │ │ │ ├── flax_utils.py
│ │ │ │ ├── logits_process.py
│ │ │ │ ├── stopping_criteria.py
│ │ │ │ ├── streamers.py
│ │ │ │ ├── tf_logits_process.py
│ │ │ │ ├── tf_utils.py
│ │ │ │ ├── utils.py
│ │ │ │ └── watermarking.py
│ │ │ ├── hf_argparser.py
│ │ │ ├── hyperparameter_search.py
│ │ │ ├── image_processing_base.py
│ │ │ ├── image_processing_utils.py
│ │ │ ├── image_processing_utils_fast.py
│ │ │ ├── image_transforms.py
│ │ │ ├── image_utils.py
│ │ │ ├── integrations/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── aqlm.py
│ │ │ │ ├── awq.py
│ │ │ │ ├── bitsandbytes.py
│ │ │ │ ├── deepspeed.py
│ │ │ │ ├── eetq.py
│ │ │ │ ├── fbgemm_fp8.py
│ │ │ │ ├── ggml.py
│ │ │ │ ├── hqq.py
│ │ │ │ ├── integration_utils.py
│ │ │ │ ├── peft.py
│ │ │ │ ├── quanto.py
│ │ │ │ └── tpu.py
│ │ │ ├── keras_callbacks.py
│ │ │ ├── kernels/
│ │ │ │ ├── deformable_detr/
│ │ │ │ │ ├── cpu/
│ │ │ │ │ │ ├── ms_deform_attn_cpu.cpp
│ │ │ │ │ │ └── ms_deform_attn_cpu.h
│ │ │ │ │ ├── cuda/
│ │ │ │ │ │ ├── ms_deform_attn_cuda.cu
│ │ │ │ │ │ ├── ms_deform_attn_cuda.cuh
│ │ │ │ │ │ ├── ms_deform_attn_cuda.h
│ │ │ │ │ │ └── ms_deform_im2col_cuda.cuh
│ │ │ │ │ ├── ms_deform_attn.h
│ │ │ │ │ └── vision.cpp
│ │ │ │ ├── deta/
│ │ │ │ │ ├── cpu/
│ │ │ │ │ │ ├── ms_deform_attn_cpu.cpp
│ │ │ │ │ │ └── ms_deform_attn_cpu.h
│ │ │ │ │ ├── cuda/
│ │ │ │ │ │ ├── ms_deform_attn_cuda.cu
│ │ │ │ │ │ ├── ms_deform_attn_cuda.cuh
│ │ │ │ │ │ ├── ms_deform_attn_cuda.h
│ │ │ │ │ │ └── ms_deform_im2col_cuda.cuh
│ │ │ │ │ ├── ms_deform_attn.h
│ │ │ │ │ └── vision.cpp
│ │ │ │ ├── mra/
│ │ │ │ │ ├── cuda_kernel.cu
│ │ │ │ │ ├── cuda_kernel.h
│ │ │ │ │ ├── cuda_launch.cu
│ │ │ │ │ ├── cuda_launch.h
│ │ │ │ │ └── torch_extension.cpp
│ │ │ │ ├── rwkv/
│ │ │ │ │ ├── wkv_cuda.cu
│ │ │ │ │ ├── wkv_cuda_bf16.cu
│ │ │ │ │ └── wkv_op.cpp
│ │ │ │ └── yoso/
│ │ │ │ ├── common.h
│ │ │ │ ├── common_cuda.h
│ │ │ │ ├── common_cuda_device.h
│ │ │ │ ├── fast_lsh_cumulation.cu
│ │ │ │ ├── fast_lsh_cumulation.h
│ │ │ │ ├── fast_lsh_cumulation_cuda.cu
│ │ │ │ ├── fast_lsh_cumulation_cuda.h
│ │ │ │ └── fast_lsh_cumulation_torch.cpp
│ │ │ ├── modelcard.py
│ │ │ ├── modeling_attn_mask_utils.py
│ │ │ ├── modeling_flash_attention_utils.py
│ │ │ ├── modeling_flax_outputs.py
│ │ │ ├── modeling_flax_pytorch_utils.py
│ │ │ ├── modeling_flax_utils.py
│ │ │ ├── modeling_gguf_pytorch_utils.py
│ │ │ ├── modeling_outputs.py
│ │ │ ├── modeling_rope_utils.py
│ │ │ ├── modeling_tf_outputs.py
│ │ │ ├── modeling_tf_pytorch_utils.py
│ │ │ ├── modeling_tf_utils.py
│ │ │ ├── modeling_utils.py
│ │ │ ├── models/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── albert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_albert.py
│ │ │ │ │ ├── convert_albert_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_albert.py
│ │ │ │ │ ├── modeling_flax_albert.py
│ │ │ │ │ ├── modeling_tf_albert.py
│ │ │ │ │ ├── tokenization_albert.py
│ │ │ │ │ └── tokenization_albert_fast.py
│ │ │ │ ├── align/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_align.py
│ │ │ │ │ ├── convert_align_tf_to_hf.py
│ │ │ │ │ ├── modeling_align.py
│ │ │ │ │ └── processing_align.py
│ │ │ │ ├── altclip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_altclip.py
│ │ │ │ │ ├── modeling_altclip.py
│ │ │ │ │ └── processing_altclip.py
│ │ │ │ ├── audio_spectrogram_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_audio_spectrogram_transformer.py
│ │ │ │ │ ├── convert_audio_spectrogram_transformer_original_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_audio_spectrogram_transformer.py
│ │ │ │ │ └── modeling_audio_spectrogram_transformer.py
│ │ │ │ ├── auto/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── auto_factory.py
│ │ │ │ │ ├── configuration_auto.py
│ │ │ │ │ ├── feature_extraction_auto.py
│ │ │ │ │ ├── image_processing_auto.py
│ │ │ │ │ ├── modeling_auto.py
│ │ │ │ │ ├── modeling_flax_auto.py
│ │ │ │ │ ├── modeling_tf_auto.py
│ │ │ │ │ ├── processing_auto.py
│ │ │ │ │ └── tokenization_auto.py
│ │ │ │ ├── autoformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_autoformer.py
│ │ │ │ │ └── modeling_autoformer.py
│ │ │ │ ├── bark/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bark.py
│ │ │ │ │ ├── convert_suno_to_hf.py
│ │ │ │ │ ├── generation_configuration_bark.py
│ │ │ │ │ ├── modeling_bark.py
│ │ │ │ │ └── processing_bark.py
│ │ │ │ ├── bart/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bart.py
│ │ │ │ │ ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_bart.py
│ │ │ │ │ ├── modeling_flax_bart.py
│ │ │ │ │ ├── modeling_tf_bart.py
│ │ │ │ │ ├── tokenization_bart.py
│ │ │ │ │ └── tokenization_bart_fast.py
│ │ │ │ ├── barthez/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── tokenization_barthez.py
│ │ │ │ │ └── tokenization_barthez_fast.py
│ │ │ │ ├── bartpho/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── tokenization_bartpho.py
│ │ │ │ ├── beit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_beit.py
│ │ │ │ │ ├── convert_beit_unilm_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_beit.py
│ │ │ │ │ ├── image_processing_beit.py
│ │ │ │ │ ├── modeling_beit.py
│ │ │ │ │ └── modeling_flax_beit.py
│ │ │ │ ├── bert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bert.py
│ │ │ │ │ ├── convert_bert_original_tf2_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_bert_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_bert_pytorch_checkpoint_to_original_tf.py
│ │ │ │ │ ├── convert_bert_token_dropping_original_tf2_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_bert.py
│ │ │ │ │ ├── modeling_flax_bert.py
│ │ │ │ │ ├── modeling_tf_bert.py
│ │ │ │ │ ├── tokenization_bert.py
│ │ │ │ │ ├── tokenization_bert_fast.py
│ │ │ │ │ └── tokenization_bert_tf.py
│ │ │ │ ├── bert_generation/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bert_generation.py
│ │ │ │ │ ├── modeling_bert_generation.py
│ │ │ │ │ └── tokenization_bert_generation.py
│ │ │ │ ├── bert_japanese/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── tokenization_bert_japanese.py
│ │ │ │ ├── bertweet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── tokenization_bertweet.py
│ │ │ │ ├── big_bird/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_big_bird.py
│ │ │ │ │ ├── convert_bigbird_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_big_bird.py
│ │ │ │ │ ├── modeling_flax_big_bird.py
│ │ │ │ │ ├── tokenization_big_bird.py
│ │ │ │ │ └── tokenization_big_bird_fast.py
│ │ │ │ ├── bigbird_pegasus/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bigbird_pegasus.py
│ │ │ │ │ ├── convert_bigbird_pegasus_tf_to_pytorch.py
│ │ │ │ │ └── modeling_bigbird_pegasus.py
│ │ │ │ ├── biogpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_biogpt.py
│ │ │ │ │ ├── convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_biogpt.py
│ │ │ │ │ └── tokenization_biogpt.py
│ │ │ │ ├── bit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bit.py
│ │ │ │ │ ├── convert_bit_to_pytorch.py
│ │ │ │ │ ├── image_processing_bit.py
│ │ │ │ │ └── modeling_bit.py
│ │ │ │ ├── blenderbot/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_blenderbot.py
│ │ │ │ │ ├── convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_blenderbot.py
│ │ │ │ │ ├── modeling_flax_blenderbot.py
│ │ │ │ │ ├── modeling_tf_blenderbot.py
│ │ │ │ │ ├── tokenization_blenderbot.py
│ │ │ │ │ └── tokenization_blenderbot_fast.py
│ │ │ │ ├── blenderbot_small/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_blenderbot_small.py
│ │ │ │ │ ├── modeling_blenderbot_small.py
│ │ │ │ │ ├── modeling_flax_blenderbot_small.py
│ │ │ │ │ ├── modeling_tf_blenderbot_small.py
│ │ │ │ │ ├── tokenization_blenderbot_small.py
│ │ │ │ │ └── tokenization_blenderbot_small_fast.py
│ │ │ │ ├── blip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_blip.py
│ │ │ │ │ ├── convert_blip_original_pytorch_to_hf.py
│ │ │ │ │ ├── image_processing_blip.py
│ │ │ │ │ ├── modeling_blip.py
│ │ │ │ │ ├── modeling_blip_text.py
│ │ │ │ │ ├── modeling_tf_blip.py
│ │ │ │ │ ├── modeling_tf_blip_text.py
│ │ │ │ │ └── processing_blip.py
│ │ │ │ ├── blip_2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_blip_2.py
│ │ │ │ │ ├── convert_blip_2_original_to_pytorch.py
│ │ │ │ │ ├── modeling_blip_2.py
│ │ │ │ │ └── processing_blip_2.py
│ │ │ │ ├── bloom/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bloom.py
│ │ │ │ │ ├── convert_bloom_original_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_bloom.py
│ │ │ │ │ ├── modeling_flax_bloom.py
│ │ │ │ │ └── tokenization_bloom_fast.py
│ │ │ │ ├── bridgetower/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bridgetower.py
│ │ │ │ │ ├── image_processing_bridgetower.py
│ │ │ │ │ ├── modeling_bridgetower.py
│ │ │ │ │ └── processing_bridgetower.py
│ │ │ │ ├── bros/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bros.py
│ │ │ │ │ ├── convert_bros_to_pytorch.py
│ │ │ │ │ ├── modeling_bros.py
│ │ │ │ │ └── processing_bros.py
│ │ │ │ ├── byt5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── convert_byt5_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ └── tokenization_byt5.py
│ │ │ │ ├── camembert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_camembert.py
│ │ │ │ │ ├── modeling_camembert.py
│ │ │ │ │ ├── modeling_tf_camembert.py
│ │ │ │ │ ├── tokenization_camembert.py
│ │ │ │ │ └── tokenization_camembert_fast.py
│ │ │ │ ├── canine/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_canine.py
│ │ │ │ │ ├── convert_canine_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_canine.py
│ │ │ │ │ └── tokenization_canine.py
│ │ │ │ ├── chameleon/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_chameleon.py
│ │ │ │ │ ├── convert_chameleon_weights_to_hf.py
│ │ │ │ │ ├── image_processing_chameleon.py
│ │ │ │ │ ├── modeling_chameleon.py
│ │ │ │ │ └── processing_chameleon.py
│ │ │ │ ├── chinese_clip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_chinese_clip.py
│ │ │ │ │ ├── convert_chinese_clip_original_pytorch_to_hf.py
│ │ │ │ │ ├── feature_extraction_chinese_clip.py
│ │ │ │ │ ├── image_processing_chinese_clip.py
│ │ │ │ │ ├── modeling_chinese_clip.py
│ │ │ │ │ └── processing_chinese_clip.py
│ │ │ │ ├── clap/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_clap.py
│ │ │ │ │ ├── convert_clap_original_pytorch_to_hf.py
│ │ │ │ │ ├── feature_extraction_clap.py
│ │ │ │ │ ├── modeling_clap.py
│ │ │ │ │ └── processing_clap.py
│ │ │ │ ├── clip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_clip.py
│ │ │ │ │ ├── convert_clip_original_pytorch_to_hf.py
│ │ │ │ │ ├── feature_extraction_clip.py
│ │ │ │ │ ├── image_processing_clip.py
│ │ │ │ │ ├── modeling_clip.py
│ │ │ │ │ ├── modeling_flax_clip.py
│ │ │ │ │ ├── modeling_tf_clip.py
│ │ │ │ │ ├── processing_clip.py
│ │ │ │ │ ├── tokenization_clip.py
│ │ │ │ │ └── tokenization_clip_fast.py
│ │ │ │ ├── clipseg/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_clipseg.py
│ │ │ │ │ ├── convert_clipseg_original_pytorch_to_hf.py
│ │ │ │ │ ├── modeling_clipseg.py
│ │ │ │ │ └── processing_clipseg.py
│ │ │ │ ├── clvp/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_clvp.py
│ │ │ │ │ ├── convert_clvp_to_hf.py
│ │ │ │ │ ├── feature_extraction_clvp.py
│ │ │ │ │ ├── modeling_clvp.py
│ │ │ │ │ ├── number_normalizer.py
│ │ │ │ │ ├── processing_clvp.py
│ │ │ │ │ └── tokenization_clvp.py
│ │ │ │ ├── code_llama/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── tokenization_code_llama.py
│ │ │ │ │ └── tokenization_code_llama_fast.py
│ │ │ │ ├── codegen/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_codegen.py
│ │ │ │ │ ├── modeling_codegen.py
│ │ │ │ │ ├── tokenization_codegen.py
│ │ │ │ │ └── tokenization_codegen_fast.py
│ │ │ │ ├── cohere/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_cohere.py
│ │ │ │ │ ├── modeling_cohere.py
│ │ │ │ │ └── tokenization_cohere_fast.py
│ │ │ │ ├── conditional_detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_conditional_detr.py
│ │ │ │ │ ├── convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_conditional_detr.py
│ │ │ │ │ ├── image_processing_conditional_detr.py
│ │ │ │ │ └── modeling_conditional_detr.py
│ │ │ │ ├── convbert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_convbert.py
│ │ │ │ │ ├── convert_convbert_original_tf1_checkpoint_to_pytorch_and_tf2.py
│ │ │ │ │ ├── modeling_convbert.py
│ │ │ │ │ ├── modeling_tf_convbert.py
│ │ │ │ │ ├── tokenization_convbert.py
│ │ │ │ │ └── tokenization_convbert_fast.py
│ │ │ │ ├── convnext/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_convnext.py
│ │ │ │ │ ├── convert_convnext_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_convnext.py
│ │ │ │ │ ├── image_processing_convnext.py
│ │ │ │ │ ├── modeling_convnext.py
│ │ │ │ │ └── modeling_tf_convnext.py
│ │ │ │ ├── convnextv2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_convnextv2.py
│ │ │ │ │ ├── convert_convnextv2_to_pytorch.py
│ │ │ │ │ ├── modeling_convnextv2.py
│ │ │ │ │ └── modeling_tf_convnextv2.py
│ │ │ │ ├── cpm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── tokenization_cpm.py
│ │ │ │ │ └── tokenization_cpm_fast.py
│ │ │ │ ├── cpmant/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_cpmant.py
│ │ │ │ │ ├── modeling_cpmant.py
│ │ │ │ │ └── tokenization_cpmant.py
│ │ │ │ ├── ctrl/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_ctrl.py
│ │ │ │ │ ├── modeling_ctrl.py
│ │ │ │ │ ├── modeling_tf_ctrl.py
│ │ │ │ │ └── tokenization_ctrl.py
│ │ │ │ ├── cvt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_cvt.py
│ │ │ │ │ ├── convert_cvt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_cvt.py
│ │ │ │ │ └── modeling_tf_cvt.py
│ │ │ │ ├── dac/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_dac.py
│ │ │ │ │ ├── convert_dac_checkpoint.py
│ │ │ │ │ ├── feature_extraction_dac.py
│ │ │ │ │ └── modeling_dac.py
│ │ │ │ ├── data2vec/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_data2vec_audio.py
│ │ │ │ │ ├── configuration_data2vec_text.py
│ │ │ │ │ ├── configuration_data2vec_vision.py
│ │ │ │ │ ├── convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_data2vec_audio.py
│ │ │ │ │ ├── modeling_data2vec_text.py
│ │ │ │ │ ├── modeling_data2vec_vision.py
│ │ │ │ │ └── modeling_tf_data2vec_vision.py
│ │ │ │ ├── dbrx/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_dbrx.py
│ │ │ │ │ └── modeling_dbrx.py
│ │ │ │ ├── deberta/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_deberta.py
│ │ │ │ │ ├── modeling_deberta.py
│ │ │ │ │ ├── modeling_tf_deberta.py
│ │ │ │ │ ├── tokenization_deberta.py
│ │ │ │ │ └── tokenization_deberta_fast.py
│ │ │ │ ├── deberta_v2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_deberta_v2.py
│ │ │ │ │ ├── modeling_deberta_v2.py
│ │ │ │ │ ├── modeling_tf_deberta_v2.py
│ │ │ │ │ ├── tokenization_deberta_v2.py
│ │ │ │ │ └── tokenization_deberta_v2_fast.py
│ │ │ │ ├── decision_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_decision_transformer.py
│ │ │ │ │ └── modeling_decision_transformer.py
│ │ │ │ ├── deformable_detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_deformable_detr.py
│ │ │ │ │ ├── convert_deformable_detr_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_deformable_detr.py
│ │ │ │ │ ├── image_processing_deformable_detr.py
│ │ │ │ │ ├── load_custom.py
│ │ │ │ │ └── modeling_deformable_detr.py
│ │ │ │ ├── deit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_deit.py
│ │ │ │ │ ├── convert_deit_timm_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_deit.py
│ │ │ │ │ ├── image_processing_deit.py
│ │ │ │ │ ├── modeling_deit.py
│ │ │ │ │ └── modeling_tf_deit.py
│ │ │ │ ├── deprecated/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── bort/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── convert_bort_original_gluonnlp_checkpoint_to_pytorch.py
│ │ │ │ │ ├── deta/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_deta.py
│ │ │ │ │ │ ├── convert_deta_resnet_to_pytorch.py
│ │ │ │ │ │ ├── convert_deta_swin_to_pytorch.py
│ │ │ │ │ │ ├── image_processing_deta.py
│ │ │ │ │ │ └── modeling_deta.py
│ │ │ │ │ ├── efficientformer/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_efficientformer.py
│ │ │ │ │ │ ├── convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ │ ├── image_processing_efficientformer.py
│ │ │ │ │ │ ├── modeling_efficientformer.py
│ │ │ │ │ │ └── modeling_tf_efficientformer.py
│ │ │ │ │ ├── ernie_m/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_ernie_m.py
│ │ │ │ │ │ ├── modeling_ernie_m.py
│ │ │ │ │ │ └── tokenization_ernie_m.py
│ │ │ │ │ ├── gptsan_japanese/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_gptsan_japanese.py
│ │ │ │ │ │ ├── convert_gptsan_tf_checkpoint_to_pytorch.py
│ │ │ │ │ │ ├── modeling_gptsan_japanese.py
│ │ │ │ │ │ └── tokenization_gptsan_japanese.py
│ │ │ │ │ ├── graphormer/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── algos_graphormer.pyx
│ │ │ │ │ │ ├── collating_graphormer.py
│ │ │ │ │ │ ├── configuration_graphormer.py
│ │ │ │ │ │ └── modeling_graphormer.py
│ │ │ │ │ ├── jukebox/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_jukebox.py
│ │ │ │ │ │ ├── convert_jukebox.py
│ │ │ │ │ │ ├── modeling_jukebox.py
│ │ │ │ │ │ └── tokenization_jukebox.py
│ │ │ │ │ ├── mctct/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_mctct.py
│ │ │ │ │ │ ├── feature_extraction_mctct.py
│ │ │ │ │ │ ├── modeling_mctct.py
│ │ │ │ │ │ └── processing_mctct.py
│ │ │ │ │ ├── mega/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_mega.py
│ │ │ │ │ │ ├── convert_mega_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ │ └── modeling_mega.py
│ │ │ │ │ ├── mmbt/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_mmbt.py
│ │ │ │ │ │ └── modeling_mmbt.py
│ │ │ │ │ ├── nat/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_nat.py
│ │ │ │ │ │ └── modeling_nat.py
│ │ │ │ │ ├── nezha/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_nezha.py
│ │ │ │ │ │ └── modeling_nezha.py
│ │ │ │ │ ├── open_llama/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_open_llama.py
│ │ │ │ │ │ └── modeling_open_llama.py
│ │ │ │ │ ├── qdqbert/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_qdqbert.py
│ │ │ │ │ │ └── modeling_qdqbert.py
│ │ │ │ │ ├── realm/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_realm.py
│ │ │ │ │ │ ├── modeling_realm.py
│ │ │ │ │ │ ├── retrieval_realm.py
│ │ │ │ │ │ ├── tokenization_realm.py
│ │ │ │ │ │ └── tokenization_realm_fast.py
│ │ │ │ │ ├── retribert/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_retribert.py
│ │ │ │ │ │ ├── modeling_retribert.py
│ │ │ │ │ │ ├── tokenization_retribert.py
│ │ │ │ │ │ └── tokenization_retribert_fast.py
│ │ │ │ │ ├── speech_to_text_2/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_speech_to_text_2.py
│ │ │ │ │ │ ├── modeling_speech_to_text_2.py
│ │ │ │ │ │ ├── processing_speech_to_text_2.py
│ │ │ │ │ │ └── tokenization_speech_to_text_2.py
│ │ │ │ │ ├── tapex/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── tokenization_tapex.py
│ │ │ │ │ ├── trajectory_transformer/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_trajectory_transformer.py
│ │ │ │ │ │ ├── convert_trajectory_transformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ │ └── modeling_trajectory_transformer.py
│ │ │ │ │ ├── transfo_xl/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_transfo_xl.py
│ │ │ │ │ │ ├── convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ │ ├── modeling_tf_transfo_xl.py
│ │ │ │ │ │ ├── modeling_tf_transfo_xl_utilities.py
│ │ │ │ │ │ ├── modeling_transfo_xl.py
│ │ │ │ │ │ ├── modeling_transfo_xl_utilities.py
│ │ │ │ │ │ └── tokenization_transfo_xl.py
│ │ │ │ │ ├── tvlt/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_tvlt.py
│ │ │ │ │ │ ├── feature_extraction_tvlt.py
│ │ │ │ │ │ ├── image_processing_tvlt.py
│ │ │ │ │ │ ├── modeling_tvlt.py
│ │ │ │ │ │ └── processing_tvlt.py
│ │ │ │ │ ├── van/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_van.py
│ │ │ │ │ │ ├── convert_van_to_pytorch.py
│ │ │ │ │ │ └── modeling_van.py
│ │ │ │ │ ├── vit_hybrid/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_vit_hybrid.py
│ │ │ │ │ │ ├── convert_vit_hybrid_timm_to_pytorch.py
│ │ │ │ │ │ ├── image_processing_vit_hybrid.py
│ │ │ │ │ │ └── modeling_vit_hybrid.py
│ │ │ │ │ └── xlm_prophetnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xlm_prophetnet.py
│ │ │ │ │ ├── modeling_xlm_prophetnet.py
│ │ │ │ │ └── tokenization_xlm_prophetnet.py
│ │ │ │ ├── depth_anything/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_depth_anything.py
│ │ │ │ │ ├── convert_depth_anything_to_hf.py
│ │ │ │ │ └── modeling_depth_anything.py
│ │ │ │ ├── detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_detr.py
│ │ │ │ │ ├── convert_detr_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_detr_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_detr.py
│ │ │ │ │ ├── image_processing_detr.py
│ │ │ │ │ └── modeling_detr.py
│ │ │ │ ├── dialogpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ ├── dinat/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_dinat.py
│ │ │ │ │ └── modeling_dinat.py
│ │ │ │ ├── dinov2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_dinov2.py
│ │ │ │ │ ├── convert_dinov2_to_hf.py
│ │ │ │ │ ├── modeling_dinov2.py
│ │ │ │ │ └── modeling_flax_dinov2.py
│ │ │ │ ├── distilbert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_distilbert.py
│ │ │ │ │ ├── modeling_distilbert.py
│ │ │ │ │ ├── modeling_flax_distilbert.py
│ │ │ │ │ ├── modeling_tf_distilbert.py
│ │ │ │ │ ├── tokenization_distilbert.py
│ │ │ │ │ └── tokenization_distilbert_fast.py
│ │ │ │ ├── dit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── convert_dit_unilm_to_pytorch.py
│ │ │ │ ├── donut/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_donut_swin.py
│ │ │ │ │ ├── convert_donut_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_donut.py
│ │ │ │ │ ├── image_processing_donut.py
│ │ │ │ │ ├── modeling_donut_swin.py
│ │ │ │ │ └── processing_donut.py
│ │ │ │ ├── dpr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_dpr.py
│ │ │ │ │ ├── convert_dpr_original_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_dpr.py
│ │ │ │ │ ├── modeling_tf_dpr.py
│ │ │ │ │ ├── tokenization_dpr.py
│ │ │ │ │ └── tokenization_dpr_fast.py
│ │ │ │ ├── dpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_dpt.py
│ │ │ │ │ ├── convert_dinov2_depth_to_hf.py
│ │ │ │ │ ├── convert_dpt_beit_to_hf.py
│ │ │ │ │ ├── convert_dpt_hybrid_to_pytorch.py
│ │ │ │ │ ├── convert_dpt_swinv2_to_hf.py
│ │ │ │ │ ├── convert_dpt_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_dpt.py
│ │ │ │ │ ├── image_processing_dpt.py
│ │ │ │ │ └── modeling_dpt.py
│ │ │ │ ├── efficientnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_efficientnet.py
│ │ │ │ │ ├── convert_efficientnet_to_pytorch.py
│ │ │ │ │ ├── image_processing_efficientnet.py
│ │ │ │ │ └── modeling_efficientnet.py
│ │ │ │ ├── electra/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_electra.py
│ │ │ │ │ ├── convert_electra_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_electra.py
│ │ │ │ │ ├── modeling_flax_electra.py
│ │ │ │ │ ├── modeling_tf_electra.py
│ │ │ │ │ ├── tokenization_electra.py
│ │ │ │ │ └── tokenization_electra_fast.py
│ │ │ │ ├── encodec/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_encodec.py
│ │ │ │ │ ├── convert_encodec_checkpoint_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_encodec.py
│ │ │ │ │ └── modeling_encodec.py
│ │ │ │ ├── encoder_decoder/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_encoder_decoder.py
│ │ │ │ │ ├── modeling_encoder_decoder.py
│ │ │ │ │ ├── modeling_flax_encoder_decoder.py
│ │ │ │ │ └── modeling_tf_encoder_decoder.py
│ │ │ │ ├── ernie/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_ernie.py
│ │ │ │ │ └── modeling_ernie.py
│ │ │ │ ├── esm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_esm.py
│ │ │ │ │ ├── convert_esm.py
│ │ │ │ │ ├── modeling_esm.py
│ │ │ │ │ ├── modeling_esmfold.py
│ │ │ │ │ ├── modeling_tf_esm.py
│ │ │ │ │ ├── openfold_utils/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── chunk_utils.py
│ │ │ │ │ │ ├── data_transforms.py
│ │ │ │ │ │ ├── feats.py
│ │ │ │ │ │ ├── loss.py
│ │ │ │ │ │ ├── protein.py
│ │ │ │ │ │ ├── residue_constants.py
│ │ │ │ │ │ ├── rigid_utils.py
│ │ │ │ │ │ └── tensor_utils.py
│ │ │ │ │ └── tokenization_esm.py
│ │ │ │ ├── falcon/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_falcon.py
│ │ │ │ │ ├── convert_custom_code_checkpoint.py
│ │ │ │ │ └── modeling_falcon.py
│ │ │ │ ├── falcon_mamba/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_falcon_mamba.py
│ │ │ │ │ └── modeling_falcon_mamba.py
│ │ │ │ ├── fastspeech2_conformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_fastspeech2_conformer.py
│ │ │ │ │ ├── convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_hifigan.py
│ │ │ │ │ ├── convert_model_with_hifigan.py
│ │ │ │ │ ├── modeling_fastspeech2_conformer.py
│ │ │ │ │ └── tokenization_fastspeech2_conformer.py
│ │ │ │ ├── flaubert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_flaubert.py
│ │ │ │ │ ├── modeling_flaubert.py
│ │ │ │ │ ├── modeling_tf_flaubert.py
│ │ │ │ │ └── tokenization_flaubert.py
│ │ │ │ ├── flava/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_flava.py
│ │ │ │ │ ├── convert_dalle_to_flava_codebook.py
│ │ │ │ │ ├── convert_flava_original_pytorch_to_hf.py
│ │ │ │ │ ├── feature_extraction_flava.py
│ │ │ │ │ ├── image_processing_flava.py
│ │ │ │ │ ├── modeling_flava.py
│ │ │ │ │ └── processing_flava.py
│ │ │ │ ├── fnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_fnet.py
│ │ │ │ │ ├── convert_fnet_original_flax_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_fnet.py
│ │ │ │ │ ├── tokenization_fnet.py
│ │ │ │ │ └── tokenization_fnet_fast.py
│ │ │ │ ├── focalnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_focalnet.py
│ │ │ │ │ ├── convert_focalnet_to_hf_format.py
│ │ │ │ │ └── modeling_focalnet.py
│ │ │ │ ├── fsmt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_fsmt.py
│ │ │ │ │ ├── convert_fsmt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_fsmt.py
│ │ │ │ │ └── tokenization_fsmt.py
│ │ │ │ ├── funnel/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_funnel.py
│ │ │ │ │ ├── convert_funnel_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_funnel.py
│ │ │ │ │ ├── modeling_tf_funnel.py
│ │ │ │ │ ├── tokenization_funnel.py
│ │ │ │ │ └── tokenization_funnel_fast.py
│ │ │ │ ├── fuyu/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_fuyu.py
│ │ │ │ │ ├── convert_fuyu_model_weights_to_hf.py
│ │ │ │ │ ├── image_processing_fuyu.py
│ │ │ │ │ ├── modeling_fuyu.py
│ │ │ │ │ └── processing_fuyu.py
│ │ │ │ ├── gemma/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gemma.py
│ │ │ │ │ ├── convert_gemma_weights_to_hf.py
│ │ │ │ │ ├── diff_gemma.py
│ │ │ │ │ ├── modeling_flax_gemma.py
│ │ │ │ │ ├── modeling_gemma.py
│ │ │ │ │ ├── tokenization_gemma.py
│ │ │ │ │ └── tokenization_gemma_fast.py
│ │ │ │ ├── gemma2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gemma2.py
│ │ │ │ │ ├── convert_gemma2_weights_to_hf.py
│ │ │ │ │ ├── diff_gemma2.py
│ │ │ │ │ └── modeling_gemma2.py
│ │ │ │ ├── git/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_git.py
│ │ │ │ │ ├── convert_git_to_pytorch.py
│ │ │ │ │ ├── modeling_git.py
│ │ │ │ │ └── processing_git.py
│ │ │ │ ├── glpn/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_glpn.py
│ │ │ │ │ ├── convert_glpn_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_glpn.py
│ │ │ │ │ ├── image_processing_glpn.py
│ │ │ │ │ └── modeling_glpn.py
│ │ │ │ ├── gpt2/
│ │ │ │ │ ├── CONVERSION.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gpt2.py
│ │ │ │ │ ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_gpt2.py
│ │ │ │ │ ├── modeling_gpt2.py
│ │ │ │ │ ├── modeling_tf_gpt2.py
│ │ │ │ │ ├── tokenization_gpt2.py
│ │ │ │ │ ├── tokenization_gpt2_fast.py
│ │ │ │ │ └── tokenization_gpt2_tf.py
│ │ │ │ ├── gpt_bigcode/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gpt_bigcode.py
│ │ │ │ │ └── modeling_gpt_bigcode.py
│ │ │ │ ├── gpt_neo/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gpt_neo.py
│ │ │ │ │ ├── convert_gpt_neo_mesh_tf_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_gpt_neo.py
│ │ │ │ │ └── modeling_gpt_neo.py
│ │ │ │ ├── gpt_neox/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gpt_neox.py
│ │ │ │ │ ├── modeling_gpt_neox.py
│ │ │ │ │ └── tokenization_gpt_neox_fast.py
│ │ │ │ ├── gpt_neox_japanese/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gpt_neox_japanese.py
│ │ │ │ │ ├── modeling_gpt_neox_japanese.py
│ │ │ │ │ └── tokenization_gpt_neox_japanese.py
│ │ │ │ ├── gpt_sw3/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── convert_megatron_to_pytorch.py
│ │ │ │ │ └── tokenization_gpt_sw3.py
│ │ │ │ ├── gptj/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gptj.py
│ │ │ │ │ ├── modeling_flax_gptj.py
│ │ │ │ │ ├── modeling_gptj.py
│ │ │ │ │ └── modeling_tf_gptj.py
│ │ │ │ ├── grounding_dino/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_grounding_dino.py
│ │ │ │ │ ├── convert_grounding_dino_to_hf.py
│ │ │ │ │ ├── image_processing_grounding_dino.py
│ │ │ │ │ ├── modeling_grounding_dino.py
│ │ │ │ │ └── processing_grounding_dino.py
│ │ │ │ ├── groupvit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_groupvit.py
│ │ │ │ │ ├── convert_groupvit_nvlab_to_hf.py
│ │ │ │ │ ├── modeling_groupvit.py
│ │ │ │ │ └── modeling_tf_groupvit.py
│ │ │ │ ├── herbert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── tokenization_herbert.py
│ │ │ │ │ └── tokenization_herbert_fast.py
│ │ │ │ ├── hiera/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_hiera.py
│ │ │ │ │ ├── convert_hiera_to_hf.py
│ │ │ │ │ └── modeling_hiera.py
│ │ │ │ ├── hubert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_hubert.py
│ │ │ │ │ ├── convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_hubert_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_hubert_original_s3prl_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_hubert.py
│ │ │ │ │ └── modeling_tf_hubert.py
│ │ │ │ ├── ibert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_ibert.py
│ │ │ │ │ ├── modeling_ibert.py
│ │ │ │ │ └── quant_modules.py
│ │ │ │ ├── idefics/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_idefics.py
│ │ │ │ │ ├── image_processing_idefics.py
│ │ │ │ │ ├── modeling_idefics.py
│ │ │ │ │ ├── modeling_tf_idefics.py
│ │ │ │ │ ├── perceiver.py
│ │ │ │ │ ├── perceiver_tf.py
│ │ │ │ │ ├── processing_idefics.py
│ │ │ │ │ ├── vision.py
│ │ │ │ │ └── vision_tf.py
│ │ │ │ ├── idefics2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_idefics2.py
│ │ │ │ │ ├── convert_idefics2_weights_to_hf.py
│ │ │ │ │ ├── image_processing_idefics2.py
│ │ │ │ │ ├── modeling_idefics2.py
│ │ │ │ │ └── processing_idefics2.py
│ │ │ │ ├── imagegpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_imagegpt.py
│ │ │ │ │ ├── convert_imagegpt_original_tf2_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_imagegpt.py
│ │ │ │ │ ├── image_processing_imagegpt.py
│ │ │ │ │ └── modeling_imagegpt.py
│ │ │ │ ├── informer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_informer.py
│ │ │ │ │ └── modeling_informer.py
│ │ │ │ ├── instructblip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_instructblip.py
│ │ │ │ │ ├── convert_instructblip_original_to_pytorch.py
│ │ │ │ │ ├── modeling_instructblip.py
│ │ │ │ │ └── processing_instructblip.py
│ │ │ │ ├── instructblipvideo/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_instructblipvideo.py
│ │ │ │ │ ├── convert_instructblipvideo_original_to_pytorch.py
│ │ │ │ │ ├── diff_instructblipvideo.py
│ │ │ │ │ ├── image_processing_instructblipvideo.py
│ │ │ │ │ ├── modeling_instructblipvideo.py
│ │ │ │ │ └── processing_instructblipvideo.py
│ │ │ │ ├── jamba/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_jamba.py
│ │ │ │ │ └── modeling_jamba.py
│ │ │ │ ├── jetmoe/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_jetmoe.py
│ │ │ │ │ └── modeling_jetmoe.py
│ │ │ │ ├── kosmos2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_kosmos2.py
│ │ │ │ │ ├── convert_kosmos2_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_kosmos2.py
│ │ │ │ │ └── processing_kosmos2.py
│ │ │ │ ├── layoutlm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_layoutlm.py
│ │ │ │ │ ├── modeling_layoutlm.py
│ │ │ │ │ ├── modeling_tf_layoutlm.py
│ │ │ │ │ ├── tokenization_layoutlm.py
│ │ │ │ │ └── tokenization_layoutlm_fast.py
│ │ │ │ ├── layoutlmv2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_layoutlmv2.py
│ │ │ │ │ ├── feature_extraction_layoutlmv2.py
│ │ │ │ │ ├── image_processing_layoutlmv2.py
│ │ │ │ │ ├── modeling_layoutlmv2.py
│ │ │ │ │ ├── processing_layoutlmv2.py
│ │ │ │ │ ├── tokenization_layoutlmv2.py
│ │ │ │ │ └── tokenization_layoutlmv2_fast.py
│ │ │ │ ├── layoutlmv3/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_layoutlmv3.py
│ │ │ │ │ ├── feature_extraction_layoutlmv3.py
│ │ │ │ │ ├── image_processing_layoutlmv3.py
│ │ │ │ │ ├── modeling_layoutlmv3.py
│ │ │ │ │ ├── modeling_tf_layoutlmv3.py
│ │ │ │ │ ├── processing_layoutlmv3.py
│ │ │ │ │ ├── tokenization_layoutlmv3.py
│ │ │ │ │ └── tokenization_layoutlmv3_fast.py
│ │ │ │ ├── layoutxlm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── processing_layoutxlm.py
│ │ │ │ │ ├── tokenization_layoutxlm.py
│ │ │ │ │ └── tokenization_layoutxlm_fast.py
│ │ │ │ ├── led/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_led.py
│ │ │ │ │ ├── modeling_led.py
│ │ │ │ │ ├── modeling_tf_led.py
│ │ │ │ │ ├── tokenization_led.py
│ │ │ │ │ └── tokenization_led_fast.py
│ │ │ │ ├── levit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_levit.py
│ │ │ │ │ ├── convert_levit_timm_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_levit.py
│ │ │ │ │ ├── image_processing_levit.py
│ │ │ │ │ └── modeling_levit.py
│ │ │ │ ├── lilt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_lilt.py
│ │ │ │ │ └── modeling_lilt.py
│ │ │ │ ├── llama/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_llama.py
│ │ │ │ │ ├── convert_llama_weights_to_hf.py
│ │ │ │ │ ├── modeling_flax_llama.py
│ │ │ │ │ ├── modeling_llama.py
│ │ │ │ │ ├── tokenization_llama.py
│ │ │ │ │ └── tokenization_llama_fast.py
│ │ │ │ ├── llava/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_llava.py
│ │ │ │ │ ├── convert_llava_weights_to_hf.py
│ │ │ │ │ ├── modeling_llava.py
│ │ │ │ │ └── processing_llava.py
│ │ │ │ ├── llava_next/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_llava_next.py
│ │ │ │ │ ├── convert_llava_next_weights_to_hf.py
│ │ │ │ │ ├── image_processing_llava_next.py
│ │ │ │ │ ├── modeling_llava_next.py
│ │ │ │ │ └── processing_llava_next.py
│ │ │ │ ├── llava_next_video/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_llava_next_video.py
│ │ │ │ │ ├── convert_llava_next_video_weights_to_hf.py
│ │ │ │ │ ├── diff_llava_next_video.py
│ │ │ │ │ ├── image_processing_llava_next_video.py
│ │ │ │ │ ├── modeling_llava_next_video.py
│ │ │ │ │ └── processing_llava_next_video.py
│ │ │ │ ├── longformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_longformer.py
│ │ │ │ │ ├── convert_longformer_original_pytorch_lightning_to_pytorch.py
│ │ │ │ │ ├── modeling_longformer.py
│ │ │ │ │ ├── modeling_tf_longformer.py
│ │ │ │ │ ├── tokenization_longformer.py
│ │ │ │ │ └── tokenization_longformer_fast.py
│ │ │ │ ├── longt5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_longt5.py
│ │ │ │ │ ├── convert_longt5x_checkpoint_to_flax.py
│ │ │ │ │ ├── modeling_flax_longt5.py
│ │ │ │ │ └── modeling_longt5.py
│ │ │ │ ├── luke/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_luke.py
│ │ │ │ │ ├── convert_luke_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_luke.py
│ │ │ │ │ └── tokenization_luke.py
│ │ │ │ ├── lxmert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_lxmert.py
│ │ │ │ │ ├── convert_lxmert_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_lxmert.py
│ │ │ │ │ ├── modeling_tf_lxmert.py
│ │ │ │ │ ├── tokenization_lxmert.py
│ │ │ │ │ └── tokenization_lxmert_fast.py
│ │ │ │ ├── m2m_100/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_m2m_100.py
│ │ │ │ │ ├── convert_m2m100_original_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_m2m_100.py
│ │ │ │ │ └── tokenization_m2m_100.py
│ │ │ │ ├── mamba/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mamba.py
│ │ │ │ │ ├── convert_mamba_ssm_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_mamba.py
│ │ │ │ ├── mamba2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mamba2.py
│ │ │ │ │ ├── convert_mamba2_ssm_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_mamba2.py
│ │ │ │ ├── marian/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_marian.py
│ │ │ │ │ ├── convert_marian_tatoeba_to_pytorch.py
│ │ │ │ │ ├── convert_marian_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_marian.py
│ │ │ │ │ ├── modeling_marian.py
│ │ │ │ │ ├── modeling_tf_marian.py
│ │ │ │ │ └── tokenization_marian.py
│ │ │ │ ├── markuplm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_markuplm.py
│ │ │ │ │ ├── feature_extraction_markuplm.py
│ │ │ │ │ ├── modeling_markuplm.py
│ │ │ │ │ ├── processing_markuplm.py
│ │ │ │ │ ├── tokenization_markuplm.py
│ │ │ │ │ └── tokenization_markuplm_fast.py
│ │ │ │ ├── mask2former/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mask2former.py
│ │ │ │ │ ├── convert_mask2former_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── image_processing_mask2former.py
│ │ │ │ │ └── modeling_mask2former.py
│ │ │ │ ├── maskformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_maskformer.py
│ │ │ │ │ ├── configuration_maskformer_swin.py
│ │ │ │ │ ├── convert_maskformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_maskformer_resnet_to_pytorch.py
│ │ │ │ │ ├── convert_maskformer_swin_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_maskformer.py
│ │ │ │ │ ├── image_processing_maskformer.py
│ │ │ │ │ ├── modeling_maskformer.py
│ │ │ │ │ └── modeling_maskformer_swin.py
│ │ │ │ ├── mbart/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mbart.py
│ │ │ │ │ ├── convert_mbart_original_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_mbart.py
│ │ │ │ │ ├── modeling_mbart.py
│ │ │ │ │ ├── modeling_tf_mbart.py
│ │ │ │ │ ├── tokenization_mbart.py
│ │ │ │ │ └── tokenization_mbart_fast.py
│ │ │ │ ├── mbart50/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── tokenization_mbart50.py
│ │ │ │ │ └── tokenization_mbart50_fast.py
│ │ │ │ ├── megatron_bert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_megatron_bert.py
│ │ │ │ │ ├── convert_megatron_bert_checkpoint.py
│ │ │ │ │ └── modeling_megatron_bert.py
│ │ │ │ ├── megatron_gpt2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── checkpoint_reshaping_and_interoperability.py
│ │ │ │ │ └── convert_megatron_gpt2_checkpoint.py
│ │ │ │ ├── mgp_str/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mgp_str.py
│ │ │ │ │ ├── modeling_mgp_str.py
│ │ │ │ │ ├── processing_mgp_str.py
│ │ │ │ │ └── tokenization_mgp_str.py
│ │ │ │ ├── mistral/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mistral.py
│ │ │ │ │ ├── convert_mistral_weights_to_hf.py
│ │ │ │ │ ├── modeling_flax_mistral.py
│ │ │ │ │ ├── modeling_mistral.py
│ │ │ │ │ └── modeling_tf_mistral.py
│ │ │ │ ├── mixtral/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mixtral.py
│ │ │ │ │ ├── convert_mixtral_weights_to_hf.py
│ │ │ │ │ └── modeling_mixtral.py
│ │ │ │ ├── mluke/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── convert_mluke_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── tokenization_mluke.py
│ │ │ │ ├── mobilebert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mobilebert.py
│ │ │ │ │ ├── convert_mobilebert_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_mobilebert.py
│ │ │ │ │ ├── modeling_tf_mobilebert.py
│ │ │ │ │ ├── tokenization_mobilebert.py
│ │ │ │ │ └── tokenization_mobilebert_fast.py
│ │ │ │ ├── mobilenet_v1/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mobilenet_v1.py
│ │ │ │ │ ├── convert_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_mobilenet_v1.py
│ │ │ │ │ ├── image_processing_mobilenet_v1.py
│ │ │ │ │ └── modeling_mobilenet_v1.py
│ │ │ │ ├── mobilenet_v2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mobilenet_v2.py
│ │ │ │ │ ├── convert_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_mobilenet_v2.py
│ │ │ │ │ ├── image_processing_mobilenet_v2.py
│ │ │ │ │ └── modeling_mobilenet_v2.py
│ │ │ │ ├── mobilevit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mobilevit.py
│ │ │ │ │ ├── convert_mlcvnets_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_mobilevit.py
│ │ │ │ │ ├── image_processing_mobilevit.py
│ │ │ │ │ ├── modeling_mobilevit.py
│ │ │ │ │ └── modeling_tf_mobilevit.py
│ │ │ │ ├── mobilevitv2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mobilevitv2.py
│ │ │ │ │ ├── convert_mlcvnets_to_pytorch.py
│ │ │ │ │ └── modeling_mobilevitv2.py
│ │ │ │ ├── mpnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mpnet.py
│ │ │ │ │ ├── modeling_mpnet.py
│ │ │ │ │ ├── modeling_tf_mpnet.py
│ │ │ │ │ ├── tokenization_mpnet.py
│ │ │ │ │ └── tokenization_mpnet_fast.py
│ │ │ │ ├── mpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mpt.py
│ │ │ │ │ └── modeling_mpt.py
│ │ │ │ ├── mra/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mra.py
│ │ │ │ │ ├── convert_mra_pytorch_to_pytorch.py
│ │ │ │ │ └── modeling_mra.py
│ │ │ │ ├── mt5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mt5.py
│ │ │ │ │ ├── modeling_flax_mt5.py
│ │ │ │ │ ├── modeling_mt5.py
│ │ │ │ │ └── modeling_tf_mt5.py
│ │ │ │ ├── musicgen/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_musicgen.py
│ │ │ │ │ ├── convert_musicgen_transformers.py
│ │ │ │ │ ├── modeling_musicgen.py
│ │ │ │ │ └── processing_musicgen.py
│ │ │ │ ├── musicgen_melody/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_musicgen_melody.py
│ │ │ │ │ ├── convert_musicgen_melody_transformers.py
│ │ │ │ │ ├── feature_extraction_musicgen_melody.py
│ │ │ │ │ ├── modeling_musicgen_melody.py
│ │ │ │ │ └── processing_musicgen_melody.py
│ │ │ │ ├── mvp/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mvp.py
│ │ │ │ │ ├── modeling_mvp.py
│ │ │ │ │ ├── tokenization_mvp.py
│ │ │ │ │ └── tokenization_mvp_fast.py
│ │ │ │ ├── nemotron/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_nemotron.py
│ │ │ │ │ ├── convert_nemotron_nemo_to_hf.py
│ │ │ │ │ └── modeling_nemotron.py
│ │ │ │ ├── nllb/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── tokenization_nllb.py
│ │ │ │ │ └── tokenization_nllb_fast.py
│ │ │ │ ├── nllb_moe/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_nllb_moe.py
│ │ │ │ │ ├── convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_nllb_moe.py
│ │ │ │ ├── nougat/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── convert_nougat_to_hf.py
│ │ │ │ │ ├── image_processing_nougat.py
│ │ │ │ │ ├── processing_nougat.py
│ │ │ │ │ └── tokenization_nougat_fast.py
│ │ │ │ ├── nystromformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_nystromformer.py
│ │ │ │ │ ├── convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_nystromformer.py
│ │ │ │ ├── olmo/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_olmo.py
│ │ │ │ │ ├── convert_olmo_weights_to_hf.py
│ │ │ │ │ └── modeling_olmo.py
│ │ │ │ ├── oneformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_oneformer.py
│ │ │ │ │ ├── convert_to_hf_oneformer.py
│ │ │ │ │ ├── image_processing_oneformer.py
│ │ │ │ │ ├── modeling_oneformer.py
│ │ │ │ │ └── processing_oneformer.py
│ │ │ │ ├── openai/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_openai.py
│ │ │ │ │ ├── convert_openai_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_openai.py
│ │ │ │ │ ├── modeling_tf_openai.py
│ │ │ │ │ ├── tokenization_openai.py
│ │ │ │ │ └── tokenization_openai_fast.py
│ │ │ │ ├── opt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_opt.py
│ │ │ │ │ ├── convert_opt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_opt.py
│ │ │ │ │ ├── modeling_opt.py
│ │ │ │ │ └── modeling_tf_opt.py
│ │ │ │ ├── owlv2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_owlv2.py
│ │ │ │ │ ├── convert_owlv2_to_hf.py
│ │ │ │ │ ├── image_processing_owlv2.py
│ │ │ │ │ ├── modeling_owlv2.py
│ │ │ │ │ └── processing_owlv2.py
│ │ │ │ ├── owlvit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_owlvit.py
│ │ │ │ │ ├── convert_owlvit_original_flax_to_hf.py
│ │ │ │ │ ├── feature_extraction_owlvit.py
│ │ │ │ │ ├── image_processing_owlvit.py
│ │ │ │ │ ├── modeling_owlvit.py
│ │ │ │ │ └── processing_owlvit.py
│ │ │ │ ├── paligemma/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_paligemma.py
│ │ │ │ │ ├── convert_paligemma_weights_to_hf.py
│ │ │ │ │ ├── modeling_paligemma.py
│ │ │ │ │ └── processing_paligemma.py
│ │ │ │ ├── patchtsmixer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_patchtsmixer.py
│ │ │ │ │ └── modeling_patchtsmixer.py
│ │ │ │ ├── patchtst/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_patchtst.py
│ │ │ │ │ └── modeling_patchtst.py
│ │ │ │ ├── pegasus/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_pegasus.py
│ │ │ │ │ ├── convert_pegasus_tf_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_pegasus.py
│ │ │ │ │ ├── modeling_pegasus.py
│ │ │ │ │ ├── modeling_tf_pegasus.py
│ │ │ │ │ ├── tokenization_pegasus.py
│ │ │ │ │ └── tokenization_pegasus_fast.py
│ │ │ │ ├── pegasus_x/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_pegasus_x.py
│ │ │ │ │ └── modeling_pegasus_x.py
│ │ │ │ ├── perceiver/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_perceiver.py
│ │ │ │ │ ├── convert_perceiver_haiku_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_perceiver.py
│ │ │ │ │ ├── image_processing_perceiver.py
│ │ │ │ │ ├── modeling_perceiver.py
│ │ │ │ │ └── tokenization_perceiver.py
│ │ │ │ ├── persimmon/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_persimmon.py
│ │ │ │ │ ├── convert_persimmon_weights_to_hf.py
│ │ │ │ │ └── modeling_persimmon.py
│ │ │ │ ├── phi/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_phi.py
│ │ │ │ │ ├── convert_phi_weights_to_hf.py
│ │ │ │ │ └── modeling_phi.py
│ │ │ │ ├── phi3/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_phi3.py
│ │ │ │ │ └── modeling_phi3.py
│ │ │ │ ├── phobert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── tokenization_phobert.py
│ │ │ │ ├── pix2struct/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_pix2struct.py
│ │ │ │ │ ├── convert_pix2struct_original_pytorch_to_hf.py
│ │ │ │ │ ├── image_processing_pix2struct.py
│ │ │ │ │ ├── modeling_pix2struct.py
│ │ │ │ │ └── processing_pix2struct.py
│ │ │ │ ├── plbart/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_plbart.py
│ │ │ │ │ ├── convert_plbart_original_checkpoint_to_torch.py
│ │ │ │ │ ├── modeling_plbart.py
│ │ │ │ │ └── tokenization_plbart.py
│ │ │ │ ├── poolformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_poolformer.py
│ │ │ │ │ ├── convert_poolformer_original_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_poolformer.py
│ │ │ │ │ ├── image_processing_poolformer.py
│ │ │ │ │ └── modeling_poolformer.py
│ │ │ │ ├── pop2piano/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_pop2piano.py
│ │ │ │ │ ├── convert_pop2piano_weights_to_hf.py
│ │ │ │ │ ├── feature_extraction_pop2piano.py
│ │ │ │ │ ├── modeling_pop2piano.py
│ │ │ │ │ ├── processing_pop2piano.py
│ │ │ │ │ └── tokenization_pop2piano.py
│ │ │ │ ├── prophetnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_prophetnet.py
│ │ │ │ │ ├── convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_prophetnet.py
│ │ │ │ │ └── tokenization_prophetnet.py
│ │ │ │ ├── pvt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_pvt.py
│ │ │ │ │ ├── convert_pvt_to_pytorch.py
│ │ │ │ │ ├── image_processing_pvt.py
│ │ │ │ │ └── modeling_pvt.py
│ │ │ │ ├── pvt_v2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_pvt_v2.py
│ │ │ │ │ ├── convert_pvt_v2_to_pytorch.py
│ │ │ │ │ └── modeling_pvt_v2.py
│ │ │ │ ├── qwen2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_qwen2.py
│ │ │ │ │ ├── modeling_qwen2.py
│ │ │ │ │ ├── tokenization_qwen2.py
│ │ │ │ │ └── tokenization_qwen2_fast.py
│ │ │ │ ├── qwen2_audio/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_qwen2_audio.py
│ │ │ │ │ ├── modeling_qwen2_audio.py
│ │ │ │ │ └── processing_qwen2_audio.py
│ │ │ │ ├── qwen2_moe/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_qwen2_moe.py
│ │ │ │ │ └── modeling_qwen2_moe.py
│ │ │ │ ├── rag/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_rag.py
│ │ │ │ │ ├── modeling_rag.py
│ │ │ │ │ ├── modeling_tf_rag.py
│ │ │ │ │ ├── retrieval_rag.py
│ │ │ │ │ └── tokenization_rag.py
│ │ │ │ ├── recurrent_gemma/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_recurrent_gemma.py
│ │ │ │ │ ├── convert_recurrent_gemma_to_hf.py
│ │ │ │ │ └── modeling_recurrent_gemma.py
│ │ │ │ ├── reformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_reformer.py
│ │ │ │ │ ├── convert_reformer_trax_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_reformer.py
│ │ │ │ │ ├── tokenization_reformer.py
│ │ │ │ │ └── tokenization_reformer_fast.py
│ │ │ │ ├── regnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_regnet.py
│ │ │ │ │ ├── convert_regnet_seer_10b_to_pytorch.py
│ │ │ │ │ ├── convert_regnet_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_regnet.py
│ │ │ │ │ ├── modeling_regnet.py
│ │ │ │ │ └── modeling_tf_regnet.py
│ │ │ │ ├── rembert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_rembert.py
│ │ │ │ │ ├── convert_rembert_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_rembert.py
│ │ │ │ │ ├── modeling_tf_rembert.py
│ │ │ │ │ ├── tokenization_rembert.py
│ │ │ │ │ └── tokenization_rembert_fast.py
│ │ │ │ ├── resnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_resnet.py
│ │ │ │ │ ├── convert_resnet_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_resnet.py
│ │ │ │ │ ├── modeling_resnet.py
│ │ │ │ │ └── modeling_tf_resnet.py
│ │ │ │ ├── roberta/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_roberta.py
│ │ │ │ │ ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_roberta.py
│ │ │ │ │ ├── modeling_roberta.py
│ │ │ │ │ ├── modeling_tf_roberta.py
│ │ │ │ │ ├── tokenization_roberta.py
│ │ │ │ │ └── tokenization_roberta_fast.py
│ │ │ │ ├── roberta_prelayernorm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_roberta_prelayernorm.py
│ │ │ │ │ ├── convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_roberta_prelayernorm.py
│ │ │ │ │ ├── modeling_roberta_prelayernorm.py
│ │ │ │ │ └── modeling_tf_roberta_prelayernorm.py
│ │ │ │ ├── roc_bert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_roc_bert.py
│ │ │ │ │ ├── modeling_roc_bert.py
│ │ │ │ │ └── tokenization_roc_bert.py
│ │ │ │ ├── roformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_roformer.py
│ │ │ │ │ ├── convert_roformer_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_roformer.py
│ │ │ │ │ ├── modeling_roformer.py
│ │ │ │ │ ├── modeling_tf_roformer.py
│ │ │ │ │ ├── tokenization_roformer.py
│ │ │ │ │ ├── tokenization_roformer_fast.py
│ │ │ │ │ └── tokenization_utils.py
│ │ │ │ ├── rt_detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_rt_detr.py
│ │ │ │ │ ├── configuration_rt_detr_resnet.py
│ │ │ │ │ ├── convert_rt_detr_original_pytorch_checkpoint_to_hf.py
│ │ │ │ │ ├── image_processing_rt_detr.py
│ │ │ │ │ ├── modeling_rt_detr.py
│ │ │ │ │ └── modeling_rt_detr_resnet.py
│ │ │ │ ├── rwkv/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_rwkv.py
│ │ │ │ │ ├── convert_rwkv_checkpoint_to_hf.py
│ │ │ │ │ └── modeling_rwkv.py
│ │ │ │ ├── sam/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_sam.py
│ │ │ │ │ ├── convert_sam_to_hf.py
│ │ │ │ │ ├── image_processing_sam.py
│ │ │ │ │ ├── modeling_sam.py
│ │ │ │ │ ├── modeling_tf_sam.py
│ │ │ │ │ └── processing_sam.py
│ │ │ │ ├── seamless_m4t/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_seamless_m4t.py
│ │ │ │ │ ├── convert_fairseq2_to_hf.py
│ │ │ │ │ ├── feature_extraction_seamless_m4t.py
│ │ │ │ │ ├── modeling_seamless_m4t.py
│ │ │ │ │ ├── processing_seamless_m4t.py
│ │ │ │ │ ├── tokenization_seamless_m4t.py
│ │ │ │ │ └── tokenization_seamless_m4t_fast.py
│ │ │ │ ├── seamless_m4t_v2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_seamless_m4t_v2.py
│ │ │ │ │ ├── convert_fairseq2_to_hf.py
│ │ │ │ │ └── modeling_seamless_m4t_v2.py
│ │ │ │ ├── segformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_segformer.py
│ │ │ │ │ ├── convert_segformer_original_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_segformer.py
│ │ │ │ │ ├── image_processing_segformer.py
│ │ │ │ │ ├── modeling_segformer.py
│ │ │ │ │ └── modeling_tf_segformer.py
│ │ │ │ ├── seggpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_seggpt.py
│ │ │ │ │ ├── convert_seggpt_to_hf.py
│ │ │ │ │ ├── image_processing_seggpt.py
│ │ │ │ │ └── modeling_seggpt.py
│ │ │ │ ├── sew/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_sew.py
│ │ │ │ │ ├── convert_sew_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_sew.py
│ │ │ │ ├── sew_d/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_sew_d.py
│ │ │ │ │ ├── convert_sew_d_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_sew_d.py
│ │ │ │ ├── siglip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_siglip.py
│ │ │ │ │ ├── convert_siglip_to_hf.py
│ │ │ │ │ ├── image_processing_siglip.py
│ │ │ │ │ ├── modeling_siglip.py
│ │ │ │ │ ├── processing_siglip.py
│ │ │ │ │ └── tokenization_siglip.py
│ │ │ │ ├── speech_encoder_decoder/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_speech_encoder_decoder.py
│ │ │ │ │ ├── convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py
│ │ │ │ │ ├── convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_speech_encoder_decoder.py
│ │ │ │ │ └── modeling_speech_encoder_decoder.py
│ │ │ │ ├── speech_to_text/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_speech_to_text.py
│ │ │ │ │ ├── convert_s2t_fairseq_to_tfms.py
│ │ │ │ │ ├── feature_extraction_speech_to_text.py
│ │ │ │ │ ├── modeling_speech_to_text.py
│ │ │ │ │ ├── modeling_tf_speech_to_text.py
│ │ │ │ │ ├── processing_speech_to_text.py
│ │ │ │ │ └── tokenization_speech_to_text.py
│ │ │ │ ├── speecht5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_speecht5.py
│ │ │ │ │ ├── convert_hifigan.py
│ │ │ │ │ ├── convert_speecht5_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_speecht5.py
│ │ │ │ │ ├── modeling_speecht5.py
│ │ │ │ │ ├── number_normalizer.py
│ │ │ │ │ ├── processing_speecht5.py
│ │ │ │ │ └── tokenization_speecht5.py
│ │ │ │ ├── splinter/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_splinter.py
│ │ │ │ │ ├── modeling_splinter.py
│ │ │ │ │ ├── tokenization_splinter.py
│ │ │ │ │ └── tokenization_splinter_fast.py
│ │ │ │ ├── squeezebert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_squeezebert.py
│ │ │ │ │ ├── modeling_squeezebert.py
│ │ │ │ │ ├── tokenization_squeezebert.py
│ │ │ │ │ └── tokenization_squeezebert_fast.py
│ │ │ │ ├── stablelm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_stablelm.py
│ │ │ │ │ └── modeling_stablelm.py
│ │ │ │ ├── starcoder2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_starcoder2.py
│ │ │ │ │ └── modeling_starcoder2.py
│ │ │ │ ├── superpoint/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_superpoint.py
│ │ │ │ │ ├── convert_superpoint_to_pytorch.py
│ │ │ │ │ ├── image_processing_superpoint.py
│ │ │ │ │ └── modeling_superpoint.py
│ │ │ │ ├── swiftformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_swiftformer.py
│ │ │ │ │ ├── convert_swiftformer_original_to_hf.py
│ │ │ │ │ ├── modeling_swiftformer.py
│ │ │ │ │ └── modeling_tf_swiftformer.py
│ │ │ │ ├── swin/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_swin.py
│ │ │ │ │ ├── convert_swin_simmim_to_pytorch.py
│ │ │ │ │ ├── convert_swin_timm_to_pytorch.py
│ │ │ │ │ ├── modeling_swin.py
│ │ │ │ │ └── modeling_tf_swin.py
│ │ │ │ ├── swin2sr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_swin2sr.py
│ │ │ │ │ ├── convert_swin2sr_original_to_pytorch.py
│ │ │ │ │ ├── image_processing_swin2sr.py
│ │ │ │ │ └── modeling_swin2sr.py
│ │ │ │ ├── swinv2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_swinv2.py
│ │ │ │ │ ├── convert_swinv2_timm_to_pytorch.py
│ │ │ │ │ └── modeling_swinv2.py
│ │ │ │ ├── switch_transformers/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_switch_transformers.py
│ │ │ │ │ ├── convert_big_switch.py
│ │ │ │ │ ├── convert_switch_transformers_original_flax_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_switch_transformers.py
│ │ │ │ ├── t5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_t5.py
│ │ │ │ │ ├── convert_t5_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_t5x_checkpoint_to_flax.py
│ │ │ │ │ ├── convert_t5x_checkpoint_to_pytorch.py
│ │ │ │ │ ├── download_from_gcp.sh
│ │ │ │ │ ├── modeling_flax_t5.py
│ │ │ │ │ ├── modeling_t5.py
│ │ │ │ │ ├── modeling_tf_t5.py
│ │ │ │ │ ├── tokenization_t5.py
│ │ │ │ │ └── tokenization_t5_fast.py
│ │ │ │ ├── table_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_table_transformer.py
│ │ │ │ │ ├── convert_table_transformer_to_hf.py
│ │ │ │ │ ├── convert_table_transformer_to_hf_no_timm.py
│ │ │ │ │ └── modeling_table_transformer.py
│ │ │ │ ├── tapas/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_tapas.py
│ │ │ │ │ ├── convert_tapas_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_tapas.py
│ │ │ │ │ ├── modeling_tf_tapas.py
│ │ │ │ │ └── tokenization_tapas.py
│ │ │ │ ├── time_series_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_time_series_transformer.py
│ │ │ │ │ └── modeling_time_series_transformer.py
│ │ │ │ ├── timesformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_timesformer.py
│ │ │ │ │ ├── convert_timesformer_to_pytorch.py
│ │ │ │ │ └── modeling_timesformer.py
│ │ │ │ ├── timm_backbone/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_timm_backbone.py
│ │ │ │ │ └── modeling_timm_backbone.py
│ │ │ │ ├── trocr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_trocr.py
│ │ │ │ │ ├── convert_trocr_unilm_to_pytorch.py
│ │ │ │ │ ├── modeling_trocr.py
│ │ │ │ │ └── processing_trocr.py
│ │ │ │ ├── tvp/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_tvp.py
│ │ │ │ │ ├── image_processing_tvp.py
│ │ │ │ │ ├── modeling_tvp.py
│ │ │ │ │ └── processing_tvp.py
│ │ │ │ ├── udop/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_udop.py
│ │ │ │ │ ├── convert_udop_to_hf.py
│ │ │ │ │ ├── modeling_udop.py
│ │ │ │ │ ├── processing_udop.py
│ │ │ │ │ ├── tokenization_udop.py
│ │ │ │ │ └── tokenization_udop_fast.py
│ │ │ │ ├── umt5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_umt5.py
│ │ │ │ │ ├── convert_umt5_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_umt5.py
│ │ │ │ ├── unispeech/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_unispeech.py
│ │ │ │ │ ├── convert_unispeech_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_unispeech.py
│ │ │ │ ├── unispeech_sat/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_unispeech_sat.py
│ │ │ │ │ ├── convert_unispeech_original_s3prl_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_unispeech_sat.py
│ │ │ │ ├── univnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_univnet.py
│ │ │ │ │ ├── convert_univnet.py
│ │ │ │ │ ├── feature_extraction_univnet.py
│ │ │ │ │ └── modeling_univnet.py
│ │ │ │ ├── upernet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_upernet.py
│ │ │ │ │ ├── convert_convnext_upernet_to_pytorch.py
│ │ │ │ │ ├── convert_swin_upernet_to_pytorch.py
│ │ │ │ │ └── modeling_upernet.py
│ │ │ │ ├── video_llava/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_video_llava.py
│ │ │ │ │ ├── convert_video_llava_weights_to_hf.py
│ │ │ │ │ ├── image_processing_video_llava.py
│ │ │ │ │ ├── modeling_video_llava.py
│ │ │ │ │ └── processing_video_llava.py
│ │ │ │ ├── videomae/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_videomae.py
│ │ │ │ │ ├── convert_videomae_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_videomae.py
│ │ │ │ │ ├── image_processing_videomae.py
│ │ │ │ │ └── modeling_videomae.py
│ │ │ │ ├── vilt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vilt.py
│ │ │ │ │ ├── convert_vilt_original_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_vilt.py
│ │ │ │ │ ├── image_processing_vilt.py
│ │ │ │ │ ├── modeling_vilt.py
│ │ │ │ │ └── processing_vilt.py
│ │ │ │ ├── vipllava/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vipllava.py
│ │ │ │ │ ├── convert_vipllava_weights_to_hf.py
│ │ │ │ │ └── modeling_vipllava.py
│ │ │ │ ├── vision_encoder_decoder/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vision_encoder_decoder.py
│ │ │ │ │ ├── modeling_flax_vision_encoder_decoder.py
│ │ │ │ │ ├── modeling_tf_vision_encoder_decoder.py
│ │ │ │ │ └── modeling_vision_encoder_decoder.py
│ │ │ │ ├── vision_text_dual_encoder/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vision_text_dual_encoder.py
│ │ │ │ │ ├── modeling_flax_vision_text_dual_encoder.py
│ │ │ │ │ ├── modeling_tf_vision_text_dual_encoder.py
│ │ │ │ │ ├── modeling_vision_text_dual_encoder.py
│ │ │ │ │ └── processing_vision_text_dual_encoder.py
│ │ │ │ ├── visual_bert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_visual_bert.py
│ │ │ │ │ ├── convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_visual_bert.py
│ │ │ │ ├── vit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vit.py
│ │ │ │ │ ├── convert_dino_to_pytorch.py
│ │ │ │ │ ├── convert_vit_timm_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_vit.py
│ │ │ │ │ ├── image_processing_vit.py
│ │ │ │ │ ├── image_processing_vit_fast.py
│ │ │ │ │ ├── modeling_flax_vit.py
│ │ │ │ │ ├── modeling_tf_vit.py
│ │ │ │ │ └── modeling_vit.py
│ │ │ │ ├── vit_mae/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vit_mae.py
│ │ │ │ │ ├── convert_vit_mae_to_pytorch.py
│ │ │ │ │ ├── modeling_tf_vit_mae.py
│ │ │ │ │ └── modeling_vit_mae.py
│ │ │ │ ├── vit_msn/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vit_msn.py
│ │ │ │ │ ├── convert_msn_to_pytorch.py
│ │ │ │ │ └── modeling_vit_msn.py
│ │ │ │ ├── vitdet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vitdet.py
│ │ │ │ │ └── modeling_vitdet.py
│ │ │ │ ├── vitmatte/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vitmatte.py
│ │ │ │ │ ├── convert_vitmatte_to_hf.py
│ │ │ │ │ ├── image_processing_vitmatte.py
│ │ │ │ │ └── modeling_vitmatte.py
│ │ │ │ ├── vits/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vits.py
│ │ │ │ │ ├── convert_original_checkpoint.py
│ │ │ │ │ ├── modeling_vits.py
│ │ │ │ │ └── tokenization_vits.py
│ │ │ │ ├── vivit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vivit.py
│ │ │ │ │ ├── convert_vivit_flax_to_pytorch.py
│ │ │ │ │ ├── image_processing_vivit.py
│ │ │ │ │ └── modeling_vivit.py
│ │ │ │ ├── wav2vec2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_wav2vec2.py
│ │ │ │ │ ├── convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_wav2vec2.py
│ │ │ │ │ ├── modeling_flax_wav2vec2.py
│ │ │ │ │ ├── modeling_tf_wav2vec2.py
│ │ │ │ │ ├── modeling_wav2vec2.py
│ │ │ │ │ ├── processing_wav2vec2.py
│ │ │ │ │ └── tokenization_wav2vec2.py
│ │ │ │ ├── wav2vec2_bert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_wav2vec2_bert.py
│ │ │ │ │ ├── convert_wav2vec2_seamless_checkpoint.py
│ │ │ │ │ ├── modeling_wav2vec2_bert.py
│ │ │ │ │ └── processing_wav2vec2_bert.py
│ │ │ │ ├── wav2vec2_conformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_wav2vec2_conformer.py
│ │ │ │ │ ├── convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_wav2vec2_conformer.py
│ │ │ │ ├── wav2vec2_phoneme/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── tokenization_wav2vec2_phoneme.py
│ │ │ │ ├── wav2vec2_with_lm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── processing_wav2vec2_with_lm.py
│ │ │ │ ├── wavlm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_wavlm.py
│ │ │ │ │ ├── convert_wavlm_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_wavlm_original_s3prl_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_wavlm.py
│ │ │ │ ├── whisper/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_whisper.py
│ │ │ │ │ ├── convert_openai_to_hf.py
│ │ │ │ │ ├── english_normalizer.py
│ │ │ │ │ ├── feature_extraction_whisper.py
│ │ │ │ │ ├── generation_whisper.py
│ │ │ │ │ ├── modeling_flax_whisper.py
│ │ │ │ │ ├── modeling_tf_whisper.py
│ │ │ │ │ ├── modeling_whisper.py
│ │ │ │ │ ├── processing_whisper.py
│ │ │ │ │ ├── tokenization_whisper.py
│ │ │ │ │ └── tokenization_whisper_fast.py
│ │ │ │ ├── x_clip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_x_clip.py
│ │ │ │ │ ├── convert_x_clip_original_pytorch_to_hf.py
│ │ │ │ │ ├── modeling_x_clip.py
│ │ │ │ │ └── processing_x_clip.py
│ │ │ │ ├── xglm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xglm.py
│ │ │ │ │ ├── convert_xglm_original_ckpt_to_trfms.py
│ │ │ │ │ ├── modeling_flax_xglm.py
│ │ │ │ │ ├── modeling_tf_xglm.py
│ │ │ │ │ ├── modeling_xglm.py
│ │ │ │ │ ├── tokenization_xglm.py
│ │ │ │ │ └── tokenization_xglm_fast.py
│ │ │ │ ├── xlm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xlm.py
│ │ │ │ │ ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_tf_xlm.py
│ │ │ │ │ ├── modeling_xlm.py
│ │ │ │ │ └── tokenization_xlm.py
│ │ │ │ ├── xlm_roberta/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xlm_roberta.py
│ │ │ │ │ ├── modeling_flax_xlm_roberta.py
│ │ │ │ │ ├── modeling_tf_xlm_roberta.py
│ │ │ │ │ ├── modeling_xlm_roberta.py
│ │ │ │ │ ├── tokenization_xlm_roberta.py
│ │ │ │ │ └── tokenization_xlm_roberta_fast.py
│ │ │ │ ├── xlm_roberta_xl/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xlm_roberta_xl.py
│ │ │ │ │ ├── convert_xlm_roberta_xl_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_xlm_roberta_xl.py
│ │ │ │ ├── xlnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xlnet.py
│ │ │ │ │ ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_tf_xlnet.py
│ │ │ │ │ ├── modeling_xlnet.py
│ │ │ │ │ ├── tokenization_xlnet.py
│ │ │ │ │ └── tokenization_xlnet_fast.py
│ │ │ │ ├── xmod/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xmod.py
│ │ │ │ │ ├── convert_xmod_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_xmod.py
│ │ │ │ ├── yolos/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_yolos.py
│ │ │ │ │ ├── convert_yolos_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_yolos.py
│ │ │ │ │ ├── image_processing_yolos.py
│ │ │ │ │ └── modeling_yolos.py
│ │ │ │ ├── yoso/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_yoso.py
│ │ │ │ │ ├── convert_yoso_pytorch_to_pytorch.py
│ │ │ │ │ └── modeling_yoso.py
│ │ │ │ └── zoedepth/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── configuration_zoedepth.py
│ │ │ │ ├── convert_zoedepth_to_hf.py
│ │ │ │ ├── image_processing_zoedepth.py
│ │ │ │ └── modeling_zoedepth.py
│ │ │ ├── onnx/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── __main__.py
│ │ │ │ ├── config.py
│ │ │ │ ├── convert.py
│ │ │ │ ├── features.py
│ │ │ │ └── utils.py
│ │ │ ├── optimization.py
│ │ │ ├── optimization_tf.py
│ │ │ ├── pipelines/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── audio_classification.py
│ │ │ │ ├── audio_utils.py
│ │ │ │ ├── automatic_speech_recognition.py
│ │ │ │ ├── base.py
│ │ │ │ ├── depth_estimation.py
│ │ │ │ ├── document_question_answering.py
│ │ │ │ ├── feature_extraction.py
│ │ │ │ ├── fill_mask.py
│ │ │ │ ├── image_classification.py
│ │ │ │ ├── image_feature_extraction.py
│ │ │ │ ├── image_segmentation.py
│ │ │ │ ├── image_to_image.py
│ │ │ │ ├── image_to_text.py
│ │ │ │ ├── mask_generation.py
│ │ │ │ ├── object_detection.py
│ │ │ │ ├── pt_utils.py
│ │ │ │ ├── question_answering.py
│ │ │ │ ├── table_question_answering.py
│ │ │ │ ├── text2text_generation.py
│ │ │ │ ├── text_classification.py
│ │ │ │ ├── text_generation.py
│ │ │ │ ├── text_to_audio.py
│ │ │ │ ├── token_classification.py
│ │ │ │ ├── video_classification.py
│ │ │ │ ├── visual_question_answering.py
│ │ │ │ ├── zero_shot_audio_classification.py
│ │ │ │ ├── zero_shot_classification.py
│ │ │ │ ├── zero_shot_image_classification.py
│ │ │ │ └── zero_shot_object_detection.py
│ │ │ ├── processing_utils.py
│ │ │ ├── pytorch_utils.py
│ │ │ ├── quantizers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── auto.py
│ │ │ │ ├── base.py
│ │ │ │ ├── quantizer_aqlm.py
│ │ │ │ ├── quantizer_awq.py
│ │ │ │ ├── quantizer_bnb_4bit.py
│ │ │ │ ├── quantizer_bnb_8bit.py
│ │ │ │ ├── quantizer_eetq.py
│ │ │ │ ├── quantizer_fbgemm_fp8.py
│ │ │ │ ├── quantizer_gptq.py
│ │ │ │ ├── quantizer_hqq.py
│ │ │ │ ├── quantizer_quanto.py
│ │ │ │ ├── quantizer_torchao.py
│ │ │ │ └── quantizers_utils.py
│ │ │ ├── safetensors_conversion.py
│ │ │ ├── sagemaker/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── trainer_sm.py
│ │ │ │ └── training_args_sm.py
│ │ │ ├── testing_utils.py
│ │ │ ├── tf_utils.py
│ │ │ ├── time_series_utils.py
│ │ │ ├── tokenization_utils.py
│ │ │ ├── tokenization_utils_base.py
│ │ │ ├── tokenization_utils_fast.py
│ │ │ ├── trainer.py
│ │ │ ├── trainer_callback.py
│ │ │ ├── trainer_pt_utils.py
│ │ │ ├── trainer_seq2seq.py
│ │ │ ├── trainer_utils.py
│ │ │ ├── training_args.py
│ │ │ ├── training_args_seq2seq.py
│ │ │ ├── training_args_tf.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── backbone_utils.py
│ │ │ ├── bitsandbytes.py
│ │ │ ├── chat_template_utils.py
│ │ │ ├── constants.py
│ │ │ ├── deprecation.py
│ │ │ ├── doc.py
│ │ │ ├── dummy_detectron2_objects.py
│ │ │ ├── dummy_essentia_and_librosa_and_pretty_midi_and_scipy_and_torch_objects.py
│ │ │ ├── dummy_flax_objects.py
│ │ │ ├── dummy_keras_nlp_objects.py
│ │ │ ├── dummy_music_objects.py
│ │ │ ├── dummy_pt_objects.py
│ │ │ ├── dummy_sentencepiece_and_tokenizers_objects.py
│ │ │ ├── dummy_sentencepiece_objects.py
│ │ │ ├── dummy_speech_objects.py
│ │ │ ├── dummy_tensorflow_text_objects.py
│ │ │ ├── dummy_tf_objects.py
│ │ │ ├── dummy_tokenizers_objects.py
│ │ │ ├── dummy_torchaudio_objects.py
│ │ │ ├── dummy_torchvision_objects.py
│ │ │ ├── dummy_vision_objects.py
│ │ │ ├── fx.py
│ │ │ ├── generic.py
│ │ │ ├── hp_naming.py
│ │ │ ├── hub.py
│ │ │ ├── import_utils.py
│ │ │ ├── logging.py
│ │ │ ├── model_parallel_utils.py
│ │ │ ├── notebook.py
│ │ │ ├── peft_utils.py
│ │ │ ├── quantization_config.py
│ │ │ ├── sentencepiece_model_pb2.py
│ │ │ ├── sentencepiece_model_pb2_new.py
│ │ │ └── versions.py
│ │ ├── templates/
│ │ │ ├── adding_a_missing_tokenization_test/
│ │ │ │ ├── README.md
│ │ │ │ ├── cookiecutter-template-{{cookiecutter.modelname}}/
│ │ │ │ │ └── test_tokenization_{{cookiecutter.lowercase_modelname}}.py
│ │ │ │ └── cookiecutter.json
│ │ │ ├── adding_a_new_example_script/
│ │ │ │ ├── README.md
│ │ │ │ ├── cookiecutter.json
│ │ │ │ └── {{cookiecutter.directory_name}}/
│ │ │ │ └── run_{{cookiecutter.example_shortcut}}.py
│ │ │ └── adding_a_new_model/
│ │ │ ├── ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md
│ │ │ ├── README.md
│ │ │ └── open_model_proposals/
│ │ │ ├── ADD_BIG_BIRD.md
│ │ │ └── README.md
│ │ ├── tests/
│ │ │ ├── __init__.py
│ │ │ ├── agents/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_agent_types.py
│ │ │ │ ├── test_agents.py
│ │ │ │ ├── test_document_question_answering.py
│ │ │ │ ├── test_final_answer.py
│ │ │ │ ├── test_image_question_answering.py
│ │ │ │ ├── test_python_interpreter.py
│ │ │ │ ├── test_speech_to_text.py
│ │ │ │ ├── test_text_to_speech.py
│ │ │ │ ├── test_tools_common.py
│ │ │ │ └── test_translation.py
│ │ │ ├── benchmark/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_benchmark.py
│ │ │ │ └── test_benchmark_tf.py
│ │ │ ├── bettertransformer/
│ │ │ │ ├── __init__.py
│ │ │ │ └── test_integration.py
│ │ │ ├── deepspeed/
│ │ │ │ ├── ds_config_zero2.json
│ │ │ │ ├── ds_config_zero3.json
│ │ │ │ ├── test_deepspeed.py
│ │ │ │ ├── test_model_zoo.py
│ │ │ │ └── vit_feature_extractor.json
│ │ │ ├── extended/
│ │ │ │ └── test_trainer_ext.py
│ │ │ ├── fixtures/
│ │ │ │ ├── add_distilbert_like_config.json
│ │ │ │ ├── dummy-config.json
│ │ │ │ ├── dummy_feature_extractor_config.json
│ │ │ │ ├── empty.txt
│ │ │ │ ├── input.txt
│ │ │ │ ├── merges.txt
│ │ │ │ ├── preprocessor_config.json
│ │ │ │ ├── sample_text.txt
│ │ │ │ ├── sample_text_no_unicode.txt
│ │ │ │ ├── spiece.model
│ │ │ │ ├── test_entity_vocab.json
│ │ │ │ ├── test_sentencepiece.model
│ │ │ │ ├── test_sentencepiece_bpe.model
│ │ │ │ ├── test_sentencepiece_bpe_char.model
│ │ │ │ ├── test_sentencepiece_no_bos.model
│ │ │ │ ├── test_sentencepiece_with_bytefallback.model
│ │ │ │ ├── tests_samples/
│ │ │ │ │ ├── .gitignore
│ │ │ │ │ ├── COCO/
│ │ │ │ │ │ ├── coco_annotations.txt
│ │ │ │ │ │ └── coco_panoptic_annotations.txt
│ │ │ │ │ ├── GermEval/
│ │ │ │ │ │ ├── dev.txt
│ │ │ │ │ │ ├── labels.txt
│ │ │ │ │ │ └── train.txt
│ │ │ │ │ ├── MRPC/
│ │ │ │ │ │ ├── dev.csv
│ │ │ │ │ │ ├── dev.tsv
│ │ │ │ │ │ ├── train.csv
│ │ │ │ │ │ └── train.tsv
│ │ │ │ │ ├── SQUAD/
│ │ │ │ │ │ └── sample.json
│ │ │ │ │ ├── STS-B/
│ │ │ │ │ │ ├── dev.tsv
│ │ │ │ │ │ └── train.tsv
│ │ │ │ │ ├── conll/
│ │ │ │ │ │ └── sample.json
│ │ │ │ │ ├── swag/
│ │ │ │ │ │ └── sample.json
│ │ │ │ │ ├── wiki_text/
│ │ │ │ │ │ └── wiki_00
│ │ │ │ │ ├── wmt16/
│ │ │ │ │ │ └── sample.json
│ │ │ │ │ ├── wmt_en_ro/
│ │ │ │ │ │ ├── test.json
│ │ │ │ │ │ ├── train.json
│ │ │ │ │ │ └── val.json
│ │ │ │ │ └── xsum/
│ │ │ │ │ └── sample.json
│ │ │ │ ├── vocab.json
│ │ │ │ └── vocab.txt
│ │ │ ├── fsdp/
│ │ │ │ └── test_fsdp.py
│ │ │ ├── generation/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_beam_constraints.py
│ │ │ │ ├── test_beam_search.py
│ │ │ │ ├── test_configuration_utils.py
│ │ │ │ ├── test_flax_logits_process.py
│ │ │ │ ├── test_flax_utils.py
│ │ │ │ ├── test_framework_agnostic.py
│ │ │ │ ├── test_logits_process.py
│ │ │ │ ├── test_stopping_criteria.py
│ │ │ │ ├── test_streamers.py
│ │ │ │ ├── test_tf_logits_process.py
│ │ │ │ ├── test_tf_utils.py
│ │ │ │ └── test_utils.py
│ │ │ ├── models/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── albert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_albert.py
│ │ │ │ │ ├── test_modeling_flax_albert.py
│ │ │ │ │ ├── test_modeling_tf_albert.py
│ │ │ │ │ └── test_tokenization_albert.py
│ │ │ │ ├── align/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_align.py
│ │ │ │ │ └── test_processor_align.py
│ │ │ │ ├── altclip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_altclip.py
│ │ │ │ ├── audio_spectrogram_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_feature_extraction_audio_spectrogram_transformer.py
│ │ │ │ │ └── test_modeling_audio_spectrogram_transformer.py
│ │ │ │ ├── auto/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_configuration_auto.py
│ │ │ │ │ ├── test_feature_extraction_auto.py
│ │ │ │ │ ├── test_image_processing_auto.py
│ │ │ │ │ ├── test_modeling_auto.py
│ │ │ │ │ ├── test_modeling_flax_auto.py
│ │ │ │ │ ├── test_modeling_tf_auto.py
│ │ │ │ │ ├── test_modeling_tf_pytorch.py
│ │ │ │ │ ├── test_processor_auto.py
│ │ │ │ │ └── test_tokenization_auto.py
│ │ │ │ ├── autoformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_autoformer.py
│ │ │ │ ├── bark/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_bark.py
│ │ │ │ │ └── test_processor_bark.py
│ │ │ │ ├── bart/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_bart.py
│ │ │ │ │ ├── test_modeling_flax_bart.py
│ │ │ │ │ ├── test_modeling_tf_bart.py
│ │ │ │ │ └── test_tokenization_bart.py
│ │ │ │ ├── barthez/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_barthez.py
│ │ │ │ ├── bartpho/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_bartpho.py
│ │ │ │ ├── beit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_beit.py
│ │ │ │ │ ├── test_modeling_beit.py
│ │ │ │ │ └── test_modeling_flax_beit.py
│ │ │ │ ├── bert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_bert.py
│ │ │ │ │ ├── test_modeling_flax_bert.py
│ │ │ │ │ ├── test_modeling_tf_bert.py
│ │ │ │ │ ├── test_tokenization_bert.py
│ │ │ │ │ └── test_tokenization_bert_tf.py
│ │ │ │ ├── bert_generation/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_bert_generation.py
│ │ │ │ │ └── test_tokenization_bert_generation.py
│ │ │ │ ├── bert_japanese/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_bert_japanese.py
│ │ │ │ ├── bertweet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_bertweet.py
│ │ │ │ ├── big_bird/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_big_bird.py
│ │ │ │ │ ├── test_modeling_flax_big_bird.py
│ │ │ │ │ └── test_tokenization_big_bird.py
│ │ │ │ ├── bigbird_pegasus/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_bigbird_pegasus.py
│ │ │ │ ├── biogpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_biogpt.py
│ │ │ │ │ └── test_tokenization_biogpt.py
│ │ │ │ ├── bit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_bit.py
│ │ │ │ ├── blenderbot/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_blenderbot.py
│ │ │ │ │ ├── test_modeling_flax_blenderbot.py
│ │ │ │ │ ├── test_modeling_tf_blenderbot.py
│ │ │ │ │ └── test_tokenization_blenderbot.py
│ │ │ │ ├── blenderbot_small/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_blenderbot_small.py
│ │ │ │ │ ├── test_modeling_flax_blenderbot_small.py
│ │ │ │ │ ├── test_modeling_tf_blenderbot_small.py
│ │ │ │ │ └── test_tokenization_blenderbot_small.py
│ │ │ │ ├── blip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_blip.py
│ │ │ │ │ ├── test_modeling_blip.py
│ │ │ │ │ ├── test_modeling_blip_text.py
│ │ │ │ │ ├── test_modeling_tf_blip.py
│ │ │ │ │ ├── test_modeling_tf_blip_text.py
│ │ │ │ │ └── test_processor_blip.py
│ │ │ │ ├── blip_2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_blip_2.py
│ │ │ │ │ └── test_processor_blip_2.py
│ │ │ │ ├── bloom/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_bloom.py
│ │ │ │ │ ├── test_modeling_flax_bloom.py
│ │ │ │ │ └── test_tokenization_bloom.py
│ │ │ │ ├── bridgetower/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_bridgetower.py
│ │ │ │ │ └── test_modeling_bridgetower.py
│ │ │ │ ├── bros/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_bros.py
│ │ │ │ ├── byt5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_byt5.py
│ │ │ │ ├── camembert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_camembert.py
│ │ │ │ │ ├── test_modeling_tf_camembert.py
│ │ │ │ │ └── test_tokenization_camembert.py
│ │ │ │ ├── canine/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_canine.py
│ │ │ │ │ └── test_tokenization_canine.py
│ │ │ │ ├── chameleon/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_chameleon.py
│ │ │ │ │ └── test_modeling_chameleon.py
│ │ │ │ ├── chinese_clip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_chinese_clip.py
│ │ │ │ │ ├── test_modeling_chinese_clip.py
│ │ │ │ │ └── test_processor_chinese_clip.py
│ │ │ │ ├── clap/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_feature_extraction_clap.py
│ │ │ │ │ ├── test_modeling_clap.py
│ │ │ │ │ └── test_processor_clap.py
│ │ │ │ ├── clip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_clip.py
│ │ │ │ │ ├── test_modeling_clip.py
│ │ │ │ │ ├── test_modeling_flax_clip.py
│ │ │ │ │ ├── test_modeling_tf_clip.py
│ │ │ │ │ ├── test_processor_clip.py
│ │ │ │ │ └── test_tokenization_clip.py
│ │ │ │ ├── clipseg/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_clipseg.py
│ │ │ │ │ └── test_processor_clipseg.py
│ │ │ │ ├── clvp/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_feature_extraction_clvp.py
│ │ │ │ │ ├── test_modeling_clvp.py
│ │ │ │ │ ├── test_processor_clvp.py
│ │ │ │ │ └── test_tokenization_clvp.py
│ │ │ │ ├── code_llama/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_code_llama.py
│ │ │ │ ├── codegen/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_codegen.py
│ │ │ │ │ └── test_tokenization_codegen.py
│ │ │ │ ├── cohere/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_cohere.py
│ │ │ │ │ └── test_tokenization_cohere.py
│ │ │ │ ├── conditional_detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_conditional_detr.py
│ │ │ │ │ └── test_modeling_conditional_detr.py
│ │ │ │ ├── convbert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_convbert.py
│ │ │ │ │ └── test_modeling_tf_convbert.py
│ │ │ │ ├── convnext/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_convnext.py
│ │ │ │ │ ├── test_modeling_convnext.py
│ │ │ │ │ └── test_modeling_tf_convnext.py
│ │ │ │ ├── convnextv2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_convnextv2.py
│ │ │ │ │ └── test_modeling_tf_convnextv2.py
│ │ │ │ ├── cpm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_cpm.py
│ │ │ │ ├── cpmant/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_cpmant.py
│ │ │ │ │ └── test_tokenization_cpmant.py
│ │ │ │ ├── ctrl/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_ctrl.py
│ │ │ │ │ ├── test_modeling_tf_ctrl.py
│ │ │ │ │ └── test_tokenization_ctrl.py
│ │ │ │ ├── cvt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_cvt.py
│ │ │ │ │ └── test_modeling_tf_cvt.py
│ │ │ │ ├── dac/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_feature_extraction_dac.py
│ │ │ │ │ └── test_modeling_dac.py
│ │ │ │ ├── data2vec/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_data2vec_audio.py
│ │ │ │ │ ├── test_modeling_data2vec_text.py
│ │ │ │ │ ├── test_modeling_data2vec_vision.py
│ │ │ │ │ └── test_modeling_tf_data2vec_vision.py
│ │ │ │ ├── dbrx/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_dbrx.py
│ │ │ │ ├── deberta/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_deberta.py
│ │ │ │ │ ├── test_modeling_tf_deberta.py
│ │ │ │ │ └── test_tokenization_deberta.py
│ │ │ │ ├── deberta_v2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_deberta_v2.py
│ │ │ │ │ ├── test_modeling_tf_deberta_v2.py
│ │ │ │ │ └── test_tokenization_deberta_v2.py
│ │ │ │ ├── decision_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_decision_transformer.py
│ │ │ │ ├── deformable_detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_deformable_detr.py
│ │ │ │ │ └── test_modeling_deformable_detr.py
│ │ │ │ ├── deit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_deit.py
│ │ │ │ │ ├── test_modeling_deit.py
│ │ │ │ │ └── test_modeling_tf_deit.py
│ │ │ │ ├── depth_anything/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_depth_anything.py
│ │ │ │ ├── detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_detr.py
│ │ │ │ │ └── test_modeling_detr.py
│ │ │ │ ├── dinat/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_dinat.py
│ │ │ │ ├── dinov2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_dinov2.py
│ │ │ │ │ └── test_modeling_flax_dinov2.py
│ │ │ │ ├── distilbert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_distilbert.py
│ │ │ │ │ ├── test_modeling_flax_distilbert.py
│ │ │ │ │ ├── test_modeling_tf_distilbert.py
│ │ │ │ │ └── test_tokenization_distilbert.py
│ │ │ │ ├── dit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_dit.py
│ │ │ │ ├── donut/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_donut.py
│ │ │ │ │ ├── test_modeling_donut_swin.py
│ │ │ │ │ └── test_processing_donut.py
│ │ │ │ ├── dpr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_dpr.py
│ │ │ │ │ ├── test_modeling_tf_dpr.py
│ │ │ │ │ └── test_tokenization_dpr.py
│ │ │ │ ├── dpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_dpt.py
│ │ │ │ │ ├── test_modeling_dpt.py
│ │ │ │ │ ├── test_modeling_dpt_auto_backbone.py
│ │ │ │ │ └── test_modeling_dpt_hybrid.py
│ │ │ │ ├── efficientnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_efficientnet.py
│ │ │ │ │ └── test_modeling_efficientnet.py
│ │ │ │ ├── electra/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_electra.py
│ │ │ │ │ ├── test_modeling_flax_electra.py
│ │ │ │ │ ├── test_modeling_tf_electra.py
│ │ │ │ │ └── test_tokenization_electra.py
│ │ │ │ ├── encodec/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_feature_extraction_encodec.py
│ │ │ │ │ └── test_modeling_encodec.py
│ │ │ │ ├── encoder_decoder/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_encoder_decoder.py
│ │ │ │ │ ├── test_modeling_flax_encoder_decoder.py
│ │ │ │ │ └── test_modeling_tf_encoder_decoder.py
│ │ │ │ ├── ernie/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_ernie.py
│ │ │ │ ├── esm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_esm.py
│ │ │ │ │ ├── test_modeling_esmfold.py
│ │ │ │ │ ├── test_modeling_tf_esm.py
│ │ │ │ │ └── test_tokenization_esm.py
│ │ │ │ ├── falcon/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_falcon.py
│ │ │ │ ├── falcon_mamba/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_falcon_mamba.py
│ │ │ │ ├── fastspeech2_conformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_fastspeech2_conformer.py
│ │ │ │ │ └── test_tokenization_fastspeech2_conformer.py
│ │ │ │ ├── flaubert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_flaubert.py
│ │ │ │ │ ├── test_modeling_tf_flaubert.py
│ │ │ │ │ └── test_tokenization_flaubert.py
│ │ │ │ ├── flava/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_flava.py
│ │ │ │ │ ├── test_modeling_flava.py
│ │ │ │ │ └── test_processor_flava.py
│ │ │ │ ├── fnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_fnet.py
│ │ │ │ │ └── test_tokenization_fnet.py
│ │ │ │ ├── focalnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_focalnet.py
│ │ │ │ ├── fsmt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_fsmt.py
│ │ │ │ │ └── test_tokenization_fsmt.py
│ │ │ │ ├── funnel/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_funnel.py
│ │ │ │ │ ├── test_modeling_tf_funnel.py
│ │ │ │ │ └── test_tokenization_funnel.py
│ │ │ │ ├── fuyu/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_fuyu.py
│ │ │ │ │ ├── test_modeling_fuyu.py
│ │ │ │ │ └── test_processing_fuyu.py
│ │ │ │ ├── gemma/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_flax_gemma.py
│ │ │ │ │ ├── test_modeling_gemma.py
│ │ │ │ │ └── test_tokenization_gemma.py
│ │ │ │ ├── gemma2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_gemma2.py
│ │ │ │ ├── git/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_git.py
│ │ │ │ │ └── test_processor_git.py
│ │ │ │ ├── glpn/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_glpn.py
│ │ │ │ │ └── test_modeling_glpn.py
│ │ │ │ ├── gpt2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_flax_gpt2.py
│ │ │ │ │ ├── test_modeling_gpt2.py
│ │ │ │ │ ├── test_modeling_tf_gpt2.py
│ │ │ │ │ ├── test_tokenization_gpt2.py
│ │ │ │ │ └── test_tokenization_gpt2_tf.py
│ │ │ │ ├── gpt_bigcode/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_gpt_bigcode.py
│ │ │ │ ├── gpt_neo/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_flax_gpt_neo.py
│ │ │ │ │ └── test_modeling_gpt_neo.py
│ │ │ │ ├── gpt_neox/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_gpt_neox.py
│ │ │ │ ├── gpt_neox_japanese/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_gpt_neox_japanese.py
│ │ │ │ │ └── test_tokenization_gpt_neox_japanese.py
│ │ │ │ ├── gpt_sw3/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_gpt_sw3.py
│ │ │ │ ├── gptj/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_flax_gptj.py
│ │ │ │ │ ├── test_modeling_gptj.py
│ │ │ │ │ └── test_modeling_tf_gptj.py
│ │ │ │ ├── grounding_dino/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_grounding_dino.py
│ │ │ │ │ ├── test_modeling_grounding_dino.py
│ │ │ │ │ └── test_processor_grounding_dino.py
│ │ │ │ ├── groupvit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_groupvit.py
│ │ │ │ │ └── test_modeling_tf_groupvit.py
│ │ │ │ ├── herbert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_herbert.py
│ │ │ │ ├── hiera/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_hiera.py
│ │ │ │ ├── hubert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_hubert.py
│ │ │ │ │ └── test_modeling_tf_hubert.py
│ │ │ │ ├── ibert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_ibert.py
│ │ │ │ ├── idefics/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test
================================================
FILE CONTENTS
================================================
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# ✨ MPLSandbox
MPLSandbox is an out-of-the-box multi-programming language sandbox designed to provide unified and comprehensive feedback from compiler and analysis tools for LLMs.
https://arxiv.org/abs/2410.23074
<img width="950" alt="image" src="https://github.com/user-attachments/assets/792e9800-ad98-472a-96ff-b78725f94597">
[](./LICENSE)
# 🔍 Introduction
we propose MPLSandbox, an out-of-the-box sandbox designed to provide unified compiler feedback across multiple programming languages.
Additionally, it integrates traditional code analysis tools, delivering comprehensive code information to LLMs from numerous perspectives.
MPLSandbox simplifies code analysis for researchers, and can be seamlessly integrated into LLM training and application processes to enhance the performance of LLMs in a range of code-related tasks.
MPLSandbox consists of three core modules:
### Multi-Programming Language Sandbox Environment
This Module can provide unified compiler feedback by compiling and executing the code.
The code and unit test samples are sent to the sub-sandbox of the corresponding programming language for isolated execution to obtain compiler feedback.
The sandbox ensures the program executes safely without jeopardizing the external environment or interrupting the training process
### Code Analysis Module
This module includes multiple traditional analysis tools to offer a comprehensive analysis report from numerous perspectives.
It provides a comprehensive code analysis from multiple perspectives, such as static analysis (i.e., potential bug detection} and code smell analysis) and dynamic analysis (i.e., fuzz testing and efficiency analysis).
Additionally, this module can also assess other input information besides the code, such as evaluating the coverage of unit tests for the code, aiding researchers in improving the quality of these unit tests.
### Information Integration Module
This module integrates compilation feedback and various analysis results to accomplish a range of complex code-related tasks.
It integrates these results for LLMs to improve the quality of generated code and enhance their performance on a range of code-related tasks.
# 🛠️ Setup
## Install MPLSandbox
The user can create and install MPLSandbox using the following command:
```bash
git clone git@github.com:Ablustrund/MPLSandbox.git
cd MPLSandbox
pip install .
# pip install -e . ## for editable mode
```
## Prepare the Docker Images
First, users need to deploy the Docker images addresses on the host machine. After extensive testing, we have installed the necessary dependencies in Docker containers for various languages and packaged these custom Docker containers into the corresponding images as follows. We hope that users can directly use our open-source images because this can, to some extent, reduce the hassle of installing dependencies for various languages.
**Python**: [mplsandbox-python-3.9.19-v1](https://drive.google.com/file/d/1kkwwj1HbODHi2-Ws0wbXCPSPt4GHr3No/view?usp=drive_link)
**Java**: [mplsandbox-java-11.0.12-v1](https://drive.google.com/file/d/1dtThSM-N93evTl5IRBongd3KyoNA-eUt/view?usp=drive_link)
**JavaScript**: [mplsandbox-javascript-22-v1](https://drive.google.com/file/d/1pxHhzNm7OMij9AvJQiYcnIXKu8TrP74E/view?usp=drive_link)
**C++**: [mplsandbox-cpp-11.2.0-v1](https://drive.google.com/file/d/1gEGoiG2WYsJp1tDQNmBp5-q1zhctG4vD/view?usp=drive_link)
**Go**: [mplsandbox-golang-1.17.0-v1](https://drive.google.com/file/d/1CZGpnoJnSn2yHEPA4WOWWFjSge2z_5lQ/view?usp=drive_link)
**Ruby**: [mplsandbox-ruby-3.0.2-v1](https://drive.google.com/file/d/1VrOkLUF7P9zapvTDYE5PBLqLrwHdeunu/view?usp=drive_link)
**TypeScript**: [mplsandbox-typescript-1-22-v1](https://drive.google.com/file/d/1DPg_fQlwiSFG9wZpIKNB8UhC6AwdnlZn/view?usp=drive_link)
**Bash**: [mplsandbox-bash-v1](https://drive.google.com/file/d/10WHK6vxipTf8Kq5qN6ZEWdWRIR0kXVZe/view?usp=drive_link)
We recommend that users manually download these image files and then use the following command to import them into Docker:
```bash
docker load < <path_to_downloaded_image>
```
If users wish to use custom images, we recommend modifying the `DefaultImage` class in `/mplsandbox/const.py` to define their own images.
# 📚 Usage
## Use in the Project
Users can start mplsandbox and run it with the following lines of code:
```python
from mplsandbox import MPLSANDBOX
data = {
"question":"Define get_sum_of_two_numbers():\n \"\"\"Write a function that takes two integers as input and returns their sum.\n\n -----Input-----\n \n The input consists of multiple test cases. Each test case contains two integers $a$ and $b$ ($-10^9 \\le a, b \\le 10^9$).\n \n -----Output-----\n \n For each test case, print the sum of the two integers.\n \n -----Example-----\n Input\n 3\n 1 2 ↵\n -1 1 ↵\n 1000000000 1000000000\n \n Output\n 3\n 0\n 2000000000\n \"\"\"",
"code": 'def get_sum_of_two_numbers():\n a, b = map(int, input().split(" "))\n print(a * b)\nget_sum_of_two_numbers()',
"unit_cases": {
"inputs": ["1 2", "3 4"],
"outputs": ["3", "7"]
},
"lang": "python"
} # or a JSON file path
executor = MPLSANDBOX(data)
result = executor.run(analysis_type="all")
```
The specific descriptions of all fields in the data are as follows:
| Field | Description |
|----------------|-------------|
| `question` | (Required) Specifies the path to the code file to be executed. |
| `code` | (Required) Specifies the code to be executed. |
| `unit_cases` | (Required) Specifies the unit test cases, including `inputs` and expected `outputs`. |
| `lang` | (Optional) Specifies the language of the code. If not specified, it can be set to `"AUTO"` for automatic recognition. |
| `libraries` | (Optional) Specifies a list of dependency library names that need to be installed. |
| `client` | (Optional) Specifies the docker client instance to be used |
| `image` | (Optional) Specifies the docker image used to run the code. |
| `dockerfile` | (Optional) Specifies the path to the dockerfile used to build a custom docker image. |
| `keep_template` | (Optional) If it is set to `True`, the template files will be kept after the code is run. |
| `verbose` | (Optional) If it is set to `True`, verbose output will be enabled to assist with debugging and diagnosing issues. |
| `app` | (Optional) If it is set to `True`, app mode will be enabled, facilitating the deployment of services on the server. |
## Use from the Command Line
We also provide the following command-line interface to scan the `data.json` file and output the report to the `report.txt` file:
```bash
mplsandbox --data /path/to/your/data.json --report /path/to/your/report.txt
```
## Use as a Service
MPLSandbox often serves as a node for emitting code-related signals, so configuring the corresponding services is very important. We have provided a simple service demo in the `scripts` directory, and users can run this demo with the following command:
```bash
cd scripts
python ./app.py
```
Then, users can access the service using the curl command or other methods, and the format example is in `scripts/test_app.sh`
```bash
./test_app.sh
```
## Providing feedback signals in RL
MPLSandbox can also provide stable compilation feedback signals for RLCF tasks. For specific implementation details, please refer to the `mplsandbox_for_rl` project.
# 🧑💻 Developing
We are working hard to refactor and improve the open-source version of MPLSandbox to closely match the functionality of the version used internally by Meituan LLM Team. We are currently working hard to reconstruct analysis tools for languages such as Go, JavaScript, and Ruby to achieve better code analysis and automated testing.
# 👀 Citation
```bibtex
@misc{dou2024MPLSandbox,
title={Multi-Programming Language Sandbox for LLMs},
author={Shihan Dou and Jiazheng Zhang and Jianxiang Zang and Yunbo Tao and Haoxiang Jia and Shichun Liu and Yuming Yang and Shenxi Wu and Shaoqing Zhang and Muling Wu and Changze Lv and Limao Xiong and Wenyu Zhan and Lin Zhang and Rongxiang Weng and Jingang Wang and Xunliang Cai and Yueming Wu and Ming Wen and Rui Zheng and Tao Ji and Yixin Cao and Tao Gui and Xipeng Qiu and Qi Zhang and Xuanjing Huang},
year={2024},
eprint={2410.23074},
archivePrefix={arXiv},
primaryClass={cs.SE},
url={https://arxiv.org/abs/2410.23074},
}
```
```bibtex
@article{dou2024s,
title={What's Wrong with Your Code Generated by Large Language Models? An Extensive Study},
author={Dou, Shihan and Jia, Haoxiang and Wu, Shenxi and Zheng, Huiyuan and Zhou, Weikang and Wu, Muling and Chai, Mingxu and Fan, Jessica and Huang, Caishuang and Tao, Yunbo and others},
journal={arXiv preprint arXiv:2407.06153},
year={2024}
}
```
================================================
FILE: mplsandbox/__init__.py
================================================
# from sandbox import * # noqa: F401
# from .const import * # noqa: F401, F403
from .tool import MPLSANDBOX
================================================
FILE: mplsandbox/analyzetools.py
================================================
import io
import os
import docker
import tarfile
from typing import List, Optional
from docker.models.images import Image
from docker.models.containers import Container
from mplsandbox.utils import (
image_exists,
get_libraries_installation_command,
get_code_file_extension,
get_code_execution_command,
)
from mplsandbox.utils import ConsoleOutput
from mplsandbox.const import (
Language,
LanguageValues,
DefaultImage,
NotSupportedLibraryInstallation,
CONTAINER_LANGUAGE_MAPPING,
)
import ast
import astpretty
from pyflowchart import Flowchart
import javalang
import sys
import re
class AnalyzeTools:
def __init__(
self,
client: Optional[docker.DockerClient] = None,
image: Optional[str] = None,
dockerfile: Optional[str] = None,
lang: str = Language.PYTHON,
keep_template: bool = False,
verbose: bool = False,
):
self._validate_inputs(image, dockerfile, lang)
self.verbose = verbose
self.lang = lang
self.client = client or self._create_docker_client()
self.image = image or DefaultImage.__dict__[lang.upper()]
self.dockerfile = dockerfile
self.container = None
self.path = None
self.keep_template = keep_template
self.is_create_template = False
self.is_create_container = False
self.memory_limits = ['2G', '4G', '8G', '16G']
def __enter__(self):
self.open()
return self
def __exit__(self, *args, **kwargs):
self.close()
def _validate_inputs(self, image, dockerfile, lang):
if image and dockerfile:
raise ValueError("Only one of image or dockerfile should be provided")
if lang not in LanguageValues:
raise ValueError(
f"Language {lang} is not supported. Must be one of {LanguageValues}"
)
def _create_docker_client(self):
if self.verbose:
print("Using local Docker context since client is not provided..")
return docker.from_env()
def open(self):
if self.dockerfile:
self._build_image_from_dockerfile()
elif isinstance(self.image, str):
self._pull_image_if_needed()
self._run_container()
assert self.container != None
def _build_image_from_dockerfile(self):
self.path = os.path.dirname(self.dockerfile)
if self.verbose:
print(f"Building docker image from {self.dockerfile}")
if self.keep_template:
print(
"Since the `keep_template` flag is set to True, the docker image will not be removed after the session ends and remains for future use."
)
self.image, _ = self.client.images.build(
path=self.path,
dockerfile=os.path.basename(self.dockerfile),
tag=f"sandbox-{self.lang.lower()}-{os.path.basename(self.path)}",
)
self.is_create_template = True
def _pull_image_if_needed(self):
if not image_exists(self.client, self.image):
if self.verbose:
print(f"Pulling image {self.image}..")
if self.keep_template:
print(
"Since the `keep_template` flag is set to True, the docker image will not be removed after the session ends and remains for future use."
)
self.image = self.client.images.pull(self.image)
self.is_create_template = True
else:
self.image = self.client.images.get(self.image)
if self.verbose:
print(f"Using image {self.image.tags[-1]}")
def _get_existing_container(self):
containers = self.client.containers.list(filters={"ancestor": self.image, "status":"running"})
if containers:
return containers[0]
return None
def _check_container_exists(self, container_id):
try:
container = self.client.api.inspect_container(container_id)
return True
except docker.errors.NotFound:
return False
def _run_container(self):
for memory_limit in self.memory_limits:
try:
self.container = self.client.containers.run(
self.image,
detach=True,
tty=True,
mem_limit=memory_limit
)
self.is_create_container = True
return # If container is created successfully, return
except docker.errors.ContainerError as e:
if 'memory' in str(e): # Check if the error is related to memory
if self.verbose:
print(f"Memory error occurred. Trying with {memory_limit}...")
continue
else:
raise # If it's not a memory error, raise the exception
raise RuntimeError("All memory limits have been tried. Failed to create container.")
def close(self):
if self.is_create_container:
self._remove_container()
def _commit_container(self):
if isinstance(self.image, Image):
self.container.commit(self.image.tags[-1])
def _remove_container(self):
self.container.remove(force=True)
self.container = None
def _remove_image_if_needed(self):
if self.is_create_template and not self.keep_template:
if not self._is_image_in_use():
self._remove_image()
elif self.verbose:
print(
f"Image {self.image.tags[-1]} is in use by other containers. Skipping removal.."
)
def _remove_image(self):
if isinstance(self.image, str):
self.client.images.remove(self.image)
elif isinstance(self.image, Image):
self.image.remove(force=True)
else:
raise ValueError("Invalid image type")
def _build_sh(self, code_dest_file, unit_input):
commands = get_code_execution_command(self.lang, code_dest_file)
sh_commands = ""
for command in commands:
unit_input = unit_input.replace("\n", "\\n")
sh_commands += (
f'echo -e "{unit_input}" | ' + command if unit_input else command
)
sh_commands += "\n"
return sh_commands
def run(self, code: str, unit_input: str = None, libraries: Optional[List] = None) -> ConsoleOutput:
self._ensure_session_is_open()
self._install_libraries_if_needed(libraries)
code_file, code_dest_file = self._prepare_code_file(code)
self._copy_code_to_container(code_file, code_dest_file)
try:
if unit_input:
sh_text = self._build_sh(code_dest_file, unit_input)
sh_file, sh_dest_file = self._prepare_sh_file(sh_text)
self._copy_code_to_container(sh_file, sh_dest_file)
return self._execute_sh_in_container(sh_dest_file)
else:
return self._execute_code_in_container(code_dest_file)
except docker.errors.ContainerError as e:
if 'memory' in str(e).lower(): # Check if the error is related to memory
self._increase_memory_and_rerun(code, unit_input, libraries)
else:
raise
def _ensure_session_is_open(self):
if not self.container:
raise RuntimeError(
"Session is not open. Please call open() method before running code."
)
def _install_libraries_if_needed(self, libraries):
if libraries:
if self.lang.upper() in NotSupportedLibraryInstallation:
raise ValueError(
f"Library installation has not been supported for {self.lang} yet!"
)
self._install_libraries(libraries)
def _install_libraries(self, libraries):
if self.lang == Language.GO:
self._prepare_go_environment()
for library in libraries:
command = get_libraries_installation_command(self.lang, library)
install_feedback=self.execute_command(
command, workdir="/example" if self.lang == Language.GO else None
)
def _prepare_go_environment(self):
self.execute_command("mkdir -p /example")
self.execute_command("go mod init example", workdir="/example")
self.execute_command("go mod tidy", workdir="/example")
def _prepare_code_file(self, code):
code_file = f"/tmp/code.{get_code_file_extension(self.lang)}"
code_dest_file = "/example/code.go" if self.lang == Language.GO else code_file
with open(code_file, "w") as f:
f.write(code)
return code_file, code_dest_file
def _prepare_sh_file(self, sh):
sh_file = f"/tmp/run.sh"
sh_dest_file = "/example/run.sh" if self.lang == Language.GO else sh_file
with open(sh_file, "w") as f:
f.write(sh)
return sh_file, sh_dest_file
def _copy_code_to_container(self, src, dest):
self.copy_to_runtime(src, dest)
def _execute_code_in_container(self, code_dest_file, unit_input=None):
output = ConsoleOutput("")
commands = get_code_execution_command(self.lang, code_dest_file)
for command in commands:
try:
output = self.execute_command(
command, workdir="/example" if self.lang == Language.GO else None
)
except docker.errors.ContainerError as e:
if 'memory' in str(e).lower(): # Check if the error is related to memory
self._increase_memory_and_rerun(code_dest_file, unit_input)
return self._execute_code_in_container(code_dest_file, unit_input)
else:
raise
return output
def _execute_sh_in_container(self, sh_dest_file):
output = ConsoleOutput("")
source_bash = "chmod +x " + sh_dest_file
output2 = ConsoleOutput("")
output2 = self.execute_command(
source_bash, workdir="/example" if self.lang == Language.GO else None
)
run_bash = "/bin/bash\t" + sh_dest_file
output = self.execute_command(
run_bash, workdir="/example" if self.lang == Language.GO else None
)
return output
def copy_from_runtime(self, src: str, dest: str):
self._ensure_session_is_open()
if self.verbose:
print(f"Copying {self.container.short_id}:{src} to {dest}..")
self._extract_file_from_container(src, dest)
def _extract_file_from_container(self, src, dest):
bits, stat = self.container.get_archive(src)
if stat["size"] == 0:
raise FileNotFoundError(f"File {src} not found in the container")
tarstream = io.BytesIO(b"".join(bits))
with tarfile.open(fileobj=tarstream, mode="r") as tar:
tar.extractall(os.path.dirname(dest))
def copy_to_runtime(self, src: str, dest: str):
self._ensure_session_is_open()
self._create_directory_if_needed(dest)
self._copy_file_to_container(src, dest)
def _create_directory_if_needed(self, dest):
directory = os.path.dirname(dest)
self.container.exec_run(f"mkdir -p {directory}")
if directory and not self.container.exec_run(f"test -d {directory}")[0] == 0:
self.container.exec_run(f"mkdir -p {directory}")
print('create successfully')
if self.verbose:
print(f"Creating directory {self.container.short_id}:{directory}")
def _create_directory_if_needed_tmp(self, dest):
directory = dest
if directory and not self.container.exec_run(f"test -d {directory}")[0] == 0:
self.container.exec_run(f"mkdir -p {directory}")
if self.verbose:
print(f"Creating directory {self.container.short_id}:{directory}")
def _copy_file_to_container(self, src, dest):
# print(f"Copying {src} to {self.container.short_id}:{dest}..")
tarstream = io.BytesIO()
with tarfile.open(fileobj=tarstream, mode="w") as tar:
tar.add(src, arcname=os.path.basename(src))
tarstream.seek(0)
self.container.put_archive(os.path.dirname(dest), tarstream)
def _add_directory_to_tar(self,tar, path, arcname):
for root, dirs, files in os.walk(path):
for dir in dirs:
dir_path = os.path.join(root, dir)
tar.add(dir_path, arcname=os.path.join(arcname, os.path.relpath(dir_path, path)))
for file in files:
file_path = os.path.join(root, file)
tar.add(file_path, arcname=os.path.join(arcname, os.path.relpath(file_path, path)))
def copy_directory_to_container(self, src_dir):
print("begin copy the tools to the container..")
dest_dir = "/tmp/tools"
self._ensure_session_is_open()
self._create_directory_if_needed_tmp(dest_dir)
exit_code, output = self.container.exec_run(f"cd {dest_dir}")
# print(f"Copying {src_dir} to {self.container.short_id}:{dest_dir}..")
tarstream = io.BytesIO()
with tarfile.open(fileobj=tarstream, mode='w') as tar:
self._add_directory_to_tar(tar, src_dir, arcname='')
tarstream.seek(0)
self.container.put_archive(dest_dir, tarstream)
command = f"ls {dest_dir}/java"
exit_code, output = self.container.exec_run(command)
def execute_command(
self, command: Optional[str], workdir: Optional[str] = None
) -> ConsoleOutput:
self._validate_command(command)
self._ensure_session_is_open()
if self.verbose:
print(f"Executing command: {command}")
return self._run_command_in_container(command, workdir)
def _validate_command(self, command):
if not command:
raise ValueError("Command cannot be empty")
def _run_command_in_container(self, command, workdir):
if workdir:
exit_code, exec_log = self.container.exec_run(
command, stream=True, tty=True, workdir=workdir
)
else:
exit_code, exec_log = self.container.exec_run(
command, stream=True, tty=True
)
output = ""
if self.verbose:
print("Output:", end=" ")
for chunk in exec_log:
chunk_str = chunk.decode("utf-8")
output += chunk_str
if self.verbose:
print(chunk_str, end="")
return ConsoleOutput(output)
def _increase_memory_and_rerun(self, code, unit_input, libraries):
self.close()
self.open()
self.run(code, unit_input, libraries)
def call_tool_python(self, code, unit_inputs, analysis) -> str:
self._ensure_session_is_open()
# print(self.container.exec_run("pip list coverage")[1].decode('utf-8'))
analysis_info = analysis.replace("_"," ")
print(f"Executing Python {analysis_info}...")
self._install_libraries_if_needed(["coverage", "bandit", "pylint"])
commands = []
tmp_outputs = {}
code_file, code_dest_file = self._prepare_code_file(code)
if analysis == "code_smell_analysis":
command = "pylint "+code_dest_file+"\n"
commands.append(command)
elif analysis == "unit_test_analysis":
for unit_input in unit_inputs:
unit_input = unit_input.replace("\n", "\\n")
command = f'echo "{unit_input}"' +f" | coverage run {code_dest_file}"+"\n"+"coverage "+"report"+"\n"
commands.append(command)
elif analysis == "code_efficiency_evaluation":
for unit_input in unit_inputs:
unit_input = unit_input.replace("\n", "\\n")
command = f'echo "{unit_input}"' +f" | python -m cProfile {code_dest_file}"+"\n"
commands.append(command)
elif analysis == "code_bug_analysis":
command = "bandit "+"-r "+f"{code_dest_file}"+"\n"
commands.append(command)
elif analysis == "code_basic_analysis":
command = ""
try:
tree = ast.parse(code)
captured_output = io.StringIO()
original_stdout = sys.stdout
sys.stdout = captured_output
astpretty.pprint(tree)
sys.stdout = original_stdout
captured_output.seek(0)
ast_pretty_printed = captured_output.read()
fc = Flowchart.from_code(code)
cfg_printed = fc.flowchart()
except Exception as e:
ast_pretty_printed = str(e)
try:
fc = Flowchart.from_code(code)
cfg_printed = fc.flowchart()
except Exception as e:
cfg_printed = str(e)
tmp_outputs = {"ast":ast_pretty_printed, "cfg":cfg_printed}
outputs = []
for i, command in enumerate(commands):
sh_file, sh_dest_file = self._prepare_sh_file(command)
self._copy_code_to_container(code_file, code_dest_file)
self._copy_code_to_container(sh_file, sh_dest_file)
output = self._execute_sh_in_container(sh_dest_file).text
if analysis == "unit_test_analysis":
output = re.sub(r'^\d+\\r\\n', '', output, count=1)
outputs.append(output)
if analysis == "code_basic_analysis":
return tmp_outputs
else:
if len(outputs) == 1:
return outputs[0]
else:
outputs_dict = {}
if analysis == "unit_test_analysis":
for unit_input, output in zip(unit_inputs, outputs):
outputs_dict.update({unit_input : output})
result = []
for input_key, output_str in outputs_dict.items():
cleaned_str = re.sub(r'^\d+\r\n', '', output_str)
lines = cleaned_str.split('\r\n')
total_line = next(line for line in lines if "TOTAL" in line)
parts = re.split(r'\s+', total_line.strip())
result.append({
"Unit Input": input_key,
"Total Lines": int(parts[1]),
"Miss": int(parts[2]),
"Cover Rate": parts[3]
})
else:
for unit_input, output in zip(unit_inputs, outputs):
outputs_dict.update({unit_input : output})
result = []
for input_key, output_str in outputs_dict.items():
cleaned_str = re.sub(r'^\d+\r\n', '', output_str)
lines = [
line.strip()
for line in cleaned_str.split('\r\n')
if line.strip() and not line.startswith(('Ordered by:', 'ncalls tottime'))
]
func_data = []
for line in lines:
if re.match(r'^\d+\s+[\d.]+', line):
parts = re.split(r'\s+', line)
func_data.append({
"ncalls": parts[0],
"tottime": f"{float(parts[1]):.6f} s",
"percall": f"{float(parts[2]):.6f} s",
"cumtime": f"{float(parts[3]):.6f} s",
"function location": parts[4]
})
result.append({
"Unit Input": input_key,
"Total Calls": len(func_data),
"Total Time": next((line for line in lines if "function calls in" in line), ""),
"Functions": func_data
})
return result
# def call_tool_java(self, code, tool_name):
# print("Executing Java tool...")
# tool_name = tool_name.lower()
# code_file, code_dest_file = self._prepare_code_file(code)
# script_dir = os.path.dirname(os.path.abspath(__file__))
# classname = os.path.splitext(os.path.basename(code_dest_file))[0]
# directory_path = os.path.dirname(code_dest_file)
# src_dir = os.path.join(script_dir, "tools")
# self.copy_directory_to_container(src_dir)
# if tool_name == "javalang" or tool_name == "basic-ast":
# print(f"Tool -{tool_name}- execution succeed:")
# commands = []
# tokens = javalang.tokenizer.tokenize(code)
# parser = javalang.parser.Parser(tokens)
# tree = parser.parse()
# important_node_types = (
# javalang.tree.ClassDeclaration,
# javalang.tree.MethodDeclaration,
# javalang.tree.IfStatement,
# javalang.tree.VariableDeclaration,
# javalang.tree.BinaryOperation,
# javalang.tree.MethodInvocation,
# )
# def node_to_dict(node):
# if not isinstance(node, javalang.ast.Node):
# return str(node)
# result = {
# "type": type(node).__name__,
# "properties": {}
# }
# for attr, value in node.__dict__.items():
# if isinstance(value, list):
# result["properties"][attr] = [node_to_dict(item) for item in value]
# elif isinstance(value, javalang.ast.Node):
# result["properties"][attr] = node_to_dict(value)
# else:
# result["properties"][attr] = str(value)
# return result
# ast_dict = node_to_dict(tree)
# formatted_ast = json.dumps(ast_dict, indent=4)
# print(formatted_ast)
# elif tool_name == "soot" or tool_name == "basic-cfg":
# commands = [
# "java","-cp","tmp/tools/java/soot-4.5.0-jar-with-dependencies.jar",
# "soot.Main",
# "-pp",
# "-cp","/tmp",
# "-process-dir",classname,
# "-allow-phantom-refs",
# "-w",
# "-p","cg","enabled:true",
# "-p", "jb", "enabled:true",
# "-f", "J",
# "-d", "tmp/tools/temp/soot"
# ]
# to_class_command = f"javac -d tmp tmp/{classname}.java"
# command = " ".join(commands)
# command = "\n".join([to_class_command, command])
# command = to_class_command
# elif tool_name == "pmd" or tool_name == "smell":
# commands = [
# 'tmp/tools/java/pmd-bin-7.6.0/bin/pmd', 'check',
# '-d', code_dest_file,
# '-R', 'tmp/tools/java/pmd-bin-7.6.0/pmd-pmd_releases-7.6.0/pmd-java/src/main/resources/rulesets/java/quickstart.xml',
# '-f', 'text'
# ]
# command = " ".join(commands)
# elif tool_name == "jacoco" or tool_name == "coverage":
# commands = [[
# "java",
# "-javaagent:tmp/tools/java/jacoco-0.8.12/lib/jacocoagent.jar=destfile=tmp/tools/temp/jacoco.exec",
# "-cp", "/tmp", classname
# ],
# [
# "java",
# "-jar", "tmp/tools/java/jacoco-0.8.12/lib/jacococli.jar",
# "report", "tmp/tools/temp/jacoco.exec",
# "--classfiles", "/tmp",
# "--sourcefiles", classname,
# "--csv", "/dev/stdout"
# ]
# ]
# sh_file, sh_dest_file = self._prepare_sh_file(command)
# self._copy_code_to_container(code_file, code_dest_file)
# self._copy_code_to_container(sh_file, sh_dest_file)
# output = self._execute_sh_in_container(sh_dest_file)
# print(output.text)
# return output.text
================================================
FILE: mplsandbox/const.py
================================================
from dataclasses import dataclass
@dataclass
class Language:
PYTHON = "python"
JAVA = "java"
JAVASCRIPT = "javascript"
CPP = "cpp"
GO = "go"
RUBY = "ruby"
RUST = "rust"
BASH = "bash"
TYPESCRIPT = "typescript"
@dataclass
class FILE_EXTENSION_MAPPING:
MAPPING = {
".py": Language.PYTHON,
".java": Language.JAVA,
".js": Language.JAVASCRIPT,
".cpp": Language.CPP,
".go": Language.GO,
".rb": Language.RUBY,
".rs": Language.RUST,
".sh": Language.BASH,
".ts": Language.TYPESCRIPT
}
@dataclass
class CONTAINER_LANGUAGE_MAPPING:
MAPPING = {
Language.PYTHON: "7365b5c6ffaa",
Language.JAVA: "6063cee04450",
Language.JAVASCRIPT: "ce41b82700ca",
Language.CPP: "2094ef9598af",
Language.GO: "928e44a0b293",
Language.RUBY: "e65e98d3a186",
Language.RUST: "6c4c831e80d5",
}
# @dataclass
# class DefaultImage:
# PYTHON = "python:3.9.19-bullseye"
# JAVA = "openjdk:11.0.12-jdk-bullseye"
# JAVASCRIPT = "node:22-bullseye"
# CPP = "gcc:11.2.0-bullseye"
# GO = "golang:1.17.0-bullseye"
# RUBY = "ruby:3.0.2-bullseye"
# RUST = "rust:latest"
# TYPESCRIPT = "node:22-bullseye"
# BASH = "bash:latest"
@dataclass
class DefaultImage:
PYTHON = "mplsandbox-python-3.9.19-v1"
JAVA = "mplsandbox-java-11.0.12-v1"
JAVASCRIPT = "mplsandbox-javascript-22-v1"
CPP = "mplsandbox-cpp-11.2.0-v1"
GO = "mplsandbox-golang-1.17.0-v1"
RUBY = "mplsandbox-ruby-3.0.2-v1"
RUST = "mplsandbox-rust-latest-v1"
TYPESCRIPT = "mplsandbox-typescript-1-22-v1"
BASH = "mplsandbox-bash-v1"
class CodeType:
STDIN = 'stdin'
CALL = 'call'
NotSupportedLibraryInstallation = ["JAVA"]
LanguageValues = [
v for k, v in Language.__dict__.items() if not k.startswith("__")
]
CodeTypeValues = [ v for k, v in CodeType.__dict__.items() if not k.startswith("__")]
================================================
FILE: mplsandbox/sandbox.py
================================================
import io
import os
import docker
import tarfile
from typing import List, Optional
from docker.models.images import Image
from docker.models.containers import Container
from mplsandbox.utils import (
image_exists,
get_libraries_installation_command,
get_code_file_extension,
get_code_execution_command,
)
from mplsandbox.utils import ConsoleOutput
from mplsandbox.const import (
Language,
LanguageValues,
DefaultImage,
NotSupportedLibraryInstallation,
CONTAINER_LANGUAGE_MAPPING,
)
class Sandbox:
def __init__(
self,
client: Optional[docker.DockerClient] = None,
image: Optional[str] = None,
dockerfile: Optional[str] = None,
lang: str = Language.PYTHON,
keep_template: bool = False,
verbose: bool = False,
):
self._validate_inputs(image, dockerfile, lang)
self.verbose = verbose
self.lang = lang
self.client = client or self._create_docker_client()
self.image = image or DefaultImage.__dict__[lang.upper()]
self.dockerfile = dockerfile
self.container = None
self.path = None
self.keep_template = keep_template
self.is_create_template = False
self.is_create_container = False
self.memory_limits = ['2G', '4G', '8G', '16G']
def __enter__(self):
self.open()
return self
def __exit__(self, *args, **kwargs):
self.close()
def _validate_inputs(self, image, dockerfile, lang):
if image and dockerfile:
raise ValueError("Only one of image or dockerfile should be provided")
if lang not in LanguageValues:
raise ValueError(
f"Language {lang} is not supported. Must be one of {LanguageValues}"
)
def _create_docker_client(self):
if self.verbose:
print("Using local Docker context since client is not provided..")
return docker.from_env()
def open(self):
print("Opening sandbox session..")
if self.dockerfile:
self._build_image_from_dockerfile()
elif isinstance(self.image, str):
self._pull_image_if_needed()
# if not self._is_image_in_use():
# self._run_container()
# else:
# self._get_container()
self._run_container()
assert self.container != None
def _build_image_from_dockerfile(self):
self.path = os.path.dirname(self.dockerfile)
if self.verbose:
print(f"Building docker image from {self.dockerfile}")
if self.keep_template:
print(
"Since the `keep_template` flag is set to True, the docker image will not be removed after the session ends and remains for future use."
)
self.image, _ = self.client.images.build(
path=self.path,
dockerfile=os.path.basename(self.dockerfile),
tag=f"sandbox-{self.lang.lower()}-{os.path.basename(self.path)}",
)
self.is_create_template = True
def _pull_image_if_needed(self):
if not image_exists(self.client, self.image):
if self.verbose:
print(f"Pulling image {self.image}..")
if self.keep_template:
print(
"Since the `keep_template` flag is set to True, the docker image will not be removed after the session ends and remains for future use."
)
self.image = self.client.images.pull(self.image)
self.is_create_template = True
else:
self.image = self.client.images.get(self.image)
if self.verbose:
print(f"Using image {self.image.tags[-1]}")
def _get_existing_container(self):
containers = self.client.containers.list(filters={"ancestor": self.image, "status":"running"})
if containers:
return containers[0]
return None
def _check_container_exists(self, container_id):
try:
container = self.client.api.inspect_container(container_id)
return True
except docker.errors.NotFound:
return False
# def _run_container(self):
# existing_container = self._get_existing_container()
# if existing_container:
# self.container = existing_container
# elif self._check_container_exists(CONTAINER_LANGUAGE_MAPPING.MAPPING.get(self.lang)):
# self.container = self.client.containers.get(CONTAINER_LANGUAGE_MAPPING.MAPPING.get(self.lang))
# else:
# print(f"Container for {self.lang} is not found. Creating a new one..")
# self.container = self.client.containers.run(self.image, detach=True, tty=True)
# self.is_create_container = True
# def _run_container(self):
# self.client.containers.run(self.image, detach=True, tty=True)
# def _get_container(self):
# containers = self.client.containers.list(all=True)
# image_id = (
# self.image.id
# if isinstance(self.image, Image)
# else self.client.images.get(self.image).id
# )
# for container in containers:
# if container.image.id == image_id:
# self.container = container
# break
def _run_container(self):
for memory_limit in self.memory_limits:
try:
self.container = self.client.containers.run(
self.image,
detach=True,
tty=True,
mem_limit=memory_limit
)
self.is_create_container = True
return # If container is created successfully, return
except docker.errors.ContainerError as e:
if 'memory' in str(e): # Check if the error is related to memory
if self.verbose:
print(f"Memory error occurred. Trying with {memory_limit}...")
continue
else:
raise # If it's not a memory error, raise the exception
raise RuntimeError("All memory limits have been tried. Failed to create container.")
def close(self):
if self.is_create_container:
self._remove_container()
# if self.container:
# self._commit_container()
# self._remove_container()
# self._remove_image_if_needed()
def _commit_container(self):
if isinstance(self.image, Image):
self.container.commit(self.image.tags[-1])
def _remove_container(self):
self.container.remove(force=True)
self.container = None
def _remove_image_if_needed(self):
if self.is_create_template and not self.keep_template:
if not self._is_image_in_use():
self._remove_image()
elif self.verbose:
print(
f"Image {self.image.tags[-1]} is in use by other containers. Skipping removal.."
)
# def _is_image_in_use(self):
# containers = self.client.containers.list(all=True)
# image_id = (
# self.image.id
# if isinstance(self.image, Image)
# else self.client.images.get(self.image).id
# )
# return any(container.image.id == image_id for container in containers)
def _remove_image(self):
if isinstance(self.image, str):
self.client.images.remove(self.image)
elif isinstance(self.image, Image):
self.image.remove(force=True)
else:
raise ValueError("Invalid image type")
def _build_sh(self, code_dest_file, unit_input):
commands = get_code_execution_command(self.lang, code_dest_file)
sh_commands = ""
for command in commands:
unit_input = unit_input.replace("\n", "\\n")
sh_commands += (
f'echo -e "{unit_input}" | ' + command if unit_input else command
)
sh_commands += "\n"
return sh_commands
# def run(
# self, code: str, unit_input: str = None, libraries: Optional[List] = None
# ) -> ConsoleOutput:
# self._ensure_session_is_open()
# self._install_libraries_if_needed(libraries)
# code_file, code_dest_file = self._prepare_code_file(code)
# self._copy_code_to_container(code_file, code_dest_file)
# if unit_input:
# sh_text = self._build_sh(code_dest_file, unit_input)
# sh_file, sh_dest_file = self._prepare_sh_file(sh_text)
# self._copy_code_to_container(sh_file, sh_dest_file)
# return self._execute_sh_in_container(sh_dest_file)
# else:
# return self._execute_code_in_container(code_dest_file)
def run(self, code: str, unit_input: str = None, libraries: Optional[List] = None) -> ConsoleOutput:
self._ensure_session_is_open()
self._install_libraries_if_needed(libraries)
code_file, code_dest_file = self._prepare_code_file(code)
self._copy_code_to_container(code_file, code_dest_file)
try:
if unit_input:
sh_text = self._build_sh(code_dest_file, unit_input)
sh_file, sh_dest_file = self._prepare_sh_file(sh_text)
self._copy_code_to_container(sh_file, sh_dest_file)
return self._execute_sh_in_container(sh_dest_file)
else:
return self._execute_code_in_container(code_dest_file)
except docker.errors.ContainerError as e:
if 'memory' in str(e).lower(): # Check if the error is related to memory
self._increase_memory_and_rerun(code, unit_input, libraries)
else:
raise
def _ensure_session_is_open(self):
if not self.container:
raise RuntimeError(
"Session is not open. Please call open() method before running code."
)
def _install_libraries_if_needed(self, libraries):
if libraries:
if self.lang.upper() in NotSupportedLibraryInstallation:
raise ValueError(
f"Library installation has not been supported for {self.lang} yet!"
)
self._install_libraries(libraries)
def _install_libraries(self, libraries):
if self.lang == Language.GO:
self._prepare_go_environment()
for library in libraries:
command = get_libraries_installation_command(self.lang, library)
self.execute_command(
command, workdir="/example" if self.lang == Language.GO else None
)
def _prepare_go_environment(self):
self.execute_command("mkdir -p /example")
self.execute_command("go mod init example", workdir="/example")
self.execute_command("go mod tidy", workdir="/example")
def _prepare_code_file(self, code):
code_file = f"/tmp/code.{get_code_file_extension(self.lang)}"
code_dest_file = "/example/code.go" if self.lang == Language.GO else code_file
with open(code_file, "w") as f:
f.write(code)
return code_file, code_dest_file
def _prepare_sh_file(self, sh):
sh_file = f"/tmp/run.sh"
sh_dest_file = "/example/run.sh" if self.lang == Language.GO else sh_file
with open(sh_file, "w") as f:
f.write(sh)
return sh_file, sh_dest_file
def _copy_code_to_container(self, src, dest):
self.copy_to_runtime(src, dest)
# def _execute_code_in_container(self, code_dest_file, unit_input=None):
# output = ConsoleOutput("")
# commands = get_code_execution_command(self.lang, code_dest_file)
# for command in commands:
# output = self.execute_command(
# command, workdir="/example" if self.lang == Language.GO else None
# )
# return output
def _execute_code_in_container(self, code_dest_file, unit_input=None):
output = ConsoleOutput("")
commands = get_code_execution_command(self.lang, code_dest_file)
for command in commands:
try:
output = self.execute_command(
command, workdir="/example" if self.lang == Language.GO else None
)
except docker.errors.ContainerError as e:
if 'memory' in str(e).lower(): # Check if the error is related to memory
self._increase_memory_and_rerun(code_dest_file, unit_input)
return self._execute_code_in_container(code_dest_file, unit_input)
else:
raise
return output
def _execute_sh_in_container(self, sh_dest_file):
output = ConsoleOutput("")
source_bash = "chmod +x " + sh_dest_file
output2 = ConsoleOutput("")
output2 = self.execute_command(
source_bash, workdir="/example" if self.lang == Language.GO else None
)
# with open("/home/llmsandbox/wushenxi/MPLCGS/MPLCGS/test/log.txt", "w") as f:
# f.write(output2.text)
run_bash = "/bin/bash\t" + sh_dest_file
output = self.execute_command(
run_bash, workdir="/example" if self.lang == Language.GO else None
)
return output
def copy_from_runtime(self, src: str, dest: str):
self._ensure_session_is_open()
if self.verbose:
print(f"Copying {self.container.short_id}:{src} to {dest}..")
self._extract_file_from_container(src, dest)
def _extract_file_from_container(self, src, dest):
bits, stat = self.container.get_archive(src)
if stat["size"] == 0:
raise FileNotFoundError(f"File {src} not found in the container")
tarstream = io.BytesIO(b"".join(bits))
with tarfile.open(fileobj=tarstream, mode="r") as tar:
tar.extractall(os.path.dirname(dest))
def copy_to_runtime(self, src: str, dest: str):
self._ensure_session_is_open()
self._create_directory_if_needed(dest)
self._copy_file_to_container(src, dest)
def _create_directory_if_needed(self, dest):
directory = os.path.dirname(dest)
if directory and not self.container.exec_run(f"test -d {directory}")[0] == 0:
self.container.exec_run(f"mkdir -p {directory}")
if self.verbose:
print(f"Creating directory {self.container.short_id}:{directory}")
def _copy_file_to_container(self, src, dest):
tarstream = io.BytesIO()
with tarfile.open(fileobj=tarstream, mode="w") as tar:
tar.add(src, arcname=os.path.basename(src))
tarstream.seek(0)
self.container.put_archive(os.path.dirname(dest), tarstream)
def execute_command(
self, command: Optional[str], workdir: Optional[str] = None
) -> ConsoleOutput:
self._validate_command(command)
self._ensure_session_is_open()
if self.verbose:
print(f"Executing command: {command}")
return self._run_command_in_container(command, workdir)
def _validate_command(self, command):
if not command:
raise ValueError("Command cannot be empty")
def _run_command_in_container(self, command, workdir):
if workdir:
exit_code, exec_log = self.container.exec_run(
command, stream=True, tty=True, workdir=workdir
)
else:
exit_code, exec_log = self.container.exec_run(
command, stream=True, tty=True
)
output = ""
if self.verbose:
print("Output:", end=" ")
for chunk in exec_log:
chunk_str = chunk.decode("utf-8")
output += chunk_str
if self.verbose:
print(chunk_str, end="")
return ConsoleOutput(output)
def _increase_memory_and_rerun(self, code, unit_input, libraries):
self.close()
self.open()
self.run(code, unit_input, libraries)
================================================
FILE: mplsandbox/tool.py
================================================
import argparse
from mplsandbox.sandbox import Sandbox
from mplsandbox.analyzetools import AnalyzeTools
from mplsandbox.const import LanguageValues, CodeType
import docker
from flask import jsonify
import traceback
import logging
from mplsandbox.utils import *
class MPLSANDBOX:
def __init__(self, *args, **kwargs):
if isinstance(args[0], str):
data_path = args[0]
try:
with open(data_path, 'r') as file:
self.args = json.load(file)
except Exception as e:
raise ValueError(f"Failed to read JSON from the provided path: {e}")
elif isinstance(args[0], dict):
self.args = args[0]
else:
raise ValueError("Only dictionary type arguments or string paths to JSON files are accepted")
self.app = self.args.get("app", False)
def process_config(self):
code = self.args.get('code')
unit_dict = self.args.get('unit_cases')
libraries = self.args.get('libraries', [])
question = self.args.get('question')
lang = self.args.get('lang', "AUTO")
if lang == "AUTO":
lang = detect_language(code)
client = docker.from_env() if self.args.get('client') else None
image = self.args.get('image', None)
docker_file = self.args.get('docker_file', None)
keep_template = self.args.get('keep_template', True)
verbose = self.args.get('verbose', False)
if not code:
raise_error_templete("No code provided", 400, self.app)
if len(unit_dict["inputs"]) != len(unit_dict["outputs"]):
raise_error_templete("Input and output cases in unit-test should be same.", 400, self.app)
if libraries is not None:
if not isinstance(libraries, list) or not all(isinstance(lib, str) for lib in libraries):
raise_error_templete({"Libraries must be a list of strings"}, 400, self.app)
if lang not in LanguageValues:
raise_error_templete(f"Invalid language specified.", 400, self.app)
return client, image, docker_file, lang, keep_template, verbose, code, unit_dict, libraries, question
def get_basic_info(self, show_per_unit_feedback=False):
client, image, docker_file, lang, keep_template, verbose, code, unit_dict, libraries, question = self.process_config()
try:
with Sandbox(client=client,
image=image,
dockerfile=docker_file,
lang=lang,
keep_template=keep_template,
verbose=verbose,
) as session:
results = []
correct_num = 0
for unit_input, unit_answer in zip(unit_dict["inputs"], unit_dict["outputs"]):
output = session.run(code, unit_input, libraries=libraries)
if_correct = output_answer_check(unit_answer.strip(), output.text.strip())
if if_correct:
correct_num += 1
tmp_text = remove_ansi_codes(output.text)
results.append(tmp_text)
reward = get_reward(tmp_text, lang, if_correct)
correct_rate = correct_num / len(unit_dict["inputs"])
compiler_feedback, compiler_feedback_per_unit = results, results
if reward == -0.3:
compiler_feedback = f"AssertionError:\nInput:{unit_dict['inputs']}\nOutput:{results}\nRequired Output:{unit_dict['outputs']}"
compiler_feedback_per_unit = f"AssertionError:\nInput:{unit_dict['inputs'][0]}\nOutput:{results[0]}\nRequired Output:{unit_dict['outputs'][0]}"
results_dict = {"reward": reward,
"compiler_feedback": compiler_feedback,
"correct_rate": correct_rate,
"question": question,
"code": code,
"inputs": unit_dict["inputs"],
"required_outputs": unit_dict["outputs"],
"language": lang}
if show_per_unit_feedback:
results_dict["compiler_feedback_per_unit"] = compiler_feedback_per_unit
return results_dict
except Exception as e:
error_type = type(e).__name__
error_message = str(e)
stack_trace = traceback.format_exc()
logging.error(f"Error during code execution: {error_message}, Stack trace: {stack_trace}")
error_dict = {"error": {"type": error_type, "message": error_message, "stack_trace": stack_trace}}
return (jsonify(error_dict), 500) if self.app else error_dict
def code_analyze_feedback(self, analysis_type):
client, image, docker_file, lang, keep_template, verbose, code, unit_dict, libraries, question = self.process_config()
try:
with AnalyzeTools(client=client,
image=image,
dockerfile=docker_file,
lang=lang,
keep_template=keep_template,
verbose=verbose,
) as session:
output = None
output_dict = dict()
analysis_list = ["all","code_basic_analysis","code_smell_analysis","code_bug_analysis","unit_test_analysis","code_efficiency_evaluation"]
assert analysis_type in analysis_list, f"Invalid analysis type. Available types are {analysis_list}"
if lang == "python":
if analysis_type == "all":
for sub_type in analysis_list[1:]:
output = session.call_tool_python(code=code,unit_inputs=unit_dict["inputs"],analysis=sub_type)
output_dict[sub_type] = output
else:
output = session.call_tool_python(code=code,unit_inputs=unit_dict["inputs"],analysis=analysis_type)
output_dict[analysis_type] = output
return output_dict
except Exception as e:
error_type = type(e).__name__
error_message = str(e)
stack_trace = traceback.format_exc()
logging.error(f"Error during code execution: {error_message}, Stack trace: {stack_trace}")
error_dict = {"error": {"type": error_type, "message": error_message, "stack_trace": stack_trace}}
return (jsonify(error_dict), 500) if self.app else error_dict
# def get_ai_anlysis(self, openai_api_key, model):
# basic_info = self.get_basic_info(show_per_unit_feedback=True)
# prompt = f"""You are a very professional {basic_info["language"]} code analyst;\n
# Now, for the question: {basic_info["question"]};\n
# I have written a piece of code: {basic_info["code"]}, Please note that the code here is used for individual unit testing;\n
# The given input is: {basic_info["input"][0]};\n
# the required answer is: {basic_info["required_output"][0]};\n
# and the compiler's feedback is: {basic_info["compiler_feedback_per_unit"]};\n
# Please analyze the problem, given code, given input, required answer, and compiler feedback comprehensively.
# Please revise the code according to the requirements of the problem to complete the correct code.\n
# """
# context = [{'role': 'user', "content": prompt}]
# anlysis_report = get_completion_from_messages(openai_api_key, context, model) if basic_info['correct_rate'] != 1 else "The code is correct."
# anlysis_info = basic_info.copy().updata({"anlysis_report": anlysis_report})
# return anlysis_info
def run(self,analysis_type="all"):
basic_info = self.get_basic_info()
analysis_info = self.code_analyze_feedback(analysis_type)
result = basic_info
if analysis_info is not None:
result.update(analysis_info)
return result
def main():
parser = argparse.ArgumentParser(description="MPLSandbox Code Executor for Command Lines")
parser.add_argument("--data", type=str, help="Path to the JSON data file")
parser.add_argument("--report", type=str, help="Path to the TXT report")
args = parser.parse_args()
executor = MPLSANDBOX(args.data)
report = executor.run(analysis_type="all")
with open(args.report, 'w', encoding='utf-8') as f:
f.write("Report\n")
f.write("="*50 + "\n")
for key, value in report.items():
f.write(f"{key}:\n")
if isinstance(value, dict):
for sub_key, sub_value in value.items():
f.write(f" {sub_key}: {sub_value}\n")
elif isinstance(value, list):
f.write(f" {', '.join(map(str, value))}\n")
else:
f.write(f" {value}\n")
f.write("\n")
if __name__ == "__main__":
main()
================================================
FILE: mplsandbox/utils.py
================================================
import docker
import docker.errors
from typing import Optional
import json
from docker import DockerClient
import docker
import re
from openai import OpenAI
from flask import jsonify
from guesslang import Guess
from mplsandbox.const import Language, FILE_EXTENSION_MAPPING
class ConsoleOutput:
def __init__(self, text: str):
self._text = text
@property
def text(self):
return self._text
def __str__(self):
return f"ConsoleOutput(text={self.text})"
def image_exists(client: DockerClient, image: str) -> bool:
"""
Check if a Docker image exists
:param client: Docker client
:param image: Docker image
:return: True if the image exists, False otherwise
"""
try:
client.images.get(image)
return True
except docker.errors.ImageNotFound:
return False
except Exception as e:
raise e
def get_libraries_installation_command(lang: str, library: str) -> Optional[str]:
"""
Get the command to install libraries for the given language
:param lang: Programming language
:param library: List of libraries
:return: Installation command
"""
supported_languages = {
Language.PYTHON: f"pip install {library}",
Language.JAVA: f"mvn install:install-file -Dfile={library}",
Language.JAVASCRIPT: f"yarn add {library}",
Language.CPP: f"apt-get install {library}",
Language.GO: f"go get -u {library}",
Language.RUBY: f"gem install {library}"
}
if lang not in supported_languages:
raise ValueError(f"Language {lang} is not supported")
return supported_languages[lang]
def get_code_file_extension(lang: str) -> str:
"""
Get the file extension for the given language
:param lang: Programming language
:return: File extension
"""
extensions = {
Language.PYTHON: "py",
Language.JAVA: "java",
Language.JAVASCRIPT: "js",
Language.CPP: "cpp",
Language.GO: "go",
Language.RUBY: "rb",
Language.RUST: "rs"
}
if lang not in extensions:
raise ValueError(f"Language {lang} is not supported")
return extensions[lang]
def get_code_execution_command(lang: str, code_file: str) -> list:
"""
Return the execution command for the given language and code file.
:param lang: Language of the code
:param code_file: Path to the code file
:return: List of execution commands
"""
commands = {
Language.PYTHON: [f"python {code_file}"],
Language.JAVA: [f"java {code_file}"],
Language.JAVASCRIPT: [f"node {code_file}"],
Language.CPP: [f"g++ -o a.out {code_file}", "./a.out"],
Language.GO: [f"go run {code_file}"],
Language.RUBY: [f"ruby {code_file}"],
Language.RUST: [f"rustc {code_file}", f"chmod +x {code_file.split('.')[0]}", f"{code_file.split('.')[0]}"],
Language.TYPESCRIPT: [f"ts-node \"{code_file}\""],
Language.BASH: [f"chmod +x {code_file}" , f"\"{code_file}\""]
}
if lang not in commands:
raise ValueError(f"Language {lang} is not supported")
return commands[lang]
def raise_error_templete(error_message: str, number: int, app=False):
if app:
return jsonify({"error": f"{error_message}"}), number
else:
raise ValueError(f"{error_message}")
def extract_libraries(code: str, language: str) -> list:
libraries = []
if language == "python":
libraries = re.findall(r'import (\w+)|from (\w+)', code)
libraries = [lib for pair in libraries for lib in pair if lib]
elif language == "go":
libraries = re.findall(r'import "(.*?)"', code)
elif language == "cpp":
libraries = re.findall(r'#include <(.*?)>', code)
elif language == "javascript":
libraries = re.findall(r'require\("(.*?)"\)|import .* from "(.*?)"', code)
libraries = [lib for pair in libraries for lib in pair if lib]
elif language == "java":
libraries = re.findall(r'import (.*?);', code)
elif language == "ruby":
libraries = re.findall(r'require "(.*?)"', code)
# elif language == "php":
# libraries = re.findall(r'use (\w+);', code)
return libraries
def generate_install_commands(language: str, libraries: list) -> str:
if language == "python":
return "pip install " + " ".join(libraries)
elif language == "go":
return "go get " + " ".join(libraries)
# elif language == "cpp":
# C++ doesn't have a single package manager, but here is an example for apt
# return "sudo apt-get install " + " ".join(libraries)
elif language == "javascript":
return "npm install " + " ".join(libraries)
elif language == "java":
# Assuming Maven is used
return "\n".join([f'<dependency>\n <groupId>groupId</groupId>\n <artifactId>{lib}</artifactId>\n <version>version</version>\n</dependency>' for lib in libraries])
elif language == "ruby":
return "gem install " + " ".join(libraries)
# elif language == "php":
# return "composer require " + " ".join(libraries)
return ""
def detect_language_via_file_extension(file_extension: str) -> str:
return FILE_EXTENSION_MAPPING.MAPPING.get(file_extension, None)
def detect_language(code: str) -> str:
guess = Guess()
language = guess.language_name(code)
if isinstance(language,str):
language = language.lower()
return language
def remove_ansi_codes(text):
ansi_escape = re.compile(r'\x1B[@-_][0-?]*[ -/]*[@-~]')
return ansi_escape.sub('', text)
def output_answer_check(answer, output):
def remove_newlines_and_spaces(s):
return s.replace("\n", "").replace("\\n", "").replace(" ", "")
answer = remove_newlines_and_spaces(answer)
compile_feedback = remove_newlines_and_spaces(output)
return answer == compile_feedback
def read_code_file(code_file_path):
with open(code_file_path, "r") as file:
return file.read()
def read_unit_file(unit_file_path):
with open(unit_file_path, "r") as file:
return json.load(file)
def read_libraries_file(library_file_path):
if library_file_path:
with open(library_file_path, "r") as file:
return [line.strip() for line in file.readlines()]
return []
def read_question_file(question_file_path):
with open(question_file_path, "r") as file:
return file.read()
def get_reward(output, lang, if_correct):
if if_correct:
return 1
reward_mapping = {
"python": lambda output: -1 if "SyntaxError" in output else -0.6 if "Error" in output else -0.3,
"java": lambda output: -1 if "error: compilation failed" in output and f"/tmp/code" in output else -0.6 if "error" in output else -0.3,
"cpp": lambda output: -1 if f"/tmp/code" in output else -0.6 if "error" in output else -0.3,
"javascript": lambda output: -1 if "ReferenceError" in output and f"/tmp/code" in output else -0.6 if "Error" in output else -0.3,
"typescript": lambda output: -1 if "TypeScript" in output and "error" in output else -0.6 if "error" in output else -0.3,
"bash": lambda output: -1 if "bash:" in output and "command not found" in output else -0.6 if "bash:" in output and "line" in output else -0.3,
"go": lambda output: -1 if "go:" in output and "build" in output else -0.6 if "error" in output else -0.3,
}
language_reward_func = reward_mapping.get(lang)
if language_reward_func:
return language_reward_func(output)
else:
return -0.3
def get_completion_from_messages(api_key, messages, model, temperature=0):
client = OpenAI(
api_key=api_key,
base_url="https://api3.apifans.com/v1"
)
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=temperature
)
return response.choices[0].message.content
================================================
FILE: mplsandbox_for_rl/README.md
================================================
# Providing feedback signals in RL
We validated the effectiveness of MPLSandbox in providing compiler feedback by integrating it into the RLCF framework, which significantly enhanced the code generation capabilities of LLMs. This code serves as a demonstration of using MPLSandbox's compiler feedback to provide reward signals for the PPO algorithm.
## Useage
First, clone the MPLSandbox repository and install it using pip.
```bash
git clone git@github.com:Ablustrund/MPLSandbox.git
cd MPLSandbox
pip install .
```
Then, clone the mplsandbox_for_rl repository and install the requirements.
```bash
cd mplsandbox_for_rl
pip install -r requirements.txt
```
Finally, run the training script to train the PPO agent using MPLSandbox's compiler feedback.
```bash
bash train_ppo.sh
```
## Citation
This project is based on [MOSS-RLHF](https://openlmlab.github.io/MOSS-RLHF/). If you use MPLSandbox or mplsandbox_for_rl in your research, please cite the following paper:
```bibtex
@misc{dou2024MPLSandbox,
title={Multi-Programming Language Sandbox for LLMs},
author={Shihan Dou and Jiazheng Zhang and Jianxiang Zang and Yunbo Tao and Haoxiang Jia and Shichun Liu and Yuming Yang and Shenxi Wu and Shaoqing Zhang and Muling Wu and Changze Lv and Limao Xiong and Wenyu Zhan and Lin Zhang and Rongxiang Weng and Jingang Wang and Xunliang Cai and Yueming Wu and Ming Wen and Rui Zheng and Tao Ji and Yixin Cao and Tao Gui and Xipeng Qiu and Qi Zhang and Xuanjing Huang},
year={2024},
eprint={2410.23074},
archivePrefix={arXiv},
primaryClass={cs.SE},
url={https://arxiv.org/abs/2410.23074},
}
```
```bibtex
@article{zheng2023secrets,
title={Secrets of rlhf in large language models part i: Ppo},
author={Zheng, Rui and Dou, Shihan and Gao, Songyang and Hua, Yuan and Shen, Wei and Wang, Binghai and Liu, Yan and Jin, Senjie and Liu, Qin and Zhou, Yuhao and others},
journal={arXiv preprint arXiv:2307.04964},
year={2023}
}
```
================================================
FILE: mplsandbox_for_rl/config.py
================================================
import argparse
def parse_args(*args):
parser = argparse.ArgumentParser(description='generation model config.')
# Model (chitchat) args
parser.add_argument('--hf_model_name', type=str, default='hfl/chinese-roberta-wwm-ext', help='Hugging model name used to load vocabs, configs and pretained models')
parser.add_argument('--init_from_hf_pretrain', action='store_true', help='whether to load weights from hugging face')
parser.add_argument('--delimiter', type=str, default='\n', help='delimiter to seperate dialog history')
parser.add_argument('--model_type', type=str, default='llama', help='model type')
# parser.add_argument('--vocab_path', type=str, default=None, help='a customized vocabulary to override the default huggingface vocab')
# parser.add_argument('--hidden_size', type=int, default=None, help='customize model if "init_from_hf_pretrain" is False')
# parser.add_argument('--num_heads', type=int, default=None, help='customize model if "init_from_hf_pretrain" is False')
# parser.add_argument('--num_layers', type=int, default=None, help='customize model if "init_from_hf_pretrain" is False')
# parser.add_argument('--intermediate_size', type=int, default=None, help='customize model if "init_from_hf_pretrain" is False')
# parser.add_argument('--layernorm_type', type=str, default='post')
# parser.add_argument('--n_layers_freeze', type=int, default=0)
# GPT (decode-only model) args
# parser.add_argument('--separate_context_response', action='store_true', help='if true, calculate the loss of last utterance (response) only')
parser.add_argument('--separate_prompt', type=str, default='P1: |P2: ')
parser.add_argument('--no_prompt', action='store_true', help='Enable pure next token prediction pretraining')
# parser.add_argument('--force_p2_response', action='store_true', help='When separate_prompt, set the prefix of response is always p2 (the even number of uttrs)')
# Different task args
parser.add_argument('--add_kd', action='store_true', help='add knowledge part for each dialog')
parser.add_argument('--kd_len', type=int, default=256, help='max length of knowledge')
parser.add_argument('--add_role', action='store_true', help='train role chat task')
parser.add_argument('--multi_role', action='store_true')
# Checkpoint args
parser.add_argument('--model_file', type=str, default='./ckpts', help='checkpoint path, used for save model and continuous training from a breakpoint')
parser.add_argument('--init_model', type=str, default=None, help='checkpoint used to initialize the model, used for fine-tuning')
parser.add_argument('--init_model1', type=str, default=None, help='checkpoint used to initialize the model, used for fine-tuning')
parser.add_argument('--init_model2', type=str, default=None, help='checkpoint used to initialize the model, used for fine-tuning')
parser.add_argument('--hdfs_ckpt_path', type=str, default=None, help='upload/download checkpoints to/from HDFS')
# Dataset args
parser.add_argument('--batch_size', type=int, default=32, help='batch size')
parser.add_argument('--context_truncate', type=int, default=2048, help='max length for history')
parser.add_argument('--label_truncate', type=int, default=None, help='max length for response')
parser.add_argument('--dynamic_batching', action='store_true', help='perform dynamic batching instead of fixed batchsize to accelerate training. the max tokens for each batch equals to batchsize * (context_trunc + label_trunc)')
parser.add_argument('--data_path', type=str, default='./data', help='dataset folder path')
parser.add_argument('--use_chunk_data', action='store_true', help='data that cannot fit in the memory and split into chunks')
parser.add_argument('--num_workers', type=int, default=1, help='>0 for multiprocessing data loader')
parser.add_argument('--num_prefetch', type=int, default=32, help='num of batches for each prefetch process')
parser.add_argument('--verbose', type=int, default=1)
parser.add_argument('--openai_style_prompt', action='store_true', help='use openai style instead of xP3 style prompt')
parser.add_argument('--belle_style_prompt', action='store_true', help='use belle style prompt')
parser.add_argument('--chatglm_style_prompt', action='store_true', help='use chatglm style prompt')
parser.add_argument('--plug_style_prompt', action='store_true', help='use chatplug style prompt')
parser.add_argument('--merge_role_prompts', action='store_true', help='ROLE: merge inline and init role information')
parser.add_argument('--no_split_dialog', action='store_true')
# Inference args
parser.add_argument('--beam_size', type=int, default=1, help='num of candidates for decoding')
parser.add_argument('--beam_groups', type=int, default=1)
parser.add_argument('--group_delay', type=int ,default=1, help='num of steps before applying grouped beam search')
parser.add_argument('--max_ts', type=int, default=128, help='max tokens to generate.')
parser.add_argument('--temperature', type=float, default=1., help='temperature to rescale the logits before softmax.')
parser.add_argument('--repetition_penalty', type=float, default=1., help='avoid from generation repetition tokens')
parser.add_argument('--context_repetition_penalty', type=float, default=1., help='avoid from generation repetition tokens')
parser.add_argument('--beam_min_length', type=int, default=0, help='minimal length to generate')
parser.add_argument('--inference', type=str, default='beam', help='decoding algorithm')
parser.add_argument('--topp', type=float, default=0.9, help='p for nucleus sampling')
parser.add_argument('--beam_length_penalty', type=float, default=1., help='rescore the generation outputs to penalize short sequences')
parser.add_argument('--length_penalty_version', type=str, default='eva')
parser.add_argument('--bleu_backend', type=str, default='sacre', help='backend used for calculating BLEU')
parser.add_argument('--bleu_level', type=str, default='sentence')
parser.add_argument('--cider_sigma', type=float, default=15., help='sigma for CIDEr')
parser.add_argument('--lang', type=str, default='zh', help='language the model trained on')
parser.add_argument('--num_examples', type=int, default=999999, help='num of examples to generate')
parser.add_argument('--no_repeat_ngram', type=int, default=-1, help='ngrams that are penalized for second time generation')
parser.add_argument('--ngram_blacklist', type=str, default=None, help='a blacklist of ngrams forbid for generation. TODO. ')
parser.add_argument('--no_history', action='store_true', help='do not record dialog history for interactive inference')
parser.add_argument('--use_huggingface_generate', action='store_true', help='use huggingface generate() interface instead')
# Training args
parser.add_argument('--skip_generation', action='store_true', help='limited metrics for faster evaluation')
parser.add_argument('--train_steps', type=int, default=999999, help='max train steps')
parser.add_argument('--warmup_steps', type=int, default=10000, help='steps for learning rate warmup')
# parser.add_argument('--grad_norm', type=float, default=1., help='max norm of gradients')
parser.add_argument('--save_freq', type=int, default=1000, help='save checkpoint for every num of steps')
parser.add_argument('--validation_metric', type=str, default='loss', help='metric to select the best model')
parser.add_argument('--lr', type=float, default=1e-4, help='learning rate. will be ignored if using noam scheduler')
parser.add_argument('--beta1', type=float, default=0.9, help='adam beta1')
parser.add_argument('--beta2', type=float, default=0.995, help='adam beta2')
parser.add_argument('--eps', type=float, default=1e-8, help='optimizer eps')
parser.add_argument('--weight_decay', type=float, default=0., help='l2 weight decay')
parser.add_argument('--scheduler', type=str, default='invsqrt', help='learning rate scheduler')
parser.add_argument('--reduce_factor', type=float, default=0.5, help='reduceonplateau args')
parser.add_argument('--reduce_patience', type=int, default=0, help='reduceonplateau args')
parser.add_argument('--patience', type=int, default=999999, help='stop train patient')
parser.add_argument('--tensorboard_logdir', type=str, default=None, help='path to write tensorboard logs')
parser.add_argument('--label_smoothing', type=float, default=0., help='label smoothing rate for nll loss')
parser.add_argument('--gradient_checkpoint', action='store_true', help='enable gradient checkpointing during training, which can expand almost 4x batchsize')
# parser.add_argument('--8bit_optim', action='store_true')
parser.add_argument('--stable_embedding', action='store_true')
parser.add_argument('--fp32_loss', action='store_true', help='use fp32 to calculate cross-entropy loss, enable when numeric stability problem occurs')
# Self-chat args
# parser.add_argument('--selfchat_turns', type=int, default=9, help='num of turns for self-chat')
# parser.add_argument('--selfchat_datasource', type=str, default=None, help='source of data for self-chat. None means stdin')
# parser.add_argument('--selfchat_return_topk', action='store_true', help='get all candidates instead of top1')
# tsp args
parser.add_argument('--tsp_build_prob', type=float, default=0., help='the prob of each sample that will be built as a tsp positive sample online')
# RM args
parser.add_argument('--sampling_offtopic_prob', type=float, default=0., help='prob to sample offtopic response as negative sample')
parser.add_argument('--reward_lm_loss_factor', type=float, default=0., help='calculate lm loss on rm model')
# RLHF args
parser.add_argument('--n_rollouts', type=int, default=128, help='num of responses to sample per iter')
parser.add_argument('--n_candidates', type=int, default=1)
parser.add_argument('--rollout_batch_size', type=int, default=4)
parser.add_argument('--clip_reward', type=float, default=10.)
parser.add_argument('--ref_mean', type=float, default=None)
parser.add_argument('--ref_std', type=float, default=None)
parser.add_argument('--pg_clip', type=float, default=0.2)
parser.add_argument('--value_clip', type=float, default=0.2)
parser.add_argument('--vf_loss_weight', type=float, default=1.)
parser.add_argument('--init_actor', type=str, default=None)
# parser.add_argument('--init_critic', type=str, default=None)
parser.add_argument('--init_reward', type=str, default=None)
parser.add_argument('--gamma', type=float, default=1.)
parser.add_argument('--lam', type=float, default=0.95)
parser.add_argument('--beta', type=float, default=0.02)
# parser.add_argument('--ema', type=float, default=0.992)
parser.add_argument('--rlhf_logdir', type=str, default='tmp')
parser.add_argument('--debug', action='store_true', help='debug')
# additional args not handled here
for func in args:
if callable(func):
func(parser)
args = parser.parse_args()
return args
================================================
FILE: mplsandbox_for_rl/config.yaml
================================================
compute_environment: LOCAL_MACHINE
deepspeed_config:
gradient_accumulation_steps: 1
gradient_clipping: 1.0
offload_optimizer_device: none
offload_param_device: none
zero3_init_flag: false
zero_stage: 2
distributed_type: DEEPSPEED
downcast_bf16: 'no'
dynamo_backend: 'NO'
fsdp_config: {}
machine_rank: 0
main_process_ip: 10.176.98.78
main_process_port: 10538
main_training_function: main
megatron_lm_config: {}
mixed_precision: 'bf16'
num_machines: 1
num_processes: 8
rdzv_backend: static
same_network: true
use_cpu: false
================================================
FILE: mplsandbox_for_rl/data/add_index.py
================================================
import json
ori = json.load(open('/root/StepCoder-main/data/valid.json','r'))
count = 0
data = ori[:16]
for d in data:
d['id'] = count
count += 1
json.dump(data,open('/root/StepCoder-main/data/valid_2.json','w'),indent=4,ensure_ascii=False)
================================================
FILE: mplsandbox_for_rl/data/train.json
================================================
[
{
"prompt": "\ndef IgUhb():\n \"\"\"There are n student groups at the university. During the study day, each group can take no more than 7 classes. Seven time slots numbered from 1 to 7 are allocated for the classes.\n\nThe schedule on Monday is known for each group, i. e. time slots when group will have classes are known.\n\nYour task is to determine the minimum number of rooms needed to hold classes for all groups on Monday. Note that one room can hold at most one group class in a single time slot.\n\n\n-----Input-----\n\nThe first line contains a single integer n (1 ≤ n ≤ 1000) — the number of groups. \n\nEach of the following n lines contains a sequence consisting of 7 zeroes and ones — the schedule of classes on Monday for a group. If the symbol in a position equals to 1 then the group has class in the corresponding time slot. In the other case, the group has no class in the corresponding time slot.\n\n\n-----Output-----\n\nPrint minimum number of rooms needed to hold all groups classes on Monday.\n\n\n-----Examples-----\nInput\n2\n0101010\n1010101\n\nOutput\n1\n\nInput\n3\n0101011\n0011001\n0110111\n\nOutput\n3\n\n\n\n-----Note-----\n\nIn the first example one room is enough. It will be occupied in each of the seven time slot by the first group or by the second group.\n\nIn the second example three rooms is enough, because in the seventh time slot all three groups have classes.\n \"\"\"\n",
"canonical_solution": "\ndef IgUhb():\n strings = int(input())\n \n count = [0 for x in range(7)]\n \n for k in range(strings):\n s = input()\n for index in range(7):\n if s[index] == '1':\n count[index] += 1\n \n print(max(count))\n ",
"inputs": [
"2\n0101010\n1010101\n",
"3\n0101011\n0011001\n0110111\n",
"1\n0111000\n"
],
"outputs": [
"1\n",
"3\n",
"1\n"
],
"starter_code": "\ndef IgUhb():\n",
"scope": [
[
"Function Body",
2,
13
],
[
"List Comprehension",
5,
5
],
[
"For Loop Body",
7,
11
],
[
"For Loop Body",
9,
11
],
[
"If Statement Body",
10,
11
]
],
"difficulty": "competition",
"id": 0
},
{
"prompt": "\ndef XKxTL():\n \"\"\"Today Pari and Arya are playing a game called Remainders.\n\nPari chooses two positive integer x and k, and tells Arya k but not x. Arya have to find the value $x \\text{mod} k$. There are n ancient numbers c_1, c_2, ..., c_{n} and Pari has to tell Arya $x \\operatorname{mod} c_{i}$ if Arya wants. Given k and the ancient values, tell us if Arya has a winning strategy independent of value of x or not. Formally, is it true that Arya can understand the value $x \\text{mod} k$ for any positive integer x?\n\nNote, that $x \\text{mod} y$ means the remainder of x after dividing it by y.\n\n\n-----Input-----\n\nThe first line of the input contains two integers n and k (1 ≤ n, k ≤ 1 000 000) — the number of ancient integers and value k that is chosen by Pari.\n\nThe second line contains n integers c_1, c_2, ..., c_{n} (1 ≤ c_{i} ≤ 1 000 000).\n\n\n-----Output-----\n\nPrint \"Yes\" (without quotes) if Arya has a winning strategy independent of value of x, or \"No\" (without quotes) otherwise.\n\n\n-----Examples-----\nInput\n4 5\n2 3 5 12\n\nOutput\nYes\n\nInput\n2 7\n2 3\n\nOutput\nNo\n\n\n\n-----Note-----\n\nIn the first sample, Arya can understand $x \\operatorname{mod} 5$ because 5 is one of the ancient numbers.\n\nIn the second sample, Arya can't be sure what $x \\text{mod} 7$ is. For example 1 and 7 have the same remainders after dividing by 2 and 3, but they differ in remainders after dividing by 7.\n \"\"\"\n",
"canonical_solution": "from math import *\nfrom sys import *\ndef XKxTL():\n n, k = map(int, stdin.readline().split(\" \"))\n a = 1;\n for c in map(int, stdin.readline().split(\" \")):\n a = a * (gcd(k, c)//gcd(a, c))\n if a%k == 0:\n print(\"Yes\")\n else:\n print(\"No\")",
"inputs": [
"4 5\n2 3 5 12\n",
"2 7\n2 3\n",
"1 6\n8\n"
],
"outputs": [
"Yes\n",
"No\n",
"No\n"
],
"starter_code": "\ndef XKxTL():\n",
"scope": [
[
"Function Body",
3,
11
],
[
"For Loop Body",
6,
7
],
[
"If Statement Body",
8,
11
]
],
"difficulty": "competition",
"id": 1
},
{
"prompt": "\ndef WUbvY():\n \"\"\"Recently, Dima met with Sasha in a philatelic store, and since then they are collecting coins together. Their favorite occupation is to sort collections of coins. Sasha likes having things in order, that is why he wants his coins to be arranged in a row in such a way that firstly come coins out of circulation, and then come coins still in circulation. \n\nFor arranging coins Dima uses the following algorithm. One step of his algorithm looks like the following:\n\n He looks through all the coins from left to right; If he sees that the i-th coin is still in circulation, and (i + 1)-th coin is already out of circulation, he exchanges these two coins and continues watching coins from (i + 1)-th. \n\nDima repeats the procedure above until it happens that no two coins were exchanged during this procedure. Dima calls hardness of ordering the number of steps required for him according to the algorithm above to sort the sequence, e.g. the number of times he looks through the coins from the very beginning. For example, for the ordered sequence hardness of ordering equals one.\n\nToday Sasha invited Dima and proposed him a game. First he puts n coins in a row, all of them are out of circulation. Then Sasha chooses one of the coins out of circulation and replaces it with a coin in circulation for n times. During this process Sasha constantly asks Dima what is the hardness of ordering of the sequence. \n\nThe task is more complicated because Dima should not touch the coins and he should determine hardness of ordering in his mind. Help Dima with this task. \n\n\n-----Input-----\n\nThe first line contains single integer n (1 ≤ n ≤ 300 000) — number of coins that Sasha puts behind Dima.\n\nSecond line contains n distinct integers p_1, p_2, ..., p_{n} (1 ≤ p_{i} ≤ n) — positions that Sasha puts coins in circulation to. At first Sasha replaces coin located at position p_1, then coin located at position p_2 and so on. Coins are numbered from left to right.\n\n\n-----Output-----\n\nPrint n + 1 numbers a_0, a_1, ..., a_{n}, where a_0 is a hardness of ordering at the beginning, a_1 is a hardness of ordering after the first replacement and so on. \n\n\n-----Examples-----\nInput\n4\n1 3 4 2\n\nOutput\n1 2 3 2 1\n\nInput\n8\n6 8 3 4 7 2 1 5\n\nOutput\n1 2 2 3 4 3 4 5 1\n\n\n\n-----Note-----\n\nLet's denote as O coin out of circulation, and as X — coin is circulation.\n\nAt the first sample, initially in row there are coins that are not in circulation, so Dima will look through them from left to right and won't make any exchanges.\n\nAfter replacement of the first coin with a coin in circulation, Dima will exchange this coin with next three times and after that he will finally look through the coins and finish the process.\n\nXOOO → OOOX\n\nAfter replacement of the third coin, Dima's actions look this way:\n\nXOXO → OXOX → OOXX\n\nAfter replacement of the fourth coin, Dima's actions look this way:\n\nXOXX → OXXX\n\nFinally, after replacement of the second coin, row becomes consisting of coins that are in circulation and Dima will look through coins from left to right without any exchanges.\n \"\"\"\n",
"canonical_solution": "\ndef WUbvY():\n n = int(input())\n a = list(map(int, input().split()))\n p = [0] * (n + 1)\n ans = [1] * (n + 1)\n ind = n\n for i in range(n):\n p[a[i] - 1] = 1\n while ind > 0 and p[ind - 1] == 1:\n ind -= 1\n ans[i + 1] = 1 + (i + 1) - (n - ind)\n print(' '.join(map(str, ans)))",
"inputs": [
"4\n1 3 4 2\n",
"8\n6 8 3 4 7 2 1 5\n",
"1\n1\n"
],
"outputs": [
"1 2 3 2 1\n",
"1 2 2 3 4 3 4 5 1\n",
"1 1\n"
],
"starter_code": "\ndef WUbvY():\n",
"scope": [
[
"Function Body",
2,
13
],
[
"For Loop Body",
8,
12
],
[
"While Loop Body",
10,
11
]
],
"difficulty": "competition",
"id": 2
},
{
"prompt": "\ndef aRWJo():\n \"\"\"There are many anime that are about \"love triangles\": Alice loves Bob, and Charlie loves Bob as well, but Alice hates Charlie. You are thinking about an anime which has n characters. The characters are labeled from 1 to n. Every pair of two characters can either mutually love each other or mutually hate each other (there is no neutral state).\n\nYou hate love triangles (A-B are in love and B-C are in love, but A-C hate each other), and you also hate it when nobody is in love. So, considering any three characters, you will be happy if exactly one pair is in love (A and B love each other, and C hates both A and B), or if all three pairs are in love (A loves B, B loves C, C loves A).\n\nYou are given a list of m known relationships in the anime. You know for sure that certain pairs love each other, and certain pairs hate each other. You're wondering how many ways you can fill in the remaining relationships so you are happy with every triangle. Two ways are considered different if two characters are in love in one way but hate each other in the other. Print this count modulo 1 000 000 007.\n\n\n-----Input-----\n\nThe first line of input will contain two integers n, m (3 ≤ n ≤ 100 000, 0 ≤ m ≤ 100 000).\n\nThe next m lines will contain the description of the known relationships. The i-th line will contain three integers a_{i}, b_{i}, c_{i}. If c_{i} is 1, then a_{i} and b_{i} are in love, otherwise, they hate each other (1 ≤ a_{i}, b_{i} ≤ n, a_{i} ≠ b_{i}, $c_{i} \\in \\{0,1 \\}$).\n\nEach pair of people will be described no more than once.\n\n\n-----Output-----\n\nPrint a single integer equal to the number of ways to fill in the remaining pairs so that you are happy with every triangle modulo 1 000 000 007. \n\n\n-----Examples-----\nInput\n3 0\n\nOutput\n4\n\nInput\n4 4\n1 2 1\n2 3 1\n3 4 0\n4 1 0\n\nOutput\n1\n\nInput\n4 4\n1 2 1\n2 3 1\n3 4 0\n4 1 1\n\nOutput\n0\n\n\n\n-----Note-----\n\nIn the first sample, the four ways are to: Make everyone love each other Make 1 and 2 love each other, and 3 hate 1 and 2 (symmetrically, we get 3 ways from this). \n\nIn the second sample, the only possible solution is to make 1 and 3 love each other and 2 and 4 hate each other.\n \"\"\"\n",
"canonical_solution": "\ndef aRWJo():\n class DisjointSet(object):\n def __init__(self, n):\n self.parent = list(range(n))\n self.rank = [0] * n\n self.num = n # number of disjoint sets\n \n def union(self, x, y):\n self._link(self.find_set(x), self.find_set(y))\n \n def _link(self, x, y):\n if x == y:\n return\n self.num -= 1\n if self.rank[x] > self.rank[y]:\n self.parent[y] = x\n else:\n self.parent[x] = y\n if self.rank[x] == self.rank[y]:\n self.rank[y] += 1\n \n def find_set(self, x):\n xp = self.parent[x]\n if xp != x:\n self.parent[x] = self.find_set(xp)\n return self.parent[x]\n \n \n def solve():\n n, m = list(map(int, input().split()))\n ds = DisjointSet(n * 2)\n for i in range(m):\n a, b, c = list(map(int, input().split()))\n a -= 1\n b -= 1\n aA = a * 2\n aB = aA + 1\n bA = b * 2\n bB = bA + 1\n if c == 0:\n if ds.find_set(aA) == ds.find_set(bA):\n return 0\n ds.union(aA, bB)\n ds.union(aB, bA)\n else:\n if ds.find_set(aA) == ds.find_set(bB):\n return 0\n ds.union(aA, bA)\n ds.union(aB, bB)\n return pow(2, (ds.num // 2) - 1, 10**9 + 7)\n \n \n print(solve())\n ",
"inputs": [
"3 0\n",
"4 4\n1 2 1\n2 3 1\n3 4 0\n4 1 0\n",
"4 4\n1 2 1\n2 3 1\n3 4 0\n4 1 1\n"
],
"outputs": [
"4\n",
"1\n",
"0\n"
],
"starter_code": "\ndef aRWJo():\n",
"scope": [
[
"Function Body",
2,
54
],
[
"Class Body",
3,
27
],
[
"Function Body",
4,
7
],
[
"Function Body",
9,
10
],
[
"Function Body",
12,
21
],
[
"If Statement Body",
13,
14
],
[
"If Statement Body",
16,
21
],
[
"If Statement Body",
20,
21
],
[
"Function Body",
23,
27
],
[
"If Statement Body",
25,
26
],
[
"Function Body",
30,
51
],
[
"For Loop Body",
33,
50
],
[
"If Statement Body",
41,
50
],
[
"If Statement Body",
42,
43
],
[
"If Statement Body",
47,
48
]
],
"difficulty": "competition",
"id": 3
},
{
"prompt": "\ndef aOSQR():\n \"\"\"One common way of digitalizing sound is to record sound intensity at particular time moments. For each time moment intensity is recorded as a non-negative integer. Thus we can represent a sound file as an array of $n$ non-negative integers.\n\nIf there are exactly $K$ distinct values in the array, then we need $k = \\lceil \\log_{2} K \\rceil$ bits to store each value. It then takes $nk$ bits to store the whole file.\n\nTo reduce the memory consumption we need to apply some compression. One common way is to reduce the number of possible intensity values. We choose two integers $l \\le r$, and after that all intensity values are changed in the following way: if the intensity value is within the range $[l;r]$, we don't change it. If it is less than $l$, we change it to $l$; if it is greater than $r$, we change it to $r$. You can see that we lose some low and some high intensities.\n\nYour task is to apply this compression in such a way that the file fits onto a disk of size $I$ bytes, and the number of changed elements in the array is minimal possible.\n\nWe remind you that $1$ byte contains $8$ bits.\n\n$k = \\lceil log_{2} K \\rceil$ is the smallest integer such that $K \\le 2^{k}$. In particular, if $K = 1$, then $k = 0$.\n\n\n-----Input-----\n\nThe first line contains two integers $n$ and $I$ ($1 \\le n \\le 4 \\cdot 10^{5}$, $1 \\le I \\le 10^{8}$) — the length of the array and the size of the disk in bytes, respectively.\n\nThe next line contains $n$ integers $a_{i}$ ($0 \\le a_{i} \\le 10^{9}$) — the array denoting the sound file.\n\n\n-----Output-----\n\nPrint a single integer — the minimal possible number of changed elements.\n\n\n-----Examples-----\nInput\n6 1\n2 1 2 3 4 3\n\nOutput\n2\n\nInput\n6 2\n2 1 2 3 4 3\n\nOutput\n0\n\nInput\n6 1\n1 1 2 2 3 3\n\nOutput\n2\n\n\n\n-----Note-----\n\nIn the first example we can choose $l=2, r=3$. The array becomes 2 2 2 3 3 3, the number of distinct elements is $K=2$, and the sound file fits onto the disk. Only two values are changed.\n\nIn the second example the disk is larger, so the initial file fits it and no changes are required.\n\nIn the third example we have to change both 1s or both 3s.\n \"\"\"\n",
"canonical_solution": "\ndef aOSQR():\n N, I = list(map(int, input().split()))\n A = sorted([int(a) for a in input().split()])\n B = []\n j = 0\n for i in range(N):\n if i == 0 or A[i] == A[i-1]:\n B.append(j)\n else:\n j += 1\n B.append(j)\n \n def calc(k):\n K = 1<<k\n i = 0\n j = 0\n ma = 0\n while j < N:\n if B[j]-B[i] <= K - 1:\n ma = max(ma, j-i+1)\n j += 1\n else:\n i += 1\n return N-ma\n \n ans = 10**100\n for i in range(31):\n if i * N <= 8 * I:\n ans = min(ans, calc(i))\n \n print(ans)\n \n \n ",
"inputs": [
"6 1\n2 1 2 3 4 3\n",
"6 2\n2 1 2 3 4 3\n",
"6 1\n1 1 2 2 3 3\n"
],
"outputs": [
"2\n",
"0\n",
"2\n"
],
"starter_code": "\ndef aOSQR():\n",
"scope": [
[
"Function Body",
2,
32
],
[
"List Comprehension",
4,
4
],
[
"For Loop Body",
7,
12
],
[
"If Statement Body",
8,
12
],
[
"Function Body",
14,
25
],
[
"While Loop Body",
19,
24
],
[
"If Statement Body",
20,
24
],
[
"For Loop Body",
28,
30
],
[
"If Statement Body",
29,
30
]
],
"difficulty": "competition",
"id": 4
},
{
"prompt": "\ndef CnMlu():\n \"\"\"Bob recently read about bitwise operations used in computers: AND, OR and XOR. He have studied their properties and invented a new game.\n\nInitially, Bob chooses integer m, bit depth of the game, which means that all numbers in the game will consist of m bits. Then he asks Peter to choose some m-bit number. After that, Bob computes the values of n variables. Each variable is assigned either a constant m-bit number or result of bitwise operation. Operands of the operation may be either variables defined before, or the number, chosen by Peter. After that, Peter's score equals to the sum of all variable values.\n\nBob wants to know, what number Peter needs to choose to get the minimum possible score, and what number he needs to choose to get the maximum possible score. In both cases, if there are several ways to get the same score, find the minimum number, which he can choose.\n\n\n-----Input-----\n\nThe first line contains two integers n and m, the number of variables and bit depth, respectively (1 ≤ n ≤ 5000; 1 ≤ m ≤ 1000). \n\nThe following n lines contain descriptions of the variables. Each line describes exactly one variable. Description has the following format: name of a new variable, space, sign \":=\", space, followed by one of: Binary number of exactly m bits. The first operand, space, bitwise operation (\"AND\", \"OR\" or \"XOR\"), space, the second operand. Each operand is either the name of variable defined before or symbol '?', indicating the number chosen by Peter. \n\nVariable names are strings consisting of lowercase Latin letters with length at most 10. All variable names are different.\n\n\n-----Output-----\n\nIn the first line output the minimum number that should be chosen by Peter, to make the sum of all variable values minimum possible, in the second line output the minimum number that should be chosen by Peter, to make the sum of all variable values maximum possible. Both numbers should be printed as m-bit binary numbers.\n\n\n-----Examples-----\nInput\n3 3\na := 101\nb := 011\nc := ? XOR b\n\nOutput\n011\n100\n\nInput\n5 1\na := 1\nbb := 0\ncx := ? OR a\nd := ? XOR ?\ne := d AND bb\n\nOutput\n0\n0\n\n\n\n-----Note-----\n\nIn the first sample if Peter chooses a number 011_2, then a = 101_2, b = 011_2, c = 000_2, the sum of their values is 8. If he chooses the number 100_2, then a = 101_2, b = 011_2, c = 111_2, the sum of their values is 15.\n\nFor the second test, the minimum and maximum sum of variables a, bb, cx, d and e is 2, and this sum doesn't depend on the number chosen by Peter, so the minimum Peter can choose is 0.\n \"\"\"\n",
"canonical_solution": "import sys\ndef CnMlu():\n def calc(b0, b1, q):\n if q == 0:\n return b0 ^ b1\n if q == 1:\n return b0 | b1\n if q == 2:\n return b0 & b1\n n, m = list(map(int,sys.stdin.readline().split()))\n arr1 = {}\n opt = ['XOR', 'OR', 'AND']\n arr2 = []\n for j in range(n):\n a, b = list(map(str,sys.stdin.readline().split(\" := \")))\n b = b.split()\n if len(b) == 1:\n s = b[0]\n arr1[a] = s\n else:\n c = b[0]\n d = b[2]\n q = opt.index(b[1])\n arr2.append((a, c, d, q))\n \n mins = ''\n maxs = ''\n d0 = {'?':0}\n d1 = {'?':1}\n for i in range(m):\n for a, b in list(arr1.items()):\n d0[a] = int(b[i])\n d1[a] = int(b[i])\n s0 = 0\n s1 = 0\n for a, c, d, q in arr2:\n b00 = d0[c]\n b01 = d0[d]\n b10 = d1[c]\n b11 = d1[d]\n c0 = calc(b00, b01, q)\n c1 = calc(b10, b11, q)\n s0 += (1 if c0 else 0)\n s1 += (1 if c1 else 0)\n d0[a] = c0\n d1[a] = c1\n if s1 < s0:\n mins += \"1\"\n else:\n mins += \"0\"\n if s1 > s0:\n maxs += \"1\"\n else:\n maxs += \"0\"\n sys.stdout.write(\"{0}\\n{1}\".format(mins,maxs))\n ",
"inputs": [
"3 3\na := 101\nb := 011\nc := ? XOR b\n",
"5 1\na := 1\nbb := 0\ncx := ? OR a\nd := ? XOR ?\ne := d AND bb\n",
"2 10\nb := 0100101101\na := ? XOR b\n"
],
"outputs": [
"011\n100\n",
"0\n0\n",
"0100101101\n1011010010\n"
],
"starter_code": "\ndef CnMlu():\n",
"scope": [
[
"Function Body",
2,
55
],
[
"Function Body",
3,
9
],
[
"If Statement Body",
4,
5
],
[
"If Statement Body",
6,
7
],
[
"If Statement Body",
8,
9
],
[
"For Loop Body",
14,
24
],
[
"If Statement Body",
17,
24
],
[
"For Loop Body",
30,
54
],
[
"For Loop Body",
31,
33
],
[
"For Loop Body",
36,
46
],
[
"If Statement Body",
47,
50
],
[
"If Statement Body",
51,
54
]
],
"difficulty": "competition",
"id": 5
},
{
"prompt": "\ndef MpKNJ():\n \"\"\"You are given an integer sequence A of length N and an integer K.\nYou will perform the following operation on this sequence Q times:\n - Choose a contiguous subsequence of length K, then remove the smallest element among the K elements contained in the chosen subsequence (if there are multiple such elements, choose one of them as you like).\nLet X and Y be the values of the largest and smallest element removed in the Q operations. You would like X-Y to be as small as possible.\nFind the smallest possible value of X-Y when the Q operations are performed optimally.\n\n-----Constraints-----\n - 1 \\leq N \\leq 2000\n - 1 \\leq K \\leq N\n - 1 \\leq Q \\leq N-K+1\n - 1 \\leq A_i \\leq 10^9\n - All values in input are integers.\n\n-----Input-----\nInput is given from Standard Input in the following format:\nN K Q\nA_1 A_2 ... A_N\n\n-----Output-----\nPrint the smallest possible value of X-Y.\n\n-----Sample Input-----\n5 3 2\n4 3 1 5 2\n\n-----Sample Output-----\n1\n\nIn the first operation, whichever contiguous subsequence of length 3 we choose, the minimum element in it is 1.\nThus, the first operation removes A_3=1 and now we have A=(4,3,5,2).\nIn the second operation, it is optimal to choose (A_2,A_3,A_4)=(3,5,2) as the contiguous subsequence of length 3 and remove A_4=2.\nIn this case, the largest element removed is 2, and the smallest is 1, so their difference is 2-1=1.\n \"\"\"\n",
"canonical_solution": "from collections import defaultdict,deque\nfrom heapq import heappush, heappop\nimport sys\nimport math\nimport bisect\nimport random\ndef MpKNJ():\n #!usr/bin/env python3\n def LI(): return [int(x) for x in sys.stdin.readline().split()]\n def I(): return int(sys.stdin.readline())\n def LS():return [list(x) for x in sys.stdin.readline().split()]\n def S():\n res = list(sys.stdin.readline())\n if res[-1] == \"\\n\":\n return res[:-1]\n return res\n def IR(n):\n return [I() for i in range(n)]\n def LIR(n):\n return [LI() for i in range(n)]\n def SR(n):\n return [S() for i in range(n)]\n def LSR(n):\n return [LS() for i in range(n)]\n sys.setrecursionlimit(1000000)\n mod = 1000000007\n def solve():\n n,k,q = LI()\n a = LI()\n b = [[a[i],i] for i in range(n)]\n b.sort()\n ans = b[q-1][0]-b[0][0]\n l = [-1,n]\n for i in range(1,n):\n l.append(b[i-1][1])\n l.sort()\n if b[i-1][0] == b[i][0]:\n continue\n s = [a[l[i]+1:l[i+1]] for i in range(i+1)]\n c = []\n for si in s:\n si.sort()\n for j in range(len(si)-k+1):\n c.append(si[j])\n if len(c) < q:\n continue\n c.sort()\n m = c[q-1]-c[0]\n if m < ans:\n ans = m\n print(ans)\n return\n #Solve\n def __starting_point():\n solve()\n __starting_point()",
"inputs": [
"5 3 2\n4 3 1 5 2\n",
"10 1 6\n1 1 2 3 5 8 13 21 34 55\n",
"11 7 5\n24979445 861648772 623690081 433933447 476190629 262703497 211047202 971407775 628894325 731963982 822804784\n"
],
"outputs": [
"1\n",
"7\n",
"451211184\n"
],
"starter_code": "\ndef MpKNJ():\n",
"scope": [
[
"Function Body",
7,
56
],
[
"Function Body",
9,
9
],
[
"List Comprehension",
9,
9
],
[
"Function Body",
10,
10
],
[
"Function Body",
11,
11
],
[
"List Comprehension",
11,
11
],
[
"Function Body",
12,
16
],
[
"If Statement Body",
14,
15
],
[
"Function Body",
17,
18
],
[
"List Comprehension",
18,
18
],
[
"Function Body",
19,
20
],
[
"List Comprehension",
20,
20
],
[
"Function Body",
21,
22
],
[
"List Comprehension",
22,
22
],
[
"Function Body",
23,
24
],
[
"List Comprehension",
24,
24
],
[
"Function Body",
27,
52
],
[
"List Comprehension",
30,
30
],
[
"For Loop Body",
34,
50
],
[
"If Statement Body",
37,
38
],
[
"List Comprehension",
39,
39
],
[
"For Loop Body",
41,
44
],
[
"For Loop Body",
43,
44
],
[
"If Statement Body",
45,
46
],
[
"If Statement Body",
49,
50
],
[
"Function Body",
54,
55
]
],
"difficulty": "competition",
"id": 6
},
{
"prompt": "\ndef FYnjM():\n \"\"\"Snuke is buying a lamp.\nThe light of the lamp can be adjusted to m levels of brightness, represented by integers from 1 through m, by the two buttons on the remote control.\nThe first button is a \"forward\" button. When this button is pressed, the brightness level is increased by 1, except when the brightness level is m, in which case the brightness level becomes 1.\nThe second button is a \"favorite\" button. When this button is pressed, the brightness level becomes the favorite brightness level x, which is set when the lamp is purchased.\nSnuke is thinking of setting the favorite brightness level x so that he can efficiently adjust the brightness.\nHe is planning to change the brightness n-1 times. In the i-th change, the brightness level is changed from a_i to a_{i+1}. The initial brightness level is a_1.\nFind the number of times Snuke needs to press the buttons when x is set to minimize this number.\n\n-----Constraints-----\n - 2 \\leq n,m \\leq 10^5\n - 1 \\leq a_i\\leq m\n - a_i \\neq a_{i+1}\n - n, m and a_i are integers.\n\n-----Input-----\nInput is given from Standard Input in the following format:\nn m\na_1 a_2 … a_n\n\n-----Output-----\nPrint the minimum number of times Snuke needs to press the buttons.\n\n-----Sample Input-----\n4 6\n1 5 1 4\n\n-----Sample Output-----\n5\n\nWhen the favorite brightness level is set to 1, 2, 3, 4, 5 and 6, Snuke needs to press the buttons 8, 9, 7, 5, 6 and 9 times, respectively.\nThus, Snuke should set the favorite brightness level to 4.\nIn this case, the brightness is adjusted as follows:\n - In the first change, press the favorite button once, then press the forward button once.\n - In the second change, press the forward button twice.\n - In the third change, press the favorite button once.\n \"\"\"\n",
"canonical_solution": "\ndef FYnjM():\n n,m=map(int,input().split())\n A=[int(i)-1 for i in input().split()]\n ds=[0]*m\n de=[[] for i in range(m)]\n h,dec=0,0\n for i in range(n-1):\n if A[i+1]-A[i]>0:\n h+=A[i+1]-A[i]\n else:\n h+=A[i+1]+1\n dec+=1\n de[A[i+1]].append((i,(A[i+1]-A[i])%m))\n for i in range(m):\n for a in de[i]:\n ds[(i-a[1]+1)%m]+=1\n ans=float(\"inf\")\n for i in range(m):\n for a in de[i]:\n h+=a[1]-1\n dec-=1\n h-=dec\n ans=min(h,ans)\n if i<=m-2:\n dec+=ds[i+1]\n \n print(ans)",
"inputs": [
"4 6\n1 5 1 4\n",
"10 10\n10 9 8 7 6 5 4 3 2 1\n",
"35 2\n1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1\n"
],
"outputs": [
"5\n",
"45\n",
"34\n"
],
"starter_code": "\ndef FYnjM():\n",
"scope": [
[
"Function Body",
2,
28
],
[
"List Comprehension",
4,
4
],
[
"List Comprehension",
6,
6
],
[
"For Loop Body",
8,
14
],
[
"If Statement Body",
9,
13
],
[
"For Loop Body",
15,
17
],
[
"For Loop Body",
16,
17
],
[
"For Loop Body",
19,
26
],
[
"For Loop Body",
20,
22
],
[
"If Statement Body",
25,
26
]
],
"difficulty": "competition",
"id": 7
},
{
"prompt": "\ndef WUotq():\n \"\"\"You invited $n$ guests to dinner! You plan to arrange one or more circles of chairs. Each chair is going to be either occupied by one guest, or be empty. You can make any number of circles. \n\nYour guests happen to be a little bit shy, so the $i$-th guest wants to have a least $l_i$ free chairs to the left of his chair, and at least $r_i$ free chairs to the right. The \"left\" and \"right\" directions are chosen assuming all guests are going to be seated towards the center of the circle. Note that when a guest is the only one in his circle, the $l_i$ chairs to his left and $r_i$ chairs to his right may overlap.\n\nWhat is smallest total number of chairs you have to use?\n\n\n-----Input-----\n\nFirst line contains one integer $n$ — number of guests, ($1 \\leqslant n \\leqslant 10^5$). \n\nNext $n$ lines contain $n$ pairs of space-separated integers $l_i$ and $r_i$ ($0 \\leqslant l_i, r_i \\leqslant 10^9$).\n\n\n-----Output-----\n\nOutput a single integer — the smallest number of chairs you have to use.\n\n\n-----Examples-----\nInput\n3\n1 1\n1 1\n1 1\n\nOutput\n6\n\nInput\n4\n1 2\n2 1\n3 5\n5 3\n\nOutput\n15\n\nInput\n1\n5 6\n\nOutput\n7\n\n\n\n-----Note-----\n\nIn the second sample the only optimal answer is to use two circles: a circle with $5$ chairs accomodating guests $1$ and $2$, and another one with $10$ chairs accomodationg guests $3$ and $4$.\n\nIn the third sample, you have only one circle with one person. The guest should have at least five free chairs to his left, and at least six free chairs to his right to the next person, which is in this case the guest herself. So, overall number of chairs should be at least 6+1=7.\n \"\"\"\n",
"canonical_solution": "import sys\ndef WUotq():\n input = sys.stdin.readline\n n=int(input())\n lr=[list(map(int,input().split())) for i in range(n)]\n L=[lr[i][0] for i in range(n)]\n R=[lr[i][1] for i in range(n)]\n L.sort()\n R.sort()\n ANS=0\n for i in range(n):\n ANS+=max(L[i],R[i])\n print(ANS+n)",
"inputs": [
"3\n1 1\n1 1\n1 1\n",
"4\n1 2\n2 1\n3 5\n5 3\n",
"1\n5 6\n"
],
"outputs": [
"6\n",
"15\n",
"7\n"
],
"starter_code": "\ndef WUotq():\n",
"scope": [
[
"Function Body",
2,
13
],
[
"List Comprehension",
5,
5
],
[
"List Comprehension",
6,
6
],
[
"List Comprehension",
7,
7
],
[
"For Loop Body",
11,
12
]
],
"difficulty": "competition",
"id": 8
},
{
"prompt": "\ndef uZhtH():\n \"\"\"Appleman has a tree with n vertices. Some of the vertices (at least one) are colored black and other vertices are colored white.\n\nConsider a set consisting of k (0 ≤ k < n) edges of Appleman's tree. If Appleman deletes these edges from the tree, then it will split into (k + 1) parts. Note, that each part will be a tree with colored vertices.\n\nNow Appleman wonders, what is the number of sets splitting the tree in such a way that each resulting part will have exactly one black vertex? Find this number modulo 1000000007 (10^9 + 7).\n\n\n-----Input-----\n\nThe first line contains an integer n (2 ≤ n ≤ 10^5) — the number of tree vertices. \n\nThe second line contains the description of the tree: n - 1 integers p_0, p_1, ..., p_{n} - 2 (0 ≤ p_{i} ≤ i). Where p_{i} means that there is an edge connecting vertex (i + 1) of the tree and vertex p_{i}. Consider tree vertices are numbered from 0 to n - 1.\n\nThe third line contains the description of the colors of the vertices: n integers x_0, x_1, ..., x_{n} - 1 (x_{i} is either 0 or 1). If x_{i} is equal to 1, vertex i is colored black. Otherwise, vertex i is colored white.\n\n\n-----Output-----\n\nOutput a single integer — the number of ways to split the tree modulo 1000000007 (10^9 + 7).\n\n\n-----Examples-----\nInput\n3\n0 0\n0 1 1\n\nOutput\n2\n\nInput\n6\n0 1 1 0 4\n1 1 0 0 1 0\n\nOutput\n1\n\nInput\n10\n0 1 2 1 4 4 4 0 8\n0 0 0 1 0 1 1 0 0 1\n\nOutput\n27\n \"\"\"\n",
"canonical_solution": "\ndef uZhtH():\n MOD = 1000000007\n \n n = int(input())\n p = [int(x) for x in input().split()]\n x = [int(x) for x in input().split()]\n \n children = [[] for x in range(n)]\n \n for i in range(1,n):\n children[p[i-1]].append(i)\n \n #print(children)\n \n count = [(0,0) for i in range(n)]\n for i in reversed(list(range(n))):\n prod = 1\n for ch in children[i]:\n prod *= count[ch][0]+count[ch][1]\n if x[i]:\n count[i] = (0,prod % MOD)\n else:\n tot = 0\n for ch in children[i]:\n cur = count[ch][1]*prod // (count[ch][0]+count[ch][1])\n tot += cur\n count[i] = (prod % MOD, tot % MOD)\n \n print(count[0][1])\n ",
"inputs": [
"3\n0 0\n0 1 1\n",
"6\n0 1 1 0 4\n1 1 0 0 1 0\n",
"10\n0 1 2 1 4 4 4 0 8\n0 0 0 1 0 1 1 0 0 1\n"
],
"outputs": [
"2\n",
"1\n",
"27\n"
],
"starter_code": "\ndef uZhtH():\n",
"scope": [
[
"Function Body",
2,
30
],
[
"List Comprehension",
6,
6
],
[
"List Comprehension",
7,
7
],
[
"List Comprehension",
9,
9
],
[
"For Loop Body",
11,
12
],
[
"List Comprehension",
16,
16
],
[
"For Loop Body",
17,
28
],
[
"For Loop Body",
19,
20
],
[
"If Statement Body",
21,
28
],
[
"For Loop Body",
25,
27
]
],
"difficulty": "competition",
"id": 9
},
{
"prompt": "\ndef cWmVr():\n \"\"\"Consider a tree $T$ (that is, a connected graph without cycles) with $n$ vertices labelled $1$ through $n$. We start the following process with $T$: while $T$ has more than one vertex, do the following:\n\n\n\n choose a random edge of $T$ equiprobably;\n\n shrink the chosen edge: if the edge was connecting vertices $v$ and $u$, erase both $v$ and $u$ and create a new vertex adjacent to all vertices previously adjacent to either $v$ or $u$. The new vertex is labelled either $v$ or $u$ equiprobably.\n\nAt the end of the process, $T$ consists of a single vertex labelled with one of the numbers $1, \\ldots, n$. For each of the numbers, what is the probability of this number becoming the label of the final vertex?\n\n\n-----Input-----\n\nThe first line contains a single integer $n$ ($1 \\leq n \\leq 50$).\n\nThe following $n - 1$ lines describe the tree edges. Each of these lines contains two integers $u_i, v_i$ — labels of vertices connected by the respective edge ($1 \\leq u_i, v_i \\leq n$, $u_i \\neq v_i$). It is guaranteed that the given graph is a tree.\n\n\n-----Output-----\n\nPrint $n$ floating numbers — the desired probabilities for labels $1, \\ldots, n$ respectively. All numbers should be correct up to $10^{-6}$ relative or absolute precision.\n\n\n-----Examples-----\nInput\n4\n1 2\n1 3\n1 4\n\nOutput\n0.1250000000\n0.2916666667\n0.2916666667\n0.2916666667\n\nInput\n7\n1 2\n1 3\n2 4\n2 5\n3 6\n3 7\n\nOutput\n0.0850694444\n0.0664062500\n0.0664062500\n0.1955295139\n0.1955295139\n0.1955295139\n0.1955295139\n\n\n\n-----Note-----\n\nIn the first sample, the resulting vertex has label 1 if and only if for all three edges the label 1 survives, hence the probability is $1/2^3 = 1/8$. All other labels have equal probability due to symmetry, hence each of them has probability $(1 - 1/8) / 3 = 7/24$.\n \"\"\"\n",
"canonical_solution": "\ndef cWmVr():\n maxn=50+10\n g=[None]*maxn\n dp=[None]*maxn\n c=[None]*maxn\n size=[0]*maxn\n \n for i in range(0,maxn):\n c[i]=[0]*maxn\n c[i][0]=1\n for j in range(1,i+1):\n c[i][j]=c[i-1][j-1]+c[i-1][j]\n \n n=int(input())\n for i in range(1,n+1):\n g[i]=[]\n for i in range(1,n):\n u,v=input().split()\n u=int(u)\n v=int(v)\n g[u].append(v)\n g[v].append(u)\n \n def mul(a,b,x,y):\n tmp=[0]*(x+y+1)\n for i in range(0,x+1):\n for j in range(0,y+1):\n tmp[i+j]+=a[i]*b[j]*c[i+j][i]*c[x+y-i-j][x-i]\n return tmp\n def dfs(pos,fa):\n nonlocal dp\n nonlocal size\n dp[pos]=[1]\n size[pos]=0\n for ch in g[pos]:\n if ch != fa:\n dfs(pos=ch,fa=pos)\n dp[pos]=mul(dp[pos],dp[ch],size[pos],size[ch])\n size[pos]+=size[ch]\n if fa:\n size[pos]+=1\n tmp=[0]*(size[pos]+1)\n for i in range(0,size[pos]+1):\n for j in range(0,size[pos]):\n if j<i:\n tmp[i]+=dp[pos][i-1]\n else:\n tmp[i]+=dp[pos][j]*0.5\n dp[pos]=tmp\n \n for i in range(1,n+1):\n dfs(pos=i,fa=0)\n tmp=dp[i][0]\n for j in range(1,n):\n tmp/=j\n print(tmp)",
"inputs": [
"4\n1 2\n1 3\n1 4\n",
"7\n1 2\n1 3\n2 4\n2 5\n3 6\n3 7\n",
"1\n"
],
"outputs": [
"0.125\n0.2916666666666667\n0.2916666666666667\n0.2916666666666667\n",
"0.08506944444444446\n0.06640625\n0.06640625\n0.19552951388888892\n0.19552951388888892\n0.19552951388888892\n0.19552951388888892\n",
"1\n"
],
"starter_code": "\ndef cWmVr():\n",
"scope": [
[
"Function Body",
2,
57
],
[
"For Loop Body",
9,
13
],
[
"For Loop Body",
12,
13
],
[
"For Loop Body",
16,
17
],
[
"For Loop Body",
18,
23
],
[
"Function Body",
25,
30
],
[
"For Loop Body",
27,
29
],
[
"For Loop Body",
28,
29
],
[
"Function Body",
31,
50
],
[
"For Loop Body",
36,
40
],
[
"If Statement Body",
37,
40
],
[
"If Statement Body",
41,
50
],
[
"For Loop Body",
44,
49
],
[
"For Loop Body",
45,
49
],
[
"If Statement Body",
46,
49
],
[
"For Loop Body",
52,
57
],
[
"For Loop Body",
55,
56
]
],
"difficulty": "competition",
"id": 10
},
{
"prompt": "\ndef OLPcD():\n \"\"\"We have a tree with N vertices. Vertex 1 is the root of the tree, and the parent of Vertex i (2 \\leq i \\leq N) is Vertex P_i.\nTo each vertex in the tree, Snuke will allocate a color, either black or white, and a non-negative integer weight.\nSnuke has a favorite integer sequence, X_1, X_2, ..., X_N, so he wants to allocate colors and weights so that the following condition is satisfied for all v.\n - The total weight of the vertices with the same color as v among the vertices contained in the subtree whose root is v, is X_v.\nHere, the subtree whose root is v is the tree consisting of Vertex v and all of its descendants.\nDetermine whether it is possible to allocate colors and weights in this way.\n\n-----Constraints-----\n - 1 \\leq N \\leq 1 000\n - 1 \\leq P_i \\leq i - 1\n - 0 \\leq X_i \\leq 5 000\n\n-----Inputs-----\nInput is given from Standard Input in the following format:\nN\nP_2 P_3 ... P_N\nX_1 X_2 ... X_N\n\n-----Outputs-----\nIf it is possible to allocate colors and weights to the vertices so that the condition is satisfied, print POSSIBLE; otherwise, print IMPOSSIBLE.\n\n-----Sample Input-----\n3\n1 1\n4 3 2\n\n-----Sample Output-----\nPOSSIBLE\n\nFor example, the following allocation satisfies the condition:\n - Set the color of Vertex 1 to white and its weight to 2.\n - Set the color of Vertex 2 to black and its weight to 3.\n - Set the color of Vertex 3 to white and its weight to 2.\nThere are also other possible allocations.\n \"\"\"\n",
"canonical_solution": "\ndef OLPcD():\n N=int(input())\n P=[-1]+[int(i)-1 for i in input().split()]\n X=[int(i) for i in input().split()]\n Q=[[] for i in range(N)]\n for i in range(1,N):\n Q[P[i]].append(i)\n dp=[0 for i in range(N)]\n INF=10**9+7\n def solve(i):\n cur=[INF for j in range(X[i]+1)]\n cur[0]=0\n for j in Q[i]:\n solve(j)\n prv=[k for k in cur]\n cur=[INF for k in range(X[i]+1)]\n for acc in range(len(prv)):\n if prv[acc]<INF:\n if acc+X[j]<=X[i]:\n cur[acc+X[j]]=min(cur[acc+X[j]],prv[acc]+dp[j])\n if acc+dp[j]<=X[i]:\n cur[acc+dp[j]]=min(cur[acc+dp[j]],prv[acc]+X[j])\n dp[i]=min(cur)\n solve(0)\n if dp[0]<INF:\n print(\"POSSIBLE\")\n else:\n print(\"IMPOSSIBLE\")\n ",
"inputs": [
"3\n1 1\n4 3 2\n",
"3\n1 2\n1 2 3\n",
"8\n1 1 1 3 4 5 5\n4 1 6 2 2 1 3 3\n"
],
"outputs": [
"POSSIBLE\n",
"IMPOSSIBLE\n",
"POSSIBLE\n"
],
"starter_code": "\ndef OLPcD():\n",
"scope": [
[
"Function Body",
2,
29
],
[
"List Comprehension",
4,
4
],
[
"List Comprehension",
5,
5
],
[
"List Comprehension",
6,
6
],
[
"For Loop Body",
7,
8
],
[
"List Comprehension",
9,
9
],
[
"Function Body",
11,
24
],
[
"List Comprehension",
12,
12
],
[
"For Loop Body",
14,
23
],
[
"List Comprehension",
16,
16
],
[
"List Comprehension",
17,
17
],
[
"For Loop Body",
18,
23
],
[
"If Statement Body",
19,
23
],
[
"If Statement Body",
20,
21
],
[
"If Statement Body",
22,
23
],
[
"If Statement Body",
26,
29
]
],
"difficulty": "competition",
"id": 11
},
{
"prompt": "\ndef ZHhKf():\n \"\"\"A bracket sequence is a string containing only characters \"(\" and \")\". A regular bracket sequence is a bracket sequence that can be transformed into a correct arithmetic expression by inserting characters \"1\" and \"+\" between the original characters of the sequence. For example, bracket sequences \"()()\" and \"(())\" are regular (the resulting expressions are: \"(1)+(1)\" and \"((1+1)+1)\"), and \")(\", \"(\" and \")\" are not.\n\nSubsequence is a sequence that can be derived from another sequence by deleting some elements without changing the order of the remaining elements.\n\nYou are given a regular bracket sequence $s$ and an integer number $k$. Your task is to find a regular bracket sequence of length exactly $k$ such that it is also a subsequence of $s$.\n\nIt is guaranteed that such sequence always exists.\n\n\n-----Input-----\n\nThe first line contains two integers $n$ and $k$ ($2 \\le k \\le n \\le 2 \\cdot 10^5$, both $n$ and $k$ are even) — the length of $s$ and the length of the sequence you are asked to find.\n\nThe second line is a string $s$ — regular bracket sequence of length $n$.\n\n\n-----Output-----\n\nPrint a single string — a regular bracket sequence of length exactly $k$ such that it is also a subsequence of $s$.\n\nIt is guaranteed that such sequence always exists.\n\n\n-----Examples-----\nInput\n6 4\n()(())\n\nOutput\n()()\n\nInput\n8 8\n(()(()))\n\nOutput\n(()(()))\n \"\"\"\n",
"canonical_solution": "\ndef ZHhKf():\n n, k = map(int, input().split())\n a = [0] * n\n b = ['0'] * n\n c = []\n s = input()\n for i in range(n):\n if k != 0:\n if s[i] == '(':\n c.append(i)\n else:\n d = c.pop()\n a[i] = 1\n a[d] = 1\n k -= 2\n for i in range(n):\n if a[i] == 1:\n print(s[i], end = '')\n ",
"inputs": [
"6 4\n()(())\n",
"8 8\n(()(()))\n",
"20 10\n((()))()((()()(())))\n"
],
"outputs": [
"()()\n",
"(()(()))\n",
"((()))()()\n"
],
"starter_code": "\ndef ZHhKf():\n",
"scope": [
[
"Function Body",
2,
19
],
[
"For Loop Body",
8,
16
],
[
"If Statement Body",
9,
16
],
[
"If Statement Body",
10,
16
],
[
"For Loop Body",
17,
19
],
[
"If Statement Body",
18,
19
]
],
"difficulty": "competition",
"id": 12
},
{
"prompt": "\ndef HPpdB():\n \"\"\"You are given a string A. Find a string B, where B is a palindrome and A is a subsequence of B.\n\nA subsequence of a string is a string that can be derived from it by deleting some (not necessarily consecutive) characters without changing the order of the remaining characters. For example, \"cotst\" is a subsequence of \"contest\".\n\nA palindrome is a string that reads the same forward or backward.\n\nThe length of string B should be at most 10^4. It is guaranteed that there always exists such string.\n\nYou do not need to find the shortest answer, the only restriction is that the length of string B should not exceed 10^4.\n\n\n-----Input-----\n\nFirst line contains a string A (1 ≤ |A| ≤ 10^3) consisting of lowercase Latin letters, where |A| is a length of A.\n\n\n-----Output-----\n\nOutput single line containing B consisting of only lowercase Latin letters. You do not need to find the shortest answer, the only restriction is that the length of string B should not exceed 10^4. If there are many possible B, print any of them.\n\n\n-----Examples-----\nInput\naba\n\nOutput\naba\nInput\nab\n\nOutput\naabaa\n\n\n-----Note-----\n\nIn the first example, \"aba\" is a subsequence of \"aba\" which is a palindrome.\n\nIn the second example, \"ab\" is a subsequence of \"aabaa\" which is a palindrome.\n \"\"\"\n",
"canonical_solution": "\ndef HPpdB():\n a = input()\n b = a[::-1]\n print(a + b)",
"inputs": [
"aba\n",
"ab\n",
"abcab\n"
],
"outputs": [
"abaaba",
"abba",
"abcabbacba"
],
"starter_code": "\ndef HPpdB():\n",
"scope": [
[
"Function Body",
2,
5
]
],
"difficulty": "competition",
"id": 13
},
{
"prompt": "\ndef ARLCD():\n \"\"\"During the last Sereja's Codesecrof round the server crashed many times, so the round was decided to be made unrated for some participants. \n\nLet's assume that n people took part in the contest. Let's assume that the participant who got the first place has rating a_1, the second place participant has rating a_2, ..., the n-th place participant has rating a_{n}. Then changing the rating on the Codesecrof site is calculated by the formula $d_{i} = \\sum_{j = 1}^{i - 1}(a_{j} \\cdot(j - 1) -(n - i) \\cdot a_{i})$.\n\nAfter the round was over, the Codesecrof management published the participants' results table. They decided that if for a participant d_{i} < k, then the round can be considered unrated for him. But imagine the management's surprise when they found out that the participants' rating table is dynamic. In other words, when some participant is removed from the rating, he is removed from the results' table and the rating is recalculated according to the new table. And of course, all applications for exclusion from the rating are considered in view of the current table.\n\nWe know that among all the applications for exclusion from the rating the first application to consider is from the participant with the best rank (the rank with the minimum number), for who d_{i} < k. We also know that the applications for exclusion from rating were submitted by all participants.\n\nNow Sereja wonders, what is the number of participants to be excluded from the contest rating, and the numbers of the participants in the original table in the order of their exclusion from the rating. Pay attention to the analysis of the first test case for a better understanding of the statement.\n\n\n-----Input-----\n\nThe first line contains two integers n, k (1 ≤ n ≤ 2·10^5, - 10^9 ≤ k ≤ 0). The second line contains n space-separated integers a_1, a_2, ..., a_{n} (1 ≤ a_{i} ≤ 10^9) — ratings of the participants in the initial table.\n\n\n-----Output-----\n\nPrint the numbers of participants in the order in which they were removed from the table. Print the initial numbers of the participants, that is, the numbers that the participants had in the initial table.\n\n\n-----Examples-----\nInput\n5 0\n5 3 4 1 2\n\nOutput\n2\n3\n4\n\nInput\n10 -10\n5 5 1 7 5 1 2 4 9 2\n\nOutput\n2\n4\n5\n7\n8\n9\n\n\n\n-----Note-----\n\nConsider the first test sample. \n\n\n\n Initially the sequence of the contest participants' ratings equals [5, 3, 4, 1, 2]. You can use this sequence to calculate the sequence of rating changes: [0, -9, -13, 8, 14]. According to the problem statement, the application of the participant who won the second place will be considered first.\n\n As soon as the second place winner is out from the ratings, the participants' rating sequence will equal [5, 4, 1, 2]. By this sequence you can count the new sequence of rating changes: [0, -8, 2, 6]. According to the problem statement, the application of the participant who won the second place will be considered. Initially this participant won third place.\n\n The new rating sequence equals [5, 1, 2], the new sequence of rating changes equals [0, -1, 1]. The second place participant's application is taken into consideration, initially this participant won the fourth place.\n\n The new rating sequence equals [5, 2], the new sequence of rating changes equals [0, 0]. No more applications will be considered. \n\nThus, you should print 2, 3, 4.\n \"\"\"\n",
"canonical_solution": "\ndef ARLCD():\n n, k = list(map(int, input().split()))\n c, m, l, r = 0, 0, [], 0\n for e in [int(i) for i in input().split()]:\n d = m - c * (n - c - 1) * e\n r+= 1\n if d < k:\n n -= 1\n l += [r]\n else:\n m += c * e\n c += 1\n l.sort()\n for e in l: print(e)\n \n \n ",
"inputs": [
"5 0\n5 3 4 1 2\n",
"10 -10\n5 5 1 7 5 1 2 4 9 2\n"
],
"outputs": [
"2\n3\n4\n",
"2\n4\n5\n7\n8\n9\n"
],
"starter_code": "\ndef ARLCD():\n",
"scope": [
[
"Function Body",
2,
15
],
[
"For Loop Body",
5,
13
],
[
"List Comprehension",
5,
5
],
[
"If Statement Body",
8,
13
],
[
"For Loop Body",
15,
15
]
],
"difficulty": "competition",
"id": 14
},
{
"prompt": "\ndef JPrlk():\n \"\"\"Little Petya likes permutations a lot. Recently his mom has presented him permutation q_1, q_2, ..., q_{n} of length n.\n\nA permutation a of length n is a sequence of integers a_1, a_2, ..., a_{n} (1 ≤ a_{i} ≤ n), all integers there are distinct. \n\nThere is only one thing Petya likes more than permutations: playing with little Masha. As it turns out, Masha also has a permutation of length n. Petya decided to get the same permutation, whatever the cost may be. For that, he devised a game with the following rules: Before the beginning of the game Petya writes permutation 1, 2, ..., n on the blackboard. After that Petya makes exactly k moves, which are described below. During a move Petya tosses a coin. If the coin shows heads, he performs point 1, if the coin shows tails, he performs point 2. Let's assume that the board contains permutation p_1, p_2, ..., p_{n} at the given moment. Then Petya removes the written permutation p from the board and writes another one instead: p_{q}_1, p_{q}_2, ..., p_{q}_{n}. In other words, Petya applies permutation q (which he has got from his mother) to permutation p. All actions are similar to point 1, except that Petya writes permutation t on the board, such that: t_{q}_{i} = p_{i} for all i from 1 to n. In other words, Petya applies a permutation that is inverse to q to permutation p. \n\nWe know that after the k-th move the board contained Masha's permutation s_1, s_2, ..., s_{n}. Besides, we know that throughout the game process Masha's permutation never occurred on the board before the k-th move. Note that the game has exactly k moves, that is, throughout the game the coin was tossed exactly k times.\n\nYour task is to determine whether the described situation is possible or else state that Petya was mistaken somewhere. See samples and notes to them for a better understanding.\n\n\n-----Input-----\n\nThe first line contains two integers n and k (1 ≤ n, k ≤ 100). The second line contains n space-separated integers q_1, q_2, ..., q_{n} (1 ≤ q_{i} ≤ n) — the permutation that Petya's got as a present. The third line contains Masha's permutation s, in the similar format.\n\nIt is guaranteed that the given sequences q and s are correct permutations.\n\n\n-----Output-----\n\nIf the situation that is described in the statement is possible, print \"YES\" (without the quotes), otherwise print \"NO\" (without the quotes).\n\n\n-----Examples-----\nInput\n4 1\n2 3 4 1\n1 2 3 4\n\nOutput\nNO\n\nInput\n4 1\n4 3 1 2\n3 4 2 1\n\nOutput\nYES\n\nInput\n4 3\n4 3 1 2\n3 4 2 1\n\nOutput\nYES\n\nInput\n4 2\n4 3 1 2\n2 1 4 3\n\nOutput\nYES\n\nInput\n4 1\n4 3 1 2\n2 1 4 3\n\nOutput\nNO\n\n\n\n-----Note-----\n\nIn the first sample Masha's permutation coincides with the permutation that was written on the board before the beginning of the game. Consequently, that violates the condition that Masha's permutation never occurred on the board before k moves were performed.\n\nIn the second sample the described situation is possible, in case if after we toss a coin, we get tails.\n\nIn the third sample the possible coin tossing sequence is: heads-tails-tails.\n\nIn the fourth sample the possible coin tossing sequence is: heads-heads.\n \"\"\"\n",
"canonical_solution": "import sys\nfrom math import *\ndef JPrlk():\n def minp():\n \treturn sys.stdin.readline().strip()\n def mint():\n \treturn int(minp())\n def mints():\n \treturn list(map(int, minp().split()))\n n, k = mints()\n q = list(mints())\n for i in range(n):\n \tq[i] -= 1\n s = list(mints())\n a = [i for i in range(1,n+1)]\n d = [0]*n\n b = [False]*(k+1)\n c = [False]*(k+1)\n e = [10000]*2\n f = [10000]*2\n for i in range(k+1):\n \t#print(a)\n \tb[i] = (a == s)\n \tif b[i]:\n \t\te[i%2] = min(e[i%2], i)\n \tfor j in range(n):\n \t\td[j] = a[q[j]]\n \ta,d = d,a\n #print('====')\n a = [i for i in range(1,n+1)]\n for i in range(k+1):\n \t#print(a)\n \tc[i] = (a == s)\n \tif c[i]:\n \t\tf[i%2] = min(f[i%2], i)\n \tfor j in range(n):\n \t\td[q[j]] = a[j]\n \ta,d = d,a\n #print('====')\n #print(e)\n #print(f)\n if e[0] == 0:\n \tprint('NO')\n elif e[1] == 1:\n \tif f[1] == 1 and k > 1:\n \t\tprint('NO')\n \telif k%2 == 1 or f[k%2] <= k:\n \t\tprint('YES')\n \telse:\n \t\tprint('NO')\n elif f[1] == 1:\n \tif k%2 == 1 or e[k%2] <= k:\n \t\tprint('YES')\n \telse:\n \t\tprint('NO')\n else:\n \tif e[k%2] <= k or f[k%2] <= k:\n \t\tprint('YES')\n \telse:\n \t\tprint('NO')",
"inputs": [
"4 1\n2 3 4 1\n1 2 3 4\n",
"4 1\n4 3 1 2\n3 4 2 1\n",
"4 3\n4 3 1 2\n3 4 2 1\n"
],
"outputs": [
"NO\n",
"YES\n",
"YES\n"
],
"starter_code": "\ndef JPrlk():\n",
"scope": [
[
"Function Body",
3,
60
],
[
"Function Body",
4,
5
],
[
"Function Body",
6,
7
],
[
"Function Body",
8,
9
],
[
"For Loop Body",
12,
13
],
[
"List Comprehension",
15,
15
],
[
"For Loop Body",
21,
28
],
[
"If Statement Body",
24,
25
],
[
"For Loop Body",
26,
27
],
[
"List Comprehension",
30,
30
],
[
"For Loop Body",
31,
38
],
[
"If Statement Body",
34,
35
],
[
"For Loop Body",
36,
37
],
[
"If Statement Body",
42,
60
],
[
"If Statement Body",
44,
60
],
[
"If Statement Body",
45,
50
],
[
"If Statement Body",
47,
50
],
[
"If Statement Body",
51,
60
],
[
"If Statement Body",
52,
55
],
[
"If Statement Body",
57,
60
]
],
"difficulty": "competition",
"id": 15
}
]
================================================
FILE: mplsandbox_for_rl/data/train_all.json
================================================
[File too large to display: 18.4 MB]
================================================
FILE: mplsandbox_for_rl/data/valid.json
================================================
[
{
"prompt": "\ndef FsxrN():\n \"\"\"\"Duel!\"\n\nBetting on the lovely princess Claris, the duel between Tokitsukaze and Quailty has started.\n\nThere are $n$ cards in a row. Each card has two sides, one of which has color. At first, some of these cards are with color sides facing up and others are with color sides facing down. Then they take turns flipping cards, in which Tokitsukaze moves first. In each move, one should choose exactly $k$ consecutive cards and flip them to the same side, which means to make their color sides all face up or all face down. If all the color sides of these $n$ cards face the same direction after one's move, the one who takes this move will win.\n\nPrincess Claris wants to know who will win the game if Tokitsukaze and Quailty are so clever that they won't make mistakes.\n\n\n-----Input-----\n\nThe first line contains two integers $n$ and $k$ ($1 \\le k \\le n \\le 10^5$).\n\nThe second line contains a single string of length $n$ that only consists of $0$ and $1$, representing the situation of these $n$ cards, where the color side of the $i$-th card faces up if the $i$-th character is $1$, or otherwise, it faces down and the $i$-th character is $0$.\n\n\n-----Output-----\n\nPrint \"once again\" (without quotes) if the total number of their moves can exceed $10^9$, which is considered a draw.\n\nIn other cases, print \"tokitsukaze\" (without quotes) if Tokitsukaze will win, or \"quailty\" (without quotes) if Quailty will win.\n\nNote that the output characters are case-sensitive, and any wrong spelling would be rejected.\n\n\n-----Examples-----\nInput\n4 2\n0101\n\nOutput\nquailty\n\nInput\n6 1\n010101\n\nOutput\nonce again\n\nInput\n6 5\n010101\n\nOutput\ntokitsukaze\n\nInput\n4 1\n0011\n\nOutput\nonce again\n\n\n\n-----Note-----\n\nIn the first example, no matter how Tokitsukaze moves, there would be three cards with color sides facing the same direction after her move, and Quailty can flip the last card to this direction and win.\n\nIn the second example, no matter how Tokitsukaze moves, Quailty can choose the same card and flip back to the initial situation, which can allow the game to end in a draw.\n\nIn the third example, Tokitsukaze can win by flipping the leftmost five cards up or flipping the rightmost five cards down.\n\nThe fourth example can be explained in the same way as the second example does.\n \"\"\"\n",
"canonical_solution": "import sys\nimport copy\ndef FsxrN():\n input = sys.stdin.readline\n n,k=list(map(int,input().split()))\n C=list(input().strip())\n def JUDGE(C):\n ANS_one=0\n ANS_zero=0\n for c in C:\n if c==\"0\":\n ANS_zero+=1\n else:\n break\n for c in C[::-1]:\n if c==\"0\":\n ANS_zero+=1\n else:\n break\n for c in C:\n if c==\"1\":\n ANS_one+=1\n else:\n break\n for c in C[::-1]:\n if c==\"1\":\n ANS_one+=1\n else:\n break\n if ANS_zero>=n-k or ANS_one>=n-k:\n return 1\n else:\n return 0\n if JUDGE(C)==1:\n print(\"tokitsukaze\")\n return\n if k>=n-1:\n print(\"quailty\")\n return\n if k<n/2:\n print(\"once again\")\n return\n \n CAN1=copy.copy(C)\n CAN2=copy.copy(C)\n if C[0]==\"0\":\n for i in range(1,k+1):\n CAN1[i]=\"1\"\n else:\n for i in range(1,k+1):\n CAN1[i]=\"0\"\n if C[-1]==\"0\":\n for i in range(n-1,n-k-1,-1):\n CAN2[i]=\"1\"\n else:\n for i in range(n-2,n-k-2,-1):\n CAN2[i]=\"0\"\n if JUDGE(CAN1)==1 and JUDGE(CAN2)==1:\n print(\"quailty\")\n return\n else:\n print(\"once again\")\n return\n \n \n ",
"inputs": [
"4 2\n0101\n",
"6 1\n010101\n",
"6 5\n010101\n"
],
"outputs": [
"quailty\n",
"once again\n",
"tokitsukaze\n"
],
"starter_code": "\ndef FsxrN():\n",
"scope": [
[
"Function Body",
3,
63
],
[
"Function Body",
7,
33
],
[
"For Loop Body",
10,
14
],
[
"If Statement Body",
11,
14
],
[
"For Loop Body",
15,
19
],
[
"If Statement Body",
16,
19
],
[
"For Loop Body",
20,
24
],
[
"If Statement Body",
21,
24
],
[
"For Loop Body",
25,
29
],
[
"If Statement Body",
26,
29
],
[
"If Statement Body",
30,
33
],
[
"If Statement Body",
34,
36
],
[
"If Statement Body",
37,
39
],
[
"If Statement Body",
40,
42
],
[
"If Statement Body",
46,
51
],
[
"For Loop Body",
47,
48
],
[
"For Loop Body",
50,
51
],
[
"If Statement Body",
52,
57
],
[
"For Loop Body",
53,
54
],
[
"For Loop Body",
56,
57
],
[
"If Statement Body",
58,
63
]
],
"difficulty": "competition",
"id": 0
},
{
"prompt": "\ndef STBHE():\n \"\"\"We have a graph with N vertices and M edges, and there are two people on the graph: Takahashi and Aoki.\nThe i-th edge connects Vertex U_i and Vertex V_i.\nThe time it takes to traverse this edge is D_i minutes, regardless of direction and who traverses the edge (Takahashi or Aoki).\nTakahashi departs Vertex S and Aoki departs Vertex T at the same time. Takahashi travels to Vertex T and Aoki travels to Vertex S, both in the shortest time possible.\nFind the number of the pairs of ways for Takahashi and Aoki to choose their shortest paths such that they never meet (at a vertex or on an edge) during the travel, modulo 10^9 + 7.\n\n-----Constraints-----\n - 1 \\leq N \\leq 100 000\n - 1 \\leq M \\leq 200 000\n - 1 \\leq S, T \\leq N\n - S \\neq T\n - 1 \\leq U_i, V_i \\leq N (1 \\leq i \\leq M)\n - 1 \\leq D_i \\leq 10^9 (1 \\leq i \\leq M)\n - If i \\neq j, then (U_i, V_i) \\neq (U_j, V_j) and (U_i, V_i) \\neq (V_j, U_j).\n - U_i \\neq V_i (1 \\leq i \\leq M)\n - D_i are integers.\n - The given graph is connected.\n\n-----Input-----\nInput is given from Standard Input in the following format:\nN M\nS T\nU_1 V_1 D_1\nU_2 V_2 D_2\n:\nU_M V_M D_M\n\n-----Output-----\nPrint the answer.\n\n-----Sample Input-----\n4 4\n1 3\n1 2 1\n2 3 1\n3 4 1\n4 1 1\n\n-----Sample Output-----\n2\n\nThere are two ways to choose shortest paths that satisfies the condition:\n - Takahashi chooses the path 1 \\rightarrow 2 \\rightarrow 3, and Aoki chooses the path 3 \\rightarrow 4 \\rightarrow 1.\n - Takahashi chooses the path 1 \\rightarrow 4 \\rightarrow 3, and Aoki chooses the path 3 \\rightarrow 2 \\rightarrow 1.\n \"\"\"\n",
"canonical_solution": "\ndef STBHE():\n # ARC090E\n \n def hoge():\n M = 10**9 + 7\n import sys\n input = lambda : sys.stdin.readline().rstrip()\n \n n, m = map(int, input().split())\n s, t = map(int, input().split())\n s -= 1\n t -= 1\n from collections import defaultdict\n ns = defaultdict(set)\n for i in range(m):\n u, v, d = map(int, input().split())\n ns[u-1].add((v-1, d))\n ns[v-1].add((u-1, d))\n \n def _dijkstra(N, s, Edge):\n import heapq\n geta = 10**15\n inf = geta\n dist = [inf] * N\n dist[s] = 0\n Q = [(0, s)]\n dp = [0]*N\n dp[s] = 1\n while Q:\n dn, vn = heapq.heappop(Q)\n if dn > dist[vn]:\n continue\n for vf, df in Edge[vn]:\n if dist[vn] + df < dist[vf]:\n dist[vf] = dist[vn] + df\n dp[vf] = dp[vn]\n heapq.heappush(Q, (dn + df,vf))\n elif dist[vn] + df == dist[vf]:\n dp[vf] = (dp[vf] + dp[vn]) % M\n return dist, dp\n \n def dijkstra(start):\n import heapq\n vals = [None] * n\n nums = [None] * n\n nums[start] = 1\n h = [(0, start)] # (距離, ノード番号)\n vals[start] = 0\n while h:\n val, u = heapq.heappop(h)\n for v, d in ns[u]:\n if vals[v] is None or vals[v]>val+d:\n vals[v] = val+d\n nums[v] = nums[u]\n heapq.heappush(h, (vals[v], v))\n elif vals[v] is not None and vals[v]==val+d:\n nums[v] = (nums[v] + nums[u]) % M\n return vals, nums\n \n vals1, nums1 = dijkstra(s)\n vals2, nums2 = dijkstra(t)\n \n T = vals1[t]\n \n c1 = 0 # 頂点で衝突するペアの数\n c2 = 0 # エッジ(端点除く)で衝突するペアの数\n \n for u in range(n):\n if 2*vals1[u]==T and 2*vals2[u]==T:\n c1 = (c1 + pow((nums1[u] * nums2[u]), 2, M)) % M\n for v,d in ns[u]:\n if (vals1[u]+d+vals2[v]==T) and (2*vals1[u] < T < 2*(vals1[u] + d)):\n c2 = (c2 + (nums1[u] * nums2[v])**2) % M\n print((nums1[t]*nums2[s] - (c1+c2)) % M)\n hoge()",
"inputs": [
"4 4\n1 3\n1 2 1\n2 3 1\n3 4 1\n4 1 1\n",
"3 3\n1 3\n1 2 1\n2 3 1\n3 1 2\n",
"3 3\n1 3\n1 2 1\n2 3 1\n3 1 2\n"
],
"outputs": [
"2\n",
"2\n",
"2\n"
],
"starter_code": "\ndef STBHE():\n",
"scope": [
[
"Function Body",
2,
76
],
[
"Function Body",
5,
75
],
[
"Lambda Expression",
8,
8
],
[
"For Loop Body",
16,
19
],
[
"Function Body",
21,
41
],
[
"While Loop Body",
30,
40
],
[
"If Statement Body",
32,
33
],
[
"For Loop Body",
34,
40
],
[
"If Statement Body",
35,
40
],
[
"If Statement Body",
39,
40
],
[
"Function Body",
43,
59
],
[
"While Loop Body",
50,
58
],
[
"For Loop Body",
52,
58
],
[
"If Statement Body",
53,
58
],
[
"If Statement Body",
57,
58
],
[
"For Loop Body",
69,
74
],
[
"If Statement Body",
70,
71
],
[
"For Loop Body",
72,
74
],
[
"If Statement Body",
73,
74
]
],
"difficulty": "competition",
"id": 1
},
{
"prompt": "\ndef OwDJQ():\n \"\"\"Alyona's mother wants to present an array of n non-negative integers to Alyona. The array should be special. \n\nAlyona is a capricious girl so after she gets the array, she inspects m of its subarrays. Subarray is a set of some subsequent elements of the array. The i-th subarray is described with two integers l_{i} and r_{i}, and its elements are a[l_{i}], a[l_{i} + 1], ..., a[r_{i}].\n\nAlyona is going to find mex for each of the chosen subarrays. Among these m mexes the girl is going to find the smallest. She wants this minimum mex to be as large as possible. \n\nYou are to find an array a of n elements so that the minimum mex among those chosen by Alyona subarrays is as large as possible.\n\nThe mex of a set S is a minimum possible non-negative integer that is not in S.\n\n\n-----Input-----\n\nThe first line contains two integers n and m (1 ≤ n, m ≤ 10^5).\n\nThe next m lines contain information about the subarrays chosen by Alyona. The i-th of these lines contains two integers l_{i} and r_{i} (1 ≤ l_{i} ≤ r_{i} ≤ n), that describe the subarray a[l_{i}], a[l_{i} + 1], ..., a[r_{i}].\n\n\n-----Output-----\n\nIn the first line print single integer — the maximum possible minimum mex.\n\nIn the second line print n integers — the array a. All the elements in a should be between 0 and 10^9.\n\nIt is guaranteed that there is an optimal answer in which all the elements in a are between 0 and 10^9.\n\nIf there are multiple solutions, print any of them.\n\n\n-----Examples-----\nInput\n5 3\n1 3\n2 5\n4 5\n\nOutput\n2\n1 0 2 1 0\n\nInput\n4 2\n1 4\n2 4\n\nOutput\n3\n5 2 0 1\n\n\n-----Note-----\n\nThe first example: the mex of the subarray (1, 3) is equal to 3, the mex of the subarray (2, 5) is equal to 3, the mex of the subarray (4, 5) is equal to 2 as well, thus the minumal mex among the subarrays chosen by Alyona is equal to 2.\n \"\"\"\n",
"canonical_solution": "\ndef OwDJQ():\n f=lambda : list(map(int,input().split()))\n n,m=f()\n lr=lambda x: x[1]-x[0]+1\n sq=min(lr(f()) for _ in range(m))\n print(sq)\n x=' '.join([str(i%sq) for i in range(n)])\n print(x)\n ",
"inputs": [
"5 3\n1 3\n2 5\n4 5\n",
"4 2\n1 4\n2 4\n",
"1 1\n1 1\n"
],
"outputs": [
"2\n0 1 0 1 0\n",
"3\n0 1 2 0\n",
"1\n0\n"
],
"starter_code": "\ndef OwDJQ():\n",
"scope": [
[
"Function Body",
2,
9
],
[
"Lambda Expression",
3,
3
],
[
"Lambda Expression",
5,
5
],
[
"Generator Expression",
6,
6
],
[
"List Comprehension",
8,
8
]
],
"difficulty": "competition",
"id": 2
},
{
"prompt": "\ndef yMwiQ():\n \"\"\"Snuke's town has a subway system, consisting of N stations and M railway lines. The stations are numbered 1 through N. Each line is operated by a company. Each company has an identification number.\nThe i-th ( 1 \\leq i \\leq M ) line connects station p_i and q_i bidirectionally. There is no intermediate station. This line is operated by company c_i.\nYou can change trains at a station where multiple lines are available.\nThe fare system used in this subway system is a bit strange. When a passenger only uses lines that are operated by the same company, the fare is 1 yen (the currency of Japan). Whenever a passenger changes to a line that is operated by a different company from the current line, the passenger is charged an additional fare of 1 yen. In a case where a passenger who changed from some company A's line to another company's line changes to company A's line again, the additional fare is incurred again.\nSnuke is now at station 1 and wants to travel to station N by subway. Find the minimum required fare.\n\n-----Constraints-----\n - 2 \\leq N \\leq 10^5\n - 0 \\leq M \\leq 2×10^5\n - 1 \\leq p_i \\leq N (1 \\leq i \\leq M)\n - 1 \\leq q_i \\leq N (1 \\leq i \\leq M)\n - 1 \\leq c_i \\leq 10^6 (1 \\leq i \\leq M)\n - p_i \\neq q_i (1 \\leq i \\leq M)\n\n-----Input-----\nThe input is given from Standard Input in the following format:\nN M\np_1 q_1 c_1\n:\np_M q_M c_M\n\n-----Output-----\nPrint the minimum required fare. If it is impossible to get to station N by subway, print -1 instead.\n\n-----Sample Input-----\n3 3\n1 2 1\n2 3 1\n3 1 2\n\n-----Sample Output-----\n1\n\nUse company 1's lines: 1 → 2 → 3. The fare is 1 yen.\n \"\"\"\n",
"canonical_solution": "import sys\nfrom collections import deque\ndef yMwiQ():\n #!/usr/bin/env python3\n input = sys.stdin.readline\n INF = 10**9\n n, m = map(int, input().split())\n pqc = []\n seen = set()\n for i in range(n):\n seen.add((i, 0))\n for _ in range(m):\n p, q, c = map(int, input().split())\n p -= 1; q -= 1\n pqc.append((p, q, c))\n seen.add((p, c))\n seen.add((q, c))\n comp = dict()\n for i, node in enumerate(seen):\n comp[node] = i\n edge = [[] for _ in range(len(comp))]\n for key in comp.keys():\n v, c = key\n if c != 0:\n frm = comp[(v, c)]\n too = comp[(v, 0)]\n edge[frm].append((too, 0))\n edge[too].append((frm, 1))\n for p, q, c in pqc:\n frm = comp[(p, c)]\n too = comp[(q, c)]\n edge[frm].append((too, 0))\n edge[too].append((frm, 0))\n class BFS:\n def __init__(self, adj):\n self.adj = adj\n self.dist = [INF] * len(adj)\n self.q = deque()\n def calc(self, start):\n self.dist[start] = 0\n self.q.append((0, start))\n while len(self.q) != 0:\n prov_cost, src = self.q.popleft()\n if self.dist[src] < prov_cost:\n continue\n for dest, cost in self.adj[src]:\n if self.dist[dest] > self.dist[src] + cost:\n self.dist[dest] = self.dist[src] + cost\n if cost == 1:\n self.q.append((self.dist[dest], dest))\n else:\n self.q.appendleft((self.dist[dest], dest))\n return self.dist\n bfs = BFS(edge)\n bfs.calc(comp[(0, 0)])\n ans = bfs.dist[comp[(n-1, 0)]]\n if ans == INF:\n print(-1)\n else:\n print(ans)",
"inputs": [
"3 3\n1 2 1\n2 3 1\n3 1 2\n",
"8 11\n1 3 1\n1 4 2\n2 3 1\n2 5 1\n3 4 3\n3 6 3\n3 7 3\n4 8 4\n5 6 1\n6 7 5\n7 8 5\n",
"2 0\n"
],
"outputs": [
"1\n",
"2\n",
"-1\n"
],
"starter_code": "\ndef yMwiQ():\n",
"scope": [
[
"Function Body",
3,
60
],
[
"For Loop Body",
10,
11
],
[
"For Loop Body",
12,
17
],
[
"For Loop Body",
19,
20
],
[
"List Comprehension",
21,
21
],
[
"For Loop Body",
22,
28
],
[
"If Statement Body",
24,
28
],
[
"For Loop Body",
29,
33
],
[
"Class Body",
34,
53
],
[
"Function Body",
35,
38
],
[
"Function Body",
39,
53
],
[
"While Loop Body",
42,
52
],
[
"If Statement Body",
44,
45
],
[
"For Loop Body",
46,
52
],
[
"If Statement Body",
47,
52
],
[
"If Statement Body",
49,
52
],
[
"If Statement Body",
57,
60
]
],
"difficulty": "competition",
"id": 3
},
{
"prompt": "\ndef MotpB():\n \"\"\"On the xy-plane, Snuke is going to travel from the point (x_s, y_s) to the point (x_t, y_t).\nHe can move in arbitrary directions with speed 1.\nHere, we will consider him as a point without size.\nThere are N circular barriers deployed on the plane.\nThe center and the radius of the i-th barrier are (x_i, y_i) and r_i, respectively.\nThe barriers may overlap or contain each other.\nA point on the plane is exposed to cosmic rays if the point is not within any of the barriers.\nSnuke wants to avoid exposure to cosmic rays as much as possible during the travel.\nFind the minimum possible duration of time he is exposed to cosmic rays during the travel.\n\n-----Constraints-----\n - All input values are integers.\n - -10^9 ≤ x_s, y_s, x_t, y_t ≤ 10^9\n - (x_s, y_s) ≠ (x_t, y_t)\n - 1≤N≤1,000\n - -10^9 ≤ x_i, y_i ≤ 10^9\n - 1 ≤ r_i ≤ 10^9\n\n-----Input-----\nThe input is given from Standard Input in the following format:\nx_s y_s x_t y_t\nN\nx_1 y_1 r_1\nx_2 y_2 r_2\n:\nx_N y_N r_N\n\n-----Output-----\nPrint the minimum possible duration of time Snuke is exposed to cosmic rays during the travel.\nThe output is considered correct if the absolute or relative error is at most 10^{-9}.\n\n-----Sample Input-----\n-2 -2 2 2\n1\n0 0 1\n\n-----Sample Output-----\n3.6568542495\n\nAn optimal route is as follows:\n \"\"\"\n",
"canonical_solution": "\ndef MotpB():\n def main():\n import sys\n input = sys.stdin.readline\n \n import heapq\n def dijkstra_heap(s,g,edge):\n #始点sから各頂点への最短距離\n d = [10**20] * (n+2)\n used = [True] * (n+2) #True:未確定\n d[s] = 0\n used[s] = False\n edgelist = []\n sx,sy,sr=edge[s][0],edge[s][1],edge[s][2]\n for i in range(n+2):\n x,y,r=edge[i][0],edge[i][1],edge[i][2]\n dist=((x-sx)**2+(y-sy)**2)**(1/2)\n heapq.heappush(edgelist,(max(dist-r-sr,0),i))\n while len(edgelist):\n minedge = heapq.heappop(edgelist)\n #まだ使われてない頂点の中から最小の距離のものを探す\n v = minedge[1]\n if not used[v]:\n continue\n d[v] = minedge[0]\n used[v] = False\n bx,by,br=edge[v][0],edge[v][1],edge[v][2]\n for i in range(n+2):\n x,y,r=edge[i][0],edge[i][1],edge[i][2]\n dist=((x-bx)**2+(y-by)**2)**(1/2)\n if used[i]:\n heapq.heappush(edgelist,(max(dist-r-br,0)+d[v],i))\n if not used[g]:\n break\n return d[g]\n \n sx,sy,gx,gy = map(int,input().split()) #n:頂点数 w:辺の数\n n=int(input())\n edge=[(sx,sy,0),(gx,gy,0)]\n for i in range(2,n+2):\n x,y,r=map(int,input().split())\n edge.append((x,y,r))\n print(dijkstra_heap(0,1,edge))\n \n def __starting_point():\n main()\n __starting_point()",
"inputs": [
"-2 -2 2 2\n1\n0 0 1\n",
"-2 0 2 0\n2\n-1 0 2\n1 0 2\n",
"4 -2 -2 4\n3\n0 0 2\n4 0 1\n0 4 1\n"
],
"outputs": [
"3.6568542494923806\n",
"0\n",
"4.0\n"
],
"starter_code": "\ndef MotpB():\n",
"scope": [
[
"Function Body",
2,
48
],
[
"Function Body",
3,
44
],
[
"Function Body",
8,
36
],
[
"For Loop Body",
16,
19
],
[
"While Loop Body",
20,
35
],
[
"If Statement Body",
24,
25
],
[
"For Loop Body",
29,
33
],
[
"If Statement Body",
32,
33
],
[
"If Statement Body",
34,
35
],
[
"For Loop Body",
41,
43
],
[
"Function Body",
46,
47
]
],
"difficulty": "competition",
"id": 4
},
{
"prompt": "\ndef nFNyU():\n \"\"\"Polycarp is making a quest for his friends. He has already made n tasks, for each task the boy evaluated how interesting it is as an integer q_{i}, and the time t_{i} in minutes needed to complete the task. \n\nAn interesting feature of his quest is: each participant should get the task that is best suited for him, depending on his preferences. The task is chosen based on an interactive quiz that consists of some questions. The player should answer these questions with \"yes\" or \"no\". Depending on the answer to the question, the participant either moves to another question or goes to one of the tasks that are in the quest. In other words, the quest is a binary tree, its nodes contain questions and its leaves contain tasks. \n\nWe know that answering any of the questions that are asked before getting a task takes exactly one minute from the quest player. Polycarp knows that his friends are busy people and they can't participate in the quest for more than T minutes. Polycarp wants to choose some of the n tasks he made, invent the corresponding set of questions for them and use them to form an interactive quiz as a binary tree so that no matter how the player answers quiz questions, he spends at most T minutes on completing the whole quest (that is, answering all the questions and completing the task). Specifically, the quest can contain zero questions and go straight to the task. Each task can only be used once (i.e., the people who give different answers to questions should get different tasks).\n\nPolycarp wants the total \"interest\" value of the tasks involved in the quest to be as large as possible. Help him determine the maximum possible total interest value of the task considering that the quest should be completed in T minutes at any variant of answering questions.\n\n\n-----Input-----\n\nThe first line contains two integers n and T (1 ≤ n ≤ 1000, 1 ≤ T ≤ 100) — the number of tasks made by Polycarp and the maximum time a quest player should fit into.\n\nNext n lines contain two integers t_{i}, q_{i} (1 ≤ t_{i} ≤ T, 1 ≤ q_{i} ≤ 1000) each — the time in minutes needed to complete the i-th task and its interest value.\n\n\n-----Output-----\n\nPrint a single integer — the maximum possible total interest value of all the tasks in the quest.\n\n\n-----Examples-----\nInput\n5 5\n1 1\n1 1\n2 2\n3 3\n4 4\n\nOutput\n11\n\nInput\n5 5\n4 1\n4 2\n4 3\n4 4\n4 5\n\nOutput\n9\n\nInput\n2 2\n1 1\n2 10\n\nOutput\n10\n\n\n\n-----Note-----\n\nIn the first sample test all the five tasks can be complemented with four questions and joined into one quest.\n\nIn the second sample test it is impossible to use all the five tasks, but you can take two of them, the most interesting ones.\n\nIn the third sample test the optimal strategy is to include only the second task into the quest.\n\nHere is the picture that illustrates the answers to the sample tests. The blue circles represent the questions, the two arrows that go from every circle represent where a person goes depending on his answer to that question. The tasks are the red ovals. [Image]\n \"\"\"\n",
"canonical_solution": "from collections import defaultdict\ndef nFNyU():\n def __starting_point():\n n, T = [int(_) for _ in input().split()]\n data = defaultdict(list)\n for i in range(n):\n t, q = [int(_) for _ in input().split()]\n data[T - t].append(q)\n prev_level = []\n for level_id in range(1, T + 1):\n level = sorted(data[T - level_id] + prev_level, reverse=True) \n if T - level_id <= 10:\n max_size = 2 ** (T - level_id)\n level = level[:max_size]\n if len(level) % 2 == 1:\n level.append(0)\n prev_level = [\n level[i] + level[i + 1]\n for i in range(0, len(level), 2)\n ]\n print(prev_level[0])\n __starting_point()",
"inputs": [
"5 5\n1 1\n1 1\n2 2\n3 3\n4 4\n",
"5 5\n4 1\n4 2\n4 3\n4 4\n4 5\n",
"2 2\n1 1\n2 10\n"
],
"outputs": [
"11\n",
"9\n",
"10\n"
],
"starter_code": "\ndef nFNyU():\n",
"scope": [
[
"Function Body",
2,
22
],
[
"Function Body",
3,
21
],
[
"List Comprehension",
4,
4
],
[
"For Loop Body",
6,
8
],
[
"List Comprehension",
7,
7
],
[
"For Loop Body",
10,
20
],
[
"If Statement Body",
12,
14
],
[
"If Statement Body",
15,
16
],
[
"List Comprehension",
17,
20
]
],
"difficulty": "competition",
"id": 5
},
{
"prompt": "\ndef zltHT():\n \"\"\"Let $a_1, \\ldots, a_n$ be an array of $n$ positive integers. In one operation, you can choose an index $i$ such that $a_i = i$, and remove $a_i$ from the array (after the removal, the remaining parts are concatenated).\n\nThe weight of $a$ is defined as the maximum number of elements you can remove.\n\nYou must answer $q$ independent queries $(x, y)$: after replacing the $x$ first elements of $a$ and the $y$ last elements of $a$ by $n+1$ (making them impossible to remove), what would be the weight of $a$?\n\n\n-----Input-----\n\nThe first line contains two integers $n$ and $q$ ($1 \\le n, q \\le 3 \\cdot 10^5$) — the length of the array and the number of queries.\n\nThe second line contains $n$ integers $a_1$, $a_2$, ..., $a_n$ ($1 \\leq a_i \\leq n$) — elements of the array.\n\nThe $i$-th of the next $q$ lines contains two integers $x$ and $y$ ($x, y \\ge 0$ and $x+y < n$).\n\n\n-----Output-----\n\nPrint $q$ lines, $i$-th line should contain a single integer — the answer to the $i$-th query.\n\n\n-----Examples-----\nInput\n13 5\n2 2 3 9 5 4 6 5 7 8 3 11 13\n3 1\n0 0\n2 4\n5 0\n0 12\n\nOutput\n5\n11\n6\n1\n0\n\nInput\n5 2\n1 4 1 2 4\n0 0\n1 0\n\nOutput\n2\n0\n\n\n\n-----Note-----\n\nExplanation of the first query:\n\nAfter making first $x = 3$ and last $y = 1$ elements impossible to remove, $a$ becomes $[\\times, \\times, \\times, 9, 5, 4, 6, 5, 7, 8, 3, 11, \\times]$ (we represent $14$ as $\\times$ for clarity).\n\nHere is a strategy that removes $5$ elements (the element removed is colored in red): $[\\times, \\times, \\times, 9, \\color{red}{5}, 4, 6, 5, 7, 8, 3, 11, \\times]$ $[\\times, \\times, \\times, 9, 4, 6, 5, 7, 8, 3, \\color{red}{11}, \\times]$ $[\\times, \\times, \\times, 9, 4, \\color{red}{6}, 5, 7, 8, 3, \\times]$ $[\\times, \\times, \\times, 9, 4, 5, 7, \\color{red}{8}, 3, \\times]$ $[\\times, \\times, \\times, 9, 4, 5, \\color{red}{7}, 3, \\times]$ $[\\times, \\times, \\times, 9, 4, 5, 3, \\times]$ (final state) \n\nIt is impossible to remove more than $5$ elements, hence the weight is $5$.\n \"\"\"\n",
"canonical_solution": "from sys import stdin\ndef zltHT():\n def bitadd(a,w,bit):\n \n x = a\n while x <= (len(bit)-1):\n bit[x] += w\n x += x & (-1 * x)\n \n def bitsum(a,bit):\n \n ret = 0\n x = a\n while x > 0:\n ret += bit[x]\n x -= x & (-1 * x)\n return ret\n class RangeBIT:\n def __init__(self,N,indexed):\n self.bit1 = [0] * (N+2)\n
Showing preview only (318K chars total). Download the full file or copy to clipboard to get everything.
gitextract_kdapck8a/
├── LICENSE
├── README.md
├── mplsandbox/
│ ├── __init__.py
│ ├── analyzetools.py
│ ├── const.py
│ ├── sandbox.py
│ ├── tool.py
│ └── utils.py
├── mplsandbox_for_rl/
│ ├── README.md
│ ├── config.py
│ ├── config.yaml
│ ├── data/
│ │ ├── add_index.py
│ │ ├── train.json
│ │ ├── train_all.json
│ │ ├── valid.json
│ │ └── valid_all.json
│ ├── data_helper.py
│ ├── generate_utils.py
│ ├── generation_config.json
│ ├── llama/
│ │ ├── __init__.py
│ │ ├── llama_model.py
│ │ ├── llama_trainer.py
│ │ ├── modeling_moe.py
│ │ └── reward/
│ │ ├── __init__.py
│ │ └── llama_reward_model.py
│ ├── log/
│ │ └── mplsandbox_for_ppo50_beta005_rollout1_0508_debug.log
│ ├── metric.py
│ ├── metric_utils.py
│ ├── ppo/
│ │ ├── __init__.py
│ │ ├── ppo_datahelper.py
│ │ ├── ppo_trainer.py
│ │ └── ppo_utils.py
│ ├── requirements.txt
│ ├── scheduler.py
│ ├── tensorboard_log/
│ │ └── ppo/
│ │ ├── GoReturn_evalstep50_beta005_rollout1_0508_debug/
│ │ │ ├── events.out.tfevents.1724743396.llm1.271880.0
│ │ │ ├── events.out.tfevents.1724927232.llm1.394070.0
│ │ │ ├── events.out.tfevents.1724932911.llm1.405634.0
│ │ │ ├── events.out.tfevents.1724982924.llm1.419045.0
│ │ │ ├── events.out.tfevents.1725000736.llm1.430015.0
│ │ │ ├── events.out.tfevents.1725001088.llm1.432277.0
│ │ │ ├── events.out.tfevents.1725178222.llm1.478675.0
│ │ │ ├── events.out.tfevents.1725180164.llm1.483165.0
│ │ │ ├── events.out.tfevents.1725181196.llm1.486635.0
│ │ │ ├── events.out.tfevents.1725182089.llm1.489725.0
│ │ │ ├── events.out.tfevents.1725182662.llm1.492414.0
│ │ │ ├── events.out.tfevents.1725186372.llm1.505624.0
│ │ │ ├── events.out.tfevents.1725186685.llm1.507779.0
│ │ │ ├── events.out.tfevents.1725186819.llm1.509958.0
│ │ │ ├── events.out.tfevents.1725187117.llm1.512114.0
│ │ │ ├── events.out.tfevents.1725187314.llm1.514285.0
│ │ │ ├── events.out.tfevents.1725241169.llm1.533455.0
│ │ │ ├── events.out.tfevents.1725242595.llm1.538369.0
│ │ │ ├── events.out.tfevents.1725258146.llm1.1526176.0
│ │ │ ├── events.out.tfevents.1725266674.llm1.2196706.0
│ │ │ ├── events.out.tfevents.1725267306.llm1.2247374.0
│ │ │ ├── events.out.tfevents.1725268298.llm1.2318320.0
│ │ │ ├── events.out.tfevents.1725269766.llm1.2321657.0
│ │ │ ├── events.out.tfevents.1725282491.llm1.2710308.0
│ │ │ ├── events.out.tfevents.1725283474.llm1.2793280.0
│ │ │ ├── events.out.tfevents.1725287656.llm1.3115365.0
│ │ │ ├── events.out.tfevents.1725288391.llm1.3177375.0
│ │ │ ├── events.out.tfevents.1725346126.llm1.888255.0
│ │ │ ├── events.out.tfevents.1725354538.llm1.1599322.0
│ │ │ ├── events.out.tfevents.1744640674.n211.1164140.0
│ │ │ ├── events.out.tfevents.1744641932.n211.1171336.0
│ │ │ ├── events.out.tfevents.1744642057.n211.1173796.0
│ │ │ ├── events.out.tfevents.1744644164.n211.1188180.0
│ │ │ ├── events.out.tfevents.1744647751.n211.1208086.0
│ │ │ ├── events.out.tfevents.1744649599.n211.1223334.0
│ │ │ ├── events.out.tfevents.1744650450.n211.1230053.0
│ │ │ ├── events.out.tfevents.1744652152.n211.1240128.0
│ │ │ ├── events.out.tfevents.1744652296.n211.1241825.0
│ │ │ ├── events.out.tfevents.1744653799.n211.1252570.0
│ │ │ ├── events.out.tfevents.1744653913.n211.1253768.0
│ │ │ ├── events.out.tfevents.1744654033.n211.1255378.0
│ │ │ ├── events.out.tfevents.1744654092.n211.1256279.0
│ │ │ ├── events.out.tfevents.1744655044.n211.1262359.0
│ │ │ ├── events.out.tfevents.1744692411.n211.1452941.0
│ │ │ ├── events.out.tfevents.1744704887.n211.1517461.0
│ │ │ ├── events.out.tfevents.1744704949.n211.1518406.0
│ │ │ ├── events.out.tfevents.1744705307.n211.1521301.0
│ │ │ ├── events.out.tfevents.1744705581.n211.1523626.0
│ │ │ ├── events.out.tfevents.1744706082.n211.1527175.0
│ │ │ ├── events.out.tfevents.1744706273.n211.1528838.0
│ │ │ ├── events.out.tfevents.1744706473.n211.1530754.0
│ │ │ ├── events.out.tfevents.1744706726.n211.1533256.0
│ │ │ ├── events.out.tfevents.1744709157.n211.1549899.0
│ │ │ ├── events.out.tfevents.1744709846.n211.1554540.0
│ │ │ ├── events.out.tfevents.1744719922.n211.1610921.0
│ │ │ ├── events.out.tfevents.1744720643.n211.1615790.0
│ │ │ └── events.out.tfevents.1744720998.n211.1618813.0
│ │ └── mplsandbox_for_ppo50_beta005_rollout1_0508_debug/
│ │ ├── events.out.tfevents.1744721546.n211.1623949.0
│ │ ├── events.out.tfevents.1744722319.n211.1631146.0
│ │ └── events.out.tfevents.1744722598.n211.1634201.0
│ ├── tmp/
│ │ └── GoReturn_evalstep50_beta005_rollout1_0508_debug/
│ │ └── experiences/
│ │ └── experiences_0.json
│ ├── tokenizer.py
│ ├── train_ppo.py
│ ├── train_ppo.sh
│ ├── trainer.py
│ ├── transformers/
│ │ ├── .circleci/
│ │ │ ├── TROUBLESHOOT.md
│ │ │ ├── config.yml
│ │ │ ├── create_circleci_config.py
│ │ │ └── parse_test_outputs.py
│ │ ├── .coveragerc
│ │ ├── .gitattributes
│ │ ├── .github/
│ │ │ ├── ISSUE_TEMPLATE/
│ │ │ │ ├── bug-report.yml
│ │ │ │ ├── config.yml
│ │ │ │ ├── feature-request.yml
│ │ │ │ ├── i18n.md
│ │ │ │ ├── migration.yml
│ │ │ │ └── new-model-addition.yml
│ │ │ ├── PULL_REQUEST_TEMPLATE.md
│ │ │ ├── conda/
│ │ │ │ ├── build.sh
│ │ │ │ └── meta.yaml
│ │ │ └── workflows/
│ │ │ ├── TROUBLESHOOT.md
│ │ │ ├── add-model-like.yml
│ │ │ ├── benchmark.yml
│ │ │ ├── build-ci-docker-images.yml
│ │ │ ├── build-docker-images.yml
│ │ │ ├── build-nightly-ci-docker-images.yml
│ │ │ ├── build-past-ci-docker-images.yml
│ │ │ ├── build_documentation.yml
│ │ │ ├── build_pr_documentation.yml
│ │ │ ├── check_tiny_models.yml
│ │ │ ├── doctest_job.yml
│ │ │ ├── doctests.yml
│ │ │ ├── model_jobs.yml
│ │ │ ├── push-important-models.yml
│ │ │ ├── release-conda.yml
│ │ │ ├── self-nightly-caller.yml
│ │ │ ├── self-nightly-past-ci-caller.yml
│ │ │ ├── self-past-caller.yml
│ │ │ ├── self-pr-slow-ci.yml
│ │ │ ├── self-push-amd-mi210-caller.yml
│ │ │ ├── self-push-amd-mi250-caller.yml
│ │ │ ├── self-push-amd-mi300-caller.yml
│ │ │ ├── self-push-amd.yml
│ │ │ ├── self-push-caller.yml
│ │ │ ├── self-push.yml
│ │ │ ├── self-scheduled-amd-caller.yml
│ │ │ ├── self-scheduled-amd-mi210-caller.yml
│ │ │ ├── self-scheduled-amd-mi250-caller.yml
│ │ │ ├── self-scheduled-amd-mi300-caller.yml
│ │ │ ├── self-scheduled-amd.yml
│ │ │ ├── self-scheduled-caller.yml
│ │ │ ├── self-scheduled.yml
│ │ │ ├── slack-report.yml
│ │ │ ├── ssh-runner.yml
│ │ │ ├── stale.yml
│ │ │ ├── trufflehog.yml
│ │ │ ├── update_metdata.yml
│ │ │ └── upload_pr_documentation.yml
│ │ ├── .gitignore
│ │ ├── CITATION.cff
│ │ ├── CODE_OF_CONDUCT.md
│ │ ├── CONTRIBUTING.md
│ │ ├── ISSUES.md
│ │ ├── LICENSE
│ │ ├── Makefile
│ │ ├── README.md
│ │ ├── SECURITY.md
│ │ ├── awesome-transformers.md
│ │ ├── benchmark/
│ │ │ ├── __init__.py
│ │ │ ├── benchmark.py
│ │ │ ├── config/
│ │ │ │ └── generation.yaml
│ │ │ └── optimum_benchmark_wrapper.py
│ │ ├── conftest.py
│ │ ├── docker/
│ │ │ ├── consistency.dockerfile
│ │ │ ├── custom-tokenizers.dockerfile
│ │ │ ├── examples-tf.dockerfile
│ │ │ ├── examples-torch.dockerfile
│ │ │ ├── exotic-models.dockerfile
│ │ │ ├── jax-light.dockerfile
│ │ │ ├── pipeline-tf.dockerfile
│ │ │ ├── pipeline-torch.dockerfile
│ │ │ ├── quality.dockerfile
│ │ │ ├── tf-light.dockerfile
│ │ │ ├── torch-jax-light.dockerfile
│ │ │ ├── torch-light.dockerfile
│ │ │ ├── torch-tf-light.dockerfile
│ │ │ ├── transformers-all-latest-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-doc-builder/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-past-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-pytorch-amd-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-pytorch-deepspeed-amd-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-pytorch-deepspeed-latest-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-pytorch-deepspeed-nightly-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-pytorch-gpu/
│ │ │ │ └── Dockerfile
│ │ │ ├── transformers-pytorch-tpu/
│ │ │ │ ├── Dockerfile
│ │ │ │ ├── bert-base-cased.jsonnet
│ │ │ │ ├── dataset.yaml
│ │ │ │ └── docker-entrypoint.sh
│ │ │ ├── transformers-quantization-latest-gpu/
│ │ │ │ └── Dockerfile
│ │ │ └── transformers-tensorflow-gpu/
│ │ │ └── Dockerfile
│ │ ├── docs/
│ │ │ ├── README.md
│ │ │ ├── TRANSLATING.md
│ │ │ └── source/
│ │ │ ├── _config.py
│ │ │ ├── de/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── add_new_model.md
│ │ │ │ ├── add_new_pipeline.md
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── contributing.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── llm_tutorial.md
│ │ │ │ ├── model_sharing.md
│ │ │ │ ├── peft.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── pr_checks.md
│ │ │ │ ├── preprocessing.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── testing.md
│ │ │ │ ├── training.md
│ │ │ │ └── transformers_agents.md
│ │ │ ├── en/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _redirects.yml
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── add_new_model.md
│ │ │ │ ├── add_new_pipeline.md
│ │ │ │ ├── agents.md
│ │ │ │ ├── attention.md
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── benchmarks.md
│ │ │ │ ├── bertology.md
│ │ │ │ ├── big_models.md
│ │ │ │ ├── chat_templating.md
│ │ │ │ ├── community.md
│ │ │ │ ├── contributing.md
│ │ │ │ ├── conversations.md
│ │ │ │ ├── create_a_model.md
│ │ │ │ ├── custom_models.md
│ │ │ │ ├── debugging.md
│ │ │ │ ├── deepspeed.md
│ │ │ │ ├── fast_tokenizers.md
│ │ │ │ ├── fsdp.md
│ │ │ │ ├── generation_strategies.md
│ │ │ │ ├── gguf.md
│ │ │ │ ├── glossary.md
│ │ │ │ ├── hpo_train.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── internal/
│ │ │ │ │ ├── audio_utils.md
│ │ │ │ │ ├── file_utils.md
│ │ │ │ │ ├── generation_utils.md
│ │ │ │ │ ├── image_processing_utils.md
│ │ │ │ │ ├── modeling_utils.md
│ │ │ │ │ ├── pipelines_utils.md
│ │ │ │ │ ├── time_series_utils.md
│ │ │ │ │ ├── tokenization_utils.md
│ │ │ │ │ └── trainer_utils.md
│ │ │ │ ├── kv_cache.md
│ │ │ │ ├── llm_optims.md
│ │ │ │ ├── llm_tutorial.md
│ │ │ │ ├── llm_tutorial_optimization.md
│ │ │ │ ├── main_classes/
│ │ │ │ │ ├── agent.md
│ │ │ │ │ ├── backbones.md
│ │ │ │ │ ├── callback.md
│ │ │ │ │ ├── configuration.md
│ │ │ │ │ ├── data_collator.md
│ │ │ │ │ ├── deepspeed.md
│ │ │ │ │ ├── feature_extractor.md
│ │ │ │ │ ├── image_processor.md
│ │ │ │ │ ├── keras_callbacks.md
│ │ │ │ │ ├── logging.md
│ │ │ │ │ ├── model.md
│ │ │ │ │ ├── onnx.md
│ │ │ │ │ ├── optimizer_schedules.md
│ │ │ │ │ ├── output.md
│ │ │ │ │ ├── pipelines.md
│ │ │ │ │ ├── processors.md
│ │ │ │ │ ├── quantization.md
│ │ │ │ │ ├── text_generation.md
│ │ │ │ │ ├── tokenizer.md
│ │ │ │ │ └── trainer.md
│ │ │ │ ├── model_doc/
│ │ │ │ │ ├── albert.md
│ │ │ │ │ ├── align.md
│ │ │ │ │ ├── altclip.md
│ │ │ │ │ ├── audio-spectrogram-transformer.md
│ │ │ │ │ ├── auto.md
│ │ │ │ │ ├── autoformer.md
│ │ │ │ │ ├── bark.md
│ │ │ │ │ ├── bart.md
│ │ │ │ │ ├── barthez.md
│ │ │ │ │ ├── bartpho.md
│ │ │ │ │ ├── beit.md
│ │ │ │ │ ├── bert-generation.md
│ │ │ │ │ ├── bert-japanese.md
│ │ │ │ │ ├── bert.md
│ │ │ │ │ ├── bertweet.md
│ │ │ │ │ ├── big_bird.md
│ │ │ │ │ ├── bigbird_pegasus.md
│ │ │ │ │ ├── biogpt.md
│ │ │ │ │ ├── bit.md
│ │ │ │ │ ├── blenderbot-small.md
│ │ │ │ │ ├── blenderbot.md
│ │ │ │ │ ├── blip-2.md
│ │ │ │ │ ├── blip.md
│ │ │ │ │ ├── bloom.md
│ │ │ │ │ ├── bort.md
│ │ │ │ │ ├── bridgetower.md
│ │ │ │ │ ├── bros.md
│ │ │ │ │ ├── byt5.md
│ │ │ │ │ ├── camembert.md
│ │ │ │ │ ├── canine.md
│ │ │ │ │ ├── chameleon.md
│ │ │ │ │ ├── chinese_clip.md
│ │ │ │ │ ├── clap.md
│ │ │ │ │ ├── clip.md
│ │ │ │ │ ├── clipseg.md
│ │ │ │ │ ├── clvp.md
│ │ │ │ │ ├── code_llama.md
│ │ │ │ │ ├── codegen.md
│ │ │ │ │ ├── cohere.md
│ │ │ │ │ ├── conditional_detr.md
│ │ │ │ │ ├── convbert.md
│ │ │ │ │ ├── convnext.md
│ │ │ │ │ ├── convnextv2.md
│ │ │ │ │ ├── cpm.md
│ │ │ │ │ ├── cpmant.md
│ │ │ │ │ ├── ctrl.md
│ │ │ │ │ ├── cvt.md
│ │ │ │ │ ├── dac.md
│ │ │ │ │ ├── data2vec.md
│ │ │ │ │ ├── dbrx.md
│ │ │ │ │ ├── deberta-v2.md
│ │ │ │ │ ├── deberta.md
│ │ │ │ │ ├── decision_transformer.md
│ │ │ │ │ ├── deformable_detr.md
│ │ │ │ │ ├── deit.md
│ │ │ │ │ ├── deplot.md
│ │ │ │ │ ├── depth_anything.md
│ │ │ │ │ ├── depth_anything_v2.md
│ │ │ │ │ ├── deta.md
│ │ │ │ │ ├── detr.md
│ │ │ │ │ ├── dialogpt.md
│ │ │ │ │ ├── dinat.md
│ │ │ │ │ ├── dinov2.md
│ │ │ │ │ ├── distilbert.md
│ │ │ │ │ ├── dit.md
│ │ │ │ │ ├── donut.md
│ │ │ │ │ ├── dpr.md
│ │ │ │ │ ├── dpt.md
│ │ │ │ │ ├── efficientformer.md
│ │ │ │ │ ├── efficientnet.md
│ │ │ │ │ ├── electra.md
│ │ │ │ │ ├── encodec.md
│ │ │ │ │ ├── encoder-decoder.md
│ │ │ │ │ ├── ernie.md
│ │ │ │ │ ├── ernie_m.md
│ │ │ │ │ ├── esm.md
│ │ │ │ │ ├── falcon.md
│ │ │ │ │ ├── falcon_mamba.md
│ │ │ │ │ ├── fastspeech2_conformer.md
│ │ │ │ │ ├── flan-t5.md
│ │ │ │ │ ├── flan-ul2.md
│ │ │ │ │ ├── flaubert.md
│ │ │ │ │ ├── flava.md
│ │ │ │ │ ├── fnet.md
│ │ │ │ │ ├── focalnet.md
│ │ │ │ │ ├── fsmt.md
│ │ │ │ │ ├── funnel.md
│ │ │ │ │ ├── fuyu.md
│ │ │ │ │ ├── gemma.md
│ │ │ │ │ ├── gemma2.md
│ │ │ │ │ ├── git.md
│ │ │ │ │ ├── glpn.md
│ │ │ │ │ ├── gpt-sw3.md
│ │ │ │ │ ├── gpt2.md
│ │ │ │ │ ├── gpt_bigcode.md
│ │ │ │ │ ├── gpt_neo.md
│ │ │ │ │ ├── gpt_neox.md
│ │ │ │ │ ├── gpt_neox_japanese.md
│ │ │ │ │ ├── gptj.md
│ │ │ │ │ ├── gptsan-japanese.md
│ │ │ │ │ ├── graphormer.md
│ │ │ │ │ ├── grounding-dino.md
│ │ │ │ │ ├── groupvit.md
│ │ │ │ │ ├── herbert.md
│ │ │ │ │ ├── hiera.md
│ │ │ │ │ ├── hubert.md
│ │ │ │ │ ├── ibert.md
│ │ │ │ │ ├── idefics.md
│ │ │ │ │ ├── idefics2.md
│ │ │ │ │ ├── imagegpt.md
│ │ │ │ │ ├── informer.md
│ │ │ │ │ ├── instructblip.md
│ │ │ │ │ ├── instructblipvideo.md
│ │ │ │ │ ├── jamba.md
│ │ │ │ │ ├── jetmoe.md
│ │ │ │ │ ├── jukebox.md
│ │ │ │ │ ├── kosmos-2.md
│ │ │ │ │ ├── layoutlm.md
│ │ │ │ │ ├── layoutlmv2.md
│ │ │ │ │ ├── layoutlmv3.md
│ │ │ │ │ ├── layoutxlm.md
│ │ │ │ │ ├── led.md
│ │ │ │ │ ├── levit.md
│ │ │ │ │ ├── lilt.md
│ │ │ │ │ ├── llama.md
│ │ │ │ │ ├── llama2.md
│ │ │ │ │ ├── llama3.md
│ │ │ │ │ ├── llava.md
│ │ │ │ │ ├── llava_next.md
│ │ │ │ │ ├── llava_next_video.md
│ │ │ │ │ ├── longformer.md
│ │ │ │ │ ├── longt5.md
│ │ │ │ │ ├── luke.md
│ │ │ │ │ ├── lxmert.md
│ │ │ │ │ ├── m2m_100.md
│ │ │ │ │ ├── madlad-400.md
│ │ │ │ │ ├── mamba.md
│ │ │ │ │ ├── mamba2.md
│ │ │ │ │ ├── marian.md
│ │ │ │ │ ├── markuplm.md
│ │ │ │ │ ├── mask2former.md
│ │ │ │ │ ├── maskformer.md
│ │ │ │ │ ├── matcha.md
│ │ │ │ │ ├── mbart.md
│ │ │ │ │ ├── mctct.md
│ │ │ │ │ ├── mega.md
│ │ │ │ │ ├── megatron-bert.md
│ │ │ │ │ ├── megatron_gpt2.md
│ │ │ │ │ ├── mgp-str.md
│ │ │ │ │ ├── mistral.md
│ │ │ │ │ ├── mixtral.md
│ │ │ │ │ ├── mluke.md
│ │ │ │ │ ├── mms.md
│ │ │ │ │ ├── mobilebert.md
│ │ │ │ │ ├── mobilenet_v1.md
│ │ │ │ │ ├── mobilenet_v2.md
│ │ │ │ │ ├── mobilevit.md
│ │ │ │ │ ├── mobilevitv2.md
│ │ │ │ │ ├── mpnet.md
│ │ │ │ │ ├── mpt.md
│ │ │ │ │ ├── mra.md
│ │ │ │ │ ├── mt5.md
│ │ │ │ │ ├── musicgen.md
│ │ │ │ │ ├── musicgen_melody.md
│ │ │ │ │ ├── mvp.md
│ │ │ │ │ ├── nat.md
│ │ │ │ │ ├── nemotron.md
│ │ │ │ │ ├── nezha.md
│ │ │ │ │ ├── nllb-moe.md
│ │ │ │ │ ├── nllb.md
│ │ │ │ │ ├── nougat.md
│ │ │ │ │ ├── nystromformer.md
│ │ │ │ │ ├── olmo.md
│ │ │ │ │ ├── oneformer.md
│ │ │ │ │ ├── open-llama.md
│ │ │ │ │ ├── openai-gpt.md
│ │ │ │ │ ├── opt.md
│ │ │ │ │ ├── owlv2.md
│ │ │ │ │ ├── owlvit.md
│ │ │ │ │ ├── paligemma.md
│ │ │ │ │ ├── patchtsmixer.md
│ │ │ │ │ ├── patchtst.md
│ │ │ │ │ ├── pegasus.md
│ │ │ │ │ ├── pegasus_x.md
│ │ │ │ │ ├── perceiver.md
│ │ │ │ │ ├── persimmon.md
│ │ │ │ │ ├── phi.md
│ │ │ │ │ ├── phi3.md
│ │ │ │ │ ├── phobert.md
│ │ │ │ │ ├── pix2struct.md
│ │ │ │ │ ├── plbart.md
│ │ │ │ │ ├── poolformer.md
│ │ │ │ │ ├── pop2piano.md
│ │ │ │ │ ├── prophetnet.md
│ │ │ │ │ ├── pvt.md
│ │ │ │ │ ├── pvt_v2.md
│ │ │ │ │ ├── qdqbert.md
│ │ │ │ │ ├── qwen2.md
│ │ │ │ │ ├── qwen2_audio.md
│ │ │ │ │ ├── qwen2_moe.md
│ │ │ │ │ ├── rag.md
│ │ │ │ │ ├── realm.md
│ │ │ │ │ ├── recurrent_gemma.md
│ │ │ │ │ ├── reformer.md
│ │ │ │ │ ├── regnet.md
│ │ │ │ │ ├── rembert.md
│ │ │ │ │ ├── resnet.md
│ │ │ │ │ ├── retribert.md
│ │ │ │ │ ├── roberta-prelayernorm.md
│ │ │ │ │ ├── roberta.md
│ │ │ │ │ ├── roc_bert.md
│ │ │ │ │ ├── roformer.md
│ │ │ │ │ ├── rt_detr.md
│ │ │ │ │ ├── rwkv.md
│ │ │ │ │ ├── sam.md
│ │ │ │ │ ├── seamless_m4t.md
│ │ │ │ │ ├── seamless_m4t_v2.md
│ │ │ │ │ ├── segformer.md
│ │ │ │ │ ├── seggpt.md
│ │ │ │ │ ├── sew-d.md
│ │ │ │ │ ├── sew.md
│ │ │ │ │ ├── siglip.md
│ │ │ │ │ ├── speech-encoder-decoder.md
│ │ │ │ │ ├── speech_to_text.md
│ │ │ │ │ ├── speech_to_text_2.md
│ │ │ │ │ ├── speecht5.md
│ │ │ │ │ ├── splinter.md
│ │ │ │ │ ├── squeezebert.md
│ │ │ │ │ ├── stablelm.md
│ │ │ │ │ ├── starcoder2.md
│ │ │ │ │ ├── superpoint.md
│ │ │ │ │ ├── swiftformer.md
│ │ │ │ │ ├── swin.md
│ │ │ │ │ ├── swin2sr.md
│ │ │ │ │ ├── swinv2.md
│ │ │ │ │ ├── switch_transformers.md
│ │ │ │ │ ├── t5.md
│ │ │ │ │ ├── t5v1.1.md
│ │ │ │ │ ├── table-transformer.md
│ │ │ │ │ ├── tapas.md
│ │ │ │ │ ├── tapex.md
│ │ │ │ │ ├── time_series_transformer.md
│ │ │ │ │ ├── timesformer.md
│ │ │ │ │ ├── trajectory_transformer.md
│ │ │ │ │ ├── transfo-xl.md
│ │ │ │ │ ├── trocr.md
│ │ │ │ │ ├── tvlt.md
│ │ │ │ │ ├── tvp.md
│ │ │ │ │ ├── udop.md
│ │ │ │ │ ├── ul2.md
│ │ │ │ │ ├── umt5.md
│ │ │ │ │ ├── unispeech-sat.md
│ │ │ │ │ ├── unispeech.md
│ │ │ │ │ ├── univnet.md
│ │ │ │ │ ├── upernet.md
│ │ │ │ │ ├── van.md
│ │ │ │ │ ├── video_llava.md
│ │ │ │ │ ├── videomae.md
│ │ │ │ │ ├── vilt.md
│ │ │ │ │ ├── vipllava.md
│ │ │ │ │ ├── vision-encoder-decoder.md
│ │ │ │ │ ├── vision-text-dual-encoder.md
│ │ │ │ │ ├── visual_bert.md
│ │ │ │ │ ├── vit.md
│ │ │ │ │ ├── vit_hybrid.md
│ │ │ │ │ ├── vit_mae.md
│ │ │ │ │ ├── vit_msn.md
│ │ │ │ │ ├── vitdet.md
│ │ │ │ │ ├── vitmatte.md
│ │ │ │ │ ├── vits.md
│ │ │ │ │ ├── vivit.md
│ │ │ │ │ ├── wav2vec2-bert.md
│ │ │ │ │ ├── wav2vec2-conformer.md
│ │ │ │ │ ├── wav2vec2.md
│ │ │ │ │ ├── wav2vec2_phoneme.md
│ │ │ │ │ ├── wavlm.md
│ │ │ │ │ ├── whisper.md
│ │ │ │ │ ├── xclip.md
│ │ │ │ │ ├── xglm.md
│ │ │ │ │ ├── xlm-prophetnet.md
│ │ │ │ │ ├── xlm-roberta-xl.md
│ │ │ │ │ ├── xlm-roberta.md
│ │ │ │ │ ├── xlm-v.md
│ │ │ │ │ ├── xlm.md
│ │ │ │ │ ├── xlnet.md
│ │ │ │ │ ├── xls_r.md
│ │ │ │ │ ├── xlsr_wav2vec2.md
│ │ │ │ │ ├── xmod.md
│ │ │ │ │ ├── yolos.md
│ │ │ │ │ ├── yoso.md
│ │ │ │ │ └── zoedepth.md
│ │ │ │ ├── model_memory_anatomy.md
│ │ │ │ ├── model_sharing.md
│ │ │ │ ├── model_summary.md
│ │ │ │ ├── multilingual.md
│ │ │ │ ├── notebooks.md
│ │ │ │ ├── pad_truncation.md
│ │ │ │ ├── peft.md
│ │ │ │ ├── perf_hardware.md
│ │ │ │ ├── perf_infer_cpu.md
│ │ │ │ ├── perf_infer_gpu_one.md
│ │ │ │ ├── perf_torch_compile.md
│ │ │ │ ├── perf_train_cpu.md
│ │ │ │ ├── perf_train_cpu_many.md
│ │ │ │ ├── perf_train_gpu_many.md
│ │ │ │ ├── perf_train_gpu_one.md
│ │ │ │ ├── perf_train_special.md
│ │ │ │ ├── perf_train_tpu_tf.md
│ │ │ │ ├── performance.md
│ │ │ │ ├── perplexity.md
│ │ │ │ ├── philosophy.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── pipeline_webserver.md
│ │ │ │ ├── pr_checks.md
│ │ │ │ ├── preprocessing.md
│ │ │ │ ├── quantization/
│ │ │ │ │ ├── aqlm.md
│ │ │ │ │ ├── awq.md
│ │ │ │ │ ├── bitsandbytes.md
│ │ │ │ │ ├── contribute.md
│ │ │ │ │ ├── eetq.md
│ │ │ │ │ ├── fbgemm_fp8.md
│ │ │ │ │ ├── gptq.md
│ │ │ │ │ ├── hqq.md
│ │ │ │ │ ├── optimum.md
│ │ │ │ │ ├── overview.md
│ │ │ │ │ ├── quanto.md
│ │ │ │ │ └── torchao.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── sagemaker.md
│ │ │ │ ├── serialization.md
│ │ │ │ ├── task_summary.md
│ │ │ │ ├── tasks/
│ │ │ │ │ ├── asr.md
│ │ │ │ │ ├── audio_classification.md
│ │ │ │ │ ├── document_question_answering.md
│ │ │ │ │ ├── idefics.md
│ │ │ │ │ ├── image_captioning.md
│ │ │ │ │ ├── image_classification.md
│ │ │ │ │ ├── image_feature_extraction.md
│ │ │ │ │ ├── image_text_to_text.md
│ │ │ │ │ ├── image_to_image.md
│ │ │ │ │ ├── knowledge_distillation_for_image_classification.md
│ │ │ │ │ ├── language_modeling.md
│ │ │ │ │ ├── mask_generation.md
│ │ │ │ │ ├── masked_language_modeling.md
│ │ │ │ │ ├── monocular_depth_estimation.md
│ │ │ │ │ ├── multiple_choice.md
│ │ │ │ │ ├── object_detection.md
│ │ │ │ │ ├── prompting.md
│ │ │ │ │ ├── question_answering.md
│ │ │ │ │ ├── semantic_segmentation.md
│ │ │ │ │ ├── sequence_classification.md
│ │ │ │ │ ├── summarization.md
│ │ │ │ │ ├── text-to-speech.md
│ │ │ │ │ ├── token_classification.md
│ │ │ │ │ ├── translation.md
│ │ │ │ │ ├── video_classification.md
│ │ │ │ │ ├── visual_question_answering.md
│ │ │ │ │ ├── zero_shot_image_classification.md
│ │ │ │ │ └── zero_shot_object_detection.md
│ │ │ │ ├── tasks_explained.md
│ │ │ │ ├── testing.md
│ │ │ │ ├── tf_xla.md
│ │ │ │ ├── tflite.md
│ │ │ │ ├── tokenizer_summary.md
│ │ │ │ ├── torchscript.md
│ │ │ │ ├── trainer.md
│ │ │ │ ├── training.md
│ │ │ │ └── troubleshooting.md
│ │ │ ├── es/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── add_new_pipeline.md
│ │ │ │ ├── attention.md
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── bertology.md
│ │ │ │ ├── chat_templating.md
│ │ │ │ ├── community.md
│ │ │ │ ├── converting_tensorflow_models.md
│ │ │ │ ├── create_a_model.md
│ │ │ │ ├── custom_models.md
│ │ │ │ ├── debugging.md
│ │ │ │ ├── fast_tokenizers.md
│ │ │ │ ├── glossary.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── model_memory_anatomy.md
│ │ │ │ ├── model_sharing.md
│ │ │ │ ├── multilingual.md
│ │ │ │ ├── pad_truncation.md
│ │ │ │ ├── performance.md
│ │ │ │ ├── perplexity.md
│ │ │ │ ├── philosophy.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── pipeline_webserver.md
│ │ │ │ ├── pr_checks.md
│ │ │ │ ├── preprocessing.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── sagemaker.md
│ │ │ │ ├── serialization.md
│ │ │ │ ├── task_summary.md
│ │ │ │ ├── tasks/
│ │ │ │ │ ├── asr.md
│ │ │ │ │ ├── image_captioning.md
│ │ │ │ │ ├── image_classification.md
│ │ │ │ │ ├── language_modeling.md
│ │ │ │ │ ├── multiple_choice.md
│ │ │ │ │ ├── question_answering.md
│ │ │ │ │ └── summarization.md
│ │ │ │ ├── tasks_explained.md
│ │ │ │ ├── tokenizer_summary.md
│ │ │ │ ├── torchscript.md
│ │ │ │ ├── trainer.md
│ │ │ │ └── training.md
│ │ │ ├── fr/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── in_translation.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts_fr.md
│ │ │ │ └── tutoriel_pipeline.md
│ │ │ ├── hi/
│ │ │ │ ├── _toctree.yml
│ │ │ │ └── pipeline_tutorial.md
│ │ │ ├── it/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── add_new_model.md
│ │ │ │ ├── add_new_pipeline.md
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── big_models.md
│ │ │ │ ├── community.md
│ │ │ │ ├── converting_tensorflow_models.md
│ │ │ │ ├── create_a_model.md
│ │ │ │ ├── custom_models.md
│ │ │ │ ├── debugging.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── migration.md
│ │ │ │ ├── model_sharing.md
│ │ │ │ ├── multilingual.md
│ │ │ │ ├── perf_hardware.md
│ │ │ │ ├── perf_infer_cpu.md
│ │ │ │ ├── perf_infer_gpu_many.md
│ │ │ │ ├── perf_infer_gpu_one.md
│ │ │ │ ├── perf_infer_special.md
│ │ │ │ ├── perf_train_cpu.md
│ │ │ │ ├── perf_train_cpu_many.md
│ │ │ │ ├── perf_train_special.md
│ │ │ │ ├── perf_train_tpu.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── pr_checks.md
│ │ │ │ ├── preprocessing.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── serialization.md
│ │ │ │ └── training.md
│ │ │ ├── ja/
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── add_new_model.md
│ │ │ │ ├── attention.md
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── benchmarks.md
│ │ │ │ ├── bertology.md
│ │ │ │ ├── big_models.md
│ │ │ │ ├── chat_templating.md
│ │ │ │ ├── community.md
│ │ │ │ ├── create_a_model.md
│ │ │ │ ├── custom_models.md
│ │ │ │ ├── custom_tools.md
│ │ │ │ ├── fast_tokenizers.md
│ │ │ │ ├── generation_strategies.md
│ │ │ │ ├── glossary.md
│ │ │ │ ├── hpo_train.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── internal/
│ │ │ │ │ ├── audio_utils.md
│ │ │ │ │ ├── file_utils.md
│ │ │ │ │ ├── generation_utils.md
│ │ │ │ │ ├── image_processing_utils.md
│ │ │ │ │ ├── modeling_utils.md
│ │ │ │ │ ├── pipelines_utils.md
│ │ │ │ │ ├── time_series_utils.md
│ │ │ │ │ ├── tokenization_utils.md
│ │ │ │ │ └── trainer_utils.md
│ │ │ │ ├── llm_tutorial.md
│ │ │ │ ├── main_classes/
│ │ │ │ │ ├── agent.md
│ │ │ │ │ ├── callback.md
│ │ │ │ │ ├── configuration.md
│ │ │ │ │ ├── data_collator.md
│ │ │ │ │ ├── deepspeed.md
│ │ │ │ │ ├── feature_extractor.md
│ │ │ │ │ ├── image_processor.md
│ │ │ │ │ ├── keras_callbacks.md
│ │ │ │ │ ├── logging.md
│ │ │ │ │ ├── model.md
│ │ │ │ │ ├── onnx.md
│ │ │ │ │ ├── optimizer_schedules.md
│ │ │ │ │ ├── output.md
│ │ │ │ │ ├── pipelines.md
│ │ │ │ │ ├── processors.md
│ │ │ │ │ ├── quantization.md
│ │ │ │ │ ├── text_generation.md
│ │ │ │ │ ├── tokenizer.md
│ │ │ │ │ └── trainer.md
│ │ │ │ ├── model_doc/
│ │ │ │ │ ├── albert.md
│ │ │ │ │ ├── align.md
│ │ │ │ │ ├── altclip.md
│ │ │ │ │ ├── audio-spectrogram-transformer.md
│ │ │ │ │ ├── auto.md
│ │ │ │ │ ├── autoformer.md
│ │ │ │ │ ├── bark.md
│ │ │ │ │ ├── bart.md
│ │ │ │ │ ├── barthez.md
│ │ │ │ │ ├── bartpho.md
│ │ │ │ │ ├── beit.md
│ │ │ │ │ ├── bert-generation.md
│ │ │ │ │ ├── bert-japanese.md
│ │ │ │ │ ├── bert.md
│ │ │ │ │ ├── bertweet.md
│ │ │ │ │ ├── big_bird.md
│ │ │ │ │ ├── bigbird_pegasus.md
│ │ │ │ │ ├── biogpt.md
│ │ │ │ │ ├── bit.md
│ │ │ │ │ ├── blenderbot-small.md
│ │ │ │ │ ├── blenderbot.md
│ │ │ │ │ ├── blip-2.md
│ │ │ │ │ ├── blip.md
│ │ │ │ │ ├── bloom.md
│ │ │ │ │ ├── bort.md
│ │ │ │ │ ├── bridgetower.md
│ │ │ │ │ ├── bros.md
│ │ │ │ │ ├── byt5.md
│ │ │ │ │ ├── camembert.md
│ │ │ │ │ ├── canine.md
│ │ │ │ │ ├── chinese_clip.md
│ │ │ │ │ ├── clap.md
│ │ │ │ │ ├── clip.md
│ │ │ │ │ ├── clipseg.md
│ │ │ │ │ ├── clvp.md
│ │ │ │ │ ├── code_llama.md
│ │ │ │ │ ├── codegen.md
│ │ │ │ │ ├── conditional_detr.md
│ │ │ │ │ ├── convbert.md
│ │ │ │ │ ├── convnext.md
│ │ │ │ │ ├── convnextv2.md
│ │ │ │ │ ├── cpm.md
│ │ │ │ │ ├── cpmant.md
│ │ │ │ │ ├── ctrl.md
│ │ │ │ │ ├── cvt.md
│ │ │ │ │ ├── data2vec.md
│ │ │ │ │ ├── deberta-v2.md
│ │ │ │ │ ├── deberta.md
│ │ │ │ │ ├── decision_transformer.md
│ │ │ │ │ ├── deformable_detr.md
│ │ │ │ │ ├── deit.md
│ │ │ │ │ ├── deplot.md
│ │ │ │ │ ├── deta.md
│ │ │ │ │ ├── detr.md
│ │ │ │ │ ├── dialogpt.md
│ │ │ │ │ └── dinat.md
│ │ │ │ ├── model_memory_anatomy.md
│ │ │ │ ├── model_sharing.md
│ │ │ │ ├── model_summary.md
│ │ │ │ ├── multilingual.md
│ │ │ │ ├── pad_truncation.md
│ │ │ │ ├── peft.md
│ │ │ │ ├── perf_hardware.md
│ │ │ │ ├── perf_infer_cpu.md
│ │ │ │ ├── perf_infer_gpu_many.md
│ │ │ │ ├── perf_infer_gpu_one.md
│ │ │ │ ├── perf_infer_special.md
│ │ │ │ ├── perf_torch_compile.md
│ │ │ │ ├── perf_train_cpu.md
│ │ │ │ ├── perf_train_cpu_many.md
│ │ │ │ ├── perf_train_gpu_many.md
│ │ │ │ ├── perf_train_gpu_one.md
│ │ │ │ ├── perf_train_special.md
│ │ │ │ ├── perf_train_tpu.md
│ │ │ │ ├── perf_train_tpu_tf.md
│ │ │ │ ├── performance.md
│ │ │ │ ├── perplexity.md
│ │ │ │ ├── philosophy.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── pipeline_webserver.md
│ │ │ │ ├── pr_checks.md
│ │ │ │ ├── preprocessing.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── serialization.md
│ │ │ │ ├── task_summary.md
│ │ │ │ ├── tasks/
│ │ │ │ │ ├── asr.md
│ │ │ │ │ ├── audio_classification.md
│ │ │ │ │ ├── document_question_answering.md
│ │ │ │ │ ├── idefics.md
│ │ │ │ │ ├── image_captioning.md
│ │ │ │ │ ├── image_classification.md
│ │ │ │ │ ├── image_to_image.md
│ │ │ │ │ ├── knowledge_distillation_for_image_classification.md
│ │ │ │ │ ├── language_modeling.md
│ │ │ │ │ ├── masked_language_modeling.md
│ │ │ │ │ ├── monocular_depth_estimation.md
│ │ │ │ │ ├── multiple_choice.md
│ │ │ │ │ ├── object_detection.md
│ │ │ │ │ ├── prompting.md
│ │ │ │ │ ├── question_answering.md
│ │ │ │ │ ├── semantic_segmentation.md
│ │ │ │ │ ├── sequence_classification.md
│ │ │ │ │ ├── summarization.md
│ │ │ │ │ ├── text-to-speech.md
│ │ │ │ │ ├── token_classification.md
│ │ │ │ │ ├── translation.md
│ │ │ │ │ ├── video_classification.md
│ │ │ │ │ ├── visual_question_answering.md
│ │ │ │ │ ├── zero_shot_image_classification.md
│ │ │ │ │ └── zero_shot_object_detection.md
│ │ │ │ ├── tasks_explained.md
│ │ │ │ ├── testing.md
│ │ │ │ ├── tf_xla.md
│ │ │ │ ├── tflite.md
│ │ │ │ ├── tokenizer_summary.md
│ │ │ │ ├── torchscript.md
│ │ │ │ ├── training.md
│ │ │ │ ├── transformers_agents.md
│ │ │ │ └── troubleshooting.md
│ │ │ ├── ko/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── add_new_model.md
│ │ │ │ ├── add_new_pipeline.md
│ │ │ │ ├── attention.md
│ │ │ │ ├── autoclass_tutorial.md
│ │ │ │ ├── bertology.md
│ │ │ │ ├── big_models.md
│ │ │ │ ├── chat_templating.md
│ │ │ │ ├── community.md
│ │ │ │ ├── contributing.md
│ │ │ │ ├── create_a_model.md
│ │ │ │ ├── custom_models.md
│ │ │ │ ├── debugging.md
│ │ │ │ ├── deepspeed.md
│ │ │ │ ├── fast_tokenizers.md
│ │ │ │ ├── fsdp.md
│ │ │ │ ├── generation_strategies.md
│ │ │ │ ├── hpo_train.md
│ │ │ │ ├── in_translation.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── llm_tutorial.md
│ │ │ │ ├── llm_tutorial_optimization.md
│ │ │ │ ├── main_classes/
│ │ │ │ │ └── agent.md
│ │ │ │ ├── model_doc/
│ │ │ │ │ ├── llama.md
│ │ │ │ │ ├── llama2.md
│ │ │ │ │ └── whisper.md
│ │ │ │ ├── model_memory_anatomy.md
│ │ │ │ ├── model_sharing.md
│ │ │ │ ├── model_summary.md
│ │ │ │ ├── multilingual.md
│ │ │ │ ├── pad_truncation.md
│ │ │ │ ├── peft.md
│ │ │ │ ├── perf_hardware.md
│ │ │ │ ├── perf_infer_cpu.md
│ │ │ │ ├── perf_infer_gpu_one.md
│ │ │ │ ├── perf_train_cpu.md
│ │ │ │ ├── perf_train_cpu_many.md
│ │ │ │ ├── perf_train_gpu_many.md
│ │ │ │ ├── perf_train_tpu_tf.md
│ │ │ │ ├── performance.md
│ │ │ │ ├── perplexity.md
│ │ │ │ ├── philosophy.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── pipeline_webserver.md
│ │ │ │ ├── pr_checks.md
│ │ │ │ ├── preprocessing.md
│ │ │ │ ├── quantization/
│ │ │ │ │ ├── awq.md
│ │ │ │ │ ├── bitsandbytes.md
│ │ │ │ │ ├── eetq.md
│ │ │ │ │ ├── gptq.md
│ │ │ │ │ └── quanto.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── sagemaker.md
│ │ │ │ ├── serialization.md
│ │ │ │ ├── task_summary.md
│ │ │ │ ├── tasks/
│ │ │ │ │ ├── asr.md
│ │ │ │ │ ├── audio_classification.md
│ │ │ │ │ ├── document_question_answering.md
│ │ │ │ │ ├── idefics.md
│ │ │ │ │ ├── image_captioning.md
│ │ │ │ │ ├── image_classification.md
│ │ │ │ │ ├── image_feature_extraction.md
│ │ │ │ │ ├── image_to_image.md
│ │ │ │ │ ├── language_modeling.md
│ │ │ │ │ ├── mask_generation.md
│ │ │ │ │ ├── masked_language_modeling.md
│ │ │ │ │ ├── monocular_depth_estimation.md
│ │ │ │ │ ├── multiple_choice.md
│ │ │ │ │ ├── object_detection.md
│ │ │ │ │ ├── prompting.md
│ │ │ │ │ ├── question_answering.md
│ │ │ │ │ ├── semantic_segmentation.md
│ │ │ │ │ ├── sequence_classification.md
│ │ │ │ │ ├── summarization.md
│ │ │ │ │ ├── token_classification.md
│ │ │ │ │ ├── translation.md
│ │ │ │ │ ├── video_classification.md
│ │ │ │ │ ├── visual_question_answering.md
│ │ │ │ │ ├── zero_shot_image_classification.md
│ │ │ │ │ └── zero_shot_object_detection.md
│ │ │ │ ├── tasks_explained.md
│ │ │ │ ├── testing.md
│ │ │ │ ├── tf_xla.md
│ │ │ │ ├── tflite.md
│ │ │ │ ├── tokenizer_summary.md
│ │ │ │ ├── torchscript.md
│ │ │ │ ├── trainer.md
│ │ │ │ ├── training.md
│ │ │ │ ├── transformers_agents.md
│ │ │ │ └── troubleshooting.md
│ │ │ ├── ms/
│ │ │ │ ├── _toctree.yml
│ │ │ │ └── index.md
│ │ │ ├── pt/
│ │ │ │ ├── _config.py
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── accelerate.md
│ │ │ │ ├── converting_tensorflow_models.md
│ │ │ │ ├── create_a_model.md
│ │ │ │ ├── custom_models.md
│ │ │ │ ├── fast_tokenizers.md
│ │ │ │ ├── index.md
│ │ │ │ ├── installation.md
│ │ │ │ ├── multilingual.md
│ │ │ │ ├── pipeline_tutorial.md
│ │ │ │ ├── quicktour.md
│ │ │ │ ├── run_scripts.md
│ │ │ │ ├── serialization.md
│ │ │ │ ├── tasks/
│ │ │ │ │ ├── sequence_classification.md
│ │ │ │ │ └── token_classification.md
│ │ │ │ └── training.md
│ │ │ ├── te/
│ │ │ │ ├── _toctree.yml
│ │ │ │ ├── index.md
│ │ │ │ └── quicktour.md
│ │ │ ├── tr/
│ │ │ │ ├── _toctree.yml
│ │ │ │ └── index.md
│ │ │ └── zh/
│ │ │ ├── _toctree.yml
│ │ │ ├── accelerate.md
│ │ │ ├── add_new_pipeline.md
│ │ │ ├── autoclass_tutorial.md
│ │ │ ├── big_models.md
│ │ │ ├── chat_templating.md
│ │ │ ├── contributing.md
│ │ │ ├── create_a_model.md
│ │ │ ├── custom_models.md
│ │ │ ├── debugging.md
│ │ │ ├── fast_tokenizers.md
│ │ │ ├── fsdp.md
│ │ │ ├── hpo_train.md
│ │ │ ├── index.md
│ │ │ ├── installation.md
│ │ │ ├── internal/
│ │ │ │ ├── audio_utils.md
│ │ │ │ ├── file_utils.md
│ │ │ │ ├── generation_utils.md
│ │ │ │ ├── image_processing_utils.md
│ │ │ │ ├── modeling_utils.md
│ │ │ │ ├── pipelines_utils.md
│ │ │ │ ├── time_series_utils.md
│ │ │ │ ├── tokenization_utils.md
│ │ │ │ └── trainer_utils.md
│ │ │ ├── llm_tutorial.md
│ │ │ ├── main_classes/
│ │ │ │ ├── agent.md
│ │ │ │ ├── callback.md
│ │ │ │ ├── configuration.md
│ │ │ │ ├── data_collator.md
│ │ │ │ ├── deepspeed.md
│ │ │ │ ├── feature_extractor.md
│ │ │ │ ├── image_processor.md
│ │ │ │ ├── keras_callbacks.md
│ │ │ │ ├── logging.md
│ │ │ │ ├── model.md
│ │ │ │ ├── onnx.md
│ │ │ │ ├── optimizer_schedules.md
│ │ │ │ ├── output.md
│ │ │ │ ├── pipelines.md
│ │ │ │ ├── processors.md
│ │ │ │ ├── quantization.md
│ │ │ │ ├── text_generation.md
│ │ │ │ ├── tokenizer.md
│ │ │ │ └── trainer.md
│ │ │ ├── model_sharing.md
│ │ │ ├── multilingual.md
│ │ │ ├── peft.md
│ │ │ ├── perf_hardware.md
│ │ │ ├── perf_torch_compile.md
│ │ │ ├── performance.md
│ │ │ ├── philosophy.md
│ │ │ ├── pipeline_tutorial.md
│ │ │ ├── preprocessing.md
│ │ │ ├── quicktour.md
│ │ │ ├── run_scripts.md
│ │ │ ├── serialization.md
│ │ │ ├── task_summary.md
│ │ │ ├── tasks/
│ │ │ │ └── asr.md
│ │ │ ├── tf_xla.md
│ │ │ ├── tflite.md
│ │ │ ├── tokenizer_summary.md
│ │ │ ├── torchscript.md
│ │ │ ├── training.md
│ │ │ └── transformers_agents.md
│ │ ├── examples/
│ │ │ ├── README.md
│ │ │ ├── diff-conversion/
│ │ │ │ ├── README.md
│ │ │ │ ├── convert_examples.sh
│ │ │ │ ├── diff_dummy.py
│ │ │ │ ├── diff_my_new_model.py
│ │ │ │ ├── diff_my_new_model2.py
│ │ │ │ ├── diff_new_model.py
│ │ │ │ └── diff_super.py
│ │ │ ├── flax/
│ │ │ │ ├── README.md
│ │ │ │ ├── _tests_requirements.txt
│ │ │ │ ├── conftest.py
│ │ │ │ ├── image-captioning/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── create_model_from_encoder_decoder_models.py
│ │ │ │ │ └── run_image_captioning_flax.py
│ │ │ │ ├── language-modeling/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_bart_dlm_flax.py
│ │ │ │ │ ├── run_clm_flax.py
│ │ │ │ │ ├── run_mlm_flax.py
│ │ │ │ │ ├── run_t5_mlm_flax.py
│ │ │ │ │ └── t5_tokenizer_model.py
│ │ │ │ ├── question-answering/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_qa.py
│ │ │ │ │ └── utils_qa.py
│ │ │ │ ├── speech-recognition/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_flax_speech_recognition_seq2seq.py
│ │ │ │ ├── summarization/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_summarization_flax.py
│ │ │ │ ├── test_flax_examples.py
│ │ │ │ ├── text-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_flax_glue.py
│ │ │ │ ├── token-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_flax_ner.py
│ │ │ │ └── vision/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_image_classification.py
│ │ │ ├── legacy/
│ │ │ │ ├── README.md
│ │ │ │ ├── benchmarking/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── plot_csv_file.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_benchmark.py
│ │ │ │ ├── multiple_choice/
│ │ │ │ │ ├── run_multiple_choice.py
│ │ │ │ │ └── utils_multiple_choice.py
│ │ │ │ ├── pytorch-lightning/
│ │ │ │ │ ├── lightning_base.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_glue.py
│ │ │ │ │ ├── run_glue.sh
│ │ │ │ │ ├── run_ner.py
│ │ │ │ │ ├── run_ner.sh
│ │ │ │ │ └── run_pos.sh
│ │ │ │ ├── question-answering/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── run_squad.py
│ │ │ │ │ └── run_squad_trainer.py
│ │ │ │ ├── run_camembert.py
│ │ │ │ ├── run_chinese_ref.py
│ │ │ │ ├── run_language_modeling.py
│ │ │ │ ├── run_openai_gpt.py
│ │ │ │ ├── run_swag.py
│ │ │ │ ├── run_transfo_xl.py
│ │ │ │ ├── seq2seq/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── convert_model_to_fp16.py
│ │ │ │ │ ├── download_wmt.py
│ │ │ │ │ ├── finetune.sh
│ │ │ │ │ ├── finetune_tpu.sh
│ │ │ │ │ ├── finetune_trainer.py
│ │ │ │ │ ├── minify_dataset.py
│ │ │ │ │ ├── old_test_calculate_rouge.py
│ │ │ │ │ ├── old_test_datasets.py
│ │ │ │ │ ├── old_test_fsmt_bleu_score.py
│ │ │ │ │ ├── old_test_seq2seq_examples.py
│ │ │ │ │ ├── old_test_seq2seq_examples_multi_gpu.py
│ │ │ │ │ ├── old_test_tatoeba_conversion.py
│ │ │ │ │ ├── pack_dataset.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── romanian_postprocessing.md
│ │ │ │ │ ├── rouge_cli.py
│ │ │ │ │ ├── run_distributed_eval.py
│ │ │ │ │ ├── run_eval.py
│ │ │ │ │ ├── run_eval_search.py
│ │ │ │ │ ├── save_len_file.py
│ │ │ │ │ ├── save_randomly_initialized_model.py
│ │ │ │ │ ├── sentence_splitter.py
│ │ │ │ │ ├── seq2seq_trainer.py
│ │ │ │ │ ├── seq2seq_training_args.py
│ │ │ │ │ ├── test_data/
│ │ │ │ │ │ ├── fsmt/
│ │ │ │ │ │ │ ├── build-eval-data.py
│ │ │ │ │ │ │ └── fsmt_val_data.json
│ │ │ │ │ │ └── wmt_en_ro/
│ │ │ │ │ │ ├── test.source
│ │ │ │ │ │ ├── test.target
│ │ │ │ │ │ ├── train.len
│ │ │ │ │ │ ├── train.source
│ │ │ │ │ │ ├── train.target
│ │ │ │ │ │ ├── val.len
│ │ │ │ │ │ ├── val.source
│ │ │ │ │ │ └── val.target
│ │ │ │ │ ├── train_distil_marian_enro.sh
│ │ │ │ │ ├── train_distil_marian_enro_tpu.sh
│ │ │ │ │ ├── train_distilbart_cnn.sh
│ │ │ │ │ ├── train_mbart_cc25_enro.sh
│ │ │ │ │ ├── utils.py
│ │ │ │ │ └── xla_spawn.py
│ │ │ │ └── token-classification/
│ │ │ │ ├── README.md
│ │ │ │ ├── run.sh
│ │ │ │ ├── run_chunk.sh
│ │ │ │ ├── run_ner.py
│ │ │ │ ├── run_pos.sh
│ │ │ │ ├── scripts/
│ │ │ │ │ └── preprocess.py
│ │ │ │ ├── tasks.py
│ │ │ │ └── utils_ner.py
│ │ │ ├── pytorch/
│ │ │ │ ├── README.md
│ │ │ │ ├── _tests_requirements.txt
│ │ │ │ ├── audio-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_audio_classification.py
│ │ │ │ ├── conftest.py
│ │ │ │ ├── contrastive-image-text/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_clip.py
│ │ │ │ ├── image-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_image_classification.py
│ │ │ │ │ └── run_image_classification_no_trainer.py
│ │ │ │ ├── image-pretraining/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_mae.py
│ │ │ │ │ ├── run_mim.py
│ │ │ │ │ └── run_mim_no_trainer.py
│ │ │ │ ├── instance-segmentation/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_instance_segmentation.py
│ │ │ │ │ └── run_instance_segmentation_no_trainer.py
│ │ │ │ ├── language-modeling/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_clm.py
│ │ │ │ │ ├── run_clm_no_trainer.py
│ │ │ │ │ ├── run_fim.py
│ │ │ │ │ ├── run_fim_no_trainer.py
│ │ │ │ │ ├── run_mlm.py
│ │ │ │ │ ├── run_mlm_no_trainer.py
│ │ │ │ │ └── run_plm.py
│ │ │ │ ├── multiple-choice/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_no_trainer.sh
│ │ │ │ │ ├── run_swag.py
│ │ │ │ │ └── run_swag_no_trainer.py
│ │ │ │ ├── object-detection/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_object_detection.py
│ │ │ │ │ └── run_object_detection_no_trainer.py
│ │ │ │ ├── old_test_xla_examples.py
│ │ │ │ ├── question-answering/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_qa.py
│ │ │ │ │ ├── run_qa_beam_search.py
│ │ │ │ │ ├── run_qa_beam_search_no_trainer.py
│ │ │ │ │ ├── run_qa_no_trainer.py
│ │ │ │ │ ├── run_seq2seq_qa.py
│ │ │ │ │ ├── trainer_qa.py
│ │ │ │ │ ├── trainer_seq2seq_qa.py
│ │ │ │ │ └── utils_qa.py
│ │ │ │ ├── semantic-segmentation/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_semantic_segmentation.py
│ │ │ │ │ └── run_semantic_segmentation_no_trainer.py
│ │ │ │ ├── speech-pretraining/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_wav2vec2_pretraining_no_trainer.py
│ │ │ │ ├── speech-recognition/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_speech_recognition_ctc.py
│ │ │ │ │ ├── run_speech_recognition_ctc_adapter.py
│ │ │ │ │ └── run_speech_recognition_seq2seq.py
│ │ │ │ ├── summarization/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_summarization.py
│ │ │ │ │ └── run_summarization_no_trainer.py
│ │ │ │ ├── test_accelerate_examples.py
│ │ │ │ ├── test_pytorch_examples.py
│ │ │ │ ├── text-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_classification.py
│ │ │ │ │ ├── run_glue.py
│ │ │ │ │ ├── run_glue_no_trainer.py
│ │ │ │ │ └── run_xnli.py
│ │ │ │ ├── text-generation/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_generation.py
│ │ │ │ │ └── run_generation_contrastive_search.py
│ │ │ │ ├── token-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run.sh
│ │ │ │ │ ├── run_ner.py
│ │ │ │ │ ├── run_ner_no_trainer.py
│ │ │ │ │ └── run_no_trainer.sh
│ │ │ │ ├── translation/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_translation.py
│ │ │ │ │ └── run_translation_no_trainer.py
│ │ │ │ └── xla_spawn.py
│ │ │ ├── research_projects/
│ │ │ │ ├── README.md
│ │ │ │ ├── adversarial/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_hans.py
│ │ │ │ │ └── utils_hans.py
│ │ │ │ ├── bert-loses-patience/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── pabee/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── modeling_pabee_albert.py
│ │ │ │ │ │ └── modeling_pabee_bert.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_glue_with_pabee.py
│ │ │ │ │ └── test_run_glue_with_pabee.py
│ │ │ │ ├── bertabs/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bertabs.py
│ │ │ │ │ ├── convert_bertabs_original_pytorch_checkpoint.py
│ │ │ │ │ ├── modeling_bertabs.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_summarization.py
│ │ │ │ │ ├── test_utils_summarization.py
│ │ │ │ │ └── utils_summarization.py
│ │ │ │ ├── bertology/
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_bertology.py
│ │ │ │ │ └── run_prune_gpt.py
│ │ │ │ ├── codeparrot/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── examples/
│ │ │ │ │ │ ├── README.md
│ │ │ │ │ │ ├── requirements.txt
│ │ │ │ │ │ └── train_complexity_predictor.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── scripts/
│ │ │ │ │ ├── arguments.py
│ │ │ │ │ ├── bpe_training.py
│ │ │ │ │ ├── codeparrot_training.py
│ │ │ │ │ ├── human_eval.py
│ │ │ │ │ ├── initialize_model.py
│ │ │ │ │ ├── minhash_deduplication.py
│ │ │ │ │ ├── preprocessing.py
│ │ │ │ │ ├── pretokenizing.py
│ │ │ │ │ ├── tests/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── test_deduplicate.py
│ │ │ │ │ └── validation_loss.py
│ │ │ │ ├── decision_transformer/
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_decision_transformer.py
│ │ │ │ ├── deebert/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── entropy_eval.sh
│ │ │ │ │ ├── eval_deebert.sh
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_glue_deebert.py
│ │ │ │ │ ├── src/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── modeling_highway_bert.py
│ │ │ │ │ │ └── modeling_highway_roberta.py
│ │ │ │ │ ├── test_glue_deebert.py
│ │ │ │ │ └── train_deebert.sh
│ │ │ │ ├── distillation/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── distiller.py
│ │ │ │ │ ├── grouped_batch_sampler.py
│ │ │ │ │ ├── lm_seqs_dataset.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_squad_w_distillation.py
│ │ │ │ │ ├── scripts/
│ │ │ │ │ │ ├── binarized_data.py
│ │ │ │ │ │ ├── extract.py
│ │ │ │ │ │ ├── extract_distilbert.py
│ │ │ │ │ │ └── token_counts.py
│ │ │ │ │ ├── train.py
│ │ │ │ │ ├── training_configs/
│ │ │ │ │ │ ├── distilbert-base-cased.json
│ │ │ │ │ │ ├── distilbert-base-multilingual-cased.json
│ │ │ │ │ │ ├── distilbert-base-uncased.json
│ │ │ │ │ │ ├── distilgpt2.json
│ │ │ │ │ │ └── distilroberta-base.json
│ │ │ │ │ └── utils.py
│ │ │ │ ├── fsner/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── pyproject.toml
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── setup.py
│ │ │ │ │ └── src/
│ │ │ │ │ └── fsner/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── model.py
│ │ │ │ │ └── tokenizer_utils.py
│ │ │ │ ├── information-gain-filtration/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── igf/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── igf.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_clm_igf.py
│ │ │ │ ├── jax-projects/
│ │ │ │ │ ├── HOW_TO_PROPOSE_PROJECT.md
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── big_bird/
│ │ │ │ │ │ ├── README.md
│ │ │ │ │ │ ├── bigbird_flax.py
│ │ │ │ │ │ ├── evaluate.py
│ │ │ │ │ │ ├── prepare_natural_questions.py
│ │ │ │ │ │ ├── requirements.txt
│ │ │ │ │ │ ├── sweep_flax.yaml
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── dataset-streaming/
│ │ │ │ │ │ ├── README.md
│ │ │ │ │ │ └── run_mlm_flax_stream.py
│ │ │ │ │ ├── hybrid_clip/
│ │ │ │ │ │ ├── README.md
│ │ │ │ │ │ ├── configuration_hybrid_clip.py
│ │ │ │ │ │ ├── modeling_hybrid_clip.py
│ │ │ │ │ │ ├── requirements.txt
│ │ │ │ │ │ └── run_hybrid_clip.py
│ │ │ │ │ ├── model_parallel/
│ │ │ │ │ │ ├── README.md
│ │ │ │ │ │ ├── partitions.py
│ │ │ │ │ │ └── run_clm_mp.py
│ │ │ │ │ └── wav2vec2/
│ │ │ │ │ ├── README.md
│ │ │ │ │ └── run_wav2vec2_pretrain_flax.py
│ │ │ │ ├── layoutlmv3/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_funsd_cord.py
│ │ │ │ ├── longform-qa/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── eli5_app.py
│ │ │ │ │ ├── eli5_utils.py
│ │ │ │ │ └── requirements.txt
│ │ │ │ ├── luke/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── luke_utils.py
│ │ │ │ │ └── run_luke_ner_no_trainer.py
│ │ │ │ ├── lxmert/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── demo.ipynb
│ │ │ │ │ ├── extracting_data.py
│ │ │ │ │ ├── modeling_frcnn.py
│ │ │ │ │ ├── processing_image.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── utils.py
│ │ │ │ │ └── visualizing_image.py
│ │ │ │ ├── mlm_wwm/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_chinese_ref.py
│ │ │ │ │ └── run_mlm_wwm.py
│ │ │ │ ├── mm-imdb/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── run_mmimdb.py
│ │ │ │ │ └── utils_mmimdb.py
│ │ │ │ ├── movement-pruning/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── Saving_PruneBERT.ipynb
│ │ │ │ │ ├── bertarize.py
│ │ │ │ │ ├── counts_parameters.py
│ │ │ │ │ ├── emmental/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_bert_masked.py
│ │ │ │ │ │ ├── modeling_bert_masked.py
│ │ │ │ │ │ └── modules/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── binarizer.py
│ │ │ │ │ │ └── masked_nn.py
│ │ │ │ │ ├── masked_run_glue.py
│ │ │ │ │ ├── masked_run_squad.py
│ │ │ │ │ └── requirements.txt
│ │ │ │ ├── onnx/
│ │ │ │ │ └── summarization/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── bart_onnx/
│ │ │ │ │ │ ├── generation_onnx.py
│ │ │ │ │ │ └── reduce_onnx_size.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_onnx_exporter.py
│ │ │ │ ├── performer/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── full_script.sh
│ │ │ │ │ ├── modeling_flax_performer.py
│ │ │ │ │ ├── modeling_flax_performer_utils.py
│ │ │ │ │ ├── run_mlm_performer.py
│ │ │ │ │ └── sanity_script.sh
│ │ │ │ ├── pplm/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── pplm_classification_head.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_pplm.py
│ │ │ │ │ └── run_pplm_discrim_train.py
│ │ │ │ ├── quantization-qdqbert/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── evaluate-hf-trt-qa.py
│ │ │ │ │ ├── ort-infer-benchmark.py
│ │ │ │ │ ├── quant_trainer.py
│ │ │ │ │ ├── run_quant_qa.py
│ │ │ │ │ ├── trainer_quant_qa.py
│ │ │ │ │ └── utils_qa.py
│ │ │ │ ├── rag/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── _test_finetune_rag.py
│ │ │ │ │ ├── callbacks_rag.py
│ │ │ │ │ ├── consolidate_rag_checkpoint.py
│ │ │ │ │ ├── distributed_pytorch_retriever.py
│ │ │ │ │ ├── distributed_ray_retriever.py
│ │ │ │ │ ├── eval_rag.py
│ │ │ │ │ ├── finetune_rag.py
│ │ │ │ │ ├── finetune_rag.sh
│ │ │ │ │ ├── finetune_rag_ray.sh
│ │ │ │ │ ├── lightning_base.py
│ │ │ │ │ ├── parse_dpr_relevance_data.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── test_data/
│ │ │ │ │ │ └── my_knowledge_dataset.csv
│ │ │ │ │ ├── test_distributed_retriever.py
│ │ │ │ │ ├── use_own_knowledge_dataset.py
│ │ │ │ │ └── utils_rag.py
│ │ │ │ ├── rag-end2end-retriever/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── callbacks_rag.py
│ │ │ │ │ ├── distributed_ray_retriever.py
│ │ │ │ │ ├── eval_rag.py
│ │ │ │ │ ├── finetune_rag.py
│ │ │ │ │ ├── finetune_rag_ray_end2end.sh
│ │ │ │ │ ├── kb_encode_utils.py
│ │ │ │ │ ├── lightning_base.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── test_run/
│ │ │ │ │ │ ├── dummy-kb/
│ │ │ │ │ │ │ └── my_knowledge_dataset.csv
│ │ │ │ │ │ ├── dummy-train-data/
│ │ │ │ │ │ │ ├── test.source
│ │ │ │ │ │ │ ├── test.target
│ │ │ │ │ │ │ ├── train.source
│ │ │ │ │ │ │ ├── train.target
│ │ │ │ │ │ │ ├── val.source
│ │ │ │ │ │ │ └── val.target
│ │ │ │ │ │ ├── test_finetune.sh
│ │ │ │ │ │ └── test_rag_new_features.sh
│ │ │ │ │ ├── use_own_knowledge_dataset.py
│ │ │ │ │ └── utils_rag.py
│ │ │ │ ├── robust-speech-event/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── run_speech_recognition_ctc_bnb.py
│ │ │ │ │ └── run_speech_recognition_ctc_streaming.py
│ │ │ │ ├── self-training-text-classification/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── finetuning.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run.sh
│ │ │ │ │ └── selftraining.py
│ │ │ │ ├── seq2seq-distillation/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── _test_bash_script.py
│ │ │ │ │ ├── _test_make_student.py
│ │ │ │ │ ├── _test_seq2seq_examples.py
│ │ │ │ │ ├── _test_seq2seq_examples_multi_gpu.py
│ │ │ │ │ ├── callbacks.py
│ │ │ │ │ ├── convert_pl_checkpoint_to_hf.py
│ │ │ │ │ ├── distil_marian_enro_teacher.sh
│ │ │ │ │ ├── distil_marian_no_teacher.sh
│ │ │ │ │ ├── distillation.py
│ │ │ │ │ ├── dynamic_bs_example.sh
│ │ │ │ │ ├── finetune.py
│ │ │ │ │ ├── finetune.sh
│ │ │ │ │ ├── finetune_bart_tiny.sh
│ │ │ │ │ ├── finetune_pegasus_xsum.sh
│ │ │ │ │ ├── finetune_t5.sh
│ │ │ │ │ ├── lightning_base.py
│ │ │ │ │ ├── make_student.py
│ │ │ │ │ ├── precomputed_pseudo_labels.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_eval.py
│ │ │ │ │ ├── sentence_splitter.py
│ │ │ │ │ ├── train_distilbart_cnn.sh
│ │ │ │ │ ├── train_distilbart_xsum.sh
│ │ │ │ │ ├── train_mbart_cc25_enro.sh
│ │ │ │ │ └── utils.py
│ │ │ │ ├── tapex/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_tabfact_with_tapex.py
│ │ │ │ │ ├── run_wikisql_with_tapex.py
│ │ │ │ │ ├── run_wikitablequestions_with_tapex.py
│ │ │ │ │ └── wikisql_utils.py
│ │ │ │ ├── token-healing/
│ │ │ │ │ ├── README.md
│ │ │ │ │ └── run_token_healing.py
│ │ │ │ ├── visual_bert/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── demo.ipynb
│ │ │ │ │ ├── extracting_data.py
│ │ │ │ │ ├── modeling_frcnn.py
│ │ │ │ │ ├── processing_image.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── utils.py
│ │ │ │ │ └── visualizing_image.py
│ │ │ │ ├── vqgan-clip/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── VQGAN_CLIP.py
│ │ │ │ │ ├── img_processing.py
│ │ │ │ │ ├── loaders.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── utils.py
│ │ │ │ ├── wav2vec2/
│ │ │ │ │ ├── FINE_TUNE_XLSR_WAV2VEC2.md
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── alignment.py
│ │ │ │ │ ├── ds_config_wav2vec2_zero2.json
│ │ │ │ │ ├── ds_config_wav2vec2_zero3.json
│ │ │ │ │ ├── finetune_base_100.sh
│ │ │ │ │ ├── finetune_base_timit_asr.sh
│ │ │ │ │ ├── finetune_large_lv60_100.sh
│ │ │ │ │ ├── finetune_large_lv60_timit_asr.sh
│ │ │ │ │ ├── finetune_large_xlsr_53_arabic_speech_corpus.sh
│ │ │ │ │ ├── finetune_wav2vec2_xlsr_turkish.sh
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ ├── run_alignment.sh
│ │ │ │ │ ├── run_asr.py
│ │ │ │ │ ├── run_common_voice.py
│ │ │ │ │ ├── run_pretrain.py
│ │ │ │ │ ├── test_wav2vec2_deepspeed.py
│ │ │ │ │ └── vocab/
│ │ │ │ │ └── buckwalter.json
│ │ │ │ ├── xtreme-s/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── run_xtreme_s.py
│ │ │ │ └── zero-shot-distillation/
│ │ │ │ ├── README.md
│ │ │ │ └── distill_classifier.py
│ │ │ ├── run_on_remote.py
│ │ │ └── tensorflow/
│ │ │ ├── README.md
│ │ │ ├── _tests_requirements.txt
│ │ │ ├── benchmarking/
│ │ │ │ ├── README.md
│ │ │ │ ├── plot_csv_file.py
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_benchmark_tf.py
│ │ │ ├── contrastive-image-text/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_clip.py
│ │ │ ├── image-classification/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_image_classification.py
│ │ │ ├── language-modeling/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ ├── run_clm.py
│ │ │ │ └── run_mlm.py
│ │ │ ├── language-modeling-tpu/
│ │ │ │ ├── README.md
│ │ │ │ ├── prepare_tfrecord_shards.py
│ │ │ │ ├── requirements.txt
│ │ │ │ ├── run_mlm.py
│ │ │ │ └── train_unigram.py
│ │ │ ├── multiple-choice/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_swag.py
│ │ │ ├── question-answering/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ ├── run_qa.py
│ │ │ │ └── utils_qa.py
│ │ │ ├── summarization/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_summarization.py
│ │ │ ├── test_tensorflow_examples.py
│ │ │ ├── text-classification/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ ├── run_glue.py
│ │ │ │ └── run_text_classification.py
│ │ │ ├── token-classification/
│ │ │ │ ├── README.md
│ │ │ │ ├── requirements.txt
│ │ │ │ └── run_ner.py
│ │ │ └── translation/
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ └── run_translation.py
│ │ ├── hubconf.py
│ │ ├── i18n/
│ │ │ ├── README_de.md
│ │ │ ├── README_es.md
│ │ │ ├── README_fr.md
│ │ │ ├── README_hd.md
│ │ │ ├── README_ja.md
│ │ │ ├── README_ko.md
│ │ │ ├── README_pt-br.md
│ │ │ ├── README_ru.md
│ │ │ ├── README_te.md
│ │ │ ├── README_vi.md
│ │ │ ├── README_zh-hans.md
│ │ │ └── README_zh-hant.md
│ │ ├── model_cards/
│ │ │ └── README.md
│ │ ├── notebooks/
│ │ │ └── README.md
│ │ ├── pyproject.toml
│ │ ├── scripts/
│ │ │ ├── benchmark/
│ │ │ │ └── trainer-benchmark.py
│ │ │ ├── check_tokenizers.py
│ │ │ ├── distributed/
│ │ │ │ └── torch-distributed-gpu-test.py
│ │ │ ├── fsmt/
│ │ │ │ ├── convert-allenai-wmt16.sh
│ │ │ │ ├── convert-allenai-wmt19.sh
│ │ │ │ ├── convert-facebook-wmt19.sh
│ │ │ │ ├── eval-allenai-wmt16.sh
│ │ │ │ ├── eval-allenai-wmt19.sh
│ │ │ │ ├── eval-facebook-wmt19.sh
│ │ │ │ ├── fsmt-make-super-tiny-model.py
│ │ │ │ ├── fsmt-make-tiny-model.py
│ │ │ │ ├── gen-card-allenai-wmt16.py
│ │ │ │ ├── gen-card-allenai-wmt19.py
│ │ │ │ ├── gen-card-facebook-wmt19.py
│ │ │ │ ├── s3-move.sh
│ │ │ │ └── tests-to-run.sh
│ │ │ ├── pegasus/
│ │ │ │ └── build_test_sample_spm_no_bos.py
│ │ │ ├── stale.py
│ │ │ └── tatoeba/
│ │ │ ├── README.md
│ │ │ └── upload_models.sh
│ │ ├── setup.py
│ │ ├── src/
│ │ │ └── transformers/
│ │ │ ├── __init__.py
│ │ │ ├── activations.py
│ │ │ ├── activations_tf.py
│ │ │ ├── agents/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── agent_types.py
│ │ │ │ ├── agents.py
│ │ │ │ ├── default_tools.py
│ │ │ │ ├── document_question_answering.py
│ │ │ │ ├── evaluate_agent.py
│ │ │ │ ├── image_question_answering.py
│ │ │ │ ├── llm_engine.py
│ │ │ │ ├── monitoring.py
│ │ │ │ ├── prompts.py
│ │ │ │ ├── python_interpreter.py
│ │ │ │ ├── speech_to_text.py
│ │ │ │ ├── text_to_speech.py
│ │ │ │ ├── tools.py
│ │ │ │ └── translation.py
│ │ │ ├── audio_utils.py
│ │ │ ├── benchmark/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── benchmark.py
│ │ │ │ ├── benchmark_args.py
│ │ │ │ ├── benchmark_args_tf.py
│ │ │ │ ├── benchmark_args_utils.py
│ │ │ │ ├── benchmark_tf.py
│ │ │ │ └── benchmark_utils.py
│ │ │ ├── cache_utils.py
│ │ │ ├── commands/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── add_new_model_like.py
│ │ │ │ ├── convert.py
│ │ │ │ ├── download.py
│ │ │ │ ├── env.py
│ │ │ │ ├── lfs.py
│ │ │ │ ├── pt_to_tf.py
│ │ │ │ ├── run.py
│ │ │ │ ├── serving.py
│ │ │ │ ├── train.py
│ │ │ │ ├── transformers_cli.py
│ │ │ │ └── user.py
│ │ │ ├── configuration_utils.py
│ │ │ ├── convert_graph_to_onnx.py
│ │ │ ├── convert_pytorch_checkpoint_to_tf2.py
│ │ │ ├── convert_slow_tokenizer.py
│ │ │ ├── convert_slow_tokenizers_checkpoints_to_fast.py
│ │ │ ├── convert_tf_hub_seq_to_seq_bert_to_pytorch.py
│ │ │ ├── data/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── data_collator.py
│ │ │ │ ├── datasets/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── glue.py
│ │ │ │ │ ├── language_modeling.py
│ │ │ │ │ └── squad.py
│ │ │ │ ├── metrics/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── squad_metrics.py
│ │ │ │ └── processors/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── glue.py
│ │ │ │ ├── squad.py
│ │ │ │ ├── utils.py
│ │ │ │ └── xnli.py
│ │ │ ├── debug_utils.py
│ │ │ ├── deepspeed.py
│ │ │ ├── dependency_versions_check.py
│ │ │ ├── dependency_versions_table.py
│ │ │ ├── dynamic_module_utils.py
│ │ │ ├── feature_extraction_sequence_utils.py
│ │ │ ├── feature_extraction_utils.py
│ │ │ ├── file_utils.py
│ │ │ ├── generation/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── beam_constraints.py
│ │ │ │ ├── beam_search.py
│ │ │ │ ├── candidate_generator.py
│ │ │ │ ├── configuration_utils.py
│ │ │ │ ├── flax_logits_process.py
│ │ │ │ ├── flax_utils.py
│ │ │ │ ├── logits_process.py
│ │ │ │ ├── stopping_criteria.py
│ │ │ │ ├── streamers.py
│ │ │ │ ├── tf_logits_process.py
│ │ │ │ ├── tf_utils.py
│ │ │ │ ├── utils.py
│ │ │ │ └── watermarking.py
│ │ │ ├── hf_argparser.py
│ │ │ ├── hyperparameter_search.py
│ │ │ ├── image_processing_base.py
│ │ │ ├── image_processing_utils.py
│ │ │ ├── image_processing_utils_fast.py
│ │ │ ├── image_transforms.py
│ │ │ ├── image_utils.py
│ │ │ ├── integrations/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── aqlm.py
│ │ │ │ ├── awq.py
│ │ │ │ ├── bitsandbytes.py
│ │ │ │ ├── deepspeed.py
│ │ │ │ ├── eetq.py
│ │ │ │ ├── fbgemm_fp8.py
│ │ │ │ ├── ggml.py
│ │ │ │ ├── hqq.py
│ │ │ │ ├── integration_utils.py
│ │ │ │ ├── peft.py
│ │ │ │ ├── quanto.py
│ │ │ │ └── tpu.py
│ │ │ ├── keras_callbacks.py
│ │ │ ├── kernels/
│ │ │ │ ├── deformable_detr/
│ │ │ │ │ ├── cpu/
│ │ │ │ │ │ ├── ms_deform_attn_cpu.cpp
│ │ │ │ │ │ └── ms_deform_attn_cpu.h
│ │ │ │ │ ├── cuda/
│ │ │ │ │ │ ├── ms_deform_attn_cuda.cu
│ │ │ │ │ │ ├── ms_deform_attn_cuda.cuh
│ │ │ │ │ │ ├── ms_deform_attn_cuda.h
│ │ │ │ │ │ └── ms_deform_im2col_cuda.cuh
│ │ │ │ │ ├── ms_deform_attn.h
│ │ │ │ │ └── vision.cpp
│ │ │ │ ├── deta/
│ │ │ │ │ ├── cpu/
│ │ │ │ │ │ ├── ms_deform_attn_cpu.cpp
│ │ │ │ │ │ └── ms_deform_attn_cpu.h
│ │ │ │ │ ├── cuda/
│ │ │ │ │ │ ├── ms_deform_attn_cuda.cu
│ │ │ │ │ │ ├── ms_deform_attn_cuda.cuh
│ │ │ │ │ │ ├── ms_deform_attn_cuda.h
│ │ │ │ │ │ └── ms_deform_im2col_cuda.cuh
│ │ │ │ │ ├── ms_deform_attn.h
│ │ │ │ │ └── vision.cpp
│ │ │ │ ├── mra/
│ │ │ │ │ ├── cuda_kernel.cu
│ │ │ │ │ ├── cuda_kernel.h
│ │ │ │ │ ├── cuda_launch.cu
│ │ │ │ │ ├── cuda_launch.h
│ │ │ │ │ └── torch_extension.cpp
│ │ │ │ ├── rwkv/
│ │ │ │ │ ├── wkv_cuda.cu
│ │ │ │ │ ├── wkv_cuda_bf16.cu
│ │ │ │ │ └── wkv_op.cpp
│ │ │ │ └── yoso/
│ │ │ │ ├── common.h
│ │ │ │ ├── common_cuda.h
│ │ │ │ ├── common_cuda_device.h
│ │ │ │ ├── fast_lsh_cumulation.cu
│ │ │ │ ├── fast_lsh_cumulation.h
│ │ │ │ ├── fast_lsh_cumulation_cuda.cu
│ │ │ │ ├── fast_lsh_cumulation_cuda.h
│ │ │ │ └── fast_lsh_cumulation_torch.cpp
│ │ │ ├── modelcard.py
│ │ │ ├── modeling_attn_mask_utils.py
│ │ │ ├── modeling_flash_attention_utils.py
│ │ │ ├── modeling_flax_outputs.py
│ │ │ ├── modeling_flax_pytorch_utils.py
│ │ │ ├── modeling_flax_utils.py
│ │ │ ├── modeling_gguf_pytorch_utils.py
│ │ │ ├── modeling_outputs.py
│ │ │ ├── modeling_rope_utils.py
│ │ │ ├── modeling_tf_outputs.py
│ │ │ ├── modeling_tf_pytorch_utils.py
│ │ │ ├── modeling_tf_utils.py
│ │ │ ├── modeling_utils.py
│ │ │ ├── models/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── albert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_albert.py
│ │ │ │ │ ├── convert_albert_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_albert.py
│ │ │ │ │ ├── modeling_flax_albert.py
│ │ │ │ │ ├── modeling_tf_albert.py
│ │ │ │ │ ├── tokenization_albert.py
│ │ │ │ │ └── tokenization_albert_fast.py
│ │ │ │ ├── align/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_align.py
│ │ │ │ │ ├── convert_align_tf_to_hf.py
│ │ │ │ │ ├── modeling_align.py
│ │ │ │ │ └── processing_align.py
│ │ │ │ ├── altclip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_altclip.py
│ │ │ │ │ ├── modeling_altclip.py
│ │ │ │ │ └── processing_altclip.py
│ │ │ │ ├── audio_spectrogram_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_audio_spectrogram_transformer.py
│ │ │ │ │ ├── convert_audio_spectrogram_transformer_original_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_audio_spectrogram_transformer.py
│ │ │ │ │ └── modeling_audio_spectrogram_transformer.py
│ │ │ │ ├── auto/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── auto_factory.py
│ │ │ │ │ ├── configuration_auto.py
│ │ │ │ │ ├── feature_extraction_auto.py
│ │ │ │ │ ├── image_processing_auto.py
│ │ │ │ │ ├── modeling_auto.py
│ │ │ │ │ ├── modeling_flax_auto.py
│ │ │ │ │ ├── modeling_tf_auto.py
│ │ │ │ │ ├── processing_auto.py
│ │ │ │ │ └── tokenization_auto.py
│ │ │ │ ├── autoformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_autoformer.py
│ │ │ │ │ └── modeling_autoformer.py
│ │ │ │ ├── bark/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bark.py
│ │ │ │ │ ├── convert_suno_to_hf.py
│ │ │ │ │ ├── generation_configuration_bark.py
│ │ │ │ │ ├── modeling_bark.py
│ │ │ │ │ └── processing_bark.py
│ │ │ │ ├── bart/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bart.py
│ │ │ │ │ ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_bart.py
│ │ │ │ │ ├── modeling_flax_bart.py
│ │ │ │ │ ├── modeling_tf_bart.py
│ │ │ │ │ ├── tokenization_bart.py
│ │ │ │ │ └── tokenization_bart_fast.py
│ │ │ │ ├── barthez/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── tokenization_barthez.py
│ │ │ │ │ └── tokenization_barthez_fast.py
│ │ │ │ ├── bartpho/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── tokenization_bartpho.py
│ │ │ │ ├── beit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_beit.py
│ │ │ │ │ ├── convert_beit_unilm_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_beit.py
│ │ │ │ │ ├── image_processing_beit.py
│ │ │ │ │ ├── modeling_beit.py
│ │ │ │ │ └── modeling_flax_beit.py
│ │ │ │ ├── bert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bert.py
│ │ │ │ │ ├── convert_bert_original_tf2_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_bert_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_bert_pytorch_checkpoint_to_original_tf.py
│ │ │ │ │ ├── convert_bert_token_dropping_original_tf2_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_bert.py
│ │ │ │ │ ├── modeling_flax_bert.py
│ │ │ │ │ ├── modeling_tf_bert.py
│ │ │ │ │ ├── tokenization_bert.py
│ │ │ │ │ ├── tokenization_bert_fast.py
│ │ │ │ │ └── tokenization_bert_tf.py
│ │ │ │ ├── bert_generation/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bert_generation.py
│ │ │ │ │ ├── modeling_bert_generation.py
│ │ │ │ │ └── tokenization_bert_generation.py
│ │ │ │ ├── bert_japanese/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── tokenization_bert_japanese.py
│ │ │ │ ├── bertweet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── tokenization_bertweet.py
│ │ │ │ ├── big_bird/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_big_bird.py
│ │ │ │ │ ├── convert_bigbird_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_big_bird.py
│ │ │ │ │ ├── modeling_flax_big_bird.py
│ │ │ │ │ ├── tokenization_big_bird.py
│ │ │ │ │ └── tokenization_big_bird_fast.py
│ │ │ │ ├── bigbird_pegasus/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bigbird_pegasus.py
│ │ │ │ │ ├── convert_bigbird_pegasus_tf_to_pytorch.py
│ │ │ │ │ └── modeling_bigbird_pegasus.py
│ │ │ │ ├── biogpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_biogpt.py
│ │ │ │ │ ├── convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_biogpt.py
│ │ │ │ │ └── tokenization_biogpt.py
│ │ │ │ ├── bit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bit.py
│ │ │ │ │ ├── convert_bit_to_pytorch.py
│ │ │ │ │ ├── image_processing_bit.py
│ │ │ │ │ └── modeling_bit.py
│ │ │ │ ├── blenderbot/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_blenderbot.py
│ │ │ │ │ ├── convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_blenderbot.py
│ │ │ │ │ ├── modeling_flax_blenderbot.py
│ │ │ │ │ ├── modeling_tf_blenderbot.py
│ │ │ │ │ ├── tokenization_blenderbot.py
│ │ │ │ │ └── tokenization_blenderbot_fast.py
│ │ │ │ ├── blenderbot_small/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_blenderbot_small.py
│ │ │ │ │ ├── modeling_blenderbot_small.py
│ │ │ │ │ ├── modeling_flax_blenderbot_small.py
│ │ │ │ │ ├── modeling_tf_blenderbot_small.py
│ │ │ │ │ ├── tokenization_blenderbot_small.py
│ │ │ │ │ └── tokenization_blenderbot_small_fast.py
│ │ │ │ ├── blip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_blip.py
│ │ │ │ │ ├── convert_blip_original_pytorch_to_hf.py
│ │ │ │ │ ├── image_processing_blip.py
│ │ │ │ │ ├── modeling_blip.py
│ │ │ │ │ ├── modeling_blip_text.py
│ │ │ │ │ ├── modeling_tf_blip.py
│ │ │ │ │ ├── modeling_tf_blip_text.py
│ │ │ │ │ └── processing_blip.py
│ │ │ │ ├── blip_2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_blip_2.py
│ │ │ │ │ ├── convert_blip_2_original_to_pytorch.py
│ │ │ │ │ ├── modeling_blip_2.py
│ │ │ │ │ └── processing_blip_2.py
│ │ │ │ ├── bloom/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bloom.py
│ │ │ │ │ ├── convert_bloom_original_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_bloom.py
│ │ │ │ │ ├── modeling_flax_bloom.py
│ │ │ │ │ └── tokenization_bloom_fast.py
│ │ │ │ ├── bridgetower/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bridgetower.py
│ │ │ │ │ ├── image_processing_bridgetower.py
│ │ │ │ │ ├── modeling_bridgetower.py
│ │ │ │ │ └── processing_bridgetower.py
│ │ │ │ ├── bros/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_bros.py
│ │ │ │ │ ├── convert_bros_to_pytorch.py
│ │ │ │ │ ├── modeling_bros.py
│ │ │ │ │ └── processing_bros.py
│ │ │ │ ├── byt5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── convert_byt5_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ └── tokenization_byt5.py
│ │ │ │ ├── camembert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_camembert.py
│ │ │ │ │ ├── modeling_camembert.py
│ │ │ │ │ ├── modeling_tf_camembert.py
│ │ │ │ │ ├── tokenization_camembert.py
│ │ │ │ │ └── tokenization_camembert_fast.py
│ │ │ │ ├── canine/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_canine.py
│ │ │ │ │ ├── convert_canine_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_canine.py
│ │ │ │ │ └── tokenization_canine.py
│ │ │ │ ├── chameleon/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_chameleon.py
│ │ │ │ │ ├── convert_chameleon_weights_to_hf.py
│ │ │ │ │ ├── image_processing_chameleon.py
│ │ │ │ │ ├── modeling_chameleon.py
│ │ │ │ │ └── processing_chameleon.py
│ │ │ │ ├── chinese_clip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_chinese_clip.py
│ │ │ │ │ ├── convert_chinese_clip_original_pytorch_to_hf.py
│ │ │ │ │ ├── feature_extraction_chinese_clip.py
│ │ │ │ │ ├── image_processing_chinese_clip.py
│ │ │ │ │ ├── modeling_chinese_clip.py
│ │ │ │ │ └── processing_chinese_clip.py
│ │ │ │ ├── clap/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_clap.py
│ │ │ │ │ ├── convert_clap_original_pytorch_to_hf.py
│ │ │ │ │ ├── feature_extraction_clap.py
│ │ │ │ │ ├── modeling_clap.py
│ │ │ │ │ └── processing_clap.py
│ │ │ │ ├── clip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_clip.py
│ │ │ │ │ ├── convert_clip_original_pytorch_to_hf.py
│ │ │ │ │ ├── feature_extraction_clip.py
│ │ │ │ │ ├── image_processing_clip.py
│ │ │ │ │ ├── modeling_clip.py
│ │ │ │ │ ├── modeling_flax_clip.py
│ │ │ │ │ ├── modeling_tf_clip.py
│ │ │ │ │ ├── processing_clip.py
│ │ │ │ │ ├── tokenization_clip.py
│ │ │ │ │ └── tokenization_clip_fast.py
│ │ │ │ ├── clipseg/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_clipseg.py
│ │ │ │ │ ├── convert_clipseg_original_pytorch_to_hf.py
│ │ │ │ │ ├── modeling_clipseg.py
│ │ │ │ │ └── processing_clipseg.py
│ │ │ │ ├── clvp/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_clvp.py
│ │ │ │ │ ├── convert_clvp_to_hf.py
│ │ │ │ │ ├── feature_extraction_clvp.py
│ │ │ │ │ ├── modeling_clvp.py
│ │ │ │ │ ├── number_normalizer.py
│ │ │ │ │ ├── processing_clvp.py
│ │ │ │ │ └── tokenization_clvp.py
│ │ │ │ ├── code_llama/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── tokenization_code_llama.py
│ │ │ │ │ └── tokenization_code_llama_fast.py
│ │ │ │ ├── codegen/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_codegen.py
│ │ │ │ │ ├── modeling_codegen.py
│ │ │ │ │ ├── tokenization_codegen.py
│ │ │ │ │ └── tokenization_codegen_fast.py
│ │ │ │ ├── cohere/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_cohere.py
│ │ │ │ │ ├── modeling_cohere.py
│ │ │ │ │ └── tokenization_cohere_fast.py
│ │ │ │ ├── conditional_detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_conditional_detr.py
│ │ │ │ │ ├── convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_conditional_detr.py
│ │ │ │ │ ├── image_processing_conditional_detr.py
│ │ │ │ │ └── modeling_conditional_detr.py
│ │ │ │ ├── convbert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_convbert.py
│ │ │ │ │ ├── convert_convbert_original_tf1_checkpoint_to_pytorch_and_tf2.py
│ │ │ │ │ ├── modeling_convbert.py
│ │ │ │ │ ├── modeling_tf_convbert.py
│ │ │ │ │ ├── tokenization_convbert.py
│ │ │ │ │ └── tokenization_convbert_fast.py
│ │ │ │ ├── convnext/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_convnext.py
│ │ │ │ │ ├── convert_convnext_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_convnext.py
│ │ │ │ │ ├── image_processing_convnext.py
│ │ │ │ │ ├── modeling_convnext.py
│ │ │ │ │ └── modeling_tf_convnext.py
│ │ │ │ ├── convnextv2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_convnextv2.py
│ │ │ │ │ ├── convert_convnextv2_to_pytorch.py
│ │ │ │ │ ├── modeling_convnextv2.py
│ │ │ │ │ └── modeling_tf_convnextv2.py
│ │ │ │ ├── cpm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── tokenization_cpm.py
│ │ │ │ │ └── tokenization_cpm_fast.py
│ │ │ │ ├── cpmant/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_cpmant.py
│ │ │ │ │ ├── modeling_cpmant.py
│ │ │ │ │ └── tokenization_cpmant.py
│ │ │ │ ├── ctrl/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_ctrl.py
│ │ │ │ │ ├── modeling_ctrl.py
│ │ │ │ │ ├── modeling_tf_ctrl.py
│ │ │ │ │ └── tokenization_ctrl.py
│ │ │ │ ├── cvt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_cvt.py
│ │ │ │ │ ├── convert_cvt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_cvt.py
│ │ │ │ │ └── modeling_tf_cvt.py
│ │ │ │ ├── dac/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_dac.py
│ │ │ │ │ ├── convert_dac_checkpoint.py
│ │ │ │ │ ├── feature_extraction_dac.py
│ │ │ │ │ └── modeling_dac.py
│ │ │ │ ├── data2vec/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_data2vec_audio.py
│ │ │ │ │ ├── configuration_data2vec_text.py
│ │ │ │ │ ├── configuration_data2vec_vision.py
│ │ │ │ │ ├── convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_data2vec_audio.py
│ │ │ │ │ ├── modeling_data2vec_text.py
│ │ │ │ │ ├── modeling_data2vec_vision.py
│ │ │ │ │ └── modeling_tf_data2vec_vision.py
│ │ │ │ ├── dbrx/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_dbrx.py
│ │ │ │ │ └── modeling_dbrx.py
│ │ │ │ ├── deberta/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_deberta.py
│ │ │ │ │ ├── modeling_deberta.py
│ │ │ │ │ ├── modeling_tf_deberta.py
│ │ │ │ │ ├── tokenization_deberta.py
│ │ │ │ │ └── tokenization_deberta_fast.py
│ │ │ │ ├── deberta_v2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_deberta_v2.py
│ │ │ │ │ ├── modeling_deberta_v2.py
│ │ │ │ │ ├── modeling_tf_deberta_v2.py
│ │ │ │ │ ├── tokenization_deberta_v2.py
│ │ │ │ │ └── tokenization_deberta_v2_fast.py
│ │ │ │ ├── decision_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_decision_transformer.py
│ │ │ │ │ └── modeling_decision_transformer.py
│ │ │ │ ├── deformable_detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_deformable_detr.py
│ │ │ │ │ ├── convert_deformable_detr_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_deformable_detr.py
│ │ │ │ │ ├── image_processing_deformable_detr.py
│ │ │ │ │ ├── load_custom.py
│ │ │ │ │ └── modeling_deformable_detr.py
│ │ │ │ ├── deit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_deit.py
│ │ │ │ │ ├── convert_deit_timm_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_deit.py
│ │ │ │ │ ├── image_processing_deit.py
│ │ │ │ │ ├── modeling_deit.py
│ │ │ │ │ └── modeling_tf_deit.py
│ │ │ │ ├── deprecated/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── bort/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── convert_bort_original_gluonnlp_checkpoint_to_pytorch.py
│ │ │ │ │ ├── deta/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_deta.py
│ │ │ │ │ │ ├── convert_deta_resnet_to_pytorch.py
│ │ │ │ │ │ ├── convert_deta_swin_to_pytorch.py
│ │ │ │ │ │ ├── image_processing_deta.py
│ │ │ │ │ │ └── modeling_deta.py
│ │ │ │ │ ├── efficientformer/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_efficientformer.py
│ │ │ │ │ │ ├── convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ │ ├── image_processing_efficientformer.py
│ │ │ │ │ │ ├── modeling_efficientformer.py
│ │ │ │ │ │ └── modeling_tf_efficientformer.py
│ │ │ │ │ ├── ernie_m/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_ernie_m.py
│ │ │ │ │ │ ├── modeling_ernie_m.py
│ │ │ │ │ │ └── tokenization_ernie_m.py
│ │ │ │ │ ├── gptsan_japanese/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_gptsan_japanese.py
│ │ │ │ │ │ ├── convert_gptsan_tf_checkpoint_to_pytorch.py
│ │ │ │ │ │ ├── modeling_gptsan_japanese.py
│ │ │ │ │ │ └── tokenization_gptsan_japanese.py
│ │ │ │ │ ├── graphormer/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── algos_graphormer.pyx
│ │ │ │ │ │ ├── collating_graphormer.py
│ │ │ │ │ │ ├── configuration_graphormer.py
│ │ │ │ │ │ └── modeling_graphormer.py
│ │ │ │ │ ├── jukebox/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_jukebox.py
│ │ │ │ │ │ ├── convert_jukebox.py
│ │ │ │ │ │ ├── modeling_jukebox.py
│ │ │ │ │ │ └── tokenization_jukebox.py
│ │ │ │ │ ├── mctct/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_mctct.py
│ │ │ │ │ │ ├── feature_extraction_mctct.py
│ │ │ │ │ │ ├── modeling_mctct.py
│ │ │ │ │ │ └── processing_mctct.py
│ │ │ │ │ ├── mega/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_mega.py
│ │ │ │ │ │ ├── convert_mega_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ │ └── modeling_mega.py
│ │ │ │ │ ├── mmbt/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_mmbt.py
│ │ │ │ │ │ └── modeling_mmbt.py
│ │ │ │ │ ├── nat/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_nat.py
│ │ │ │ │ │ └── modeling_nat.py
│ │ │ │ │ ├── nezha/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_nezha.py
│ │ │ │ │ │ └── modeling_nezha.py
│ │ │ │ │ ├── open_llama/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_open_llama.py
│ │ │ │ │ │ └── modeling_open_llama.py
│ │ │ │ │ ├── qdqbert/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_qdqbert.py
│ │ │ │ │ │ └── modeling_qdqbert.py
│ │ │ │ │ ├── realm/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_realm.py
│ │ │ │ │ │ ├── modeling_realm.py
│ │ │ │ │ │ ├── retrieval_realm.py
│ │ │ │ │ │ ├── tokenization_realm.py
│ │ │ │ │ │ └── tokenization_realm_fast.py
│ │ │ │ │ ├── retribert/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_retribert.py
│ │ │ │ │ │ ├── modeling_retribert.py
│ │ │ │ │ │ ├── tokenization_retribert.py
│ │ │ │ │ │ └── tokenization_retribert_fast.py
│ │ │ │ │ ├── speech_to_text_2/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_speech_to_text_2.py
│ │ │ │ │ │ ├── modeling_speech_to_text_2.py
│ │ │ │ │ │ ├── processing_speech_to_text_2.py
│ │ │ │ │ │ └── tokenization_speech_to_text_2.py
│ │ │ │ │ ├── tapex/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── tokenization_tapex.py
│ │ │ │ │ ├── trajectory_transformer/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_trajectory_transformer.py
│ │ │ │ │ │ ├── convert_trajectory_transformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ │ └── modeling_trajectory_transformer.py
│ │ │ │ │ ├── transfo_xl/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_transfo_xl.py
│ │ │ │ │ │ ├── convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ │ ├── modeling_tf_transfo_xl.py
│ │ │ │ │ │ ├── modeling_tf_transfo_xl_utilities.py
│ │ │ │ │ │ ├── modeling_transfo_xl.py
│ │ │ │ │ │ ├── modeling_transfo_xl_utilities.py
│ │ │ │ │ │ └── tokenization_transfo_xl.py
│ │ │ │ │ ├── tvlt/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_tvlt.py
│ │ │ │ │ │ ├── feature_extraction_tvlt.py
│ │ │ │ │ │ ├── image_processing_tvlt.py
│ │ │ │ │ │ ├── modeling_tvlt.py
│ │ │ │ │ │ └── processing_tvlt.py
│ │ │ │ │ ├── van/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_van.py
│ │ │ │ │ │ ├── convert_van_to_pytorch.py
│ │ │ │ │ │ └── modeling_van.py
│ │ │ │ │ ├── vit_hybrid/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── configuration_vit_hybrid.py
│ │ │ │ │ │ ├── convert_vit_hybrid_timm_to_pytorch.py
│ │ │ │ │ │ ├── image_processing_vit_hybrid.py
│ │ │ │ │ │ └── modeling_vit_hybrid.py
│ │ │ │ │ └── xlm_prophetnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xlm_prophetnet.py
│ │ │ │ │ ├── modeling_xlm_prophetnet.py
│ │ │ │ │ └── tokenization_xlm_prophetnet.py
│ │ │ │ ├── depth_anything/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_depth_anything.py
│ │ │ │ │ ├── convert_depth_anything_to_hf.py
│ │ │ │ │ └── modeling_depth_anything.py
│ │ │ │ ├── detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_detr.py
│ │ │ │ │ ├── convert_detr_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_detr_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_detr.py
│ │ │ │ │ ├── image_processing_detr.py
│ │ │ │ │ └── modeling_detr.py
│ │ │ │ ├── dialogpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ ├── dinat/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_dinat.py
│ │ │ │ │ └── modeling_dinat.py
│ │ │ │ ├── dinov2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_dinov2.py
│ │ │ │ │ ├── convert_dinov2_to_hf.py
│ │ │ │ │ ├── modeling_dinov2.py
│ │ │ │ │ └── modeling_flax_dinov2.py
│ │ │ │ ├── distilbert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_distilbert.py
│ │ │ │ │ ├── modeling_distilbert.py
│ │ │ │ │ ├── modeling_flax_distilbert.py
│ │ │ │ │ ├── modeling_tf_distilbert.py
│ │ │ │ │ ├── tokenization_distilbert.py
│ │ │ │ │ └── tokenization_distilbert_fast.py
│ │ │ │ ├── dit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── convert_dit_unilm_to_pytorch.py
│ │ │ │ ├── donut/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_donut_swin.py
│ │ │ │ │ ├── convert_donut_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_donut.py
│ │ │ │ │ ├── image_processing_donut.py
│ │ │ │ │ ├── modeling_donut_swin.py
│ │ │ │ │ └── processing_donut.py
│ │ │ │ ├── dpr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_dpr.py
│ │ │ │ │ ├── convert_dpr_original_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_dpr.py
│ │ │ │ │ ├── modeling_tf_dpr.py
│ │ │ │ │ ├── tokenization_dpr.py
│ │ │ │ │ └── tokenization_dpr_fast.py
│ │ │ │ ├── dpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_dpt.py
│ │ │ │ │ ├── convert_dinov2_depth_to_hf.py
│ │ │ │ │ ├── convert_dpt_beit_to_hf.py
│ │ │ │ │ ├── convert_dpt_hybrid_to_pytorch.py
│ │ │ │ │ ├── convert_dpt_swinv2_to_hf.py
│ │ │ │ │ ├── convert_dpt_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_dpt.py
│ │ │ │ │ ├── image_processing_dpt.py
│ │ │ │ │ └── modeling_dpt.py
│ │ │ │ ├── efficientnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_efficientnet.py
│ │ │ │ │ ├── convert_efficientnet_to_pytorch.py
│ │ │ │ │ ├── image_processing_efficientnet.py
│ │ │ │ │ └── modeling_efficientnet.py
│ │ │ │ ├── electra/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_electra.py
│ │ │ │ │ ├── convert_electra_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_electra.py
│ │ │ │ │ ├── modeling_flax_electra.py
│ │ │ │ │ ├── modeling_tf_electra.py
│ │ │ │ │ ├── tokenization_electra.py
│ │ │ │ │ └── tokenization_electra_fast.py
│ │ │ │ ├── encodec/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_encodec.py
│ │ │ │ │ ├── convert_encodec_checkpoint_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_encodec.py
│ │ │ │ │ └── modeling_encodec.py
│ │ │ │ ├── encoder_decoder/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_encoder_decoder.py
│ │ │ │ │ ├── modeling_encoder_decoder.py
│ │ │ │ │ ├── modeling_flax_encoder_decoder.py
│ │ │ │ │ └── modeling_tf_encoder_decoder.py
│ │ │ │ ├── ernie/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_ernie.py
│ │ │ │ │ └── modeling_ernie.py
│ │ │ │ ├── esm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_esm.py
│ │ │ │ │ ├── convert_esm.py
│ │ │ │ │ ├── modeling_esm.py
│ │ │ │ │ ├── modeling_esmfold.py
│ │ │ │ │ ├── modeling_tf_esm.py
│ │ │ │ │ ├── openfold_utils/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── chunk_utils.py
│ │ │ │ │ │ ├── data_transforms.py
│ │ │ │ │ │ ├── feats.py
│ │ │ │ │ │ ├── loss.py
│ │ │ │ │ │ ├── protein.py
│ │ │ │ │ │ ├── residue_constants.py
│ │ │ │ │ │ ├── rigid_utils.py
│ │ │ │ │ │ └── tensor_utils.py
│ │ │ │ │ └── tokenization_esm.py
│ │ │ │ ├── falcon/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_falcon.py
│ │ │ │ │ ├── convert_custom_code_checkpoint.py
│ │ │ │ │ └── modeling_falcon.py
│ │ │ │ ├── falcon_mamba/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_falcon_mamba.py
│ │ │ │ │ └── modeling_falcon_mamba.py
│ │ │ │ ├── fastspeech2_conformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_fastspeech2_conformer.py
│ │ │ │ │ ├── convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_hifigan.py
│ │ │ │ │ ├── convert_model_with_hifigan.py
│ │ │ │ │ ├── modeling_fastspeech2_conformer.py
│ │ │ │ │ └── tokenization_fastspeech2_conformer.py
│ │ │ │ ├── flaubert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_flaubert.py
│ │ │ │ │ ├── modeling_flaubert.py
│ │ │ │ │ ├── modeling_tf_flaubert.py
│ │ │ │ │ └── tokenization_flaubert.py
│ │ │ │ ├── flava/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_flava.py
│ │ │ │ │ ├── convert_dalle_to_flava_codebook.py
│ │ │ │ │ ├── convert_flava_original_pytorch_to_hf.py
│ │ │ │ │ ├── feature_extraction_flava.py
│ │ │ │ │ ├── image_processing_flava.py
│ │ │ │ │ ├── modeling_flava.py
│ │ │ │ │ └── processing_flava.py
│ │ │ │ ├── fnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_fnet.py
│ │ │ │ │ ├── convert_fnet_original_flax_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_fnet.py
│ │ │ │ │ ├── tokenization_fnet.py
│ │ │ │ │ └── tokenization_fnet_fast.py
│ │ │ │ ├── focalnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_focalnet.py
│ │ │ │ │ ├── convert_focalnet_to_hf_format.py
│ │ │ │ │ └── modeling_focalnet.py
│ │ │ │ ├── fsmt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_fsmt.py
│ │ │ │ │ ├── convert_fsmt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_fsmt.py
│ │ │ │ │ └── tokenization_fsmt.py
│ │ │ │ ├── funnel/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_funnel.py
│ │ │ │ │ ├── convert_funnel_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_funnel.py
│ │ │ │ │ ├── modeling_tf_funnel.py
│ │ │ │ │ ├── tokenization_funnel.py
│ │ │ │ │ └── tokenization_funnel_fast.py
│ │ │ │ ├── fuyu/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_fuyu.py
│ │ │ │ │ ├── convert_fuyu_model_weights_to_hf.py
│ │ │ │ │ ├── image_processing_fuyu.py
│ │ │ │ │ ├── modeling_fuyu.py
│ │ │ │ │ └── processing_fuyu.py
│ │ │ │ ├── gemma/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gemma.py
│ │ │ │ │ ├── convert_gemma_weights_to_hf.py
│ │ │ │ │ ├── diff_gemma.py
│ │ │ │ │ ├── modeling_flax_gemma.py
│ │ │ │ │ ├── modeling_gemma.py
│ │ │ │ │ ├── tokenization_gemma.py
│ │ │ │ │ └── tokenization_gemma_fast.py
│ │ │ │ ├── gemma2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gemma2.py
│ │ │ │ │ ├── convert_gemma2_weights_to_hf.py
│ │ │ │ │ ├── diff_gemma2.py
│ │ │ │ │ └── modeling_gemma2.py
│ │ │ │ ├── git/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_git.py
│ │ │ │ │ ├── convert_git_to_pytorch.py
│ │ │ │ │ ├── modeling_git.py
│ │ │ │ │ └── processing_git.py
│ │ │ │ ├── glpn/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_glpn.py
│ │ │ │ │ ├── convert_glpn_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_glpn.py
│ │ │ │ │ ├── image_processing_glpn.py
│ │ │ │ │ └── modeling_glpn.py
│ │ │ │ ├── gpt2/
│ │ │ │ │ ├── CONVERSION.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gpt2.py
│ │ │ │ │ ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_gpt2.py
│ │ │ │ │ ├── modeling_gpt2.py
│ │ │ │ │ ├── modeling_tf_gpt2.py
│ │ │ │ │ ├── tokenization_gpt2.py
│ │ │ │ │ ├── tokenization_gpt2_fast.py
│ │ │ │ │ └── tokenization_gpt2_tf.py
│ │ │ │ ├── gpt_bigcode/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gpt_bigcode.py
│ │ │ │ │ └── modeling_gpt_bigcode.py
│ │ │ │ ├── gpt_neo/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gpt_neo.py
│ │ │ │ │ ├── convert_gpt_neo_mesh_tf_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_gpt_neo.py
│ │ │ │ │ └── modeling_gpt_neo.py
│ │ │ │ ├── gpt_neox/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gpt_neox.py
│ │ │ │ │ ├── modeling_gpt_neox.py
│ │ │ │ │ └── tokenization_gpt_neox_fast.py
│ │ │ │ ├── gpt_neox_japanese/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gpt_neox_japanese.py
│ │ │ │ │ ├── modeling_gpt_neox_japanese.py
│ │ │ │ │ └── tokenization_gpt_neox_japanese.py
│ │ │ │ ├── gpt_sw3/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── convert_megatron_to_pytorch.py
│ │ │ │ │ └── tokenization_gpt_sw3.py
│ │ │ │ ├── gptj/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_gptj.py
│ │ │ │ │ ├── modeling_flax_gptj.py
│ │ │ │ │ ├── modeling_gptj.py
│ │ │ │ │ └── modeling_tf_gptj.py
│ │ │ │ ├── grounding_dino/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_grounding_dino.py
│ │ │ │ │ ├── convert_grounding_dino_to_hf.py
│ │ │ │ │ ├── image_processing_grounding_dino.py
│ │ │ │ │ ├── modeling_grounding_dino.py
│ │ │ │ │ └── processing_grounding_dino.py
│ │ │ │ ├── groupvit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_groupvit.py
│ │ │ │ │ ├── convert_groupvit_nvlab_to_hf.py
│ │ │ │ │ ├── modeling_groupvit.py
│ │ │ │ │ └── modeling_tf_groupvit.py
│ │ │ │ ├── herbert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── tokenization_herbert.py
│ │ │ │ │ └── tokenization_herbert_fast.py
│ │ │ │ ├── hiera/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_hiera.py
│ │ │ │ │ ├── convert_hiera_to_hf.py
│ │ │ │ │ └── modeling_hiera.py
│ │ │ │ ├── hubert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_hubert.py
│ │ │ │ │ ├── convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_hubert_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_hubert_original_s3prl_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_hubert.py
│ │ │ │ │ └── modeling_tf_hubert.py
│ │ │ │ ├── ibert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_ibert.py
│ │ │ │ │ ├── modeling_ibert.py
│ │ │ │ │ └── quant_modules.py
│ │ │ │ ├── idefics/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_idefics.py
│ │ │ │ │ ├── image_processing_idefics.py
│ │ │ │ │ ├── modeling_idefics.py
│ │ │ │ │ ├── modeling_tf_idefics.py
│ │ │ │ │ ├── perceiver.py
│ │ │ │ │ ├── perceiver_tf.py
│ │ │ │ │ ├── processing_idefics.py
│ │ │ │ │ ├── vision.py
│ │ │ │ │ └── vision_tf.py
│ │ │ │ ├── idefics2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_idefics2.py
│ │ │ │ │ ├── convert_idefics2_weights_to_hf.py
│ │ │ │ │ ├── image_processing_idefics2.py
│ │ │ │ │ ├── modeling_idefics2.py
│ │ │ │ │ └── processing_idefics2.py
│ │ │ │ ├── imagegpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_imagegpt.py
│ │ │ │ │ ├── convert_imagegpt_original_tf2_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_imagegpt.py
│ │ │ │ │ ├── image_processing_imagegpt.py
│ │ │ │ │ └── modeling_imagegpt.py
│ │ │ │ ├── informer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_informer.py
│ │ │ │ │ └── modeling_informer.py
│ │ │ │ ├── instructblip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_instructblip.py
│ │ │ │ │ ├── convert_instructblip_original_to_pytorch.py
│ │ │ │ │ ├── modeling_instructblip.py
│ │ │ │ │ └── processing_instructblip.py
│ │ │ │ ├── instructblipvideo/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_instructblipvideo.py
│ │ │ │ │ ├── convert_instructblipvideo_original_to_pytorch.py
│ │ │ │ │ ├── diff_instructblipvideo.py
│ │ │ │ │ ├── image_processing_instructblipvideo.py
│ │ │ │ │ ├── modeling_instructblipvideo.py
│ │ │ │ │ └── processing_instructblipvideo.py
│ │ │ │ ├── jamba/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_jamba.py
│ │ │ │ │ └── modeling_jamba.py
│ │ │ │ ├── jetmoe/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_jetmoe.py
│ │ │ │ │ └── modeling_jetmoe.py
│ │ │ │ ├── kosmos2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_kosmos2.py
│ │ │ │ │ ├── convert_kosmos2_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_kosmos2.py
│ │ │ │ │ └── processing_kosmos2.py
│ │ │ │ ├── layoutlm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_layoutlm.py
│ │ │ │ │ ├── modeling_layoutlm.py
│ │ │ │ │ ├── modeling_tf_layoutlm.py
│ │ │ │ │ ├── tokenization_layoutlm.py
│ │ │ │ │ └── tokenization_layoutlm_fast.py
│ │ │ │ ├── layoutlmv2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_layoutlmv2.py
│ │ │ │ │ ├── feature_extraction_layoutlmv2.py
│ │ │ │ │ ├── image_processing_layoutlmv2.py
│ │ │ │ │ ├── modeling_layoutlmv2.py
│ │ │ │ │ ├── processing_layoutlmv2.py
│ │ │ │ │ ├── tokenization_layoutlmv2.py
│ │ │ │ │ └── tokenization_layoutlmv2_fast.py
│ │ │ │ ├── layoutlmv3/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_layoutlmv3.py
│ │ │ │ │ ├── feature_extraction_layoutlmv3.py
│ │ │ │ │ ├── image_processing_layoutlmv3.py
│ │ │ │ │ ├── modeling_layoutlmv3.py
│ │ │ │ │ ├── modeling_tf_layoutlmv3.py
│ │ │ │ │ ├── processing_layoutlmv3.py
│ │ │ │ │ ├── tokenization_layoutlmv3.py
│ │ │ │ │ └── tokenization_layoutlmv3_fast.py
│ │ │ │ ├── layoutxlm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── processing_layoutxlm.py
│ │ │ │ │ ├── tokenization_layoutxlm.py
│ │ │ │ │ └── tokenization_layoutxlm_fast.py
│ │ │ │ ├── led/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_led.py
│ │ │ │ │ ├── modeling_led.py
│ │ │ │ │ ├── modeling_tf_led.py
│ │ │ │ │ ├── tokenization_led.py
│ │ │ │ │ └── tokenization_led_fast.py
│ │ │ │ ├── levit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_levit.py
│ │ │ │ │ ├── convert_levit_timm_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_levit.py
│ │ │ │ │ ├── image_processing_levit.py
│ │ │ │ │ └── modeling_levit.py
│ │ │ │ ├── lilt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_lilt.py
│ │ │ │ │ └── modeling_lilt.py
│ │ │ │ ├── llama/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_llama.py
│ │ │ │ │ ├── convert_llama_weights_to_hf.py
│ │ │ │ │ ├── modeling_flax_llama.py
│ │ │ │ │ ├── modeling_llama.py
│ │ │ │ │ ├── tokenization_llama.py
│ │ │ │ │ └── tokenization_llama_fast.py
│ │ │ │ ├── llava/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_llava.py
│ │ │ │ │ ├── convert_llava_weights_to_hf.py
│ │ │ │ │ ├── modeling_llava.py
│ │ │ │ │ └── processing_llava.py
│ │ │ │ ├── llava_next/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_llava_next.py
│ │ │ │ │ ├── convert_llava_next_weights_to_hf.py
│ │ │ │ │ ├── image_processing_llava_next.py
│ │ │ │ │ ├── modeling_llava_next.py
│ │ │ │ │ └── processing_llava_next.py
│ │ │ │ ├── llava_next_video/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_llava_next_video.py
│ │ │ │ │ ├── convert_llava_next_video_weights_to_hf.py
│ │ │ │ │ ├── diff_llava_next_video.py
│ │ │ │ │ ├── image_processing_llava_next_video.py
│ │ │ │ │ ├── modeling_llava_next_video.py
│ │ │ │ │ └── processing_llava_next_video.py
│ │ │ │ ├── longformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_longformer.py
│ │ │ │ │ ├── convert_longformer_original_pytorch_lightning_to_pytorch.py
│ │ │ │ │ ├── modeling_longformer.py
│ │ │ │ │ ├── modeling_tf_longformer.py
│ │ │ │ │ ├── tokenization_longformer.py
│ │ │ │ │ └── tokenization_longformer_fast.py
│ │ │ │ ├── longt5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_longt5.py
│ │ │ │ │ ├── convert_longt5x_checkpoint_to_flax.py
│ │ │ │ │ ├── modeling_flax_longt5.py
│ │ │ │ │ └── modeling_longt5.py
│ │ │ │ ├── luke/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_luke.py
│ │ │ │ │ ├── convert_luke_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_luke.py
│ │ │ │ │ └── tokenization_luke.py
│ │ │ │ ├── lxmert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_lxmert.py
│ │ │ │ │ ├── convert_lxmert_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_lxmert.py
│ │ │ │ │ ├── modeling_tf_lxmert.py
│ │ │ │ │ ├── tokenization_lxmert.py
│ │ │ │ │ └── tokenization_lxmert_fast.py
│ │ │ │ ├── m2m_100/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_m2m_100.py
│ │ │ │ │ ├── convert_m2m100_original_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_m2m_100.py
│ │ │ │ │ └── tokenization_m2m_100.py
│ │ │ │ ├── mamba/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mamba.py
│ │ │ │ │ ├── convert_mamba_ssm_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_mamba.py
│ │ │ │ ├── mamba2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mamba2.py
│ │ │ │ │ ├── convert_mamba2_ssm_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_mamba2.py
│ │ │ │ ├── marian/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_marian.py
│ │ │ │ │ ├── convert_marian_tatoeba_to_pytorch.py
│ │ │ │ │ ├── convert_marian_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_marian.py
│ │ │ │ │ ├── modeling_marian.py
│ │ │ │ │ ├── modeling_tf_marian.py
│ │ │ │ │ └── tokenization_marian.py
│ │ │ │ ├── markuplm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_markuplm.py
│ │ │ │ │ ├── feature_extraction_markuplm.py
│ │ │ │ │ ├── modeling_markuplm.py
│ │ │ │ │ ├── processing_markuplm.py
│ │ │ │ │ ├── tokenization_markuplm.py
│ │ │ │ │ └── tokenization_markuplm_fast.py
│ │ │ │ ├── mask2former/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mask2former.py
│ │ │ │ │ ├── convert_mask2former_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── image_processing_mask2former.py
│ │ │ │ │ └── modeling_mask2former.py
│ │ │ │ ├── maskformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_maskformer.py
│ │ │ │ │ ├── configuration_maskformer_swin.py
│ │ │ │ │ ├── convert_maskformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_maskformer_resnet_to_pytorch.py
│ │ │ │ │ ├── convert_maskformer_swin_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_maskformer.py
│ │ │ │ │ ├── image_processing_maskformer.py
│ │ │ │ │ ├── modeling_maskformer.py
│ │ │ │ │ └── modeling_maskformer_swin.py
│ │ │ │ ├── mbart/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mbart.py
│ │ │ │ │ ├── convert_mbart_original_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_mbart.py
│ │ │ │ │ ├── modeling_mbart.py
│ │ │ │ │ ├── modeling_tf_mbart.py
│ │ │ │ │ ├── tokenization_mbart.py
│ │ │ │ │ └── tokenization_mbart_fast.py
│ │ │ │ ├── mbart50/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── tokenization_mbart50.py
│ │ │ │ │ └── tokenization_mbart50_fast.py
│ │ │ │ ├── megatron_bert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_megatron_bert.py
│ │ │ │ │ ├── convert_megatron_bert_checkpoint.py
│ │ │ │ │ └── modeling_megatron_bert.py
│ │ │ │ ├── megatron_gpt2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── checkpoint_reshaping_and_interoperability.py
│ │ │ │ │ └── convert_megatron_gpt2_checkpoint.py
│ │ │ │ ├── mgp_str/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mgp_str.py
│ │ │ │ │ ├── modeling_mgp_str.py
│ │ │ │ │ ├── processing_mgp_str.py
│ │ │ │ │ └── tokenization_mgp_str.py
│ │ │ │ ├── mistral/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mistral.py
│ │ │ │ │ ├── convert_mistral_weights_to_hf.py
│ │ │ │ │ ├── modeling_flax_mistral.py
│ │ │ │ │ ├── modeling_mistral.py
│ │ │ │ │ └── modeling_tf_mistral.py
│ │ │ │ ├── mixtral/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mixtral.py
│ │ │ │ │ ├── convert_mixtral_weights_to_hf.py
│ │ │ │ │ └── modeling_mixtral.py
│ │ │ │ ├── mluke/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── convert_mluke_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── tokenization_mluke.py
│ │ │ │ ├── mobilebert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mobilebert.py
│ │ │ │ │ ├── convert_mobilebert_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_mobilebert.py
│ │ │ │ │ ├── modeling_tf_mobilebert.py
│ │ │ │ │ ├── tokenization_mobilebert.py
│ │ │ │ │ └── tokenization_mobilebert_fast.py
│ │ │ │ ├── mobilenet_v1/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mobilenet_v1.py
│ │ │ │ │ ├── convert_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_mobilenet_v1.py
│ │ │ │ │ ├── image_processing_mobilenet_v1.py
│ │ │ │ │ └── modeling_mobilenet_v1.py
│ │ │ │ ├── mobilenet_v2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mobilenet_v2.py
│ │ │ │ │ ├── convert_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_mobilenet_v2.py
│ │ │ │ │ ├── image_processing_mobilenet_v2.py
│ │ │ │ │ └── modeling_mobilenet_v2.py
│ │ │ │ ├── mobilevit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mobilevit.py
│ │ │ │ │ ├── convert_mlcvnets_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_mobilevit.py
│ │ │ │ │ ├── image_processing_mobilevit.py
│ │ │ │ │ ├── modeling_mobilevit.py
│ │ │ │ │ └── modeling_tf_mobilevit.py
│ │ │ │ ├── mobilevitv2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mobilevitv2.py
│ │ │ │ │ ├── convert_mlcvnets_to_pytorch.py
│ │ │ │ │ └── modeling_mobilevitv2.py
│ │ │ │ ├── mpnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mpnet.py
│ │ │ │ │ ├── modeling_mpnet.py
│ │ │ │ │ ├── modeling_tf_mpnet.py
│ │ │ │ │ ├── tokenization_mpnet.py
│ │ │ │ │ └── tokenization_mpnet_fast.py
│ │ │ │ ├── mpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mpt.py
│ │ │ │ │ └── modeling_mpt.py
│ │ │ │ ├── mra/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mra.py
│ │ │ │ │ ├── convert_mra_pytorch_to_pytorch.py
│ │ │ │ │ └── modeling_mra.py
│ │ │ │ ├── mt5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mt5.py
│ │ │ │ │ ├── modeling_flax_mt5.py
│ │ │ │ │ ├── modeling_mt5.py
│ │ │ │ │ └── modeling_tf_mt5.py
│ │ │ │ ├── musicgen/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_musicgen.py
│ │ │ │ │ ├── convert_musicgen_transformers.py
│ │ │ │ │ ├── modeling_musicgen.py
│ │ │ │ │ └── processing_musicgen.py
│ │ │ │ ├── musicgen_melody/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_musicgen_melody.py
│ │ │ │ │ ├── convert_musicgen_melody_transformers.py
│ │ │ │ │ ├── feature_extraction_musicgen_melody.py
│ │ │ │ │ ├── modeling_musicgen_melody.py
│ │ │ │ │ └── processing_musicgen_melody.py
│ │ │ │ ├── mvp/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_mvp.py
│ │ │ │ │ ├── modeling_mvp.py
│ │ │ │ │ ├── tokenization_mvp.py
│ │ │ │ │ └── tokenization_mvp_fast.py
│ │ │ │ ├── nemotron/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_nemotron.py
│ │ │ │ │ ├── convert_nemotron_nemo_to_hf.py
│ │ │ │ │ └── modeling_nemotron.py
│ │ │ │ ├── nllb/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── tokenization_nllb.py
│ │ │ │ │ └── tokenization_nllb_fast.py
│ │ │ │ ├── nllb_moe/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_nllb_moe.py
│ │ │ │ │ ├── convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_nllb_moe.py
│ │ │ │ ├── nougat/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── convert_nougat_to_hf.py
│ │ │ │ │ ├── image_processing_nougat.py
│ │ │ │ │ ├── processing_nougat.py
│ │ │ │ │ └── tokenization_nougat_fast.py
│ │ │ │ ├── nystromformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_nystromformer.py
│ │ │ │ │ ├── convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_nystromformer.py
│ │ │ │ ├── olmo/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_olmo.py
│ │ │ │ │ ├── convert_olmo_weights_to_hf.py
│ │ │ │ │ └── modeling_olmo.py
│ │ │ │ ├── oneformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_oneformer.py
│ │ │ │ │ ├── convert_to_hf_oneformer.py
│ │ │ │ │ ├── image_processing_oneformer.py
│ │ │ │ │ ├── modeling_oneformer.py
│ │ │ │ │ └── processing_oneformer.py
│ │ │ │ ├── openai/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_openai.py
│ │ │ │ │ ├── convert_openai_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_openai.py
│ │ │ │ │ ├── modeling_tf_openai.py
│ │ │ │ │ ├── tokenization_openai.py
│ │ │ │ │ └── tokenization_openai_fast.py
│ │ │ │ ├── opt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_opt.py
│ │ │ │ │ ├── convert_opt_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_opt.py
│ │ │ │ │ ├── modeling_opt.py
│ │ │ │ │ └── modeling_tf_opt.py
│ │ │ │ ├── owlv2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_owlv2.py
│ │ │ │ │ ├── convert_owlv2_to_hf.py
│ │ │ │ │ ├── image_processing_owlv2.py
│ │ │ │ │ ├── modeling_owlv2.py
│ │ │ │ │ └── processing_owlv2.py
│ │ │ │ ├── owlvit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_owlvit.py
│ │ │ │ │ ├── convert_owlvit_original_flax_to_hf.py
│ │ │ │ │ ├── feature_extraction_owlvit.py
│ │ │ │ │ ├── image_processing_owlvit.py
│ │ │ │ │ ├── modeling_owlvit.py
│ │ │ │ │ └── processing_owlvit.py
│ │ │ │ ├── paligemma/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_paligemma.py
│ │ │ │ │ ├── convert_paligemma_weights_to_hf.py
│ │ │ │ │ ├── modeling_paligemma.py
│ │ │ │ │ └── processing_paligemma.py
│ │ │ │ ├── patchtsmixer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_patchtsmixer.py
│ │ │ │ │ └── modeling_patchtsmixer.py
│ │ │ │ ├── patchtst/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_patchtst.py
│ │ │ │ │ └── modeling_patchtst.py
│ │ │ │ ├── pegasus/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_pegasus.py
│ │ │ │ │ ├── convert_pegasus_tf_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_pegasus.py
│ │ │ │ │ ├── modeling_pegasus.py
│ │ │ │ │ ├── modeling_tf_pegasus.py
│ │ │ │ │ ├── tokenization_pegasus.py
│ │ │ │ │ └── tokenization_pegasus_fast.py
│ │ │ │ ├── pegasus_x/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_pegasus_x.py
│ │ │ │ │ └── modeling_pegasus_x.py
│ │ │ │ ├── perceiver/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_perceiver.py
│ │ │ │ │ ├── convert_perceiver_haiku_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_perceiver.py
│ │ │ │ │ ├── image_processing_perceiver.py
│ │ │ │ │ ├── modeling_perceiver.py
│ │ │ │ │ └── tokenization_perceiver.py
│ │ │ │ ├── persimmon/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_persimmon.py
│ │ │ │ │ ├── convert_persimmon_weights_to_hf.py
│ │ │ │ │ └── modeling_persimmon.py
│ │ │ │ ├── phi/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_phi.py
│ │ │ │ │ ├── convert_phi_weights_to_hf.py
│ │ │ │ │ └── modeling_phi.py
│ │ │ │ ├── phi3/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_phi3.py
│ │ │ │ │ └── modeling_phi3.py
│ │ │ │ ├── phobert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── tokenization_phobert.py
│ │ │ │ ├── pix2struct/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_pix2struct.py
│ │ │ │ │ ├── convert_pix2struct_original_pytorch_to_hf.py
│ │ │ │ │ ├── image_processing_pix2struct.py
│ │ │ │ │ ├── modeling_pix2struct.py
│ │ │ │ │ └── processing_pix2struct.py
│ │ │ │ ├── plbart/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_plbart.py
│ │ │ │ │ ├── convert_plbart_original_checkpoint_to_torch.py
│ │ │ │ │ ├── modeling_plbart.py
│ │ │ │ │ └── tokenization_plbart.py
│ │ │ │ ├── poolformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_poolformer.py
│ │ │ │ │ ├── convert_poolformer_original_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_poolformer.py
│ │ │ │ │ ├── image_processing_poolformer.py
│ │ │ │ │ └── modeling_poolformer.py
│ │ │ │ ├── pop2piano/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_pop2piano.py
│ │ │ │ │ ├── convert_pop2piano_weights_to_hf.py
│ │ │ │ │ ├── feature_extraction_pop2piano.py
│ │ │ │ │ ├── modeling_pop2piano.py
│ │ │ │ │ ├── processing_pop2piano.py
│ │ │ │ │ └── tokenization_pop2piano.py
│ │ │ │ ├── prophetnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_prophetnet.py
│ │ │ │ │ ├── convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_prophetnet.py
│ │ │ │ │ └── tokenization_prophetnet.py
│ │ │ │ ├── pvt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_pvt.py
│ │ │ │ │ ├── convert_pvt_to_pytorch.py
│ │ │ │ │ ├── image_processing_pvt.py
│ │ │ │ │ └── modeling_pvt.py
│ │ │ │ ├── pvt_v2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_pvt_v2.py
│ │ │ │ │ ├── convert_pvt_v2_to_pytorch.py
│ │ │ │ │ └── modeling_pvt_v2.py
│ │ │ │ ├── qwen2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_qwen2.py
│ │ │ │ │ ├── modeling_qwen2.py
│ │ │ │ │ ├── tokenization_qwen2.py
│ │ │ │ │ └── tokenization_qwen2_fast.py
│ │ │ │ ├── qwen2_audio/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_qwen2_audio.py
│ │ │ │ │ ├── modeling_qwen2_audio.py
│ │ │ │ │ └── processing_qwen2_audio.py
│ │ │ │ ├── qwen2_moe/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_qwen2_moe.py
│ │ │ │ │ └── modeling_qwen2_moe.py
│ │ │ │ ├── rag/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_rag.py
│ │ │ │ │ ├── modeling_rag.py
│ │ │ │ │ ├── modeling_tf_rag.py
│ │ │ │ │ ├── retrieval_rag.py
│ │ │ │ │ └── tokenization_rag.py
│ │ │ │ ├── recurrent_gemma/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_recurrent_gemma.py
│ │ │ │ │ ├── convert_recurrent_gemma_to_hf.py
│ │ │ │ │ └── modeling_recurrent_gemma.py
│ │ │ │ ├── reformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_reformer.py
│ │ │ │ │ ├── convert_reformer_trax_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_reformer.py
│ │ │ │ │ ├── tokenization_reformer.py
│ │ │ │ │ └── tokenization_reformer_fast.py
│ │ │ │ ├── regnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_regnet.py
│ │ │ │ │ ├── convert_regnet_seer_10b_to_pytorch.py
│ │ │ │ │ ├── convert_regnet_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_regnet.py
│ │ │ │ │ ├── modeling_regnet.py
│ │ │ │ │ └── modeling_tf_regnet.py
│ │ │ │ ├── rembert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_rembert.py
│ │ │ │ │ ├── convert_rembert_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_rembert.py
│ │ │ │ │ ├── modeling_tf_rembert.py
│ │ │ │ │ ├── tokenization_rembert.py
│ │ │ │ │ └── tokenization_rembert_fast.py
│ │ │ │ ├── resnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_resnet.py
│ │ │ │ │ ├── convert_resnet_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_resnet.py
│ │ │ │ │ ├── modeling_resnet.py
│ │ │ │ │ └── modeling_tf_resnet.py
│ │ │ │ ├── roberta/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_roberta.py
│ │ │ │ │ ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_roberta.py
│ │ │ │ │ ├── modeling_roberta.py
│ │ │ │ │ ├── modeling_tf_roberta.py
│ │ │ │ │ ├── tokenization_roberta.py
│ │ │ │ │ └── tokenization_roberta_fast.py
│ │ │ │ ├── roberta_prelayernorm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_roberta_prelayernorm.py
│ │ │ │ │ ├── convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_roberta_prelayernorm.py
│ │ │ │ │ ├── modeling_roberta_prelayernorm.py
│ │ │ │ │ └── modeling_tf_roberta_prelayernorm.py
│ │ │ │ ├── roc_bert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_roc_bert.py
│ │ │ │ │ ├── modeling_roc_bert.py
│ │ │ │ │ └── tokenization_roc_bert.py
│ │ │ │ ├── roformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_roformer.py
│ │ │ │ │ ├── convert_roformer_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_roformer.py
│ │ │ │ │ ├── modeling_roformer.py
│ │ │ │ │ ├── modeling_tf_roformer.py
│ │ │ │ │ ├── tokenization_roformer.py
│ │ │ │ │ ├── tokenization_roformer_fast.py
│ │ │ │ │ └── tokenization_utils.py
│ │ │ │ ├── rt_detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_rt_detr.py
│ │ │ │ │ ├── configuration_rt_detr_resnet.py
│ │ │ │ │ ├── convert_rt_detr_original_pytorch_checkpoint_to_hf.py
│ │ │ │ │ ├── image_processing_rt_detr.py
│ │ │ │ │ ├── modeling_rt_detr.py
│ │ │ │ │ └── modeling_rt_detr_resnet.py
│ │ │ │ ├── rwkv/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_rwkv.py
│ │ │ │ │ ├── convert_rwkv_checkpoint_to_hf.py
│ │ │ │ │ └── modeling_rwkv.py
│ │ │ │ ├── sam/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_sam.py
│ │ │ │ │ ├── convert_sam_to_hf.py
│ │ │ │ │ ├── image_processing_sam.py
│ │ │ │ │ ├── modeling_sam.py
│ │ │ │ │ ├── modeling_tf_sam.py
│ │ │ │ │ └── processing_sam.py
│ │ │ │ ├── seamless_m4t/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_seamless_m4t.py
│ │ │ │ │ ├── convert_fairseq2_to_hf.py
│ │ │ │ │ ├── feature_extraction_seamless_m4t.py
│ │ │ │ │ ├── modeling_seamless_m4t.py
│ │ │ │ │ ├── processing_seamless_m4t.py
│ │ │ │ │ ├── tokenization_seamless_m4t.py
│ │ │ │ │ └── tokenization_seamless_m4t_fast.py
│ │ │ │ ├── seamless_m4t_v2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_seamless_m4t_v2.py
│ │ │ │ │ ├── convert_fairseq2_to_hf.py
│ │ │ │ │ └── modeling_seamless_m4t_v2.py
│ │ │ │ ├── segformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_segformer.py
│ │ │ │ │ ├── convert_segformer_original_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_segformer.py
│ │ │ │ │ ├── image_processing_segformer.py
│ │ │ │ │ ├── modeling_segformer.py
│ │ │ │ │ └── modeling_tf_segformer.py
│ │ │ │ ├── seggpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_seggpt.py
│ │ │ │ │ ├── convert_seggpt_to_hf.py
│ │ │ │ │ ├── image_processing_seggpt.py
│ │ │ │ │ └── modeling_seggpt.py
│ │ │ │ ├── sew/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_sew.py
│ │ │ │ │ ├── convert_sew_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_sew.py
│ │ │ │ ├── sew_d/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_sew_d.py
│ │ │ │ │ ├── convert_sew_d_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_sew_d.py
│ │ │ │ ├── siglip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_siglip.py
│ │ │ │ │ ├── convert_siglip_to_hf.py
│ │ │ │ │ ├── image_processing_siglip.py
│ │ │ │ │ ├── modeling_siglip.py
│ │ │ │ │ ├── processing_siglip.py
│ │ │ │ │ └── tokenization_siglip.py
│ │ │ │ ├── speech_encoder_decoder/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_speech_encoder_decoder.py
│ │ │ │ │ ├── convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py
│ │ │ │ │ ├── convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py
│ │ │ │ │ ├── modeling_flax_speech_encoder_decoder.py
│ │ │ │ │ └── modeling_speech_encoder_decoder.py
│ │ │ │ ├── speech_to_text/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_speech_to_text.py
│ │ │ │ │ ├── convert_s2t_fairseq_to_tfms.py
│ │ │ │ │ ├── feature_extraction_speech_to_text.py
│ │ │ │ │ ├── modeling_speech_to_text.py
│ │ │ │ │ ├── modeling_tf_speech_to_text.py
│ │ │ │ │ ├── processing_speech_to_text.py
│ │ │ │ │ └── tokenization_speech_to_text.py
│ │ │ │ ├── speecht5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_speecht5.py
│ │ │ │ │ ├── convert_hifigan.py
│ │ │ │ │ ├── convert_speecht5_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_speecht5.py
│ │ │ │ │ ├── modeling_speecht5.py
│ │ │ │ │ ├── number_normalizer.py
│ │ │ │ │ ├── processing_speecht5.py
│ │ │ │ │ └── tokenization_speecht5.py
│ │ │ │ ├── splinter/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_splinter.py
│ │ │ │ │ ├── modeling_splinter.py
│ │ │ │ │ ├── tokenization_splinter.py
│ │ │ │ │ └── tokenization_splinter_fast.py
│ │ │ │ ├── squeezebert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_squeezebert.py
│ │ │ │ │ ├── modeling_squeezebert.py
│ │ │ │ │ ├── tokenization_squeezebert.py
│ │ │ │ │ └── tokenization_squeezebert_fast.py
│ │ │ │ ├── stablelm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_stablelm.py
│ │ │ │ │ └── modeling_stablelm.py
│ │ │ │ ├── starcoder2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_starcoder2.py
│ │ │ │ │ └── modeling_starcoder2.py
│ │ │ │ ├── superpoint/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_superpoint.py
│ │ │ │ │ ├── convert_superpoint_to_pytorch.py
│ │ │ │ │ ├── image_processing_superpoint.py
│ │ │ │ │ └── modeling_superpoint.py
│ │ │ │ ├── swiftformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_swiftformer.py
│ │ │ │ │ ├── convert_swiftformer_original_to_hf.py
│ │ │ │ │ ├── modeling_swiftformer.py
│ │ │ │ │ └── modeling_tf_swiftformer.py
│ │ │ │ ├── swin/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_swin.py
│ │ │ │ │ ├── convert_swin_simmim_to_pytorch.py
│ │ │ │ │ ├── convert_swin_timm_to_pytorch.py
│ │ │ │ │ ├── modeling_swin.py
│ │ │ │ │ └── modeling_tf_swin.py
│ │ │ │ ├── swin2sr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_swin2sr.py
│ │ │ │ │ ├── convert_swin2sr_original_to_pytorch.py
│ │ │ │ │ ├── image_processing_swin2sr.py
│ │ │ │ │ └── modeling_swin2sr.py
│ │ │ │ ├── swinv2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_swinv2.py
│ │ │ │ │ ├── convert_swinv2_timm_to_pytorch.py
│ │ │ │ │ └── modeling_swinv2.py
│ │ │ │ ├── switch_transformers/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_switch_transformers.py
│ │ │ │ │ ├── convert_big_switch.py
│ │ │ │ │ ├── convert_switch_transformers_original_flax_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_switch_transformers.py
│ │ │ │ ├── t5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_t5.py
│ │ │ │ │ ├── convert_t5_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_t5x_checkpoint_to_flax.py
│ │ │ │ │ ├── convert_t5x_checkpoint_to_pytorch.py
│ │ │ │ │ ├── download_from_gcp.sh
│ │ │ │ │ ├── modeling_flax_t5.py
│ │ │ │ │ ├── modeling_t5.py
│ │ │ │ │ ├── modeling_tf_t5.py
│ │ │ │ │ ├── tokenization_t5.py
│ │ │ │ │ └── tokenization_t5_fast.py
│ │ │ │ ├── table_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_table_transformer.py
│ │ │ │ │ ├── convert_table_transformer_to_hf.py
│ │ │ │ │ ├── convert_table_transformer_to_hf_no_timm.py
│ │ │ │ │ └── modeling_table_transformer.py
│ │ │ │ ├── tapas/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_tapas.py
│ │ │ │ │ ├── convert_tapas_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_tapas.py
│ │ │ │ │ ├── modeling_tf_tapas.py
│ │ │ │ │ └── tokenization_tapas.py
│ │ │ │ ├── time_series_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_time_series_transformer.py
│ │ │ │ │ └── modeling_time_series_transformer.py
│ │ │ │ ├── timesformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_timesformer.py
│ │ │ │ │ ├── convert_timesformer_to_pytorch.py
│ │ │ │ │ └── modeling_timesformer.py
│ │ │ │ ├── timm_backbone/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_timm_backbone.py
│ │ │ │ │ └── modeling_timm_backbone.py
│ │ │ │ ├── trocr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_trocr.py
│ │ │ │ │ ├── convert_trocr_unilm_to_pytorch.py
│ │ │ │ │ ├── modeling_trocr.py
│ │ │ │ │ └── processing_trocr.py
│ │ │ │ ├── tvp/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_tvp.py
│ │ │ │ │ ├── image_processing_tvp.py
│ │ │ │ │ ├── modeling_tvp.py
│ │ │ │ │ └── processing_tvp.py
│ │ │ │ ├── udop/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_udop.py
│ │ │ │ │ ├── convert_udop_to_hf.py
│ │ │ │ │ ├── modeling_udop.py
│ │ │ │ │ ├── processing_udop.py
│ │ │ │ │ ├── tokenization_udop.py
│ │ │ │ │ └── tokenization_udop_fast.py
│ │ │ │ ├── umt5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_umt5.py
│ │ │ │ │ ├── convert_umt5_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_umt5.py
│ │ │ │ ├── unispeech/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_unispeech.py
│ │ │ │ │ ├── convert_unispeech_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_unispeech.py
│ │ │ │ ├── unispeech_sat/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_unispeech_sat.py
│ │ │ │ │ ├── convert_unispeech_original_s3prl_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_unispeech_sat.py
│ │ │ │ ├── univnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_univnet.py
│ │ │ │ │ ├── convert_univnet.py
│ │ │ │ │ ├── feature_extraction_univnet.py
│ │ │ │ │ └── modeling_univnet.py
│ │ │ │ ├── upernet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_upernet.py
│ │ │ │ │ ├── convert_convnext_upernet_to_pytorch.py
│ │ │ │ │ ├── convert_swin_upernet_to_pytorch.py
│ │ │ │ │ └── modeling_upernet.py
│ │ │ │ ├── video_llava/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_video_llava.py
│ │ │ │ │ ├── convert_video_llava_weights_to_hf.py
│ │ │ │ │ ├── image_processing_video_llava.py
│ │ │ │ │ ├── modeling_video_llava.py
│ │ │ │ │ └── processing_video_llava.py
│ │ │ │ ├── videomae/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_videomae.py
│ │ │ │ │ ├── convert_videomae_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_videomae.py
│ │ │ │ │ ├── image_processing_videomae.py
│ │ │ │ │ └── modeling_videomae.py
│ │ │ │ ├── vilt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vilt.py
│ │ │ │ │ ├── convert_vilt_original_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_vilt.py
│ │ │ │ │ ├── image_processing_vilt.py
│ │ │ │ │ ├── modeling_vilt.py
│ │ │ │ │ └── processing_vilt.py
│ │ │ │ ├── vipllava/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vipllava.py
│ │ │ │ │ ├── convert_vipllava_weights_to_hf.py
│ │ │ │ │ └── modeling_vipllava.py
│ │ │ │ ├── vision_encoder_decoder/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vision_encoder_decoder.py
│ │ │ │ │ ├── modeling_flax_vision_encoder_decoder.py
│ │ │ │ │ ├── modeling_tf_vision_encoder_decoder.py
│ │ │ │ │ └── modeling_vision_encoder_decoder.py
│ │ │ │ ├── vision_text_dual_encoder/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vision_text_dual_encoder.py
│ │ │ │ │ ├── modeling_flax_vision_text_dual_encoder.py
│ │ │ │ │ ├── modeling_tf_vision_text_dual_encoder.py
│ │ │ │ │ ├── modeling_vision_text_dual_encoder.py
│ │ │ │ │ └── processing_vision_text_dual_encoder.py
│ │ │ │ ├── visual_bert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_visual_bert.py
│ │ │ │ │ ├── convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_visual_bert.py
│ │ │ │ ├── vit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vit.py
│ │ │ │ │ ├── convert_dino_to_pytorch.py
│ │ │ │ │ ├── convert_vit_timm_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_vit.py
│ │ │ │ │ ├── image_processing_vit.py
│ │ │ │ │ ├── image_processing_vit_fast.py
│ │ │ │ │ ├── modeling_flax_vit.py
│ │ │ │ │ ├── modeling_tf_vit.py
│ │ │ │ │ └── modeling_vit.py
│ │ │ │ ├── vit_mae/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vit_mae.py
│ │ │ │ │ ├── convert_vit_mae_to_pytorch.py
│ │ │ │ │ ├── modeling_tf_vit_mae.py
│ │ │ │ │ └── modeling_vit_mae.py
│ │ │ │ ├── vit_msn/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vit_msn.py
│ │ │ │ │ ├── convert_msn_to_pytorch.py
│ │ │ │ │ └── modeling_vit_msn.py
│ │ │ │ ├── vitdet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vitdet.py
│ │ │ │ │ └── modeling_vitdet.py
│ │ │ │ ├── vitmatte/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vitmatte.py
│ │ │ │ │ ├── convert_vitmatte_to_hf.py
│ │ │ │ │ ├── image_processing_vitmatte.py
│ │ │ │ │ └── modeling_vitmatte.py
│ │ │ │ ├── vits/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vits.py
│ │ │ │ │ ├── convert_original_checkpoint.py
│ │ │ │ │ ├── modeling_vits.py
│ │ │ │ │ └── tokenization_vits.py
│ │ │ │ ├── vivit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_vivit.py
│ │ │ │ │ ├── convert_vivit_flax_to_pytorch.py
│ │ │ │ │ ├── image_processing_vivit.py
│ │ │ │ │ └── modeling_vivit.py
│ │ │ │ ├── wav2vec2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_wav2vec2.py
│ │ │ │ │ ├── convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_wav2vec2.py
│ │ │ │ │ ├── modeling_flax_wav2vec2.py
│ │ │ │ │ ├── modeling_tf_wav2vec2.py
│ │ │ │ │ ├── modeling_wav2vec2.py
│ │ │ │ │ ├── processing_wav2vec2.py
│ │ │ │ │ └── tokenization_wav2vec2.py
│ │ │ │ ├── wav2vec2_bert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_wav2vec2_bert.py
│ │ │ │ │ ├── convert_wav2vec2_seamless_checkpoint.py
│ │ │ │ │ ├── modeling_wav2vec2_bert.py
│ │ │ │ │ └── processing_wav2vec2_bert.py
│ │ │ │ ├── wav2vec2_conformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_wav2vec2_conformer.py
│ │ │ │ │ ├── convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_wav2vec2_conformer.py
│ │ │ │ ├── wav2vec2_phoneme/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── tokenization_wav2vec2_phoneme.py
│ │ │ │ ├── wav2vec2_with_lm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── processing_wav2vec2_with_lm.py
│ │ │ │ ├── wavlm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_wavlm.py
│ │ │ │ │ ├── convert_wavlm_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── convert_wavlm_original_s3prl_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_wavlm.py
│ │ │ │ ├── whisper/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_whisper.py
│ │ │ │ │ ├── convert_openai_to_hf.py
│ │ │ │ │ ├── english_normalizer.py
│ │ │ │ │ ├── feature_extraction_whisper.py
│ │ │ │ │ ├── generation_whisper.py
│ │ │ │ │ ├── modeling_flax_whisper.py
│ │ │ │ │ ├── modeling_tf_whisper.py
│ │ │ │ │ ├── modeling_whisper.py
│ │ │ │ │ ├── processing_whisper.py
│ │ │ │ │ ├── tokenization_whisper.py
│ │ │ │ │ └── tokenization_whisper_fast.py
│ │ │ │ ├── x_clip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_x_clip.py
│ │ │ │ │ ├── convert_x_clip_original_pytorch_to_hf.py
│ │ │ │ │ ├── modeling_x_clip.py
│ │ │ │ │ └── processing_x_clip.py
│ │ │ │ ├── xglm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xglm.py
│ │ │ │ │ ├── convert_xglm_original_ckpt_to_trfms.py
│ │ │ │ │ ├── modeling_flax_xglm.py
│ │ │ │ │ ├── modeling_tf_xglm.py
│ │ │ │ │ ├── modeling_xglm.py
│ │ │ │ │ ├── tokenization_xglm.py
│ │ │ │ │ └── tokenization_xglm_fast.py
│ │ │ │ ├── xlm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xlm.py
│ │ │ │ │ ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_tf_xlm.py
│ │ │ │ │ ├── modeling_xlm.py
│ │ │ │ │ └── tokenization_xlm.py
│ │ │ │ ├── xlm_roberta/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xlm_roberta.py
│ │ │ │ │ ├── modeling_flax_xlm_roberta.py
│ │ │ │ │ ├── modeling_tf_xlm_roberta.py
│ │ │ │ │ ├── modeling_xlm_roberta.py
│ │ │ │ │ ├── tokenization_xlm_roberta.py
│ │ │ │ │ └── tokenization_xlm_roberta_fast.py
│ │ │ │ ├── xlm_roberta_xl/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xlm_roberta_xl.py
│ │ │ │ │ ├── convert_xlm_roberta_xl_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_xlm_roberta_xl.py
│ │ │ │ ├── xlnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xlnet.py
│ │ │ │ │ ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py
│ │ │ │ │ ├── modeling_tf_xlnet.py
│ │ │ │ │ ├── modeling_xlnet.py
│ │ │ │ │ ├── tokenization_xlnet.py
│ │ │ │ │ └── tokenization_xlnet_fast.py
│ │ │ │ ├── xmod/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_xmod.py
│ │ │ │ │ ├── convert_xmod_original_pytorch_checkpoint_to_pytorch.py
│ │ │ │ │ └── modeling_xmod.py
│ │ │ │ ├── yolos/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_yolos.py
│ │ │ │ │ ├── convert_yolos_to_pytorch.py
│ │ │ │ │ ├── feature_extraction_yolos.py
│ │ │ │ │ ├── image_processing_yolos.py
│ │ │ │ │ └── modeling_yolos.py
│ │ │ │ ├── yoso/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── configuration_yoso.py
│ │ │ │ │ ├── convert_yoso_pytorch_to_pytorch.py
│ │ │ │ │ └── modeling_yoso.py
│ │ │ │ └── zoedepth/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── configuration_zoedepth.py
│ │ │ │ ├── convert_zoedepth_to_hf.py
│ │ │ │ ├── image_processing_zoedepth.py
│ │ │ │ └── modeling_zoedepth.py
│ │ │ ├── onnx/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── __main__.py
│ │ │ │ ├── config.py
│ │ │ │ ├── convert.py
│ │ │ │ ├── features.py
│ │ │ │ └── utils.py
│ │ │ ├── optimization.py
│ │ │ ├── optimization_tf.py
│ │ │ ├── pipelines/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── audio_classification.py
│ │ │ │ ├── audio_utils.py
│ │ │ │ ├── automatic_speech_recognition.py
│ │ │ │ ├── base.py
│ │ │ │ ├── depth_estimation.py
│ │ │ │ ├── document_question_answering.py
│ │ │ │ ├── feature_extraction.py
│ │ │ │ ├── fill_mask.py
│ │ │ │ ├── image_classification.py
│ │ │ │ ├── image_feature_extraction.py
│ │ │ │ ├── image_segmentation.py
│ │ │ │ ├── image_to_image.py
│ │ │ │ ├── image_to_text.py
│ │ │ │ ├── mask_generation.py
│ │ │ │ ├── object_detection.py
│ │ │ │ ├── pt_utils.py
│ │ │ │ ├── question_answering.py
│ │ │ │ ├── table_question_answering.py
│ │ │ │ ├── text2text_generation.py
│ │ │ │ ├── text_classification.py
│ │ │ │ ├── text_generation.py
│ │ │ │ ├── text_to_audio.py
│ │ │ │ ├── token_classification.py
│ │ │ │ ├── video_classification.py
│ │ │ │ ├── visual_question_answering.py
│ │ │ │ ├── zero_shot_audio_classification.py
│ │ │ │ ├── zero_shot_classification.py
│ │ │ │ ├── zero_shot_image_classification.py
│ │ │ │ └── zero_shot_object_detection.py
│ │ │ ├── processing_utils.py
│ │ │ ├── pytorch_utils.py
│ │ │ ├── quantizers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── auto.py
│ │ │ │ ├── base.py
│ │ │ │ ├── quantizer_aqlm.py
│ │ │ │ ├── quantizer_awq.py
│ │ │ │ ├── quantizer_bnb_4bit.py
│ │ │ │ ├── quantizer_bnb_8bit.py
│ │ │ │ ├── quantizer_eetq.py
│ │ │ │ ├── quantizer_fbgemm_fp8.py
│ │ │ │ ├── quantizer_gptq.py
│ │ │ │ ├── quantizer_hqq.py
│ │ │ │ ├── quantizer_quanto.py
│ │ │ │ ├── quantizer_torchao.py
│ │ │ │ └── quantizers_utils.py
│ │ │ ├── safetensors_conversion.py
│ │ │ ├── sagemaker/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── trainer_sm.py
│ │ │ │ └── training_args_sm.py
│ │ │ ├── testing_utils.py
│ │ │ ├── tf_utils.py
│ │ │ ├── time_series_utils.py
│ │ │ ├── tokenization_utils.py
│ │ │ ├── tokenization_utils_base.py
│ │ │ ├── tokenization_utils_fast.py
│ │ │ ├── trainer.py
│ │ │ ├── trainer_callback.py
│ │ │ ├── trainer_pt_utils.py
│ │ │ ├── trainer_seq2seq.py
│ │ │ ├── trainer_utils.py
│ │ │ ├── training_args.py
│ │ │ ├── training_args_seq2seq.py
│ │ │ ├── training_args_tf.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── backbone_utils.py
│ │ │ ├── bitsandbytes.py
│ │ │ ├── chat_template_utils.py
│ │ │ ├── constants.py
│ │ │ ├── deprecation.py
│ │ │ ├── doc.py
│ │ │ ├── dummy_detectron2_objects.py
│ │ │ ├── dummy_essentia_and_librosa_and_pretty_midi_and_scipy_and_torch_objects.py
│ │ │ ├── dummy_flax_objects.py
│ │ │ ├── dummy_keras_nlp_objects.py
│ │ │ ├── dummy_music_objects.py
│ │ │ ├── dummy_pt_objects.py
│ │ │ ├── dummy_sentencepiece_and_tokenizers_objects.py
│ │ │ ├── dummy_sentencepiece_objects.py
│ │ │ ├── dummy_speech_objects.py
│ │ │ ├── dummy_tensorflow_text_objects.py
│ │ │ ├── dummy_tf_objects.py
│ │ │ ├── dummy_tokenizers_objects.py
│ │ │ ├── dummy_torchaudio_objects.py
│ │ │ ├── dummy_torchvision_objects.py
│ │ │ ├── dummy_vision_objects.py
│ │ │ ├── fx.py
│ │ │ ├── generic.py
│ │ │ ├── hp_naming.py
│ │ │ ├── hub.py
│ │ │ ├── import_utils.py
│ │ │ ├── logging.py
│ │ │ ├── model_parallel_utils.py
│ │ │ ├── notebook.py
│ │ │ ├── peft_utils.py
│ │ │ ├── quantization_config.py
│ │ │ ├── sentencepiece_model_pb2.py
│ │ │ ├── sentencepiece_model_pb2_new.py
│ │ │ └── versions.py
│ │ ├── templates/
│ │ │ ├── adding_a_missing_tokenization_test/
│ │ │ │ ├── README.md
│ │ │ │ ├── cookiecutter-template-{{cookiecutter.modelname}}/
│ │ │ │ │ └── test_tokenization_{{cookiecutter.lowercase_modelname}}.py
│ │ │ │ └── cookiecutter.json
│ │ │ ├── adding_a_new_example_script/
│ │ │ │ ├── README.md
│ │ │ │ ├── cookiecutter.json
│ │ │ │ └── {{cookiecutter.directory_name}}/
│ │ │ │ └── run_{{cookiecutter.example_shortcut}}.py
│ │ │ └── adding_a_new_model/
│ │ │ ├── ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md
│ │ │ ├── README.md
│ │ │ └── open_model_proposals/
│ │ │ ├── ADD_BIG_BIRD.md
│ │ │ └── README.md
│ │ ├── tests/
│ │ │ ├── __init__.py
│ │ │ ├── agents/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_agent_types.py
│ │ │ │ ├── test_agents.py
│ │ │ │ ├── test_document_question_answering.py
│ │ │ │ ├── test_final_answer.py
│ │ │ │ ├── test_image_question_answering.py
│ │ │ │ ├── test_python_interpreter.py
│ │ │ │ ├── test_speech_to_text.py
│ │ │ │ ├── test_text_to_speech.py
│ │ │ │ ├── test_tools_common.py
│ │ │ │ └── test_translation.py
│ │ │ ├── benchmark/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_benchmark.py
│ │ │ │ └── test_benchmark_tf.py
│ │ │ ├── bettertransformer/
│ │ │ │ ├── __init__.py
│ │ │ │ └── test_integration.py
│ │ │ ├── deepspeed/
│ │ │ │ ├── ds_config_zero2.json
│ │ │ │ ├── ds_config_zero3.json
│ │ │ │ ├── test_deepspeed.py
│ │ │ │ ├── test_model_zoo.py
│ │ │ │ └── vit_feature_extractor.json
│ │ │ ├── extended/
│ │ │ │ └── test_trainer_ext.py
│ │ │ ├── fixtures/
│ │ │ │ ├── add_distilbert_like_config.json
│ │ │ │ ├── dummy-config.json
│ │ │ │ ├── dummy_feature_extractor_config.json
│ │ │ │ ├── empty.txt
│ │ │ │ ├── input.txt
│ │ │ │ ├── merges.txt
│ │ │ │ ├── preprocessor_config.json
│ │ │ │ ├── sample_text.txt
│ │ │ │ ├── sample_text_no_unicode.txt
│ │ │ │ ├── spiece.model
│ │ │ │ ├── test_entity_vocab.json
│ │ │ │ ├── test_sentencepiece.model
│ │ │ │ ├── test_sentencepiece_bpe.model
│ │ │ │ ├── test_sentencepiece_bpe_char.model
│ │ │ │ ├── test_sentencepiece_no_bos.model
│ │ │ │ ├── test_sentencepiece_with_bytefallback.model
│ │ │ │ ├── tests_samples/
│ │ │ │ │ ├── .gitignore
│ │ │ │ │ ├── COCO/
│ │ │ │ │ │ ├── coco_annotations.txt
│ │ │ │ │ │ └── coco_panoptic_annotations.txt
│ │ │ │ │ ├── GermEval/
│ │ │ │ │ │ ├── dev.txt
│ │ │ │ │ │ ├── labels.txt
│ │ │ │ │ │ └── train.txt
│ │ │ │ │ ├── MRPC/
│ │ │ │ │ │ ├── dev.csv
│ │ │ │ │ │ ├── dev.tsv
│ │ │ │ │ │ ├── train.csv
│ │ │ │ │ │ └── train.tsv
│ │ │ │ │ ├── SQUAD/
│ │ │ │ │ │ └── sample.json
│ │ │ │ │ ├── STS-B/
│ │ │ │ │ │ ├── dev.tsv
│ │ │ │ │ │ └── train.tsv
│ │ │ │ │ ├── conll/
│ │ │ │ │ │ └── sample.json
│ │ │ │ │ ├── swag/
│ │ │ │ │ │ └── sample.json
│ │ │ │ │ ├── wiki_text/
│ │ │ │ │ │ └── wiki_00
│ │ │ │ │ ├── wmt16/
│ │ │ │ │ │ └── sample.json
│ │ │ │ │ ├── wmt_en_ro/
│ │ │ │ │ │ ├── test.json
│ │ │ │ │ │ ├── train.json
│ │ │ │ │ │ └── val.json
│ │ │ │ │ └── xsum/
│ │ │ │ │ └── sample.json
│ │ │ │ ├── vocab.json
│ │ │ │ └── vocab.txt
│ │ │ ├── fsdp/
│ │ │ │ └── test_fsdp.py
│ │ │ ├── generation/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_beam_constraints.py
│ │ │ │ ├── test_beam_search.py
│ │ │ │ ├── test_configuration_utils.py
│ │ │ │ ├── test_flax_logits_process.py
│ │ │ │ ├── test_flax_utils.py
│ │ │ │ ├── test_framework_agnostic.py
│ │ │ │ ├── test_logits_process.py
│ │ │ │ ├── test_stopping_criteria.py
│ │ │ │ ├── test_streamers.py
│ │ │ │ ├── test_tf_logits_process.py
│ │ │ │ ├── test_tf_utils.py
│ │ │ │ └── test_utils.py
│ │ │ ├── models/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── albert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_albert.py
│ │ │ │ │ ├── test_modeling_flax_albert.py
│ │ │ │ │ ├── test_modeling_tf_albert.py
│ │ │ │ │ └── test_tokenization_albert.py
│ │ │ │ ├── align/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_align.py
│ │ │ │ │ └── test_processor_align.py
│ │ │ │ ├── altclip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_altclip.py
│ │ │ │ ├── audio_spectrogram_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_feature_extraction_audio_spectrogram_transformer.py
│ │ │ │ │ └── test_modeling_audio_spectrogram_transformer.py
│ │ │ │ ├── auto/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_configuration_auto.py
│ │ │ │ │ ├── test_feature_extraction_auto.py
│ │ │ │ │ ├── test_image_processing_auto.py
│ │ │ │ │ ├── test_modeling_auto.py
│ │ │ │ │ ├── test_modeling_flax_auto.py
│ │ │ │ │ ├── test_modeling_tf_auto.py
│ │ │ │ │ ├── test_modeling_tf_pytorch.py
│ │ │ │ │ ├── test_processor_auto.py
│ │ │ │ │ └── test_tokenization_auto.py
│ │ │ │ ├── autoformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_autoformer.py
│ │ │ │ ├── bark/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_bark.py
│ │ │ │ │ └── test_processor_bark.py
│ │ │ │ ├── bart/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_bart.py
│ │ │ │ │ ├── test_modeling_flax_bart.py
│ │ │ │ │ ├── test_modeling_tf_bart.py
│ │ │ │ │ └── test_tokenization_bart.py
│ │ │ │ ├── barthez/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_barthez.py
│ │ │ │ ├── bartpho/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_bartpho.py
│ │ │ │ ├── beit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_beit.py
│ │ │ │ │ ├── test_modeling_beit.py
│ │ │ │ │ └── test_modeling_flax_beit.py
│ │ │ │ ├── bert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_bert.py
│ │ │ │ │ ├── test_modeling_flax_bert.py
│ │ │ │ │ ├── test_modeling_tf_bert.py
│ │ │ │ │ ├── test_tokenization_bert.py
│ │ │ │ │ └── test_tokenization_bert_tf.py
│ │ │ │ ├── bert_generation/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_bert_generation.py
│ │ │ │ │ └── test_tokenization_bert_generation.py
│ │ │ │ ├── bert_japanese/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_bert_japanese.py
│ │ │ │ ├── bertweet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_bertweet.py
│ │ │ │ ├── big_bird/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_big_bird.py
│ │ │ │ │ ├── test_modeling_flax_big_bird.py
│ │ │ │ │ └── test_tokenization_big_bird.py
│ │ │ │ ├── bigbird_pegasus/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_bigbird_pegasus.py
│ │ │ │ ├── biogpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_biogpt.py
│ │ │ │ │ └── test_tokenization_biogpt.py
│ │ │ │ ├── bit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_bit.py
│ │ │ │ ├── blenderbot/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_blenderbot.py
│ │ │ │ │ ├── test_modeling_flax_blenderbot.py
│ │ │ │ │ ├── test_modeling_tf_blenderbot.py
│ │ │ │ │ └── test_tokenization_blenderbot.py
│ │ │ │ ├── blenderbot_small/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_blenderbot_small.py
│ │ │ │ │ ├── test_modeling_flax_blenderbot_small.py
│ │ │ │ │ ├── test_modeling_tf_blenderbot_small.py
│ │ │ │ │ └── test_tokenization_blenderbot_small.py
│ │ │ │ ├── blip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_blip.py
│ │ │ │ │ ├── test_modeling_blip.py
│ │ │ │ │ ├── test_modeling_blip_text.py
│ │ │ │ │ ├── test_modeling_tf_blip.py
│ │ │ │ │ ├── test_modeling_tf_blip_text.py
│ │ │ │ │ └── test_processor_blip.py
│ │ │ │ ├── blip_2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_blip_2.py
│ │ │ │ │ └── test_processor_blip_2.py
│ │ │ │ ├── bloom/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_bloom.py
│ │ │ │ │ ├── test_modeling_flax_bloom.py
│ │ │ │ │ └── test_tokenization_bloom.py
│ │ │ │ ├── bridgetower/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_bridgetower.py
│ │ │ │ │ └── test_modeling_bridgetower.py
│ │ │ │ ├── bros/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_bros.py
│ │ │ │ ├── byt5/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_byt5.py
│ │ │ │ ├── camembert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_camembert.py
│ │ │ │ │ ├── test_modeling_tf_camembert.py
│ │ │ │ │ └── test_tokenization_camembert.py
│ │ │ │ ├── canine/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_canine.py
│ │ │ │ │ └── test_tokenization_canine.py
│ │ │ │ ├── chameleon/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_chameleon.py
│ │ │ │ │ └── test_modeling_chameleon.py
│ │ │ │ ├── chinese_clip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_chinese_clip.py
│ │ │ │ │ ├── test_modeling_chinese_clip.py
│ │ │ │ │ └── test_processor_chinese_clip.py
│ │ │ │ ├── clap/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_feature_extraction_clap.py
│ │ │ │ │ ├── test_modeling_clap.py
│ │ │ │ │ └── test_processor_clap.py
│ │ │ │ ├── clip/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_clip.py
│ │ │ │ │ ├── test_modeling_clip.py
│ │ │ │ │ ├── test_modeling_flax_clip.py
│ │ │ │ │ ├── test_modeling_tf_clip.py
│ │ │ │ │ ├── test_processor_clip.py
│ │ │ │ │ └── test_tokenization_clip.py
│ │ │ │ ├── clipseg/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_clipseg.py
│ │ │ │ │ └── test_processor_clipseg.py
│ │ │ │ ├── clvp/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_feature_extraction_clvp.py
│ │ │ │ │ ├── test_modeling_clvp.py
│ │ │ │ │ ├── test_processor_clvp.py
│ │ │ │ │ └── test_tokenization_clvp.py
│ │ │ │ ├── code_llama/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_code_llama.py
│ │ │ │ ├── codegen/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_codegen.py
│ │ │ │ │ └── test_tokenization_codegen.py
│ │ │ │ ├── cohere/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_cohere.py
│ │ │ │ │ └── test_tokenization_cohere.py
│ │ │ │ ├── conditional_detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_conditional_detr.py
│ │ │ │ │ └── test_modeling_conditional_detr.py
│ │ │ │ ├── convbert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_convbert.py
│ │ │ │ │ └── test_modeling_tf_convbert.py
│ │ │ │ ├── convnext/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_convnext.py
│ │ │ │ │ ├── test_modeling_convnext.py
│ │ │ │ │ └── test_modeling_tf_convnext.py
│ │ │ │ ├── convnextv2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_convnextv2.py
│ │ │ │ │ └── test_modeling_tf_convnextv2.py
│ │ │ │ ├── cpm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_cpm.py
│ │ │ │ ├── cpmant/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_cpmant.py
│ │ │ │ │ └── test_tokenization_cpmant.py
│ │ │ │ ├── ctrl/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_ctrl.py
│ │ │ │ │ ├── test_modeling_tf_ctrl.py
│ │ │ │ │ └── test_tokenization_ctrl.py
│ │ │ │ ├── cvt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_cvt.py
│ │ │ │ │ └── test_modeling_tf_cvt.py
│ │ │ │ ├── dac/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_feature_extraction_dac.py
│ │ │ │ │ └── test_modeling_dac.py
│ │ │ │ ├── data2vec/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_data2vec_audio.py
│ │ │ │ │ ├── test_modeling_data2vec_text.py
│ │ │ │ │ ├── test_modeling_data2vec_vision.py
│ │ │ │ │ └── test_modeling_tf_data2vec_vision.py
│ │ │ │ ├── dbrx/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_dbrx.py
│ │ │ │ ├── deberta/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_deberta.py
│ │ │ │ │ ├── test_modeling_tf_deberta.py
│ │ │ │ │ └── test_tokenization_deberta.py
│ │ │ │ ├── deberta_v2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_deberta_v2.py
│ │ │ │ │ ├── test_modeling_tf_deberta_v2.py
│ │ │ │ │ └── test_tokenization_deberta_v2.py
│ │ │ │ ├── decision_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_decision_transformer.py
│ │ │ │ ├── deformable_detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_deformable_detr.py
│ │ │ │ │ └── test_modeling_deformable_detr.py
│ │ │ │ ├── deit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_deit.py
│ │ │ │ │ ├── test_modeling_deit.py
│ │ │ │ │ └── test_modeling_tf_deit.py
│ │ │ │ ├── depth_anything/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_depth_anything.py
│ │ │ │ ├── detr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_detr.py
│ │ │ │ │ └── test_modeling_detr.py
│ │ │ │ ├── dinat/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_dinat.py
│ │ │ │ ├── dinov2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_dinov2.py
│ │ │ │ │ └── test_modeling_flax_dinov2.py
│ │ │ │ ├── distilbert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_distilbert.py
│ │ │ │ │ ├── test_modeling_flax_distilbert.py
│ │ │ │ │ ├── test_modeling_tf_distilbert.py
│ │ │ │ │ └── test_tokenization_distilbert.py
│ │ │ │ ├── dit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_dit.py
│ │ │ │ ├── donut/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_donut.py
│ │ │ │ │ ├── test_modeling_donut_swin.py
│ │ │ │ │ └── test_processing_donut.py
│ │ │ │ ├── dpr/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_dpr.py
│ │ │ │ │ ├── test_modeling_tf_dpr.py
│ │ │ │ │ └── test_tokenization_dpr.py
│ │ │ │ ├── dpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_dpt.py
│ │ │ │ │ ├── test_modeling_dpt.py
│ │ │ │ │ ├── test_modeling_dpt_auto_backbone.py
│ │ │ │ │ └── test_modeling_dpt_hybrid.py
│ │ │ │ ├── efficientnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_efficientnet.py
│ │ │ │ │ └── test_modeling_efficientnet.py
│ │ │ │ ├── electra/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_electra.py
│ │ │ │ │ ├── test_modeling_flax_electra.py
│ │ │ │ │ ├── test_modeling_tf_electra.py
│ │ │ │ │ └── test_tokenization_electra.py
│ │ │ │ ├── encodec/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_feature_extraction_encodec.py
│ │ │ │ │ └── test_modeling_encodec.py
│ │ │ │ ├── encoder_decoder/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_encoder_decoder.py
│ │ │ │ │ ├── test_modeling_flax_encoder_decoder.py
│ │ │ │ │ └── test_modeling_tf_encoder_decoder.py
│ │ │ │ ├── ernie/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_ernie.py
│ │ │ │ ├── esm/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_esm.py
│ │ │ │ │ ├── test_modeling_esmfold.py
│ │ │ │ │ ├── test_modeling_tf_esm.py
│ │ │ │ │ └── test_tokenization_esm.py
│ │ │ │ ├── falcon/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_falcon.py
│ │ │ │ ├── falcon_mamba/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_falcon_mamba.py
│ │ │ │ ├── fastspeech2_conformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_fastspeech2_conformer.py
│ │ │ │ │ └── test_tokenization_fastspeech2_conformer.py
│ │ │ │ ├── flaubert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_flaubert.py
│ │ │ │ │ ├── test_modeling_tf_flaubert.py
│ │ │ │ │ └── test_tokenization_flaubert.py
│ │ │ │ ├── flava/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_flava.py
│ │ │ │ │ ├── test_modeling_flava.py
│ │ │ │ │ └── test_processor_flava.py
│ │ │ │ ├── fnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_fnet.py
│ │ │ │ │ └── test_tokenization_fnet.py
│ │ │ │ ├── focalnet/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_focalnet.py
│ │ │ │ ├── fsmt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_fsmt.py
│ │ │ │ │ └── test_tokenization_fsmt.py
│ │ │ │ ├── funnel/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_funnel.py
│ │ │ │ │ ├── test_modeling_tf_funnel.py
│ │ │ │ │ └── test_tokenization_funnel.py
│ │ │ │ ├── fuyu/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_fuyu.py
│ │ │ │ │ ├── test_modeling_fuyu.py
│ │ │ │ │ └── test_processing_fuyu.py
│ │ │ │ ├── gemma/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_flax_gemma.py
│ │ │ │ │ ├── test_modeling_gemma.py
│ │ │ │ │ └── test_tokenization_gemma.py
│ │ │ │ ├── gemma2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_gemma2.py
│ │ │ │ ├── git/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_git.py
│ │ │ │ │ └── test_processor_git.py
│ │ │ │ ├── glpn/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_glpn.py
│ │ │ │ │ └── test_modeling_glpn.py
│ │ │ │ ├── gpt2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_flax_gpt2.py
│ │ │ │ │ ├── test_modeling_gpt2.py
│ │ │ │ │ ├── test_modeling_tf_gpt2.py
│ │ │ │ │ ├── test_tokenization_gpt2.py
│ │ │ │ │ └── test_tokenization_gpt2_tf.py
│ │ │ │ ├── gpt_bigcode/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_gpt_bigcode.py
│ │ │ │ ├── gpt_neo/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_flax_gpt_neo.py
│ │ │ │ │ └── test_modeling_gpt_neo.py
│ │ │ │ ├── gpt_neox/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_gpt_neox.py
│ │ │ │ ├── gpt_neox_japanese/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_gpt_neox_japanese.py
│ │ │ │ │ └── test_tokenization_gpt_neox_japanese.py
│ │ │ │ ├── gpt_sw3/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_gpt_sw3.py
│ │ │ │ ├── gptj/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_flax_gptj.py
│ │ │ │ │ ├── test_modeling_gptj.py
│ │ │ │ │ └── test_modeling_tf_gptj.py
│ │ │ │ ├── grounding_dino/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_grounding_dino.py
│ │ │ │ │ ├── test_modeling_grounding_dino.py
│ │ │ │ │ └── test_processor_grounding_dino.py
│ │ │ │ ├── groupvit/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_groupvit.py
│ │ │ │ │ └── test_modeling_tf_groupvit.py
│ │ │ │ ├── herbert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_tokenization_herbert.py
│ │ │ │ ├── hiera/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_hiera.py
│ │ │ │ ├── hubert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_modeling_hubert.py
│ │ │ │ │ └── test_modeling_tf_hubert.py
│ │ │ │ ├── ibert/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── test_modeling_ibert.py
│ │ │ │ ├── idefics/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── test_image_processing_idefics.py
│ │ │ │ │ ├── test_modeling_idefics.py
│ │ │ │ │ ├── test_modeling_tf_ide
Showing preview only (5,298K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (60948 symbols across 2502 files)
FILE: mplsandbox/analyzetools.py
class AnalyzeTools (line 29) | class AnalyzeTools:
method __init__ (line 30) | def __init__(
method __enter__ (line 52) | def __enter__(self):
method __exit__ (line 56) | def __exit__(self, *args, **kwargs):
method _validate_inputs (line 59) | def _validate_inputs(self, image, dockerfile, lang):
method _create_docker_client (line 67) | def _create_docker_client(self):
method open (line 72) | def open(self):
method _build_image_from_dockerfile (line 80) | def _build_image_from_dockerfile(self):
method _pull_image_if_needed (line 96) | def _pull_image_if_needed(self):
method _get_existing_container (line 111) | def _get_existing_container(self):
method _check_container_exists (line 117) | def _check_container_exists(self, container_id):
method _run_container (line 124) | def _run_container(self):
method close (line 144) | def close(self):
method _commit_container (line 148) | def _commit_container(self):
method _remove_container (line 152) | def _remove_container(self):
method _remove_image_if_needed (line 156) | def _remove_image_if_needed(self):
method _remove_image (line 166) | def _remove_image(self):
method _build_sh (line 174) | def _build_sh(self, code_dest_file, unit_input):
method run (line 186) | def run(self, code: str, unit_input: str = None, libraries: Optional[L...
method _ensure_session_is_open (line 205) | def _ensure_session_is_open(self):
method _install_libraries_if_needed (line 211) | def _install_libraries_if_needed(self, libraries):
method _install_libraries (line 219) | def _install_libraries(self, libraries):
method _prepare_go_environment (line 228) | def _prepare_go_environment(self):
method _prepare_code_file (line 233) | def _prepare_code_file(self, code):
method _prepare_sh_file (line 241) | def _prepare_sh_file(self, sh):
method _copy_code_to_container (line 248) | def _copy_code_to_container(self, src, dest):
method _execute_code_in_container (line 251) | def _execute_code_in_container(self, code_dest_file, unit_input=None):
method _execute_sh_in_container (line 267) | def _execute_sh_in_container(self, sh_dest_file):
method copy_from_runtime (line 281) | def copy_from_runtime(self, src: str, dest: str):
method _extract_file_from_container (line 287) | def _extract_file_from_container(self, src, dest):
method copy_to_runtime (line 295) | def copy_to_runtime(self, src: str, dest: str):
method _create_directory_if_needed (line 300) | def _create_directory_if_needed(self, dest):
method _create_directory_if_needed_tmp (line 309) | def _create_directory_if_needed_tmp(self, dest):
method _copy_file_to_container (line 316) | def _copy_file_to_container(self, src, dest):
method _add_directory_to_tar (line 323) | def _add_directory_to_tar(self,tar, path, arcname):
method copy_directory_to_container (line 332) | def copy_directory_to_container(self, src_dir):
method execute_command (line 348) | def execute_command(
method _validate_command (line 357) | def _validate_command(self, command):
method _run_command_in_container (line 361) | def _run_command_in_container(self, command, workdir):
method _increase_memory_and_rerun (line 383) | def _increase_memory_and_rerun(self, code, unit_input, libraries):
method call_tool_python (line 389) | def call_tool_python(self, code, unit_inputs, analysis) -> str:
FILE: mplsandbox/const.py
class Language (line 4) | class Language:
class FILE_EXTENSION_MAPPING (line 16) | class FILE_EXTENSION_MAPPING:
class CONTAINER_LANGUAGE_MAPPING (line 29) | class CONTAINER_LANGUAGE_MAPPING:
class DefaultImage (line 52) | class DefaultImage:
class CodeType (line 63) | class CodeType:
FILE: mplsandbox/sandbox.py
class Sandbox (line 24) | class Sandbox:
method __init__ (line 25) | def __init__(
method __enter__ (line 47) | def __enter__(self):
method __exit__ (line 51) | def __exit__(self, *args, **kwargs):
method _validate_inputs (line 54) | def _validate_inputs(self, image, dockerfile, lang):
method _create_docker_client (line 62) | def _create_docker_client(self):
method open (line 67) | def open(self):
method _build_image_from_dockerfile (line 81) | def _build_image_from_dockerfile(self):
method _pull_image_if_needed (line 97) | def _pull_image_if_needed(self):
method _get_existing_container (line 112) | def _get_existing_container(self):
method _check_container_exists (line 118) | def _check_container_exists(self, container_id):
method _run_container (line 150) | def _run_container(self):
method close (line 170) | def close(self):
method _commit_container (line 178) | def _commit_container(self):
method _remove_container (line 182) | def _remove_container(self):
method _remove_image_if_needed (line 186) | def _remove_image_if_needed(self):
method _remove_image (line 204) | def _remove_image(self):
method _build_sh (line 212) | def _build_sh(self, code_dest_file, unit_input):
method run (line 239) | def run(self, code: str, unit_input: str = None, libraries: Optional[L...
method _ensure_session_is_open (line 258) | def _ensure_session_is_open(self):
method _install_libraries_if_needed (line 264) | def _install_libraries_if_needed(self, libraries):
method _install_libraries (line 272) | def _install_libraries(self, libraries):
method _prepare_go_environment (line 281) | def _prepare_go_environment(self):
method _prepare_code_file (line 286) | def _prepare_code_file(self, code):
method _prepare_sh_file (line 293) | def _prepare_sh_file(self, sh):
method _copy_code_to_container (line 300) | def _copy_code_to_container(self, src, dest):
method _execute_code_in_container (line 312) | def _execute_code_in_container(self, code_dest_file, unit_input=None):
method _execute_sh_in_container (line 328) | def _execute_sh_in_container(self, sh_dest_file):
method copy_from_runtime (line 343) | def copy_from_runtime(self, src: str, dest: str):
method _extract_file_from_container (line 349) | def _extract_file_from_container(self, src, dest):
method copy_to_runtime (line 357) | def copy_to_runtime(self, src: str, dest: str):
method _create_directory_if_needed (line 362) | def _create_directory_if_needed(self, dest):
method _copy_file_to_container (line 369) | def _copy_file_to_container(self, src, dest):
method execute_command (line 376) | def execute_command(
method _validate_command (line 385) | def _validate_command(self, command):
method _run_command_in_container (line 389) | def _run_command_in_container(self, command, workdir):
method _increase_memory_and_rerun (line 411) | def _increase_memory_and_rerun(self, code, unit_input, libraries):
FILE: mplsandbox/tool.py
class MPLSANDBOX (line 12) | class MPLSANDBOX:
method __init__ (line 13) | def __init__(self, *args, **kwargs):
method process_config (line 27) | def process_config(self):
method get_basic_info (line 54) | def get_basic_info(self, show_per_unit_feedback=False):
method code_analyze_feedback (line 98) | def code_analyze_feedback(self, analysis_type):
method run (line 146) | def run(self,analysis_type="all"):
function main (line 154) | def main():
FILE: mplsandbox/utils.py
class ConsoleOutput (line 13) | class ConsoleOutput:
method __init__ (line 14) | def __init__(self, text: str):
method text (line 18) | def text(self):
method __str__ (line 21) | def __str__(self):
function image_exists (line 24) | def image_exists(client: DockerClient, image: str) -> bool:
function get_libraries_installation_command (line 38) | def get_libraries_installation_command(lang: str, library: str) -> Optio...
function get_code_file_extension (line 59) | def get_code_file_extension(lang: str) -> str:
function get_code_execution_command (line 80) | def get_code_execution_command(lang: str, code_file: str) -> list:
function raise_error_templete (line 104) | def raise_error_templete(error_message: str, number: int, app=False):
function extract_libraries (line 110) | def extract_libraries(code: str, language: str) -> list:
function generate_install_commands (line 130) | def generate_install_commands(language: str, libraries: list) -> str:
function detect_language_via_file_extension (line 149) | def detect_language_via_file_extension(file_extension: str) -> str:
function detect_language (line 152) | def detect_language(code: str) -> str:
function remove_ansi_codes (line 159) | def remove_ansi_codes(text):
function output_answer_check (line 163) | def output_answer_check(answer, output):
function read_code_file (line 170) | def read_code_file(code_file_path):
function read_unit_file (line 174) | def read_unit_file(unit_file_path):
function read_libraries_file (line 178) | def read_libraries_file(library_file_path):
function read_question_file (line 184) | def read_question_file(question_file_path):
function get_reward (line 188) | def get_reward(output, lang, if_correct):
function get_completion_from_messages (line 208) | def get_completion_from_messages(api_key, messages, model, temperature=0):
FILE: mplsandbox_for_rl/config.py
function parse_args (line 3) | def parse_args(*args):
FILE: mplsandbox_for_rl/data_helper.py
class DummyDataset (line 13) | class DummyDataset(IterableDataset):
method __iter__ (line 14) | def __iter__(self):
method __len__ (line 18) | def __len__(self):
class BaseDataset (line 21) | class BaseDataset(IterableDataset):
method __init__ (line 22) | def __init__(self, opt: Namespace, accelerator: Accelerator, mode: str...
method tokenizer_class (line 50) | def tokenizer_class(self):
method _load_data (line 53) | def _load_data(self, dpath: str):
method _get_bot_mask (line 79) | def _get_bot_mask(self, text_vec: List[int], sep_i: int):
method _encode_sample (line 86) | def _encode_sample(self, sample: Tuple[List[str], str]) -> Dict[str, A...
method _get_sample_len (line 151) | def _get_sample_len(self, sample: Dict[str, Any]):
method _get_allowed_max_len (line 154) | def _get_allowed_max_len(self):
method _batchify (line 157) | def _batchify(self, batch_samples: List[Dict[str, Any]]) -> Dict[str, ...
method sample_generator (line 187) | def sample_generator(self) -> Generator[Dict[str, Any], None, None]:
method batch_generator (line 192) | def batch_generator(self) -> Generator[List[Dict[str, Any]], None, None]:
method dynamic_batch_generator (line 214) | def dynamic_batch_generator(self) -> Generator[List[Dict[str, Any]], N...
method final_generator (line 278) | def final_generator(self) -> Generator[Dict[str, Any], None, None]:
method __iter__ (line 289) | def __iter__(self):
class DialogDataset (line 292) | class DialogDataset(BaseDataset):
method __init__ (line 295) | def __init__(self, opt, accelerator, mode: str='train', **kwargs) -> N...
method sample_generator (line 323) | def sample_generator(self):
method __len__ (line 339) | def __len__(self):
class CodeDataset (line 342) | class CodeDataset(BaseDataset):
method __init__ (line 345) | def __init__(self, opt, accelerator, mode: str='train', **kwargs) -> N...
method _load_data (line 373) | def _load_data(self, dpath: str):
method _encode_sample (line 387) | def _encode_sample(self, sample:Dict[str, Any]) -> Dict[str, Any]:
method sample_generator (line 435) | def sample_generator(self):
method __len__ (line 451) | def __len__(self):
class ChunkDataset (line 455) | class ChunkDataset(BaseDataset):
method __init__ (line 458) | def __init__(self, opt, accelerator, mode: str='train', **kwargs) -> N...
method __len__ (line 476) | def __len__(self):
method _load_chunk (line 479) | def _load_chunk(self, chunk_i):
method sample_generator (line 483) | def sample_generator(self):
function get_dataloader (line 517) | def get_dataloader(dataset: IterableDataset, opt):
FILE: mplsandbox_for_rl/generate_utils.py
function top_p_logits (line 9) | def top_p_logits(logits: torch.Tensor, topp: float=0., filter_value: flo...
function get_banned_ngrams (line 25) | def get_banned_ngrams(prev_input_ids: torch.LongTensor, n=4, init_ngrams...
function get_blacklist_ngrams (line 37) | def get_blacklist_ngrams(blacklist_fname: str, tokenizer):
function sort_value_index (line 53) | def sort_value_index(values: torch.Tensor, indexes: torch.Tensor, descen...
class CustomizedGeneration (line 58) | class CustomizedGeneration:
method __init__ (line 59) | def __init__(self, model, debug=False) -> None:
method _cal_length_penalty (line 74) | def _cal_length_penalty(self, curr_length, beam_length_penalty):
method check_value_stability (line 84) | def check_value_stability(self, logits):
method _run_fake_forward (line 89) | def _run_fake_forward(self):
method generate (line 93) | def generate(self, batch, **kwargs):
method _generate (line 104) | def _generate(self, batch, **kwargs):
FILE: mplsandbox_for_rl/llama/llama_model.py
class Llama (line 10) | class Llama(LlamaForCausalLM):
method __init__ (line 11) | def __init__(self, config, opt: Dict[str, Any], dict: LlamaPretrainedT...
method forward (line 27) | def forward(self, decoder_input: torch.LongTensor, incr_state: torch.T...
method reorder_encoder_states (line 43) | def reorder_encoder_states(self, encoder_states, indices):
method reorder_decoder_incremental_state (line 47) | def reorder_decoder_incremental_state(self, incremental_state, inds):
method _preprocess_context (line 50) | def _preprocess_context(self, context: Union[str, List[str]], **kwargs):
method generate_sentence (line 104) | def generate_sentence(self, context: Union[str, List[str]], **kwargs):
method generate (line 123) | def generate(self, batch: Dict[str, Any], **kwargs):
FILE: mplsandbox_for_rl/llama/llama_trainer.py
class LlamaTrainer (line 13) | class LlamaTrainer(Seq2SeqTrainer):
method __init__ (line 15) | def __init__(self, opt, model: Llama, reward_model: nn.Module, acceler...
method _criterion (line 20) | def _criterion(self, model_output: Tuple[torch.Tensor, ...], batch: Di...
method _tokenizer_class (line 31) | def _tokenizer_class(self):
method _dataset_class (line 34) | def _dataset_class(self):
method _prompt_dataset_class (line 50) | def _prompt_dataset_class(self):
method _strip_pad (line 53) | def _strip_pad(self, seq: List[int]):
method _extract_context_candidates (line 56) | def _extract_context_candidates(self, context: List[List[int]], candid...
method _build_metrics (line 72) | def _build_metrics(self, mode='train'):
method evaluate (line 84) | def evaluate(self, datatype='valid', **kwargs) -> Tuple[float, List]:
method inference (line 136) | def inference(self, datatype, reward_model, **kwargs) -> Tuple[float, ...
FILE: mplsandbox_for_rl/llama/modeling_moe.py
function _make_causal_mask (line 43) | def _make_causal_mask(
function _expand_mask (line 61) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option...
class LlamaRMSNorm (line 75) | class LlamaRMSNorm(nn.Module):
method __init__ (line 76) | def __init__(self, hidden_size, eps=1e-6):
method forward (line 84) | def forward(self, hidden_states):
class LlamaRotaryEmbedding (line 92) | class LlamaRotaryEmbedding(torch.nn.Module):
method __init__ (line 93) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
method forward (line 107) | def forward(self, x, seq_len=None):
function rotate_half (line 124) | def rotate_half(x):
function apply_rotary_pos_emb (line 131) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
class LlamaMLP (line 142) | class LlamaMLP(nn.Module):
method __init__ (line 143) | def __init__(
method forward (line 155) | def forward(self, x):
class LlamaAttention (line 159) | class LlamaAttention(nn.Module):
method __init__ (line 162) | def __init__(self, config: LlamaConfig):
method _shape (line 181) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
method forward (line 184) | def forward(
class LlamaDecoderLayer (line 252) | class LlamaDecoderLayer(nn.Module):
method __init__ (line 253) | def __init__(self, config: LlamaConfig):
method forward (line 265) | def forward(
class LlamaPreTrainedModel (line 341) | class LlamaPreTrainedModel(PreTrainedModel):
method _init_weights (line 349) | def _init_weights(self, module):
method _set_gradient_checkpointing (line 360) | def _set_gradient_checkpointing(self, module, value=False):
class LlamaModel (line 433) | class LlamaModel(LlamaPreTrainedModel):
method __init__ (line 441) | def __init__(self, config: LlamaConfig):
method get_input_embeddings (line 454) | def get_input_embeddings(self):
method set_input_embeddings (line 457) | def set_input_embeddings(self, value):
method _prepare_decoder_attention_mask (line 461) | def _prepare_decoder_attention_mask(self, attention_mask, input_shape,...
method forward (line 485) | def forward(
class LlamaForCausalLM (line 613) | class LlamaForCausalLM(LlamaPreTrainedModel):
method __init__ (line 614) | def __init__(self, config):
method get_input_embeddings (line 623) | def get_input_embeddings(self):
method set_input_embeddings (line 626) | def set_input_embeddings(self, value):
method get_output_embeddings (line 629) | def get_output_embeddings(self):
method set_output_embeddings (line 632) | def set_output_embeddings(self, new_embeddings):
method set_decoder (line 635) | def set_decoder(self, decoder):
method get_decoder (line 638) | def get_decoder(self):
method forward (line 643) | def forward(
method prepare_inputs_for_generation (line 729) | def prepare_inputs_for_generation(
method _reorder_cache (line 760) | def _reorder_cache(past_key_values, beam_idx):
class LlamaForSequenceClassification (line 787) | class LlamaForSequenceClassification(LlamaPreTrainedModel):
method __init__ (line 790) | def __init__(self, config):
method get_input_embeddings (line 799) | def get_input_embeddings(self):
method set_input_embeddings (line 802) | def set_input_embeddings(self, value):
method forward (line 806) | def forward(
FILE: mplsandbox_for_rl/llama/reward/llama_reward_model.py
class LlamaCriticModel (line 12) | class LlamaCriticModel(LlamaForCausalLM):
method __init__ (line 13) | def __init__(self, config, opt: Dict[str, Any], dict: LlamaPretrainedT...
method forward (line 26) | def forward(self, decoder_input: torch.LongTensor, rank_all=False):
method reward (line 45) | def reward(self, context: Union[str, List[str], List[int]], is_encoded...
method batch_reward (line 52) | def batch_reward(self, context: Union[List[str], List[List[str]], List...
class LlamaRewardModel (line 59) | class LlamaRewardModel():
method __init__ (line 60) | def __init__(self, opt: Dict[str, Any], dict: LlamaPretrainedTokenizer...
method process_answer (line 68) | def process_answer(self, text):
method write_tmp (line 89) | def write_tmp(self, code, tmp_file):
method find_test (line 124) | def find_test(self, code):
method find_test_type (line 140) | def find_test_type(self, code):
method execute_code (line 159) | def execute_code(self, code, inputs, outputs, tmp_file,has_args, has_p...
method get_reward (line 194) | def get_reward(self, resp, inputs, outputs, tmp_file):
method get_reward_via_mplsandbox (line 271) | def get_reward_via_mplsandbox(self, resp, inputs, outputs, tmp_file):
method forward (line 341) | def forward(self, resp_vec_sampled: List[List[int]], resps: List[str],...
FILE: mplsandbox_for_rl/metric.py
class Metric (line 18) | class Metric:
method __init__ (line 19) | def __init__(self) -> None:
method add (line 22) | def add(self, val):
method val (line 25) | def val(self) -> float:
method reset (line 28) | def reset(self):
method compute (line 31) | def compute(self, val: Any):
method __add__ (line 34) | def __add__(self, other):
method __radd__ (line 37) | def __radd__(self, other):
class ExponentialMovingAverageMetric (line 41) | class ExponentialMovingAverageMetric(Metric):
method __init__ (line 42) | def __init__(self, factor, val=None, denom=1):
method add (line 49) | def add(self, val):
method val (line 55) | def val(self) -> float:
method sync (line 60) | def sync(self, val):
method reset (line 64) | def reset(self):
method __add__ (line 67) | def __add__(self, other: 'ExponentialMovingAverageMetric'):
class SpanSumMetric (line 71) | class SpanSumMetric(Metric):
method __init__ (line 72) | def __init__(self, size, buffer=None, sum_=0, denom=1) -> None:
method add (line 79) | def add(self, val):
method val (line 86) | def val(self) -> float:
method reset (line 89) | def reset(self):
method __add__ (line 94) | def __add__(self, other: 'SpanSumMetric'):
class MeanMetric (line 98) | class MeanMetric(Metric):
method __init__ (line 99) | def __init__(self, num=0, denom=0):
method add (line 103) | def add(self, val: Any):
method many (line 107) | def many(self, vals: List[Any], denoms: Optional[List[int]] = None):
method val (line 116) | def val(self):
method reset (line 121) | def reset(self):
method __add__ (line 124) | def __add__(self, other: 'MeanMetric'):
class SumMetric (line 129) | class SumMetric(Metric):
method __init__ (line 130) | def __init__(self, sum_=0):
method add (line 133) | def add(self, val):
method many (line 136) | def many(self, vals: List[Any]):
method val (line 139) | def val(self):
method reset (line 142) | def reset(self):
method __add__ (line 145) | def __add__(self, other: 'SumMetric'):
class RealtimeMetric (line 149) | class RealtimeMetric(Metric):
method __init__ (line 150) | def __init__(self, val=0):
method add (line 153) | def add(self, val):
method many (line 156) | def many(self, vals: List[Any]):
method val (line 159) | def val(self):
method reset (line 162) | def reset(self):
method __add__ (line 165) | def __add__(self, other):
class CIDErDMetric (line 168) | class CIDErDMetric(Metric):
method __init__ (line 169) | def __init__(self, lang, ngram=4, sigma=15, guesses=None, answers=None...
method add (line 177) | def add(self, val: Tuple[str, List[str]]):
method reset (line 186) | def reset(self):
method val (line 190) | def val(self) -> float:
method __add__ (line 198) | def __add__(self, other: 'CIDErDMetric'):
class BleuMetric (line 204) | class BleuMetric(MeanMetric):
method __init__ (line 206) | def __init__(self, b=4, backend='sacre', lang='zh'):
method compute (line 219) | def compute(self, val: Tuple[str, List[str]]):
class CorpusBleuMetric (line 239) | class CorpusBleuMetric(Metric):
method __init__ (line 240) | def __init__(self, b=4, backend='sacre', lang='zh', guesses=None, answ...
method reset (line 257) | def reset(self):
method add (line 261) | def add(self, val: Tuple[str, List[str]]):
method val (line 266) | def val(self) -> float:
method __add__ (line 277) | def __add__(self, other: 'CorpusBleuMetric'):
class RougeMetric (line 282) | class RougeMetric(MeanMetric):
method __init__ (line 283) | def __init__(self, r, lang='zh'):
method compute (line 290) | def compute(self, val: Tuple[str, List[str]]):
class F1Metric (line 305) | class F1Metric(MeanMetric):
method __init__ (line 306) | def __init__(self, lang='zh'):
method _prec_recall_f1_score (line 311) | def _prec_recall_f1_score(pred_items, gold_items):
method compute (line 329) | def compute(self, val: Tuple[str, List[str]]):
class InterDistinctMetric (line 340) | class InterDistinctMetric(Metric):
method __init__ (line 341) | def __init__(self, d, lang='zh', counts: Optional[TCounter] = None):
method _ngram (line 348) | def _ngram(seq: List[str], n):
method add (line 352) | def add(self, val: Any):
method many (line 355) | def many(self, vals: List[Any]):
method compute (line 359) | def compute(self, val: str):
method val (line 363) | def val(self):
method __add__ (line 366) | def __add__(self, other: 'InterDistinctMetric'):
method reset (line 369) | def reset(self):
class PPLMetric (line 373) | class PPLMetric(MeanMetric):
method val (line 374) | def val(self):
method __add__ (line 381) | def __add__(self, other):
class Metrics (line 384) | class Metrics():
method __init__ (line 394) | def __init__(self, opt: Dict[str, Any], accelerator: Accelerator, mode...
method add_additional_metric (line 415) | def add_additional_metric(self, metric_name: str, metric_obj: Metric):
method remove_useless_metric (line 419) | def remove_useless_metric(self, metric_name: str):
method record_metric (line 423) | def record_metric(self, metric_name: str, val: Any):
method record_metric_many (line 426) | def record_metric_many(self, metric_name: str, vals: List[Any], counts...
method get_metric (line 434) | def get_metric(self, metric_name: str):
method reset (line 437) | def reset(self, no_reset: List[str] = ['total_exs']):
method all_gather_metrics (line 442) | def all_gather_metrics(self) -> Dict[str, float]:
method write_tensorboard (line 463) | def write_tensorboard(self, global_step: int, gathered_metrics: Dict[s...
method flush (line 470) | def flush(self):
method display (line 474) | def display(self, global_step: int, data_size: Optional[int] = None, g...
method _get_metric_obj (line 493) | def _get_metric_obj(self, metric_name: str):
function normalize_answer (line 522) | def normalize_answer(s, lang: str) -> str:
function add (line 539) | def add(a, b):
FILE: mplsandbox_for_rl/metric_utils.py
function precook (line 12) | def precook(s, n=4, out=False):
function cook_refs (line 29) | def cook_refs(refs, n=4): ## lhuang: oracle will call with "average"
function cook_test (line 39) | def cook_test(test, n=4):
function sim (line 48) | def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref, n=...
class CiderScorer (line 77) | class CiderScorer(object):
method copy (line 81) | def copy(self):
method copy_empty (line 88) | def copy_empty(self):
method __init__ (line 95) | def __init__(self, df_mode="corpus", test=None, refs=None, n=4, sigma=...
method clear (line 110) | def clear(self):
method cook_append (line 114) | def cook_append(self, test, refs):
method size (line 124) | def size(self):
method __iadd__ (line 128) | def __iadd__(self, other):
method compute_doc_freq (line 139) | def compute_doc_freq(self):
method counts2vec (line 152) | def counts2vec(self, cnts):
method compute_cider (line 178) | def compute_cider(self):
method compute_score (line 207) | def compute_score(self, option=None, verbose=0):
method my_get_cider (line 222) | def my_get_cider(self, gts, res):
method my_get_self_cider (line 242) | def my_get_self_cider(self, res):
class CiderD (line 261) | class CiderD:
method __init__ (line 266) | def __init__(self, n=4, sigma=6.0, df="corpus"):
method compute_score (line 275) | def compute_score(self, gts, res):
method my_compute_score (line 302) | def my_compute_score(self, gts, res, avg_refs=True):
method my_self_cider (line 325) | def my_self_cider(self, res):
method method (line 338) | def method(self):
FILE: mplsandbox_for_rl/ppo/ppo_datahelper.py
class Cell (line 9) | class Cell:
class Archive (line 14) | class Archive():
method __init__ (line 15) | def __init__(self):
method add (line 21) | def add(self, state: str, score: float):
method update (line 29) | def update(self, state: str, score: float):
class DialogPromptDataset (line 46) | class DialogPromptDataset(DialogDataset):
method __init__ (line 47) | def __init__(self, opt, accelerator, mode: str = 'train', **kwargs) ->...
method _load_data (line 57) | def _load_data(self, dpath: str):
method _encode_sample (line 71) | def _encode_sample(self, sample:Dict[str, Any]) -> Dict[str, Any]:
method _get_allowed_max_len (line 129) | def _get_allowed_max_len(self):
method _get_sample_len (line 132) | def _get_sample_len(self, sample: Dict[str, Any]):
method batch_generator (line 135) | def batch_generator(self) -> Generator[List[Dict[str, Any]], None, None]:
method dynamic_batch_generator (line 143) | def dynamic_batch_generator(self) -> Generator[List[Dict[str, Any]], N...
method _batchify (line 146) | def _batchify(self, batch_samples: List[Dict[str, Any]]) -> Dict[str, ...
class DialogReplayDataset (line 163) | class DialogReplayDataset(DialogDataset):
method __init__ (line 164) | def __init__(self, data: List[Dict[str, Any]], opt, accelerator, archi...
method _load_data (line 174) | def _load_data(self, dpath: str):
method _get_advantages_and_returns (line 177) | def _get_advantages_and_returns(self, rewards: List[float], values: Li...
method _encode_sample (line 195) | def _encode_sample(self, sample: Dict[str, Any]) -> Dict[str, Any]:
method _get_allowed_max_len (line 237) | def _get_allowed_max_len(self):
method _get_sample_len (line 240) | def _get_sample_len(self, sample: Dict[str, Any]):
method batch_generator (line 243) | def batch_generator(self) -> Generator[List[Dict[str, Any]], None, None]:
method dynamic_batch_generator (line 247) | def dynamic_batch_generator(self) -> Generator[List[Dict[str, Any]], N...
method _batchify (line 250) | def _batchify(self, batch_samples: List[Dict[str, Any]]) -> Dict[str, ...
FILE: mplsandbox_for_rl/ppo/ppo_trainer.py
class RLHFTrainableModelWrapper (line 18) | class RLHFTrainableModelWrapper(nn.Module):
method __init__ (line 19) | def __init__(self, policy_model: nn.Module, critic_model: nn.Module, r...
method forward (line 34) | def forward(self, inputs, **kwargs):
method train (line 37) | def train(self, mode=True):
method eval (line 41) | def eval(self):
class PPOTrainer (line 50) | class PPOTrainer(Seq2SeqTrainer):
method __init__ (line 51) | def __init__(self, opt, policy_model: nn.Module, ref_model: nn.Module,...
method _build_metrics (line 120) | def _build_metrics(self, mode='train'):
method _group_optim_params (line 149) | def _group_optim_params(self, no_decay_name_list=["bias", "LayerNorm.w...
method _build_optimizer (line 179) | def _build_optimizer(self):
method _build_dataloader (line 184) | def _build_dataloader(self, mode):
method _prompt_dataset_class (line 187) | def _prompt_dataset_class(self):
method _replay_dataset_class (line 190) | def _replay_dataset_class(self):
method _strip_pad (line 193) | def _strip_pad(self, seq: List[int]):
method _run_reward_forward (line 196) | def _run_reward_forward(self, resp_vec_sampled, resps, batch, bsz, **k...
method _run_policy_forward (line 199) | def _run_policy_forward(self, inputs, **kwargs):
method _run_ref_forward (line 205) | def _run_ref_forward(self, inputs, **kwargs):
method _run_critic_forward (line 208) | def _run_critic_forward(self, inputs, **kwargs):
method _run_forward (line 211) | def _run_forward(self, batch: Dict[str, Any], **kwargs):
method _extract_context_candidates (line 214) | def _extract_context_candidates(self, context: List[List[int]], candid...
method _record_batch_info (line 229) | def _record_batch_info(self, batch, mode):
method _save_checkpoint (line 234) | def _save_checkpoint(self, is_best: bool, total_steps: int, **kwargs):
method _format_experience_log (line 278) | def _format_experience_log(self, log):
method make_experiences (line 287) | def make_experiences(self):
method _criterion (line 382) | def _criterion(self, model_output: Tuple[torch.Tensor, ...], batch: Di...
method evaluate (line 446) | def evaluate(self, datatype='valid', **kwargs) -> Tuple[float, List]:
method _pre_epoch (line 497) | def _pre_epoch(self):
method _post_epoch (line 506) | def _post_epoch(self):
method train (line 512) | def train(self):
FILE: mplsandbox_for_rl/ppo/ppo_utils.py
function get_global_statistics (line 8) | def get_global_statistics(accelerator: Accelerator, xs: torch.Tensor, ma...
function logprobs_from_logits (line 24) | def logprobs_from_logits(logits, labels):
function whiten (line 31) | def whiten(xs: torch.Tensor, mask: torch.BoolTensor, shift_mean=True, ac...
class RunningMoments (line 44) | class RunningMoments:
method __init__ (line 45) | def __init__(self, accelerator: Accelerator):
method update (line 57) | def update(self, xs: torch.Tensor) -> Tuple[float, float]:
FILE: mplsandbox_for_rl/scheduler.py
function invsqrt_scheduler (line 4) | def invsqrt_scheduler(warmup_steps: int):
function calculate_noam_lr (line 14) | def calculate_noam_lr(dmodel: int, step: int, factor=1.):
function noam_scheduler (line 18) | def noam_scheduler(dmodel: int, warmup_steps: int, factor=1.):
function warmup_scheduler (line 31) | def warmup_scheduler(warmup_steps: int, min_factor=0.):
FILE: mplsandbox_for_rl/tokenizer.py
class HFPretrainedTokenizer (line 5) | class HFPretrainedTokenizer():
method __init__ (line 6) | def __init__(self, opt):
method gmask_token (line 29) | def gmask_token(self):
method gmask_token_id (line 33) | def gmask_token_id(self):
method null_token (line 37) | def null_token(self):
method end_token (line 41) | def end_token(self):
method unk_token (line 45) | def unk_token(self):
method start_token (line 49) | def start_token(self):
method null_token_id (line 53) | def null_token_id(self):
method end_token_id (line 57) | def end_token_id(self):
method unk_token_id (line 61) | def unk_token_id(self):
method start_token_id (line 65) | def start_token_id(self):
method override_special_tokens (line 69) | def override_special_tokens(self):
method txt2vec (line 72) | def txt2vec(self, text: str):
method vec2txt (line 77) | def vec2txt(self, vector: List[int], skip_special=False):
FILE: mplsandbox_for_rl/train_ppo.py
function additional_args (line 13) | def additional_args(parser):
FILE: mplsandbox_for_rl/trainer.py
class CustomTrainerStates (line 20) | class CustomTrainerStates:
method __init__ (line 21) | def __init__(self) -> None:
method state_dict (line 27) | def state_dict(self):
method load_state_dict (line 35) | def load_state_dict(self, state_dict):
class Seq2SeqTrainer (line 42) | class Seq2SeqTrainer():
method __init__ (line 43) | def __init__(self, opt, model: nn.Module, accelerator: Accelerator, ev...
method post_init (line 97) | def post_init(self):
method _load_checkpoint (line 100) | def _load_checkpoint(self, load_path, strict=True, target_model: nn.Mo...
method _save_checkpoint (line 131) | def _save_checkpoint(self, is_best: bool, total_steps: int):
method _dataset_class (line 164) | def _dataset_class(self):
method _tokenizer_class (line 170) | def _tokenizer_class(self):
method _custom_states_class (line 173) | def _custom_states_class(self):
method _build_dataloader (line 176) | def _build_dataloader(self, mode='train'):
method _build_metrics (line 181) | def _build_metrics(self, mode='train'):
method _optimizer_class (line 185) | def _optimizer_class(self):
method _build_optimizer (line 191) | def _build_optimizer(self):
method _build_scheduler (line 202) | def _build_scheduler(self):
method _build_loss_fn (line 229) | def _build_loss_fn(self):
method _get_metric_obj (line 232) | def _get_metric_obj(self, training=True):
method _record_batch_info (line 238) | def _record_batch_info(self, batch, mode='train'):
method _criterion (line 255) | def _criterion(self, model_output: Tuple[torch.Tensor, ...], batch: Di...
method _run_forward (line 286) | def _run_forward(self, batch: Dict[str, Any], **kwargs):
method _run_fake_forward (line 290) | def _run_fake_forward(self):
method _train_step (line 294) | def _train_step(self, batch: Dict[str, Any], **kwargs):
method generate_sentence (line 319) | def generate_sentence(self, context: Union[str, List[str]], **kwargs):
method _on_stop_train (line 322) | def _on_stop_train(self):
method _pre_epoch (line 325) | def _pre_epoch(self):
method _post_epoch (line 329) | def _post_epoch(self):
method train (line 333) | def train(self):
method get_generate_batch (line 395) | def get_generate_batch(self, batch: Dict[str, Any]):
method evaluate (line 401) | def evaluate(self, datatype='valid', **kwargs) -> Tuple[float, List]:
method _get_extra_inference_info (line 464) | def _get_extra_inference_info(self, batch: Dict[str, Any]) -> Tuple[Li...
method _parse_extra_inference_info (line 468) | def _parse_extra_inference_info(self, others: List[Any]) -> Tuple[Any,...
method inference (line 472) | def inference(self, datatype='test', num_examples: int=999999) -> List:
FILE: mplsandbox_for_rl/transformers/.circleci/create_circleci_config.py
class EmptyJob (line 39) | class EmptyJob:
method to_dict (line 42) | def to_dict(self):
class CircleCIJob (line 50) | class CircleCIJob:
method __post_init__ (line 66) | def __post_init__(self):
method to_dict (line 90) | def to_dict(self):
method job_name (line 222) | def job_name(self):
function create_circleci_config (line 447) | def create_circleci_config(folder=None):
FILE: mplsandbox_for_rl/transformers/.circleci/parse_test_outputs.py
function parse_pytest_output (line 4) | def parse_pytest_output(file_path):
function parse_pytest_failure_output (line 18) | def parse_pytest_failure_output(file_path):
function parse_pytest_errors_output (line 34) | def parse_pytest_errors_output(file_path):
function main (line 51) | def main():
FILE: mplsandbox_for_rl/transformers/benchmark/benchmark.py
function checkout_commit (line 45) | def checkout_commit(repo: Repo, commit_id: str):
function summarize (line 62) | def summarize(run_dir, metrics, expand_metrics=False):
function combine_summaries (line 151) | def combine_summaries(summaries):
function list_str (line 202) | def list_str(values):
FILE: mplsandbox_for_rl/transformers/benchmark/optimum_benchmark_wrapper.py
function main (line 5) | def main(config_dir, config_name, args):
FILE: mplsandbox_for_rl/transformers/conftest.py
function pytest_configure (line 87) | def pytest_configure(config):
function pytest_collection_modifyitems (line 101) | def pytest_collection_modifyitems(items):
function pytest_addoption (line 107) | def pytest_addoption(parser):
function pytest_terminal_summary (line 113) | def pytest_terminal_summary(terminalreporter):
function pytest_sessionfinish (line 121) | def pytest_sessionfinish(session, exitstatus):
class CustomOutputChecker (line 133) | class CustomOutputChecker(OutputChecker):
method check_output (line 134) | def check_output(self, want, got, optionflags):
FILE: mplsandbox_for_rl/transformers/examples/diff-conversion/diff_dummy.py
function _pre_process_input (line 11) | def _pre_process_input(input_ids):
class DummyModel (line 17) | class DummyModel(LlamaModel):
method forward (line 18) | def forward(
FILE: mplsandbox_for_rl/transformers/examples/diff-conversion/diff_my_new_model.py
class MyNewModelConfig (line 6) | class MyNewModelConfig(LlamaConfig):
method __init__ (line 11) | def __init__(self, mlp_bias=True, new_param=0, **super_kwargs):
FILE: mplsandbox_for_rl/transformers/examples/diff-conversion/diff_my_new_model2.py
class MyNewModel2Config (line 6) | class MyNewModel2Config(LlamaConfig):
class MyNewModel2ForSequenceClassification (line 30) | class MyNewModel2ForSequenceClassification(GemmaForSequenceClassification):
FILE: mplsandbox_for_rl/transformers/examples/diff-conversion/diff_new_model.py
class NewModelConfig (line 6) | class NewModelConfig(GemmaConfig):
method __init__ (line 7) | def __init__(
FILE: mplsandbox_for_rl/transformers/examples/diff-conversion/diff_super.py
class SuperModel (line 11) | class SuperModel(LlamaModel):
method forward (line 12) | def forward(
FILE: mplsandbox_for_rl/transformers/examples/flax/conftest.py
function pytest_addoption (line 34) | def pytest_addoption(parser):
function pytest_terminal_summary (line 40) | def pytest_terminal_summary(terminalreporter):
FILE: mplsandbox_for_rl/transformers/examples/flax/image-captioning/create_model_from_encoder_decoder_models.py
class ModelArguments (line 29) | class ModelArguments:
function main (line 61) | def main():
FILE: mplsandbox_for_rl/transformers/examples/flax/image-captioning/run_image_captioning_flax.py
function shift_tokens_right (line 73) | def shift_tokens_right(input_ids: np.ndarray, pad_token_id: int, decoder...
class TrainingArguments (line 86) | class TrainingArguments:
method __post_init__ (line 140) | def __post_init__(self):
method to_dict (line 144) | def to_dict(self):
class ModelArguments (line 161) | class ModelArguments:
class DataTrainingArguments (line 207) | class DataTrainingArguments:
method __post_init__ (line 305) | def __post_init__(self):
class TrainState (line 326) | class TrainState(train_state.TrainState):
method replicate (line 329) | def replicate(self):
function data_loader (line 333) | def data_loader(rng: jax.random.PRNGKey, dataset: Dataset, batch_size: i...
function write_metric (line 360) | def write_metric(summary_writer, metrics, train_time, step, metric_key_p...
function create_learning_rate_fn (line 375) | def create_learning_rate_fn(
function main (line 389) | def main():
FILE: mplsandbox_for_rl/transformers/examples/flax/language-modeling/run_bart_dlm_flax.py
class TrainingArguments (line 70) | class TrainingArguments:
method __post_init__ (line 111) | def __post_init__(self):
method to_dict (line 115) | def to_dict(self):
class ModelArguments (line 132) | class ModelArguments:
class DataTrainingArguments (line 183) | class DataTrainingArguments:
method __post_init__ (line 249) | def __post_init__(self):
class FlaxDataCollatorForBartDenoisingLM (line 264) | class FlaxDataCollatorForBartDenoisingLM:
method __post_init__ (line 290) | def __post_init__(self):
method __call__ (line 297) | def __call__(self, examples: List[Dict[str, List[int]]]) -> BatchEncod...
method permute_sentences (line 323) | def permute_sentences(self, input_ids):
method span_mask_tokens (line 357) | def span_mask_tokens(self, input_ids, labels, do_permute):
function generate_batch_splits (line 432) | def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop...
function write_train_metric (line 448) | def write_train_metric(summary_writer, train_metrics, train_time, step):
function write_eval_metric (line 458) | def write_eval_metric(summary_writer, eval_metrics, step):
function main (line 463) | def main():
FILE: mplsandbox_for_rl/transformers/examples/flax/language-modeling/run_clm_flax.py
class TrainingArguments (line 71) | class TrainingArguments:
method __post_init__ (line 112) | def __post_init__(self):
method to_dict (line 116) | def to_dict(self):
class ModelArguments (line 133) | class ModelArguments:
class DataTrainingArguments (line 194) | class DataTrainingArguments:
method __post_init__ (line 255) | def __post_init__(self):
class TrainState (line 269) | class TrainState(train_state.TrainState):
method replicate (line 272) | def replicate(self):
function data_loader (line 276) | def data_loader(rng: jax.random.PRNGKey, dataset: Dataset, batch_size: i...
function write_train_metric (line 302) | def write_train_metric(summary_writer, train_metrics, train_time, step):
function write_eval_metric (line 312) | def write_eval_metric(summary_writer, eval_metrics, step):
function create_learning_rate_fn (line 317) | def create_learning_rate_fn(
function main (line 331) | def main():
FILE: mplsandbox_for_rl/transformers/examples/flax/language-modeling/run_mlm_flax.py
class TrainingArguments (line 71) | class TrainingArguments:
method __post_init__ (line 118) | def __post_init__(self):
method to_dict (line 122) | def to_dict(self):
class ModelArguments (line 139) | class ModelArguments:
class DataTrainingArguments (line 200) | class DataTrainingArguments:
method __post_init__ (line 263) | def __post_init__(self):
class FlaxDataCollatorForLanguageModeling (line 276) | class FlaxDataCollatorForLanguageModeling:
method __post_init__ (line 298) | def __post_init__(self):
method __call__ (line 305) | def __call__(self, examples: List[Dict[str, np.ndarray]], pad_to_multi...
method mask_tokens (line 317) | def mask_tokens(
function generate_batch_splits (line 347) | def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop...
function write_train_metric (line 363) | def write_train_metric(summary_writer, train_metrics, train_time, step):
function write_eval_metric (line 373) | def write_eval_metric(summary_writer, eval_metrics, step):
function main (line 378) | def main():
FILE: mplsandbox_for_rl/transformers/examples/flax/language-modeling/run_t5_mlm_flax.py
class TrainingArguments (line 71) | class TrainingArguments:
method __post_init__ (line 112) | def __post_init__(self):
method to_dict (line 116) | def to_dict(self):
class ModelArguments (line 133) | class ModelArguments:
class DataTrainingArguments (line 184) | class DataTrainingArguments:
method __post_init__ (line 248) | def __post_init__(self):
function compute_input_and_target_lengths (line 260) | def compute_input_and_target_lengths(inputs_length, noise_density, mean_...
class FlaxDataCollatorForT5MLM (line 308) | class FlaxDataCollatorForT5MLM:
method __call__ (line 341) | def __call__(self, examples: List[Dict[str, np.ndarray]]) -> BatchEnco...
method create_sentinel_ids (line 378) | def create_sentinel_ids(self, mask_indices):
method filter_input_ids (line 393) | def filter_input_ids(self, input_ids, sentinel_ids):
method random_spans_noise_mask (line 409) | def random_spans_noise_mask(self, length):
function generate_batch_splits (line 474) | def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop...
function write_train_metric (line 490) | def write_train_metric(summary_writer, train_metrics, train_time, step):
function write_eval_metric (line 500) | def write_eval_metric(summary_writer, eval_metrics, step):
function main (line 505) | def main():
FILE: mplsandbox_for_rl/transformers/examples/flax/language-modeling/t5_tokenizer_model.py
class SentencePieceUnigramTokenizer (line 11) | class SentencePieceUnigramTokenizer(BaseTokenizer):
method __init__ (line 19) | def __init__(
method train (line 73) | def train(
method train_from_iterator (line 93) | def train_from_iterator(
method add_unk_id (line 111) | def add_unk_id(self):
FILE: mplsandbox_for_rl/transformers/examples/flax/question-answering/run_qa.py
class TrainingArguments (line 73) | class TrainingArguments:
method __post_init__ (line 115) | def __post_init__(self):
method to_dict (line 119) | def to_dict(self):
class ModelArguments (line 136) | class ModelArguments:
class DataTrainingArguments (line 189) | class DataTrainingArguments:
method __post_init__ (line 292) | def __post_init__(self):
function create_train_state (line 316) | def create_train_state(
function create_learning_rate_fn (line 383) | def create_learning_rate_fn(
function train_data_collator (line 401) | def train_data_collator(rng: PRNGKey, dataset: Dataset, batch_size: int):
function eval_data_collator (line 420) | def eval_data_collator(dataset: Dataset, batch_size: int):
function main (line 438) | def main():
FILE: mplsandbox_for_rl/transformers/examples/flax/question-answering/utils_qa.py
function postprocess_qa_predictions (line 32) | def postprocess_qa_predictions(
function postprocess_qa_predictions_with_beam_search (line 253) | def postprocess_qa_predictions_with_beam_search(
FILE: mplsandbox_for_rl/transformers/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py
class ModelArguments (line 71) | class ModelArguments:
class DataTrainingArguments (line 128) | class DataTrainingArguments:
function shift_tokens_right (line 251) | def shift_tokens_right(label_ids: np.array, decoder_start_token_id: int)...
class FlaxDataCollatorSpeechSeq2SeqWithPadding (line 263) | class FlaxDataCollatorSpeechSeq2SeqWithPadding:
method __call__ (line 306) | def __call__(self, features: List[Dict[str, Union[List[int], np.ndarra...
class TrainState (line 351) | class TrainState(train_state.TrainState):
method replicate (line 354) | def replicate(self):
function write_metric (line 358) | def write_metric(summary_writer, train_metrics, eval_metrics, train_time...
function create_learning_rate_fn (line 371) | def create_learning_rate_fn(
function main (line 383) | def main():
FILE: mplsandbox_for_rl/transformers/examples/flax/summarization/run_summarization_flax.py
class TrainingArguments (line 80) | class TrainingArguments:
method __post_init__ (line 131) | def __post_init__(self):
method to_dict (line 135) | def to_dict(self):
class ModelArguments (line 152) | class ModelArguments:
class DataTrainingArguments (line 213) | class DataTrainingArguments:
method __post_init__ (line 320) | def __post_init__(self):
class TrainState (line 357) | class TrainState(train_state.TrainState):
method replicate (line 360) | def replicate(self):
function data_loader (line 364) | def data_loader(rng: jax.random.PRNGKey, dataset: Dataset, batch_size: i...
function write_metric (line 390) | def write_metric(summary_writer, train_metrics, eval_metrics, train_time...
function create_learning_rate_fn (line 403) | def create_learning_rate_fn(
function main (line 417) | def main():
FILE: mplsandbox_for_rl/transformers/examples/flax/test_flax_examples.py
function get_setup_file (line 57) | def get_setup_file():
function get_results (line 64) | def get_results(output_dir, split="eval"):
class ExamplesTests (line 76) | class ExamplesTests(TestCasePlus):
method test_run_glue (line 77) | def test_run_glue(self):
method test_run_clm (line 100) | def test_run_clm(self):
method test_run_summarization (line 124) | def test_run_summarization(self):
method test_run_mlm (line 154) | def test_run_mlm(self):
method test_run_t5_mlm (line 178) | def test_run_t5_mlm(self):
method test_run_ner (line 202) | def test_run_ner(self):
method test_run_qa (line 232) | def test_run_qa(self):
method test_run_flax_speech_recognition_seq2seq (line 259) | def test_run_flax_speech_recognition_seq2seq(self):
FILE: mplsandbox_for_rl/transformers/examples/flax/text-classification/run_flax_glue.py
class ModelArguments (line 80) | class ModelArguments:
class DataTrainingArguments (line 134) | class DataTrainingArguments:
method __post_init__ (line 206) | def __post_init__(self):
function create_train_state (line 219) | def create_train_state(
function create_learning_rate_fn (line 290) | def create_learning_rate_fn(
function glue_train_data_collator (line 304) | def glue_train_data_collator(rng: PRNGKey, dataset: Dataset, batch_size:...
function glue_eval_data_collator (line 319) | def glue_eval_data_collator(dataset: Dataset, batch_size: int):
function main (line 333) | def main():
FILE: mplsandbox_for_rl/transformers/examples/flax/token-classification/run_flax_ner.py
class TrainingArguments (line 70) | class TrainingArguments:
method __post_init__ (line 111) | def __post_init__(self):
method to_dict (line 115) | def to_dict(self):
class ModelArguments (line 132) | class ModelArguments:
class DataTrainingArguments (line 182) | class DataTrainingArguments:
method __post_init__ (line 268) | def __post_init__(self):
function create_train_state (line 281) | def create_train_state(
function create_learning_rate_fn (line 342) | def create_learning_rate_fn(
function train_data_collator (line 356) | def train_data_collator(rng: PRNGKey, dataset: Dataset, batch_size: int):
function eval_data_collator (line 371) | def eval_data_collator(dataset: Dataset, batch_size: int):
function main (line 385) | def main():
FILE: mplsandbox_for_rl/transformers/examples/flax/vision/run_image_classification.py
class TrainingArguments (line 68) | class TrainingArguments:
method __post_init__ (line 109) | def __post_init__(self):
method to_dict (line 113) | def to_dict(self):
class ModelArguments (line 130) | class ModelArguments:
class DataTrainingArguments (line 184) | class DataTrainingArguments:
class TrainState (line 223) | class TrainState(train_state.TrainState):
method replicate (line 226) | def replicate(self):
function write_metric (line 230) | def write_metric(summary_writer, train_metrics, eval_metrics, train_time...
function create_learning_rate_fn (line 243) | def create_learning_rate_fn(
function main (line 257) | def main():
FILE: mplsandbox_for_rl/transformers/examples/legacy/benchmarking/plot_csv_file.py
function list_field (line 27) | def list_field(default=None, metadata=None):
class PlotArguments (line 32) | class PlotArguments:
function can_convert_to_int (line 67) | def can_convert_to_int(string):
function can_convert_to_float (line 75) | def can_convert_to_float(string):
class Plot (line 83) | class Plot:
method __init__ (line 84) | def __init__(self, args):
method plot (line 105) | def plot(self):
function main (line 170) | def main():
FILE: mplsandbox_for_rl/transformers/examples/legacy/benchmarking/run_benchmark.py
function main (line 22) | def main():
FILE: mplsandbox_for_rl/transformers/examples/legacy/multiple_choice/run_multiple_choice.py
function simple_accuracy (line 44) | def simple_accuracy(preds, labels):
class ModelArguments (line 49) | class ModelArguments:
class DataTrainingArguments (line 70) | class DataTrainingArguments:
function main (line 91) | def main():
function _mp_fn (line 238) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/legacy/multiple_choice/utils_multiple_choice.py
class InputExample (line 37) | class InputExample:
class InputFeatures (line 58) | class InputFeatures:
class Split (line 71) | class Split(Enum):
class MultipleChoiceDataset (line 81) | class MultipleChoiceDataset(Dataset):
method __init__ (line 89) | def __init__(
method __len__ (line 136) | def __len__(self):
method __getitem__ (line 139) | def __getitem__(self, i) -> InputFeatures:
class TFMultipleChoiceDataset (line 146) | class TFMultipleChoiceDataset:
method __init__ (line 154) | def __init__(
method get_dataset (line 219) | def get_dataset(self):
method __len__ (line 224) | def __len__(self):
method __getitem__ (line 227) | def __getitem__(self, i) -> InputFeatures:
class DataProcessor (line 231) | class DataProcessor:
method get_train_examples (line 234) | def get_train_examples(self, data_dir):
method get_dev_examples (line 238) | def get_dev_examples(self, data_dir):
method get_test_examples (line 242) | def get_test_examples(self, data_dir):
method get_labels (line 246) | def get_labels(self):
class RaceProcessor (line 251) | class RaceProcessor(DataProcessor):
method get_train_examples (line 254) | def get_train_examples(self, data_dir):
method get_dev_examples (line 263) | def get_dev_examples(self, data_dir):
method get_test_examples (line 272) | def get_test_examples(self, data_dir):
method get_labels (line 281) | def get_labels(self):
method _read_txt (line 285) | def _read_txt(self, input_dir):
method _create_examples (line 295) | def _create_examples(self, lines, set_type):
class SynonymProcessor (line 318) | class SynonymProcessor(DataProcessor):
method get_train_examples (line 321) | def get_train_examples(self, data_dir):
method get_dev_examples (line 326) | def get_dev_examples(self, data_dir):
method get_test_examples (line 331) | def get_test_examples(self, data_dir):
method get_labels (line 337) | def get_labels(self):
method _read_csv (line 341) | def _read_csv(self, input_file):
method _create_examples (line 345) | def _create_examples(self, lines: List[List[str]], type: str):
class SwagProcessor (line 364) | class SwagProcessor(DataProcessor):
method get_train_examples (line 367) | def get_train_examples(self, data_dir):
method get_dev_examples (line 372) | def get_dev_examples(self, data_dir):
method get_test_examples (line 377) | def get_test_examples(self, data_dir):
method get_labels (line 386) | def get_labels(self):
method _read_csv (line 390) | def _read_csv(self, input_file):
method _create_examples (line 394) | def _create_examples(self, lines: List[List[str]], type: str):
class ArcProcessor (line 415) | class ArcProcessor(DataProcessor):
method get_train_examples (line 418) | def get_train_examples(self, data_dir):
method get_dev_examples (line 423) | def get_dev_examples(self, data_dir):
method get_test_examples (line 428) | def get_test_examples(self, data_dir):
method get_labels (line 432) | def get_labels(self):
method _read_json (line 436) | def _read_json(self, input_file):
method _create_examples (line 441) | def _create_examples(self, lines, type):
function convert_examples_to_features (line 506) | def convert_examples_to_features(
FILE: mplsandbox_for_rl/transformers/examples/legacy/pytorch-lightning/lightning_base.py
class BaseTransformer (line 63) | class BaseTransformer(pl.LightningModule):
method __init__ (line 64) | def __init__(
method load_hf_checkpoint (line 118) | def load_hf_checkpoint(self, *args, **kwargs):
method get_lr_scheduler (line 121) | def get_lr_scheduler(self):
method configure_optimizers (line 129) | def configure_optimizers(self):
method test_step (line 158) | def test_step(self, batch, batch_nb):
method test_epoch_end (line 161) | def test_epoch_end(self, outputs):
method total_steps (line 164) | def total_steps(self) -> int:
method setup (line 170) | def setup(self, mode):
method get_dataloader (line 177) | def get_dataloader(self, type_path: str, batch_size: int, shuffle: boo...
method train_dataloader (line 180) | def train_dataloader(self):
method val_dataloader (line 183) | def val_dataloader(self):
method test_dataloader (line 186) | def test_dataloader(self):
method _feature_file (line 189) | def _feature_file(self, mode):
method on_save_checkpoint (line 200) | def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
method add_model_specific_args (line 207) | def add_model_specific_args(parser, root_dir):
class LoggingCallback (line 269) | class LoggingCallback(pl.Callback):
method on_batch_end (line 270) | def on_batch_end(self, trainer, pl_module):
method on_validation_end (line 275) | def on_validation_end(self, trainer: pl.Trainer, pl_module: pl.Lightni...
method on_test_end (line 283) | def on_test_end(self, trainer: pl.Trainer, pl_module: pl.LightningModu...
function add_generic_args (line 295) | def add_generic_args(parser, root_dir) -> None:
function generic_train (line 341) | def generic_train(
FILE: mplsandbox_for_rl/transformers/examples/legacy/pytorch-lightning/run_glue.py
class GLUETransformer (line 22) | class GLUETransformer(BaseTransformer):
method __init__ (line 25) | def __init__(self, hparams):
method forward (line 33) | def forward(self, **inputs):
method training_step (line 36) | def training_step(self, batch, batch_idx):
method prepare_data (line 49) | def prepare_data(self):
method get_dataloader (line 76) | def get_dataloader(self, mode: str, batch_size: int, shuffle: bool = F...
method validation_step (line 99) | def validation_step(self, batch, batch_idx):
method _eval_end (line 112) | def _eval_end(self, outputs) -> tuple:
method validation_epoch_end (line 131) | def validation_epoch_end(self, outputs: list) -> dict:
method test_epoch_end (line 136) | def test_epoch_end(self, outputs) -> dict:
method add_model_specific_args (line 143) | def add_model_specific_args(parser, root_dir):
function main (line 176) | def main():
FILE: mplsandbox_for_rl/transformers/examples/legacy/pytorch-lightning/run_ner.py
class NERTransformer (line 20) | class NERTransformer(BaseTransformer):
method __init__ (line 27) | def __init__(self, hparams):
method forward (line 43) | def forward(self, **inputs):
method training_step (line 46) | def training_step(self, batch, batch_num):
method prepare_data (line 59) | def prepare_data(self):
method get_dataloader (line 88) | def get_dataloader(self, mode: int, batch_size: int, shuffle: bool = F...
method validation_step (line 105) | def validation_step(self, batch, batch_nb):
method _eval_end (line 118) | def _eval_end(self, outputs):
method validation_epoch_end (line 147) | def validation_epoch_end(self, outputs):
method test_epoch_end (line 153) | def test_epoch_end(self, outputs):
method add_model_specific_args (line 165) | def add_model_specific_args(parser, root_dir):
FILE: mplsandbox_for_rl/transformers/examples/legacy/question-answering/run_squad.py
function set_seed (line 63) | def set_seed(args):
function to_list (line 71) | def to_list(tensor):
function train (line 75) | def train(args, train_dataset, model, tokenizer):
function evaluate (line 268) | def evaluate(args, model, tokenizer, prefix=""):
function load_and_cache_examples (line 402) | def load_and_cache_examples(args, tokenizer, evaluate=False, output_exam...
function main (line 472) | def main():
FILE: mplsandbox_for_rl/transformers/examples/legacy/question-answering/run_squad_trainer.py
class ModelArguments (line 43) | class ModelArguments:
function main (line 66) | def main():
function _mp_fn (line 180) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/legacy/run_camembert.py
function fill_mask (line 7) | def fill_mask(masked_input, model, tokenizer, topk=5):
FILE: mplsandbox_for_rl/transformers/examples/legacy/run_chinese_ref.py
function _is_chinese_char (line 11) | def _is_chinese_char(cp):
function is_chinese (line 36) | def is_chinese(word: str):
function get_chinese_word (line 45) | def get_chinese_word(tokens: List[str]):
function add_sub_symbol (line 56) | def add_sub_symbol(bert_tokens: List[str], chinese_word_set: set()):
function prepare_ref (line 80) | def prepare_ref(lines: List[str], ltp_tokenizer: LTP, bert_tokenizer: Be...
function main (line 117) | def main(args):
FILE: mplsandbox_for_rl/transformers/examples/legacy/run_language_modeling.py
class ModelArguments (line 62) | class ModelArguments:
class DataTrainingArguments (line 93) | class DataTrainingArguments:
function get_dataset (line 162) | def get_dataset(
function main (line 198) | def main():
function _mp_fn (line 368) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/legacy/run_openai_gpt.py
function accuracy (line 59) | def accuracy(out, labels):
function load_rocstories_dataset (line 64) | def load_rocstories_dataset(dataset_path):
function pre_process_datasets (line 75) | def pre_process_datasets(encoded_datasets, input_len, cap_length, start_...
function main (line 106) | def main():
FILE: mplsandbox_for_rl/transformers/examples/legacy/run_swag.py
class SwagExample (line 55) | class SwagExample(object):
method __init__ (line 58) | def __init__(self, swag_id, context_sentence, start_ending, ending_0, ...
method __str__ (line 70) | def __str__(self):
method __repr__ (line 73) | def __repr__(self):
class InputFeatures (line 90) | class InputFeatures(object):
method __init__ (line 91) | def __init__(self, example_id, choices_features, label):
function read_swag_examples (line 100) | def read_swag_examples(input_file, is_training=True):
function convert_examples_to_features (line 126) | def convert_examples_to_features(examples, tokenizer, max_seq_length, is...
function _truncate_seq_pair (line 198) | def _truncate_seq_pair(tokens_a, tokens_b, max_length):
function accuracy (line 215) | def accuracy(out, labels):
function select_field (line 220) | def select_field(features, field):
function set_seed (line 224) | def set_seed(args):
function load_and_cache_examples (line 232) | def load_and_cache_examples(args, tokenizer, evaluate=False, output_exam...
function train (line 277) | def train(args, train_dataset, model, tokenizer):
function evaluate (line 415) | def evaluate(args, model, tokenizer, prefix=""):
function main (line 475) | def main():
FILE: mplsandbox_for_rl/transformers/examples/legacy/run_transfo_xl.py
function main (line 40) | def main():
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/convert_model_to_fp16.py
function convert (line 23) | def convert(src_path: str, map_location: str = "cpu", save_path: Union[s...
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/download_wmt.py
function download_wmt_dataset (line 22) | def download_wmt_dataset(src_lang="ro", tgt_lang="en", dataset="wmt16", ...
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/finetune_trainer.py
class ModelArguments (line 56) | class ModelArguments:
class DataTrainingArguments (line 79) | class DataTrainingArguments:
function handle_metrics (line 141) | def handle_metrics(split, metrics, output_dir):
function main (line 157) | def main():
function _mp_fn (line 369) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/minify_dataset.py
function minify (line 21) | def minify(src_dir: str, dest_dir: str, n: int):
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/old_test_calculate_rouge.py
function test_disaggregated_scores_are_determinstic (line 52) | def test_disaggregated_scores_are_determinstic():
function test_newline_cnn_improvement (line 62) | def test_newline_cnn_improvement():
function test_newline_irrelevant_for_other_metrics (line 69) | def test_newline_irrelevant_for_other_metrics():
function test_single_sent_scores_dont_depend_on_newline_sep (line 76) | def test_single_sent_scores_dont_depend_on_newline_sep():
function test_pegasus_newline (line 89) | def test_pegasus_newline():
function test_rouge_cli (line 102) | def test_rouge_cli():
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/old_test_datasets.py
function _dump_articles (line 41) | def _dump_articles(path: Path, articles: list):
function make_test_data_dir (line 46) | def make_test_data_dir(tmp_dir):
class TestAll (line 53) | class TestAll(TestCasePlus):
method test_seq2seq_dataset_truncation (line 64) | def test_seq2seq_dataset_truncation(self, tok_name):
method test_legacy_dataset_truncation (line 103) | def test_legacy_dataset_truncation(self, tok):
method test_pack_dataset (line 127) | def test_pack_dataset(self):
method test_dynamic_batch_size (line 145) | def test_dynamic_batch_size(self):
method test_sortish_sampler_reduces_padding (line 169) | def test_sortish_sampler_reduces_padding(self):
method _get_dataset (line 186) | def _get_dataset(self, n_obs=1000, max_len=128):
method test_distributed_sortish_sampler_splits_indices_between_procs (line 208) | def test_distributed_sortish_sampler_splits_indices_between_procs(self):
method test_dataset_kwargs (line 223) | def test_dataset_kwargs(self, tok_name):
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/old_test_fsmt_bleu_score.py
class ModelEvalTester (line 33) | class ModelEvalTester(unittest.TestCase):
method get_tokenizer (line 34) | def get_tokenizer(self, mname):
method get_model (line 37) | def get_model(self, mname):
method test_bleu_scores (line 52) | def test_bleu_scores(self, pair, min_bleu_score):
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/old_test_seq2seq_examples.py
function _dump_articles (line 33) | def _dump_articles(path: Path, articles: list):
class TestTheRest (line 47) | class TestTheRest(TestCasePlus):
method run_eval_tester (line 48) | def run_eval_tester(self, model):
method test_run_eval (line 75) | def test_run_eval(self):
method test_run_eval_slow (line 81) | def test_run_eval_slow(self, model):
method test_run_eval_search (line 87) | def test_run_eval_search(self, model):
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/old_test_seq2seq_examples_multi_gpu.py
class TestSummarizationDistillerMultiGPU (line 25) | class TestSummarizationDistillerMultiGPU(TestCasePlus):
method setUpClass (line 27) | def setUpClass(cls):
method test_distributed_eval (line 32) | def test_distributed_eval(self):
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/old_test_tatoeba_conversion.py
class TatoebaConversionTester (line 25) | class TatoebaConversionTester(unittest.TestCase):
method resolver (line 27) | def resolver(self):
method test_resolver (line 32) | def test_resolver(self):
method test_model_card (line 36) | def test_model_card(self):
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/pack_dataset.py
function pack_examples (line 31) | def pack_examples(tok, src_examples, tgt_examples, max_tokens=1024):
function pack_data_dir (line 58) | def pack_data_dir(tok, data_dir: Path, max_tokens, save_path):
function packer_cli (line 75) | def packer_cli():
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/rouge_cli.py
function calculate_rouge_path (line 20) | def calculate_rouge_path(pred_path, tgt_path, save_path=None, **kwargs):
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/run_distributed_eval.py
function eval_data_dir (line 46) | def eval_data_dir(
function run_generate (line 119) | def run_generate():
function combine_partial_results (line 230) | def combine_partial_results(partial_results) -> List:
function gather_results_from_each_node (line 240) | def gather_results_from_each_node(num_replicas, save_dir, timeout) -> Li...
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/run_eval.py
function generate_summaries_or_translations (line 38) | def generate_summaries_or_translations(
function datetime_now (line 82) | def datetime_now():
function run_generate (line 86) | def run_generate(verbose=True):
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/run_eval_search.py
function parse_search_arg (line 35) | def parse_search_arg(search):
function run_search (line 44) | def run_search():
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/save_len_file.py
function save_len_file (line 24) | def save_len_file(
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/save_randomly_initialized_model.py
function save_randomly_initialized_version (line 21) | def save_randomly_initialized_version(config_name: str, save_dir: str, *...
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/sentence_splitter.py
function add_newline_to_end_of_each_sentence (line 31) | def add_newline_to_end_of_each_sentence(x: str) -> str:
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/seq2seq_trainer.py
class Seq2SeqTrainer (line 50) | class Seq2SeqTrainer(Trainer):
method __init__ (line 51) | def __init__(self, config=None, data_args=None, *args, **kwargs):
method create_optimizer_and_scheduler (line 86) | def create_optimizer_and_scheduler(self, num_training_steps: int):
method _get_lr_scheduler (line 123) | def _get_lr_scheduler(self, num_training_steps):
method _get_train_sampler (line 135) | def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
method _compute_loss (line 153) | def _compute_loss(self, model, inputs, labels):
method compute_loss (line 169) | def compute_loss(self, model, inputs):
method prediction_step (line 174) | def prediction_step(
method _pad_tensors_to_max_len (line 236) | def _pad_tensors_to_max_len(self, tensor, max_length):
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/seq2seq_training_args.py
class Seq2SeqTrainingArguments (line 28) | class Seq2SeqTrainingArguments(TrainingArguments):
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/test_data/fsmt/build-eval-data.py
function get_all_data (line 18) | def get_all_data(pairs, n_objs):
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/utils.py
function label_smoothed_nll_loss (line 49) | def label_smoothed_nll_loss(lprobs, target, epsilon, ignore_index=-100):
function lmap (line 70) | def lmap(f: Callable, x: Iterable) -> List:
function calculate_bleu (line 75) | def calculate_bleu(output_lns, refs_lns, **kwargs) -> dict:
function build_compute_metrics_fn (line 80) | def build_compute_metrics_fn(task_name: str, tokenizer: PreTrainedTokeni...
function trim_batch (line 112) | def trim_batch(
class AbstractSeq2SeqDataset (line 125) | class AbstractSeq2SeqDataset(Dataset):
method __init__ (line 126) | def __init__(
method __len__ (line 159) | def __len__(self):
method get_char_lens (line 163) | def get_char_lens(data_file):
method tgt_lens (line 167) | def tgt_lens(self):
method make_sortish_sampler (line 171) | def make_sortish_sampler(self, batch_size, distributed=False, shuffle=...
method make_dynamic_sampler (line 177) | def make_dynamic_sampler(self, max_tokens_per_batch=1024, **kwargs):
method __getitem__ (line 202) | def __getitem__(self, item):
method collate_fn (line 205) | def collate_fn(self, batch):
class LegacySeq2SeqDataset (line 209) | class LegacySeq2SeqDataset(AbstractSeq2SeqDataset):
method __getitem__ (line 210) | def __getitem__(self, index) -> Dict[str, torch.Tensor]:
method encode_line (line 229) | def encode_line(self, tokenizer, line, max_length, pad_to_max_length=T...
method collate_fn (line 240) | def collate_fn(self, batch) -> Dict[str, torch.Tensor]:
class Seq2SeqDataset (line 255) | class Seq2SeqDataset(AbstractSeq2SeqDataset):
method __getitem__ (line 258) | def __getitem__(self, index) -> Dict[str, str]:
method collate_fn (line 266) | def collate_fn(self, batch) -> Dict[str, torch.Tensor]:
class Seq2SeqDataCollator (line 280) | class Seq2SeqDataCollator:
method __init__ (line 281) | def __init__(self, tokenizer, data_args, decoder_start_token_id, tpu_n...
method __call__ (line 296) | def __call__(self, batch) -> Dict[str, torch.Tensor]:
method _shift_right_t5 (line 325) | def _shift_right_t5(self, input_ids):
method _encode (line 332) | def _encode(self, batch) -> Dict[str, torch.Tensor]:
class SortishSampler (line 345) | class SortishSampler(Sampler):
method __init__ (line 348) | def __init__(self, data, batch_size, shuffle=True):
method __len__ (line 351) | def __len__(self) -> int:
method __iter__ (line 354) | def __iter__(self):
function sortish_sampler_indices (line 358) | def sortish_sampler_indices(data: List, bs: int, shuffle=True) -> np.array:
class DistributedSortishSampler (line 379) | class DistributedSortishSampler(Sampler):
method __init__ (line 382) | def __init__(self, dataset, batch_size, num_replicas=None, rank=None, ...
method __iter__ (line 405) | def __iter__(self) -> Iterable:
method available_indices (line 416) | def available_indices(self) -> np.array:
method __len__ (line 425) | def __len__(self):
method set_epoch (line 428) | def set_epoch(self, epoch):
function use_task_specific_params (line 435) | def use_task_specific_params(model, task):
function pickle_load (line 446) | def pickle_load(path):
function pickle_save (line 452) | def pickle_save(obj, path):
function flatten_list (line 458) | def flatten_list(summary_ids: List[List]):
function save_git_info (line 462) | def save_git_info(folder_path: str) -> None:
function save_json (line 468) | def save_json(content, path, indent=4, **json_dump_kwargs):
function load_json (line 473) | def load_json(path):
function get_git_info (line 478) | def get_git_info():
function extract_rouge_mid_statistics (line 500) | def extract_rouge_mid_statistics(dct):
function calculate_rouge (line 508) | def calculate_rouge(
function freeze_params (line 559) | def freeze_params(model: nn.Module):
function freeze_embeds (line 565) | def freeze_embeds(model):
function grad_status (line 584) | def grad_status(model: nn.Module) -> Iterable:
function any_requires_grad (line 588) | def any_requires_grad(model: nn.Module) -> bool:
function assert_all_frozen (line 592) | def assert_all_frozen(model):
function assert_not_all_frozen (line 599) | def assert_not_all_frozen(model):
function parse_numeric_n_bool_cl_kwargs (line 605) | def parse_numeric_n_bool_cl_kwargs(unparsed_args: List[str]) -> Dict[str...
function write_txt_file (line 630) | def write_txt_file(ordered_tgt, path):
function chunks (line 637) | def chunks(lst, n):
function check_output_dir (line 643) | def check_output_dir(args, expected_items=0):
FILE: mplsandbox_for_rl/transformers/examples/legacy/seq2seq/xla_spawn.py
function parse_args (line 34) | def parse_args():
function main (line 66) | def main():
FILE: mplsandbox_for_rl/transformers/examples/legacy/token-classification/run_ner.py
class ModelArguments (line 49) | class ModelArguments:
class DataTrainingArguments (line 76) | class DataTrainingArguments:
function main (line 102) | def main():
function _mp_fn (line 319) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/legacy/token-classification/tasks.py
class NER (line 12) | class NER(TokenClassificationTask):
method __init__ (line 13) | def __init__(self, label_idx=-1):
method read_examples_from_file (line 17) | def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -...
method write_predictions_to_file (line 45) | def write_predictions_to_file(self, writer: TextIO, test_input_reader:...
method get_labels (line 58) | def get_labels(self, path: str) -> List[str]:
class Chunk (line 69) | class Chunk(NER):
method __init__ (line 70) | def __init__(self):
method get_labels (line 74) | def get_labels(self, path: str) -> List[str]:
class POS (line 107) | class POS(TokenClassificationTask):
method read_examples_from_file (line 108) | def read_examples_from_file(self, data_dir, mode: Union[Split, str]) -...
method write_predictions_to_file (line 128) | def write_predictions_to_file(self, writer: TextIO, test_input_reader:...
method get_labels (line 139) | def get_labels(self, path: str) -> List[str]:
FILE: mplsandbox_for_rl/transformers/examples/legacy/token-classification/utils_ner.py
class InputExample (line 33) | class InputExample:
class InputFeatures (line 50) | class InputFeatures:
class Split (line 62) | class Split(Enum):
class TokenClassificationTask (line 68) | class TokenClassificationTask:
method read_examples_from_file (line 70) | def read_examples_from_file(data_dir, mode: Union[Split, str]) -> List...
method get_labels (line 74) | def get_labels(path: str) -> List[str]:
method convert_examples_to_features (line 78) | def convert_examples_to_features(
class TokenClassificationDataset (line 211) | class TokenClassificationDataset(Dataset):
method __init__ (line 222) | def __init__(
method __len__ (line 270) | def __len__(self):
method __getitem__ (line 273) | def __getitem__(self, i) -> InputFeatures:
class TFTokenClassificationDataset (line 280) | class TFTokenClassificationDataset:
method __init__ (line 291) | def __init__(
method get_dataset (line 362) | def get_dataset(self):
method __len__ (line 367) | def __len__(self):
method __getitem__ (line 370) | def __getitem__(self, i) -> InputFeatures:
FILE: mplsandbox_for_rl/transformers/examples/pytorch/audio-classification/run_audio_classification.py
function random_subsample (line 53) | def random_subsample(wav: np.ndarray, max_length: float, sample_rate: in...
class DataTrainingArguments (line 63) | class DataTrainingArguments:
class ModelArguments (line 127) | class ModelArguments:
method __post_init__ (line 182) | def __post_init__(self):
function main (line 198) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/conftest.py
function pytest_addoption (line 34) | def pytest_addoption(parser):
function pytest_terminal_summary (line 40) | def pytest_terminal_summary(terminalreporter):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/contrastive-image-text/run_clip.py
class ModelArguments (line 63) | class ModelArguments:
class DataTrainingArguments (line 117) | class DataTrainingArguments:
method __post_init__ (line 183) | def __post_init__(self):
class Transform (line 205) | class Transform(torch.nn.Module):
method __init__ (line 206) | def __init__(self, image_size, mean, std):
method forward (line 215) | def forward(self, x) -> torch.Tensor:
function collate_fn (line 222) | def collate_fn(examples):
function main (line 234) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/image-classification/run_image_classification.py
function pil_loader (line 67) | def pil_loader(path: str):
class DataTrainingArguments (line 74) | class DataTrainingArguments:
method __post_init__ (line 122) | def __post_init__(self):
class ModelArguments (line 130) | class ModelArguments:
function main (line 179) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/image-classification/run_image_classification_no_trainer.py
function parse_args (line 59) | def parse_args():
function main (line 222) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/image-pretraining/run_mae.py
class DataTrainingArguments (line 52) | class DataTrainingArguments:
method __post_init__ (line 103) | def __post_init__(self):
class ModelArguments (line 113) | class ModelArguments:
class CustomTrainingArguments (line 164) | class CustomTrainingArguments(TrainingArguments):
function collate_fn (line 170) | def collate_fn(examples):
function main (line 175) | def main():
function _mp_fn (line 402) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/image-pretraining/run_mim.py
class DataTrainingArguments (line 60) | class DataTrainingArguments:
method __post_init__ (line 106) | def __post_init__(self):
class ModelArguments (line 116) | class ModelArguments:
class MaskGenerator (line 197) | class MaskGenerator:
method __init__ (line 205) | def __init__(self, input_size=192, mask_patch_size=32, model_patch_siz...
method __call__ (line 222) | def __call__(self):
function collate_fn (line 233) | def collate_fn(examples):
function main (line 239) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/image-pretraining/run_mim_no_trainer.py
function parse_args (line 64) | def parse_args():
class MaskGenerator (line 334) | class MaskGenerator:
method __init__ (line 342) | def __init__(self, input_size=192, mask_patch_size=32, model_patch_siz...
method __call__ (line 359) | def __call__(self):
function collate_fn (line 370) | def collate_fn(examples):
function main (line 376) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/instance-segmentation/run_instance_segmentation.py
class Arguments (line 55) | class Arguments:
function augment_and_transform_batch (line 103) | def augment_and_transform_batch(
function collate_fn (line 144) | def collate_fn(examples):
class ModelOutput (line 155) | class ModelOutput:
function nested_cpu (line 160) | def nested_cpu(tensors):
class Evaluator (line 171) | class Evaluator:
method __init__ (line 176) | def __init__(
method get_metric (line 196) | def get_metric(self):
method reset_metric (line 200) | def reset_metric(self):
method postprocess_target_batch (line 203) | def postprocess_target_batch(self, target_batch) -> List[Dict[str, tor...
method get_target_sizes (line 217) | def get_target_sizes(self, post_processed_targets) -> List[List[int]]:
method postprocess_prediction_batch (line 223) | def postprocess_prediction_batch(self, prediction_batch, target_sizes)...
method __call__ (line 254) | def __call__(self, evaluation_results: EvalPrediction, compute_result:...
function setup_logging (line 300) | def setup_logging(training_args: TrainingArguments) -> None:
function find_last_checkpoint (line 320) | def find_last_checkpoint(training_args: TrainingArguments) -> Optional[s...
function main (line 342) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/instance-segmentation/run_instance_segmentation_no_trainer.py
function parse_args (line 60) | def parse_args():
function augment_and_transform_batch (line 221) | def augment_and_transform_batch(
function collate_fn (line 262) | def collate_fn(examples):
function nested_cpu (line 272) | def nested_cpu(tensors):
function evaluation_loop (line 283) | def evaluation_loop(model, image_processor, accelerator: Accelerator, da...
function setup_logging (line 358) | def setup_logging(accelerator: Accelerator) -> None:
function handle_repository_creation (line 376) | def handle_repository_creation(accelerator: Accelerator, args: argparse....
function main (line 402) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/language-modeling/run_clm.py
class ModelArguments (line 70) | class ModelArguments:
method __post_init__ (line 153) | def __post_init__(self):
class DataTrainingArguments (line 161) | class DataTrainingArguments:
method __post_init__ (line 223) | def __post_init__(self):
function main (line 238) | def main():
function _mp_fn (line 651) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/language-modeling/run_clm_no_trainer.py
function parse_args (line 70) | def parse_args():
function main (line 262) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/language-modeling/run_fim.py
class ModelArguments (line 73) | class ModelArguments:
method __post_init__ (line 168) | def __post_init__(self):
class DataTrainingArguments (line 176) | class DataTrainingArguments:
method __post_init__ (line 290) | def __post_init__(self):
function main (line 305) | def main():
function _mp_fn (line 858) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/language-modeling/run_fim_no_trainer.py
function parse_args (line 73) | def parse_args():
function main (line 324) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/language-modeling/run_mlm.py
class ModelArguments (line 67) | class ModelArguments:
method __post_init__ (line 150) | def __post_init__(self):
class DataTrainingArguments (line 158) | class DataTrainingArguments:
method __post_init__ (line 232) | def __post_init__(self):
function main (line 249) | def main():
function _mp_fn (line 685) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/language-modeling/run_mlm_no_trainer.py
function parse_args (line 68) | def parse_args():
function main (line 269) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/language-modeling/run_plm.py
class ModelArguments (line 58) | class ModelArguments:
method __post_init__ (line 117) | def __post_init__(self):
class DataTrainingArguments (line 125) | class DataTrainingArguments:
method __post_init__ (line 217) | def __post_init__(self):
function main (line 229) | def main():
function _mp_fn (line 578) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/multiple-choice/run_swag.py
class ModelArguments (line 56) | class ModelArguments:
class DataTrainingArguments (line 104) | class DataTrainingArguments:
method __post_init__ (line 159) | def __post_init__(self):
class DataCollatorForMultipleChoice (line 169) | class DataCollatorForMultipleChoice:
method __call__ (line 200) | def __call__(self, features):
function main (line 225) | def main():
function _mp_fn (line 494) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/multiple-choice/run_swag_no_trainer.py
function parse_args (line 67) | def parse_args():
class DataCollatorForMultipleChoice (line 230) | class DataCollatorForMultipleChoice:
method __call__ (line 261) | def __call__(self, features):
function main (line 286) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/object-detection/run_object_detection.py
class ModelOutput (line 57) | class ModelOutput:
function format_image_annotations_as_coco (line 62) | def format_image_annotations_as_coco(
function convert_bbox_yolo_to_pascal (line 97) | def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: Tuple[i...
function augment_and_transform_batch (line 119) | def augment_and_transform_batch(
function collate_fn (line 151) | def collate_fn(batch: List[BatchFeature]) -> Mapping[str, Union[torch.Te...
function compute_metrics (line 161) | def compute_metrics(
class DataTrainingArguments (line 233) | class DataTrainingArguments:
class ModelArguments (line 277) | class ModelArguments:
function main (line 324) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/object-detection/run_object_detection_no_trainer.py
function format_image_annotations_as_coco (line 63) | def format_image_annotations_as_coco(
function convert_bbox_yolo_to_pascal (line 99) | def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: Tuple[i...
function augment_and_transform_batch (line 122) | def augment_and_transform_batch(
function collate_fn (line 155) | def collate_fn(batch: List[BatchFeature]) -> Mapping[str, Union[torch.Te...
function nested_to_cpu (line 164) | def nested_to_cpu(objects):
function evaluation_loop (line 177) | def evaluation_loop(
function parse_args (line 228) | def parse_args():
function main (line 393) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/old_test_xla_examples.py
function get_results (line 32) | def get_results(output_dir):
class TorchXLAExamplesTests (line 48) | class TorchXLAExamplesTests(TestCasePlus):
method test_run_glue (line 49) | def test_run_glue(self):
method test_trainer_tpu (line 85) | def test_trainer_tpu(self):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/question-answering/run_qa.py
class ModelArguments (line 61) | class ModelArguments:
class DataTrainingArguments (line 105) | class DataTrainingArguments:
method __post_init__ (line 208) | def __post_init__(self):
function main (line 228) | def main():
function _mp_fn (line 709) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/question-answering/run_qa_beam_search.py
class ModelArguments (line 59) | class ModelArguments:
class DataTrainingArguments (line 93) | class DataTrainingArguments:
method __post_init__ (line 206) | def __post_init__(self):
function main (line 226) | def main():
function _mp_fn (line 736) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
function save_prefixed_metrics (line 66) | def save_prefixed_metrics(results, output_dir, file_name: str = "all_res...
function parse_args (line 89) | def parse_args():
function main (line 301) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/question-answering/run_qa_no_trainer.py
function save_prefixed_metrics (line 70) | def save_prefixed_metrics(results, output_dir, file_name: str = "all_res...
function parse_args (line 93) | def parse_args():
function main (line 339) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/question-answering/run_seq2seq_qa.py
class ModelArguments (line 57) | class ModelArguments:
class DataTrainingArguments (line 105) | class DataTrainingArguments:
method __post_init__ (line 246) | def __post_init__(self):
function main (line 273) | def main():
function _mp_fn (line 737) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/question-answering/trainer_qa.py
class QuestionAnsweringTrainer (line 31) | class QuestionAnsweringTrainer(Trainer):
method __init__ (line 32) | def __init__(self, *args, eval_examples=None, post_process_function=No...
method evaluate (line 37) | def evaluate(self, eval_dataset=None, eval_examples=None, ignore_keys=...
method predict (line 94) | def predict(self, predict_dataset, predict_examples, ignore_keys=None,...
FILE: mplsandbox_for_rl/transformers/examples/pytorch/question-answering/trainer_seq2seq_qa.py
class QuestionAnsweringSeq2SeqTrainer (line 34) | class QuestionAnsweringSeq2SeqTrainer(Seq2SeqTrainer):
method __init__ (line 35) | def __init__(self, *args, eval_examples=None, post_process_function=No...
method evaluate (line 41) | def evaluate(
method predict (line 117) | def predict(
FILE: mplsandbox_for_rl/transformers/examples/pytorch/question-answering/utils_qa.py
function postprocess_qa_predictions (line 32) | def postprocess_qa_predictions(
function postprocess_qa_predictions_with_beam_search (line 253) | def postprocess_qa_predictions_with_beam_search(
FILE: mplsandbox_for_rl/transformers/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py
function reduce_labels_transform (line 59) | def reduce_labels_transform(labels: np.ndarray, **kwargs) -> np.ndarray:
class DataTrainingArguments (line 75) | class DataTrainingArguments:
method __post_init__ (line 121) | def __post_init__(self):
class ModelArguments (line 135) | class ModelArguments:
function main (line 176) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py
function reduce_labels_transform (line 60) | def reduce_labels_transform(labels: np.ndarray, **kwargs) -> np.ndarray:
function parse_args (line 75) | def parse_args():
function main (line 241) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py
function parse_args (line 52) | def parse_args():
class DataCollatorForWav2Vec2Pretraining (line 289) | class DataCollatorForWav2Vec2Pretraining:
method __call__ (line 332) | def __call__(self, features: List[Dict[str, Union[List[int], torch.Ten...
function multiply_grads (line 377) | def multiply_grads(params, c):
function get_grad_norm (line 386) | def get_grad_norm(params, scale=1):
function main (line 397) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
function list_field (line 61) | def list_field(default=None, metadata=None):
class ModelArguments (line 66) | class ModelArguments:
class DataTrainingArguments (line 151) | class DataTrainingArguments:
class DataCollatorCTCWithPadding (line 290) | class DataCollatorCTCWithPadding:
method __call__ (line 321) | def __call__(self, features: List[Dict[str, Union[List[int], torch.Ten...
function create_vocabulary_from_data (line 353) | def create_vocabulary_from_data(
function main (line 395) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py
function list_field (line 64) | def list_field(default=None, metadata=None):
class ModelArguments (line 69) | class ModelArguments:
class DataTrainingArguments (line 130) | class DataTrainingArguments:
class DataCollatorCTCWithPadding (line 273) | class DataCollatorCTCWithPadding:
method __call__ (line 303) | def __call__(self, features: List[Dict[str, Union[List[int], torch.Ten...
function create_vocabulary_from_data (line 333) | def create_vocabulary_from_data(
function main (line 375) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py
class ModelArguments (line 59) | class ModelArguments:
class DataTrainingArguments (line 135) | class DataTrainingArguments:
class DataCollatorSpeechSeq2SeqWithPadding (line 234) | class DataCollatorSpeechSeq2SeqWithPadding:
method __call__ (line 250) | def __call__(self, features: List[Dict[str, Union[List[int], torch.Ten...
function main (line 277) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/summarization/run_summarization.py
class ModelArguments (line 76) | class ModelArguments:
class DataTrainingArguments (line 133) | class DataTrainingArguments:
method __post_init__ (line 274) | def __post_init__(self):
function main (line 312) | def main():
function _mp_fn (line 767) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/summarization/run_summarization_no_trainer.py
function parse_args (line 93) | def parse_args():
function main (line 324) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/test_accelerate_examples.py
function get_setup_file (line 43) | def get_setup_file():
function get_results (line 50) | def get_results(output_dir):
class ExamplesTestsNoTrainer (line 65) | class ExamplesTestsNoTrainer(TestCasePlus):
method setUpClass (line 67) | def setUpClass(cls):
method tearDownClass (line 75) | def tearDownClass(cls):
method test_run_glue_no_trainer (line 79) | def test_run_glue_no_trainer(self):
method test_run_clm_no_trainer (line 104) | def test_run_clm_no_trainer(self):
method test_run_mlm_no_trainer (line 132) | def test_run_mlm_no_trainer(self):
method test_run_ner_no_trainer (line 152) | def test_run_ner_no_trainer(self):
method test_run_squad_no_trainer (line 180) | def test_run_squad_no_trainer(self):
method test_run_swag_no_trainer (line 208) | def test_run_swag_no_trainer(self):
method test_run_summarization_no_trainer (line 231) | def test_run_summarization_no_trainer(self):
method test_run_translation_no_trainer (line 259) | def test_run_translation_no_trainer(self):
method test_run_semantic_segmentation_no_trainer (line 288) | def test_run_semantic_segmentation_no_trainer(self):
method test_run_image_classification_no_trainer (line 310) | def test_run_image_classification_no_trainer(self):
method test_run_object_detection_no_trainer (line 338) | def test_run_object_detection_no_trainer(self):
method test_run_instance_segmentation_no_trainer (line 362) | def test_run_instance_segmentation_no_trainer(self):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/test_pytorch_examples.py
function get_results (line 86) | def get_results(output_dir):
class ExamplesTests (line 101) | class ExamplesTests(TestCasePlus):
method test_run_glue (line 102) | def test_run_glue(self):
method test_run_clm (line 130) | def test_run_clm(self):
method test_run_clm_config_overrides (line 159) | def test_run_clm_config_overrides(self):
method test_run_mlm (line 184) | def test_run_mlm(self):
method test_run_ner (line 207) | def test_run_ner(self):
method test_run_squad (line 238) | def test_run_squad(self):
method test_run_squad_seq2seq (line 263) | def test_run_squad_seq2seq(self):
method test_run_swag (line 292) | def test_run_swag(self):
method test_generation (line 315) | def test_generation(self):
method test_run_summarization (line 330) | def test_run_summarization(self):
method test_run_translation (line 358) | def test_run_translation(self):
method test_run_image_classification (line 387) | def test_run_image_classification(self):
method test_run_speech_recognition_ctc (line 418) | def test_run_speech_recognition_ctc(self):
method test_run_speech_recognition_ctc_adapter (line 449) | def test_run_speech_recognition_ctc_adapter(self):
method test_run_speech_recognition_seq2seq (line 482) | def test_run_speech_recognition_seq2seq(self):
method test_run_audio_classification (line 513) | def test_run_audio_classification(self):
method test_run_wav2vec2_pretraining (line 546) | def test_run_wav2vec2_pretraining(self):
method test_run_vit_mae_pretraining (line 570) | def test_run_vit_mae_pretraining(self):
method test_run_semantic_segmentation (line 599) | def test_run_semantic_segmentation(self):
method test_run_object_detection (line 625) | def test_run_object_detection(self):
method test_run_instance_segmentation (line 653) | def test_run_instance_segmentation(self):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/text-classification/run_classification.py
class DataTrainingArguments (line 59) | class DataTrainingArguments:
method __post_init__ (line 194) | def __post_init__(self):
class ModelArguments (line 208) | class ModelArguments:
function get_label_list (line 259) | def get_label_list(raw_dataset, split="train") -> List[str]:
function main (line 272) | def main():
function _mp_fn (line 747) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/text-classification/run_glue.py
class DataTrainingArguments (line 71) | class DataTrainingArguments:
method __post_init__ (line 146) | def __post_init__(self):
class ModelArguments (line 165) | class ModelArguments:
function main (line 216) | def main():
function _mp_fn (line 631) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/text-classification/run_glue_no_trainer.py
function parse_args (line 71) | def parse_args():
function main (line 221) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/text-classification/run_xnli.py
class DataTrainingArguments (line 59) | class DataTrainingArguments:
class ModelArguments (line 119) | class ModelArguments:
function main (line 180) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/text-generation/run_generation.py
function prepare_ctrl_input (line 95) | def prepare_ctrl_input(args, _, tokenizer, prompt_text):
function prepare_xlm_input (line 105) | def prepare_xlm_input(args, model, tokenizer, prompt_text):
function prepare_xlnet_input (line 131) | def prepare_xlnet_input(args, _, tokenizer, prompt_text):
function prepare_transfoxl_input (line 137) | def prepare_transfoxl_input(args, _, tokenizer, prompt_text):
function adjust_length_to_model (line 151) | def adjust_length_to_model(length, max_sequence_length):
function sparse_model_config (line 161) | def sparse_model_config(model_config):
function generate_past_key_values (line 190) | def generate_past_key_values(model, batch_size, seq_len):
function prepare_jit_inputs (line 219) | def prepare_jit_inputs(inputs, model, tokenizer):
class _ModelFallbackWrapper (line 237) | class _ModelFallbackWrapper(GenerationMixin):
method __init__ (line 240) | def __init__(self, optimized, default):
method __call__ (line 244) | def __call__(self, *args, **kwargs):
method __getattr__ (line 263) | def __getattr__(self, item):
method prepare_inputs_for_generation (line 266) | def prepare_inputs_for_generation(
method _reorder_cache (line 273) | def _reorder_cache(
function main (line 284) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/text-generation/run_generation_contrastive_search.py
function main (line 39) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/token-classification/run_ner.py
class ModelArguments (line 60) | class ModelArguments:
class DataTrainingArguments (line 108) | class DataTrainingArguments:
method __post_init__ (line 204) | def __post_init__(self):
function main (line 217) | def main():
function _mp_fn (line 645) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/token-classification/run_ner_no_trainer.py
function parse_args (line 69) | def parse_args():
function main (line 273) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/translation/run_translation.py
class ModelArguments (line 66) | class ModelArguments:
class DataTrainingArguments (line 114) | class DataTrainingArguments:
method __post_init__ (line 241) | def __post_init__(self):
function main (line 261) | def main():
function _mp_fn (line 691) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/pytorch/translation/run_translation_no_trainer.py
function parse_args (line 71) | def parse_args():
function main (line 315) | def main():
FILE: mplsandbox_for_rl/transformers/examples/pytorch/xla_spawn.py
function parse_args (line 34) | def parse_args():
function main (line 66) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/adversarial/run_hans.py
class ModelArguments (line 45) | class ModelArguments:
class DataTrainingArguments (line 66) | class DataTrainingArguments:
function hans_data_collator (line 91) | def hans_data_collator(features: List[InputFeatures]) -> Dict[str, torch...
function main (line 100) | def main():
function _mp_fn (line 236) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/adversarial/utils_hans.py
class InputExample (line 42) | class InputExample:
class InputFeatures (line 65) | class InputFeatures:
class HansDataset (line 93) | class HansDataset(Dataset):
method __init__ (line 101) | def __init__(
method __len__ (line 152) | def __len__(self):
method __getitem__ (line 155) | def __getitem__(self, i) -> InputFeatures:
method get_labels (line 158) | def get_labels(self):
class TFHansDataset (line 165) | class TFHansDataset:
method __init__ (line 173) | def __init__(
method get_dataset (line 235) | def get_dataset(self):
method __len__ (line 238) | def __len__(self):
method __getitem__ (line 241) | def __getitem__(self, i) -> InputFeatures:
method get_labels (line 244) | def get_labels(self):
class HansProcessor (line 248) | class HansProcessor(DataProcessor):
method get_train_examples (line 251) | def get_train_examples(self, data_dir):
method get_dev_examples (line 255) | def get_dev_examples(self, data_dir):
method get_labels (line 259) | def get_labels(self):
method _create_examples (line 267) | def _create_examples(self, lines, set_type):
function hans_convert_examples_to_features (line 282) | def hans_convert_examples_to_features(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_albert.py
class AlbertTransformerWithPabee (line 36) | class AlbertTransformerWithPabee(AlbertTransformer):
method adaptive_forward (line 37) | def adaptive_forward(self, hidden_states, current_layer, attention_mas...
class AlbertModelWithPabee (line 62) | class AlbertModelWithPabee(AlbertModel):
method __init__ (line 63) | def __init__(self, config):
method set_regression_threshold (line 75) | def set_regression_threshold(self, threshold):
method set_patience (line 78) | def set_patience(self, patience):
method reset_stats (line 81) | def reset_stats(self):
method log_stats (line 85) | def log_stats(self):
method forward (line 94) | def forward(
class AlbertForSequenceClassificationWithPabee (line 223) | class AlbertForSequenceClassificationWithPabee(AlbertPreTrainedModel):
method __init__ (line 224) | def __init__(self, config):
method forward (line 237) | def forward(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_bert.py
class BertEncoderWithPabee (line 37) | class BertEncoderWithPabee(BertEncoder):
method adaptive_forward (line 38) | def adaptive_forward(self, hidden_states, current_layer, attention_mas...
class BertModelWithPabee (line 50) | class BertModelWithPabee(BertModel):
method __init__ (line 67) | def __init__(self, config):
method set_regression_threshold (line 79) | def set_regression_threshold(self, threshold):
method set_patience (line 82) | def set_patience(self, patience):
method reset_stats (line 85) | def reset_stats(self):
method log_stats (line 89) | def log_stats(self):
method forward (line 98) | def forward(
class BertForSequenceClassificationWithPabee (line 246) | class BertForSequenceClassificationWithPabee(BertPreTrainedModel):
method __init__ (line 247) | def __init__(self, config):
method forward (line 260) | def forward(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py
function set_seed (line 65) | def set_seed(args):
function train (line 73) | def train(args, train_dataset, model, tokenizer):
function evaluate (line 263) | def evaluate(args, model, tokenizer, prefix="", patience=0):
function load_and_cache_examples (line 353) | def load_and_cache_examples(args, task, tokenizer, evaluate=False):
function main (line 408) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/bert-loses-patience/test_run_glue_with_pabee.py
function get_setup_file (line 16) | def get_setup_file():
class PabeeTests (line 23) | class PabeeTests(TestCasePlus):
method test_run_glue (line 24) | def test_run_glue(self):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/bertabs/configuration_bertabs.py
class BertAbsConfig (line 31) | class BertAbsConfig(PretrainedConfig):
method __init__ (line 67) | def __init__(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/bertabs/convert_bertabs_original_pytorch_checkpoint.py
function convert_bertabs_checkpoints (line 64) | def convert_bertabs_checkpoints(path_to_checkpoints, dump_path):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/bertabs/modeling_bertabs.py
class BertAbsPreTrainedModel (line 37) | class BertAbsPreTrainedModel(PreTrainedModel):
class BertAbs (line 43) | class BertAbs(BertAbsPreTrainedModel):
method __init__ (line 44) | def __init__(self, args, checkpoint=None, bert_extractive_checkpoint=N...
method init_weights (line 88) | def init_weights(self):
method forward (line 103) | def forward(
class Bert (line 122) | class Bert(nn.Module):
method __init__ (line 125) | def __init__(self):
method forward (line 130) | def forward(self, input_ids, attention_mask=None, token_type_ids=None,...
class TransformerDecoder (line 139) | class TransformerDecoder(nn.Module):
method __init__ (line 154) | def __init__(self, num_layers, d_model, heads, d_ff, dropout, embeddin...
method forward (line 173) | def forward(
method init_decoder_state (line 249) | def init_decoder_state(self, src, memory_bank, with_cache=False):
class PositionalEncoding (line 257) | class PositionalEncoding(nn.Module):
method __init__ (line 258) | def __init__(self, dropout, dim, max_len=5000):
method forward (line 270) | def forward(self, emb, step=None):
method get_emb (line 280) | def get_emb(self, emb):
class TransformerDecoderLayer (line 284) | class TransformerDecoderLayer(nn.Module):
method __init__ (line 296) | def __init__(self, d_model, heads, d_ff, dropout):
method forward (line 311) | def forward(
method _get_attn_subsequent_mask (line 368) | def _get_attn_subsequent_mask(self, size):
class MultiHeadedAttention (line 386) | class MultiHeadedAttention(nn.Module):
method __init__ (line 428) | def __init__(self, head_count, model_dim, dropout=0.1, use_final_linea...
method forward (line 445) | def forward(
class DecoderState (line 560) | class DecoderState:
method detach (line 569) | def detach(self):
method beam_update (line 574) | def beam_update(self, idx, positions, beam_size):
method map_batch_fn (line 586) | def map_batch_fn(self, fn):
class TransformerDecoderState (line 590) | class TransformerDecoderState(DecoderState):
method __init__ (line 593) | def __init__(self, src):
method _all (line 605) | def _all(self):
method detach (line 614) | def detach(self):
method update_state (line 621) | def update_state(self, new_input, previous_layer_inputs):
method _init_cache (line 627) | def _init_cache(self, memory_bank, num_layers):
method repeat_beam_size_times (line 636) | def repeat_beam_size_times(self, beam_size):
method map_batch_fn (line 640) | def map_batch_fn(self, fn):
function gelu (line 654) | def gelu(x):
class PositionwiseFeedForward (line 658) | class PositionwiseFeedForward(nn.Module):
method __init__ (line 668) | def __init__(self, d_model, d_ff, dropout=0.1):
method forward (line 677) | def forward(self, x):
function build_predictor (line 690) | def build_predictor(args, tokenizer, symbols, model, logger=None):
class GNMTGlobalScorer (line 697) | class GNMTGlobalScorer:
method __init__ (line 707) | def __init__(self, alpha, length_penalty):
method score (line 712) | def score(self, beam, logprobs):
class PenaltyBuilder (line 720) | class PenaltyBuilder:
method __init__ (line 729) | def __init__(self, length_pen):
method length_penalty (line 732) | def length_penalty(self):
method length_wu (line 744) | def length_wu(self, beam, logprobs, alpha=0.0):
method length_average (line 753) | def length_average(self, beam, logprobs, alpha=0.0):
method length_none (line 759) | def length_none(self, beam, logprobs, alpha=0.0, beta=0.0):
class Translator (line 766) | class Translator:
method __init__ (line 784) | def __init__(self, args, model, vocab, symbols, global_scorer=None, lo...
method translate (line 800) | def translate(self, batch, step, attn_debug=False):
method translate_batch (line 808) | def translate_batch(self, batch, fast=False):
method _fast_translate_batch (line 823) | def _fast_translate_batch(self, batch, max_length, min_length=0):
method from_batch (line 956) | def from_batch(self, translation_batch):
function tile (line 982) | def tile(x, count, dim=0):
class BertSumOptimizer (line 1005) | class BertSumOptimizer:
method __init__ (line 1016) | def __init__(self, model, lr, warmup_steps, beta_1=0.99, beta_2=0.999,...
method _update_rate (line 1040) | def _update_rate(self, stack):
method zero_grad (line 1043) | def zero_grad(self):
method step (line 1047) | def step(self):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/bertabs/run_summarization.py
function evaluate (line 31) | def evaluate(args):
function save_summaries (line 100) | def save_summaries(summaries, path, original_document_name):
function format_summary (line 126) | def format_summary(translation):
function format_rouge_scores (line 145) | def format_rouge_scores(scores):
function save_rouge_scores (line 175) | def save_rouge_scores(str_scores):
function build_data_iterator (line 185) | def build_data_iterator(args, tokenizer):
function load_and_cache_examples (line 202) | def load_and_cache_examples(args, tokenizer):
function collate (line 207) | def collate(data, tokenizer, block_size, device):
function decode_summary (line 237) | def decode_summary(summary_tokens, tokenizer):
function main (line 248) | def main():
function documents_dir_is_valid (line 335) | def documents_dir_is_valid(path):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/bertabs/test_utils_summarization.py
class SummarizationDataProcessingTest (line 23) | class SummarizationDataProcessingTest(unittest.TestCase):
method setUp (line 24) | def setUp(self):
method test_fit_to_block_sequence_too_small (line 27) | def test_fit_to_block_sequence_too_small(self):
method test_fit_to_block_sequence_fit_exactly (line 33) | def test_fit_to_block_sequence_fit_exactly(self):
method test_fit_to_block_sequence_too_big (line 39) | def test_fit_to_block_sequence_too_big(self):
method test_process_story_no_highlights (line 45) | def test_process_story_no_highlights(self):
method test_process_empty_story (line 53) | def test_process_empty_story(self):
method test_process_story_with_missing_period (line 60) | def test_process_story_with_missing_period(self):
method test_build_mask_no_padding (line 77) | def test_build_mask_no_padding(self):
method test_build_mask (line 82) | def test_build_mask(self):
method test_build_mask_with_padding_equal_to_one (line 87) | def test_build_mask_with_padding_equal_to_one(self):
method test_compute_token_type_ids (line 92) | def test_compute_token_type_ids(self):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/bertabs/utils_summarization.py
class CNNDMDataset (line 13) | class CNNDMDataset(Dataset):
method __init__ (line 33) | def __init__(self, path="", prefix="train"):
method __len__ (line 49) | def __len__(self):
method __getitem__ (line 53) | def __getitem__(self, idx):
function process_story (line 62) | def process_story(raw_story):
function _add_missing_period (line 96) | def _add_missing_period(line):
function truncate_or_pad (line 110) | def truncate_or_pad(sequence, block_size, pad_token_id):
function build_mask (line 121) | def build_mask(sequence, pad_token_id):
function encode_for_summarization (line 130) | def encode_for_summarization(story_lines, summary_lines, tokenizer):
function compute_token_type_ids (line 143) | def compute_token_type_ids(batch, separator_token_id):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/bertology/run_bertology.py
function entropy (line 53) | def entropy(p):
function print_2d_tensor (line 60) | def print_2d_tensor(tensor):
function compute_heads_importance (line 70) | def compute_heads_importance(
function mask_heads (line 156) | def mask_heads(args, model, eval_dataloader):
function prune_heads (line 207) | def prune_heads(args, model, eval_dataloader, head_mask):
function main (line 254) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/bertology/run_prune_gpt.py
function save_model (line 23) | def save_model(model, dirpath):
function entropy (line 39) | def entropy(p, unlogit=False):
function print_2d_tensor (line 49) | def print_2d_tensor(tensor):
function compute_heads_importance (line 59) | def compute_heads_importance(
function mask_heads (line 133) | def mask_heads(args, model, eval_dataloader):
function prune_heads (line 183) | def prune_heads(args, model, eval_dataloader, head_mask):
function main (line 236) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/codeparrot/examples/train_complexity_predictor.py
function get_args (line 19) | def get_args():
function compute_metrics (line 38) | def compute_metrics(eval_pred):
class CustomCallback (line 44) | class CustomCallback(TrainerCallback):
method __init__ (line 45) | def __init__(self, trainer) -> None:
method on_epoch_end (line 49) | def on_epoch_end(self, args, state, control, **kwargs):
function main (line 56) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/codeparrot/scripts/arguments.py
class TrainingArguments (line 6) | class TrainingArguments:
class EvaluationArguments (line 57) | class EvaluationArguments:
class HumanEvalArguments (line 77) | class HumanEvalArguments:
class PreprocessingArguments (line 120) | class PreprocessingArguments:
class TokenizerTrainingArguments (line 169) | class TokenizerTrainingArguments:
class PretokenizationArguments (line 190) | class PretokenizationArguments:
class InitializationArguments (line 208) | class InitializationArguments:
FILE: mplsandbox_for_rl/transformers/examples/research_projects/codeparrot/scripts/bpe_training.py
function batch_iterator (line 10) | def batch_iterator(batch_size=10):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/codeparrot/scripts/codeparrot_training.py
class ConstantLengthDataset (line 23) | class ConstantLengthDataset(IterableDataset):
method __init__ (line 36) | def __init__(
method __iter__ (line 62) | def __iter__(self):
method shuffle (line 94) | def shuffle(self, buffer_size=1000):
function setup_logging (line 98) | def setup_logging(args):
function create_dataloaders (line 124) | def create_dataloaders(args):
function get_grouped_params (line 141) | def get_grouped_params(model, args, no_decay=["bias", "ln_1.weight", "ln...
function log_metrics (line 154) | def log_metrics(step, metrics):
function compute_tflops (line 160) | def compute_tflops(elapsed_time, accelerator, args):
function evaluate (line 175) | def evaluate(args):
function get_lr (line 239) | def get_lr():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/codeparrot/scripts/human_eval.py
class TokenizedDataset (line 23) | class TokenizedDataset(IterableDataset):
method __init__ (line 29) | def __init__(self, tokenizer, dataset, n_tasks=None, n_copies=1):
method __iter__ (line 35) | def __iter__(self):
class EndOfFunctionCriteria (line 50) | class EndOfFunctionCriteria(StoppingCriteria):
method __init__ (line 53) | def __init__(self, start_length, eof_strings, tokenizer):
method __call__ (line 58) | def __call__(self, input_ids, scores, **kwargs):
function remove_last_block (line 67) | def remove_last_block(string):
function complete_code (line 74) | def complete_code(accelerator, model, tokenizer, dataloader, n_tasks, ba...
function main (line 140) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/codeparrot/scripts/minhash_deduplication.py
function get_min_hash (line 20) | def get_min_hash(tokens: List[str]) -> Optional[MinHash]:
function get_tokens (line 30) | def get_tokens(code: str) -> Set[str]:
class DuplicationIndex (line 35) | class DuplicationIndex:
method __init__ (line 36) | def __init__(
method add (line 47) | def add(self, code_key: Tuple, min_hash: MinHash) -> None:
method get_duplicate_clusters (line 72) | def get_duplicate_clusters(self) -> List[List[Dict]]:
method save (line 89) | def save(self, filepath) -> None:
function _compute_min_hash (line 95) | def _compute_min_hash(element):
function minhash_iter (line 102) | def minhash_iter(dataset_iterator: Type[Dataset]):
function make_duplicate_clusters (line 113) | def make_duplicate_clusters(dataset_iterator: Type[Dataset], jaccard_thr...
function jaccard_similarity (line 129) | def jaccard_similarity(code1: str, code2: str) -> float:
function _find_cluster_extremes_shared (line 139) | def _find_cluster_extremes_shared(cluster, jaccard_threshold):
function find_extremes (line 173) | def find_extremes(cluster_list, dataset, jaccard_threshold):
function deduplicate_dataset (line 209) | def deduplicate_dataset(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/codeparrot/scripts/preprocessing.py
function get_hash (line 22) | def get_hash(example):
function line_stats (line 27) | def line_stats(example):
function alpha_stats (line 33) | def alpha_stats(example):
function check_uniques (line 39) | def check_uniques(example, uniques):
function is_autogenerated (line 48) | def is_autogenerated(example, scan_width=5):
function is_config_or_test (line 60) | def is_config_or_test(example, scan_width=5, coeff=0.05):
function has_no_keywords (line 86) | def has_no_keywords(example):
function has_few_assignments (line 97) | def has_few_assignments(example, minimum=4):
function char_token_ratio (line 108) | def char_token_ratio(example):
function preprocess (line 115) | def preprocess(example):
function filter (line 129) | def filter(example, uniques, args):
function compress_file (line 153) | def compress_file(file_path):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/codeparrot/scripts/pretokenizing.py
function tokenize (line 10) | def tokenize(example):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/codeparrot/scripts/tests/test_deduplicate.py
function get_dataset (line 7) | def get_dataset():
class MakeDuplicateClustersTest (line 17) | class MakeDuplicateClustersTest(TestCase):
method test_make_duplicate_clusters (line 18) | def test_make_duplicate_clusters(self):
method test_deduplicate_dataset (line 23) | def test_deduplicate_dataset(self):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/codeparrot/scripts/validation_loss.py
class ConstantLengthDataset (line 13) | class ConstantLengthDataset(IterableDataset):
method __init__ (line 14) | def __init__(self, tokenizer, dataset, seq_length=1024, num_of_sequenc...
method __iter__ (line 21) | def __iter__(self):
function create_dataloader (line 45) | def create_dataloader(args):
function evaluate (line 53) | def evaluate(args):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/decision_transformer/run_decision_transformer.py
function get_action (line 12) | def get_action(model, states, actions, rewards, returns_to_go, timesteps):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/deebert/run_glue_deebert.py
function set_seed (line 51) | def set_seed(args):
function get_wanted_result (line 59) | def get_wanted_result(result):
function train (line 73) | def train(args, train_dataset, model, tokenizer, train_highway=False):
function evaluate (line 240) | def evaluate(args, model, tokenizer, prefix="", output_layer=-1, eval_hi...
function load_and_cache_examples (line 335) | def load_and_cache_examples(args, task, tokenizer, evaluate=False):
function main (line 395) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/deebert/src/modeling_highway_bert.py
function entropy (line 16) | def entropy(x):
class DeeBertEncoder (line 24) | class DeeBertEncoder(nn.Module):
method __init__ (line 25) | def __init__(self, config):
method set_early_exit_entropy (line 34) | def set_early_exit_entropy(self, x):
method init_highway_pooler (line 41) | def init_highway_pooler(self, pooler):
method forward (line 47) | def forward(
class DeeBertModel (line 109) | class DeeBertModel(BertPreTrainedModel):
method __init__ (line 110) | def __init__(self, config):
method init_highway_pooler (line 120) | def init_highway_pooler(self):
method get_input_embeddings (line 123) | def get_input_embeddings(self):
method set_input_embeddings (line 126) | def set_input_embeddings(self, value):
method _prune_heads (line 129) | def _prune_heads(self, heads_to_prune):
method forward (line 138) | def forward(
class HighwayException (line 239) | class HighwayException(Exception):
method __init__ (line 240) | def __init__(self, message, exit_layer):
class BertHighway (line 245) | class BertHighway(nn.Module):
method __init__ (line 250) | def __init__(self, config):
method forward (line 256) | def forward(self, encoder_outputs):
class DeeBertForSequenceClassification (line 280) | class DeeBertForSequenceClassification(BertPreTrainedModel):
method __init__ (line 281) | def __init__(self, config):
method forward (line 293) | def forward(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/deebert/src/modeling_highway_roberta.py
class DeeRobertaModel (line 21) | class DeeRobertaModel(DeeBertModel):
method __init__ (line 25) | def __init__(self, config):
class DeeRobertaForSequenceClassification (line 37) | class DeeRobertaForSequenceClassification(BertPreTrainedModel):
method __init__ (line 41) | def __init__(self, config):
method forward (line 51) | def forward(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/deebert/test_glue_deebert.py
function get_setup_file (line 16) | def get_setup_file():
class DeeBertTests (line 23) | class DeeBertTests(TestCasePlus):
method setup (line 24) | def setup(self) -> None:
method run_and_check (line 28) | def run_and_check(self, args):
method test_glue_deebert_train (line 48) | def test_glue_deebert_train(self):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/distillation/distiller.py
class Distiller (line 43) | class Distiller:
method __init__ (line 44) | def __init__(
method prepare_batch_mlm (line 189) | def prepare_batch_mlm(self, batch):
method prepare_batch_clm (line 254) | def prepare_batch_clm(self, batch):
method round_batch (line 283) | def round_batch(self, x: torch.tensor, lengths: torch.tensor):
method train (line 330) | def train(self):
method step (line 372) | def step(self, input_ids: torch.tensor, attention_mask: torch.tensor, ...
method optimize (line 466) | def optimize(self, loss):
method iter (line 500) | def iter(self):
method log_tensorboard (line 513) | def log_tensorboard(self):
method end_epoch (line 574) | def end_epoch(self):
method save_checkpoint (line 592) | def save_checkpoint(self, checkpoint_name: str = "checkpoint.pth"):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/distillation/grouped_batch_sampler.py
function _quantize (line 27) | def _quantize(x, bins):
function create_lengths_groups (line 34) | def create_lengths_groups(lengths, k=0):
class GroupedBatchSampler (line 45) | class GroupedBatchSampler(BatchSampler):
method __init__ (line 60) | def __init__(self, sampler, group_ids, batch_size):
method __iter__ (line 69) | def __iter__(self):
method __len__ (line 104) | def __len__(self):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/distillation/lm_seqs_dataset.py
class LmSeqsDataset (line 26) | class LmSeqsDataset(Dataset):
method __init__ (line 37) | def __init__(self, params, data):
method __getitem__ (line 50) | def __getitem__(self, index):
method __len__ (line 53) | def __len__(self):
method check (line 56) | def check(self):
method remove_long_sequences (line 63) | def remove_long_sequences(self):
method remove_empty_sequences (line 103) | def remove_empty_sequences(self):
method remove_unknown_sequences (line 114) | def remove_unknown_sequences(self):
method print_statistics (line 130) | def print_statistics(self):
method batch_sequences (line 145) | def batch_sequences(self, batch):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/distillation/run_squad_w_distillation.py
function set_seed (line 81) | def set_seed(args):
function to_list (line 89) | def to_list(tensor):
function train (line 93) | def train(args, train_dataset, model, tokenizer, teacher=None):
function evaluate (line 308) | def evaluate(args, model, tokenizer, prefix=""):
function load_and_cache_examples (line 427) | def load_and_cache_examples(args, tokenizer, evaluate=False, output_exam...
function main (line 489) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/distillation/scripts/binarized_data.py
function main (line 36) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/distillation/train.py
function sanity_checks (line 56) | def sanity_checks(args):
function freeze_pos_embeddings (line 86) | def freeze_pos_embeddings(student, args):
function freeze_token_type_embeddings (line 93) | def freeze_token_type_embeddings(student, args):
function main (line 98) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/distillation/utils.py
function git_log (line 37) | def git_log(folder_path: str):
function init_gpu_params (line 52) | def init_gpu_params(params):
function set_seed (line 127) | def set_seed(args):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/fsner/src/fsner/model.py
class FSNERModel (line 6) | class FSNERModel(torch.nn.Module):
method __init__ (line 13) | def __init__(self, pretrained_model_name_or_path="sayef/fsner-bert-bas...
method BERT (line 20) | def BERT(self, **inputs):
method VectorSum (line 23) | def VectorSum(self, token_embeddings):
method Atten (line 26) | def Atten(self, q_rep, S_rep, T=1):
method forward (line 29) | def forward(self, W_query, W_supports):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/fsner/src/fsner/tokenizer_utils.py
class FSNERTokenizerUtils (line 6) | class FSNERTokenizerUtils:
method __init__ (line 7) | def __init__(self, pretrained_model_name_or_path):
method tokenize (line 10) | def tokenize(self, x):
method extract_entity_from_scores (line 57) | def extract_entity_from_scores(self, query, W_query, p_start, p_end, t...
FILE: mplsandbox_for_rl/transformers/examples/research_projects/information-gain-filtration/igf/igf.py
function set_seed (line 21) | def set_seed(seed):
function compute_perplexity (line 35) | def compute_perplexity(model, test_data, context_len):
function load_gpt2 (line 72) | def load_gpt2(model_name="openai-community/gpt2"):
function recopy_gpt2 (line 89) | def recopy_gpt2(orig_model, device, max_steps):
function intermittent_save (line 121) | def intermittent_save(contexts, real_perps, past_perps, filename):
function collect_objective_set (line 143) | def collect_objective_set(
function generate_datasets (line 222) | def generate_datasets(
function train_secondary_learner (line 269) | def train_secondary_learner(
class SecondaryLearner (line 364) | class SecondaryLearner(nn.Module):
method __init__ (line 369) | def __init__(self, model):
method forward (line 383) | def forward(self, context):
method from_pretrained (line 399) | def from_pretrained(cls, state_path, model):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/information-gain-filtration/run_clm_igf.py
function generate_n_pairs (line 49) | def generate_n_pairs(
function training_secondary_learner (line 100) | def training_secondary_learner(
function finetune (line 145) | def finetune(
function main (line 268) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/jax-projects/big_bird/bigbird_flax.py
class FlaxBigBirdForNaturalQuestionsModule (line 23) | class FlaxBigBirdForNaturalQuestionsModule(FlaxBigBirdForQuestionAnsweri...
method setup (line 34) | def setup(self):
method __call__ (line 38) | def __call__(self, *args, **kwargs):
class FlaxBigBirdForNaturalQuestions (line 44) | class FlaxBigBirdForNaturalQuestions(FlaxBigBirdForQuestionAnswering):
function calculate_loss_for_nq (line 48) | def calculate_loss_for_nq(start_logits, start_labels, end_logits, end_la...
class Args (line 71) | class Args:
method __post_init__ (line 93) | def __post_init__(self):
class DataCollator (line 100) | class DataCollator:
method __call__ (line 104) | def __call__(self, batch):
method collate_fn (line 109) | def collate_fn(self, features):
method fetch_inputs (line 120) | def fetch_inputs(self, input_ids: list):
method _fetch_inputs (line 124) | def _fetch_inputs(self, input_ids: list):
function get_batched_dataset (line 132) | def get_batched_dataset(dataset, batch_size, seed=None):
function train_step (line 141) | def train_step(state, drp_rng, **model_inputs):
function val_step (line 170) | def val_step(state, **model_inputs):
class TrainState (line 183) | class TrainState(train_state.TrainState):
class Trainer (line 188) | class Trainer:
method create_state (line 197) | def create_state(self, model, tx, num_train_steps, ckpt_dir=None):
method train (line 229) | def train(self, state, tr_dataset, val_dataset):
method evaluate (line 262) | def evaluate(self, state, dataset):
method save_checkpoint (line 274) | def save_checkpoint(self, save_dir, state):
function restore_checkpoint (line 287) | def restore_checkpoint(save_dir, state):
function scheduler_fn (line 306) | def scheduler_fn(lr, init_lr, warmup_steps, num_train_steps):
function build_tx (line 314) | def build_tx(lr, init_lr, warmup_steps, num_train_steps, weight_decay):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/jax-projects/big_bird/evaluate.py
function get_sub_answers (line 13) | def get_sub_answers(answers, begin=0, end=None):
function expand_to_aliases (line 17) | def expand_to_aliases(given_answers, make_sub_answers=False):
function get_best_valid_start_end_idx (line 32) | def get_best_valid_start_end_idx(start_scores, end_scores, top_k=1, max_...
function format_dataset (line 44) | def format_dataset(sample):
function main (line 90) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/jax-projects/big_bird/prepare_natural_questions.py
function _get_single_answer (line 15) | def _get_single_answer(example):
function get_context_and_ans (line 59) | def get_context_and_ans(example, assertion=False):
function get_strided_contexts_and_ans (line 137) | def get_strided_contexts_and_ans(example, tokenizer, doc_stride=2048, ma...
function prepare_inputs (line 270) | def prepare_inputs(example, tokenizer, doc_stride=2048, max_length=4096,...
function save_to_disk (line 282) | def save_to_disk(hf_data, file_name):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py
class ModelArguments (line 71) | class ModelArguments:
class DataTrainingArguments (line 113) | class DataTrainingArguments:
method __post_init__ (line 184) | def __post_init__(self):
class FlaxDataCollatorForLanguageModeling (line 197) | class FlaxDataCollatorForLanguageModeling:
method __post_init__ (line 219) | def __post_init__(self):
method __call__ (line 226) | def __call__(self, examples: List[Dict[str, np.ndarray]]) -> Dict[str,...
method mask_tokens (line 238) | def mask_tokens(
function generate_batch_splits (line 268) | def generate_batch_splits(samples_idx: np.ndarray, batch_size: int) -> n...
function advance_iter_and_group_samples (line 279) | def advance_iter_and_group_samples(train_iterator, num_samples, max_seq_...
function write_train_metric (line 310) | def write_train_metric(summary_writer, train_metrics, train_time, step):
function write_eval_metric (line 320) | def write_eval_metric(summary_writer, eval_metrics, step):
function tokenize_function (line 410) | def tokenize_function(examples):
function decay_mask_fn (line 475) | def decay_mask_fn(params):
function train_step (line 494) | def train_step(state, batch, dropout_rng):
function eval_step (line 526) | def eval_step(params, batch):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/jax-projects/hybrid_clip/configuration_hybrid_clip.py
class HybridCLIPConfig (line 10) | class HybridCLIPConfig(PretrainedConfig):
method __init__ (line 57) | def __init__(self, projection_dim=512, **kwargs):
method from_text_vision_configs (line 89) | def from_text_vision_configs(cls, text_config: PretrainedConfig, visio...
method to_dict (line 100) | def to_dict(self):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/jax-projects/hybrid_clip/modeling_hybrid_clip.py
class FlaxHybridCLIPModule (line 33) | class FlaxHybridCLIPModule(nn.Module):
method setup (line 37) | def setup(self):
method __call__ (line 65) | def __call__(
class FlaxHybridCLIP (line 126) | class FlaxHybridCLIP(FlaxPreTrainedModel):
method __init__ (line 130) | def __init__(
method init_weights (line 144) | def init_weights(self, rng: jax.random.PRNGKey, input_shape: Tuple, pa...
method __call__ (line 158) | def __call__(
method get_text_features (line 206) | def get_text_features(
method get_image_features (line 269) | def get_image_features(
method from_text_vision_pretrained (line 304) | def from_text_vision_pretrained(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/jax-projects/hybrid_clip/run_hybrid_clip.py
class ModelArguments (line 73) | class ModelArguments:
class DataTrainingArguments (line 123) | class DataTrainingArguments:
method __post_init__ (line 171) | def __post_init__(self):
class Transform (line 185) | class Transform(torch.nn.Module):
method __init__ (line 186) | def __init__(self, image_size):
method forward (line 195) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class ImageTextDataset (line 201) | class ImageTextDataset(VisionDataset):
method __init__ (line 219) | def __init__(
method _load_image (line 241) | def _load_image(self, idx: int):
method _load_target (line 245) | def _load_target(self, idx):
method __getitem__ (line 248) | def __getitem__(self, index: int):
method __len__ (line 257) | def __len__(self) -> int:
class TrainState (line 261) | class TrainState(train_state.TrainState):
method replicate (line 264) | def replicate(self):
function write_metric (line 268) | def write_metric(summary_writer, train_metrics, eval_metrics, train_time...
function create_learning_rate_fn (line 281) | def create_learning_rate_fn(
function main (line 295) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/jax-projects/model_parallel/partitions.py
function _match (line 34) | def _match(qs, ks):
function _replacement_rules (line 45) | def _replacement_rules(rules):
function _get_partition_rules (line 57) | def _get_partition_rules():
function set_partitions (line 79) | def set_partitions(in_dict):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/jax-projects/model_parallel/run_clm_mp.py
class ModelArguments (line 64) | class ModelArguments:
class DataTrainingArguments (line 106) | class DataTrainingArguments:
method __post_init__ (line 164) | def __post_init__(self):
function data_loader (line 176) | def data_loader(rng: jax.random.PRNGKey, dataset: Dataset, batch_size: i...
function write_train_metric (line 197) | def write_train_metric(summary_writer, train_metrics, train_time, step):
function write_eval_metric (line 207) | def write_eval_metric(summary_writer, eval_metrics, step):
function create_learning_rate_fn (line 212) | def create_learning_rate_fn(
function main (line 226) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/jax-projects/wav2vec2/run_wav2vec2_pretrain_flax.py
class ModelArguments (line 36) | class ModelArguments:
class DataTrainingArguments (line 76) | class DataTrainingArguments:
class FlaxDataCollatorForWav2Vec2Pretraining (line 137) | class FlaxDataCollatorForWav2Vec2Pretraining:
method __call__ (line 171) | def __call__(self, features: List[Dict[str, Union[List[int], np.ndarra...
function configure_logger (line 213) | def configure_logger(model_args: ModelArguments, training_args: Training...
function write_train_metric (line 225) | def write_train_metric(summary_writer, train_metrics, train_time, step):
function write_eval_metric (line 235) | def write_eval_metric(summary_writer, eval_metrics, step):
function generate_batch_splits (line 240) | def generate_batch_splits(samples_idx: np.ndarray, batch_size: int) -> n...
function compute_contrastive_loss (line 251) | def compute_contrastive_loss(
function main (line 283) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/layoutlmv3/run_funsd_cord.py
class ModelArguments (line 57) | class ModelArguments:
class DataTrainingArguments (line 92) | class DataTrainingArguments:
method __post_init__ (line 179) | def __post_init__(self):
function main (line 192) | def main():
function _mp_fn (line 527) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/longform-qa/eli5_app.py
function load_models (line 24) | def load_models():
function load_indexes (line 45) | def load_indexes():
function load_train_data (line 65) | def load_train_data():
function find_nearest_training (line 81) | def find_nearest_training(question, n_results=10):
function make_support (line 88) | def make_support(question, source="wiki40b", method="dense", n_results=10):
function answer_question (line 116) | def answer_question(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/longform-qa/eli5_utils.py
function make_es_index_snippets (line 28) | def make_es_index_snippets(es_client, passages_dset, index_name="english...
function query_es_index (line 62) | def query_es_index(question, es_client, index_name="english_wiki_kilt_sn...
class ELI5DatasetQARetriver (line 93) | class ELI5DatasetQARetriver(Dataset):
method __init__ (line 94) | def __init__(self, examples_array, extra_answer_threshold=3, min_answe...
method __len__ (line 101) | def __len__(self):
method make_example (line 104) | def make_example(self, idx):
method __getitem__ (line 116) | def __getitem__(self, idx):
class RetrievalQAEmbedder (line 120) | class RetrievalQAEmbedder(nn.Module):
method __init__ (line 121) | def __init__(self, sent_encoder, dim):
method embed_sentences_checkpointed (line 129) | def embed_sentences_checkpointed(self, input_ids, attention_mask, chec...
method embed_questions (line 167) | def embed_questions(self, q_ids, q_mask, checkpoint_batch_size=-1):
method embed_answers (line 171) | def embed_answers(self, a_ids, a_mask, checkpoint_batch_size=-1):
method forward (line 175) | def forward(self, q_ids, q_mask, a_ids, a_mask, checkpoint_batch_size=...
function make_qa_retriever_model (line 186) | def make_qa_retriever_model(model_name="google/bert_uncased_L-8_H-512_A-...
function make_qa_retriever_batch (line 202) | def make_qa_retriever_batch(qa_list, tokenizer, max_len=64, device="cuda...
function train_qa_retriever_epoch (line 218) | def train_qa_retriever_epoch(model, dataset, tokenizer, optimizer, sched...
function train_qa_retriever_joint_epoch (line 257) | def train_qa_retriever_joint_epoch(model, dataset_list, tokenizer, optim...
function evaluate_qa_retriever (line 300) | def evaluate_qa_retriever(model, dataset, tokenizer, args):
function train_qa_retriever (line 318) | def train_qa_retriever(qar_model, qar_tokenizer, qar_train_dset, qar_val...
class ELI5DatasetS2S (line 341) | class ELI5DatasetS2S(Dataset):
method __init__ (line 342) | def __init__(
method __len__ (line 361) | def __len__(self):
method make_example (line 364) | def make_example(self, idx):
method __getitem__ (line 380) | def __getitem__(self, idx):
function make_qa_s2s_model (line 384) | def make_qa_s2s_model(model_name="facebook/bart-large", from_file=None, ...
function make_qa_s2s_batch (line 393) | def make_qa_s2s_batch(qa_list, tokenizer, max_len=64, max_a_len=360, dev...
function train_qa_s2s_epoch (line 417) | def train_qa_s2s_epoch(model, dataset, tokenizer, optimizer, scheduler, ...
function eval_qa_s2s_epoch (line 459) | def eval_qa_s2s_epoch(model, dataset, tokenizer, args):
function train_qa_s2s (line 495) | def train_qa_s2s(qa_s2s_model, qa_s2s_tokenizer, s2s_train_dset, s2s_val...
function qa_s2s_generate (line 524) | def qa_s2s_generate(
function embed_passages_for_retrieval (line 568) | def embed_passages_for_retrieval(passages, tokenizer, qa_embedder, max_l...
function embed_questions_for_retrieval (line 579) | def embed_questions_for_retrieval(q_ls, tokenizer, qa_embedder, device="...
function make_qa_dense_index (line 590) | def make_qa_dense_index(
function evaluate_retriever (line 611) | def evaluate_retriever(qa_list, retriever_func, scoring_func, n_ret=10, ...
function query_qa_dense_index (line 630) | def query_qa_dense_index(
function batch_query_qa_dense_index (line 644) | def batch_query_qa_dense_index(questions, qa_embedder, tokenizer, wiki_p...
function query_qa_dense_index_nn (line 661) | def query_qa_dense_index_nn(passage, qa_embedder, tokenizer, wiki_passag...
function batch_query_qa_dense_index_nn (line 674) | def batch_query_qa_dense_index_nn(passages, qa_embedder, tokenizer, wiki...
FILE: mplsandbox_for_rl/transformers/examples/research_projects/luke/luke_utils.py
function padding_tensor (line 12) | def padding_tensor(sequences, padding_value, padding_side, sequence_leng...
function is_punctuation (line 33) | def is_punctuation(char):
class DataCollatorForLukeTokenClassification (line 44) | class DataCollatorForLukeTokenClassification(DataCollatorMixin):
method torch_call (line 81) | def torch_call(self, features):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/luke/run_luke_ner_no_trainer.py
function parse_args (line 55) | def parse_args():
function main (line 229) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/lxmert/extracting_data.py
class Extract (line 43) | class Extract:
method __init__ (line 44) | def __init__(self, argv=sys.argv[1:]):
method _vqa_file_split (line 78) | def _vqa_file_split(self, file):
method file_generator (line 84) | def file_generator(self):
method __call__ (line 98) | def __call__(self):
function tryload (line 128) | def tryload(stream):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/lxmert/modeling_frcnn.py
function norm_box (line 37) | def norm_box(boxes, raw_sizes):
function pad_list_tensors (line 47) | def pad_list_tensors(
function do_nms (line 116) | def do_nms(boxes, scores, image_shape, score_thresh, nms_thresh, mind, m...
function _clip_box (line 143) | def _clip_box(tensor, box_size: Tuple[int, int]):
function _nonempty_boxes (line 152) | def _nonempty_boxes(box, threshold: float = 0.0) -> torch.Tensor:
function get_norm (line 159) | def get_norm(norm, out_channels):
function _create_grid_offsets (line 172) | def _create_grid_offsets(size: List[int], stride: int, offset: float, de...
function build_backbone (line 195) | def build_backbone(cfg):
function find_top_rpn_proposals (line 255) | def find_top_rpn_proposals(
function subsample_labels (line 335) | def subsample_labels(labels, num_samples, positive_fraction, bg_label):
function add_ground_truth_to_proposals (line 360) | def add_ground_truth_to_proposals(gt_boxes, proposals):
function add_ground_truth_to_proposals_single_image (line 364) | def add_ground_truth_to_proposals_single_image(gt_boxes, proposals):
function _fmt_box_list (line 368) | def _fmt_box_list(box_tensor, batch_index: int):
function convert_boxes_to_pooler_format (line 378) | def convert_boxes_to_pooler_format(box_lists: List[torch.Tensor]):
function assign_boxes_to_levels (line 386) | def assign_boxes_to_levels(
class _NewEmptyTensorOp (line 403) | class _NewEmptyTensorOp(torch.autograd.Function):
method forward (line 405) | def forward(ctx, x, new_shape):
method backward (line 410) | def backward(ctx, grad):
class ShapeSpec (line 415) | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width",...
method __new__ (line 416) | def __new__(cls, *, channels=None, height=None, width=None, stride=None):
class Box2BoxTransform (line 420) | class Box2BoxTransform:
method __init__ (line 427) | def __init__(self, weights: Tuple[float, float, float, float], scale_c...
method get_deltas (line 449) | def get_deltas(self, src_boxes, target_boxes):
method apply_deltas (line 483) | def apply_deltas(self, deltas, boxes):
class Matcher (line 522) | class Matcher:
method __init__ (line 536) | def __init__(
method __call__ (line 565) | def __call__(self, match_quality_matrix):
method set_low_quality_matches_ (line 603) | def set_low_quality_matches_(self, match_labels, match_quality_matrix):
class RPNOutputs (line 625) | class RPNOutputs:
method __init__ (line 626) | def __init__(
method _get_ground_truth (line 668) | def _get_ground_truth(self):
method predict_proposals (line 671) | def predict_proposals(self):
method predict_objectness_logits (line 688) | def predict_objectness_logits(self):
class Conv2d (line 702) | class Conv2d(nn.Conv2d):
method __init__ (line 703) | def __init__(self, *args, **kwargs):
method forward (line 711) | def forward(self, x):
class LastLevelMaxPool (line 742) | class LastLevelMaxPool(nn.Module):
method __init__ (line 747) | def __init__(self):
method forward (line 752) | def forward(self, x):
class LastLevelP6P7 (line 756) | class LastLevelP6P7(nn.Module):
method __init__ (line 761) | def __init__(self, in_channels, out_channels):
method forward (line 768) | def forward(self, c5):
class BasicStem (line 774) | class BasicStem(nn.Module):
method __init__ (line 775) | def __init__(self, in_channels=3, out_channels=64, norm="BN", caffe_ma...
method forward (line 789) | def forward(self, x):
method out_channels (line 799) | def out_channels(self):
method stride (line 803) | def stride(self):
class ResNetBlockBase (line 807) | class ResNetBlockBase(nn.Module):
method __init__ (line 808) | def __init__(self, in_channels, out_channels, stride):
method freeze (line 814) | def freeze(self):
class BottleneckBlock (line 820) | class BottleneckBlock(ResNetBlockBase):
method __init__ (line 821) | def __init__(
method forward (line 880) | def forward(self, x):
class Backbone (line 899) | class Backbone(nn.Module, metaclass=ABCMeta):
method __init__ (line 900) | def __init__(self):
method forward (line 904) | def forward(self):
method size_divisibility (line 908) | def size_divisibility(self):
method output_shape (line 916) | def output_shape(self):
method out_features (line 926) | def out_features(self):
method out_feature_strides (line 931) | def out_feature_strides(self):
method out_feature_channels (line 936) | def out_feature_channels(self):
class ResNet (line 941) | class ResNet(Backbone):
method __init__ (line 942) | def __init__(self, stem, stages, num_classes=None, out_features=None):
method forward (line 991) | def forward(self, x):
method output_shape (line 1007) | def output_shape(self):
method make_stage (line 1017) | def make_stage(
class ROIPooler (line 1054) | class ROIPooler(nn.Module):
method __init__ (line 1060) | def __init__(
method forward (line 1091) | def forward(self, feature_maps, boxes):
class ROIOutputs (line 1135) | class ROIOutputs:
method __init__ (line 1136) | def __init__(self, cfg, training=False):
method _predict_boxes (line 1149) | def _predict_boxes(self, proposals, box_deltas, preds_per_image):
method _predict_objs (line 1159) | def _predict_objs(self, obj_logits, preds_per_image):
method _predict_attrs (line 1164) | def _predict_attrs(self, attr_logits, preds_per_image):
method inference (line 1170) | def inference(
method training (line 1223) | def training(self, obj_logits, attr_logits, box_deltas, pred_boxes, fe...
method __call__ (line 1226) | def __call__(
class Res5ROIHeads (line 1249) | class Res5ROIHeads(nn.Module):
method __init__ (line 1256) | def __init__(self, cfg, input_shape):
method _build_res5_block (line 1309) | def _build_res5_block(self, cfg):
method _shared_roi_transform (line 1331) | def _shared_roi_transform(self, features, boxes):
method forward (line 1335) | def forward(self, features, proposal_boxes, gt_boxes=None):
class AnchorGenerator (line 1350) | class AnchorGenerator(nn.Module):
method __init__ (line 1355) | def __init__(self, cfg, input_shape: List[ShapeSpec]):
method _calculate_anchors (line 1375) | def _calculate_anchors(self, sizes, aspect_ratios):
method box_dim (line 1390) | def box_dim(self):
method num_cell_anchors (line 1394) | def num_cell_anchors(self):
method grid_anchors (line 1401) | def grid_anchors(self, grid_sizes):
method generate_cell_anchors (line 1411) | def generate_cell_anchors(self, sizes=(32, 64, 128, 256, 512), aspect_...
method forward (line 1429) | def forward(self, features):
class RPNHead (line 1443) | class RPNHead(nn.Module):
method __init__ (line 1451) | def __init__(self, cfg, input_shape: List[ShapeSpec]):
method forward (line 1483) | def forward(self, features):
class RPN (line 1497) | class RPN(nn.Module):
method __init__ (line 1502) | def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
method training (line 1532) | def training(self, images, image_shapes, features, gt_boxes):
method inference (line 1535) | def inference(self, outputs, images, image_shapes, features, gt_boxes=...
method forward (line 1558) | def forward(self, images, image_shapes, features, gt_boxes=None):
class FastRCNNOutputLayers (line 1591) | class FastRCNNOutputLayers(nn.Module):
method __init__ (line 1598) | def __init__(
method forward (line 1641) | def forward(self, roi_features):
class GeneralizedRCNN (line 1658) | class GeneralizedRCNN(nn.Module):
method __init__ (line 1659) | def __init__(self, cfg):
method from_pretrained (line 1670) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
method forward (line 1831) | def forward(
method inference (line 1857) | def inference(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/lxmert/processing_image.py
class ResizeShortestEdge (line 31) | class ResizeShortestEdge:
method __init__ (line 32) | def __init__(self, short_edge_length, max_size=sys.maxsize):
method __call__ (line 42) | def __call__(self, imgs):
class Preprocess (line 76) | class Preprocess:
method __init__ (line 77) | def __init__(self, cfg):
method pad (line 88) | def pad(self, images):
method __call__ (line 102) | def __call__(self, images, single_image=False):
function _scale_box (line 139) | def _scale_box(boxes, scale_yx):
function _clip_box (line 145) | def _clip_box(tensor, box_size: Tuple[int, int]):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/lxmert/utils.py
function load_labels (line 79) | def load_labels(objs=OBJECTS, attrs=ATTRIBUTES):
function load_checkpoint (line 92) | def load_checkpoint(ckp):
class Config (line 106) | class Config:
method __init__ (line 109) | def __init__(self, dictionary: dict, name: str = "root", level=0):
method __repr__ (line 125) | def __repr__(self):
method __setattr__ (line 128) | def __setattr__(self, key, val):
method to_dict (line 143) | def to_dict(self):
method dump_yaml (line 146) | def dump_yaml(self, data, file_name):
method dump_json (line 150) | def dump_json(self, data, file_name):
method load_yaml (line 155) | def load_yaml(config):
method __str__ (line 160) | def __str__(self):
method from_pretrained (line 177) | def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
method get_config_dict (line 182) | def get_config_dict(cls, pretrained_model_name_or_path: str, **kwargs):
function compare (line 225) | def compare(in_tensor):
function is_remote_url (line 240) | def is_remote_url(url_or_filename):
function hf_bucket_url (line 245) | def hf_bucket_url(model_id: str, filename: str, use_cdn=True) -> str:
function http_get (line 254) | def http_get(
function get_from_cache (line 290) | def get_from_cache(
function url_to_filename (line 403) | def url_to_filename(url, etag=None):
function cached_path (line 419) | def cached_path(
function get_data (line 492) | def get_data(query, delim=","):
function get_image_from_url (line 512) | def get_image_from_url(url):
function load_frcnn_pkl_from_url (line 519) | def load_frcnn_pkl_from_url(url):
function get_demo_path (line 536) | def get_demo_path():
function img_tensorize (line 540) | def img_tensorize(im, input_format="RGB"):
function chunk (line 553) | def chunk(images, batch=1):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/lxmert/visualizing_image.py
class SingleImageViz (line 36) | class SingleImageViz:
method __init__ (line 37) | def __init__(
method add_box (line 89) | def add_box(self, box, color=None):
method draw_boxes (line 108) | def draw_boxes(self, boxes, obj_ids=None, obj_scores=None, attr_ids=No...
method draw_labels (line 141) | def draw_labels(self, label, box, color):
method draw_text (line 163) | def draw_text(
method save (line 195) | def save(self, saveas=None):
method _create_text_labels_attr (line 206) | def _create_text_labels_attr(self, classes, scores, attr_classes, attr...
method _create_text_labels (line 215) | def _create_text_labels(self, classes, scores):
method _random_color (line 224) | def _random_color(self, maximum=255):
method _get_buffer (line 231) | def _get_buffer(self):
method _change_color_brightness (line 259) | def _change_color_brightness(self, color, brightness_factor):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/mlm_wwm/run_chinese_ref.py
function _is_chinese_char (line 10) | def _is_chinese_char(cp):
function is_chinese (line 35) | def is_chinese(word: str):
function get_chinese_word (line 44) | def get_chinese_word(tokens: List[str]):
function add_sub_symbol (line 55) | def add_sub_symbol(bert_tokens: List[str], chinese_word_set: set()):
function prepare_ref (line 79) | def prepare_ref(lines: List[str], ltp_tokenizer: LTP, bert_tokenizer: Be...
function main (line 116) | def main(args):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/mlm_wwm/run_mlm_wwm.py
class ModelArguments (line 56) | class ModelArguments:
method __post_init__ (line 110) | def __post_init__(self):
class DataTrainingArguments (line 118) | class DataTrainingArguments:
method __post_init__ (line 177) | def __post_init__(self):
function add_chinese_references (line 186) | def add_chinese_references(dataset, ref_file):
function main (line 196) | def main():
function _mp_fn (line 429) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/mm-imdb/run_mmimdb.py
function set_seed (line 57) | def set_seed(args):
function train (line 65) | def train(args, train_dataset, model, tokenizer, criterion):
function evaluate (line 234) | def evaluate(args, model, tokenizer, criterion, prefix=""):
function load_examples (line 304) | def load_examples(args, tokenizer, evaluate=False):
function main (line 312) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/mm-imdb/utils_mmimdb.py
class ImageEncoder (line 32) | class ImageEncoder(nn.Module):
method __init__ (line 33) | def __init__(self, args):
method forward (line 40) | def forward(self, x):
class JsonlDataset (line 48) | class JsonlDataset(Dataset):
method __init__ (line 49) | def __init__(self, data_path, tokenizer, transforms, labels, max_seq_l...
method __len__ (line 59) | def __len__(self):
method __getitem__ (line 62) | def __getitem__(self, index):
method get_label_frequencies (line 81) | def get_label_frequencies(self):
function collate_fn (line 88) | def collate_fn(batch):
function get_mmimdb_labels (line 107) | def get_mmimdb_labels():
function get_image_transforms (line 135) | def get_image_transforms():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/movement-pruning/bertarize.py
function main (line 28) | def main(args):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/movement-pruning/counts_parameters.py
function main (line 26) | def main(args):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/movement-pruning/emmental/configuration_bert_masked.py
class MaskedBertConfig (line 27) | class MaskedBertConfig(PretrainedConfig):
method __init__ (line 34) | def __init__(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/movement-pruning/emmental/modeling_bert_masked.py
class BertEmbeddings (line 38) | class BertEmbeddings(nn.Module):
method __init__ (line 41) | def __init__(self, config):
method forward (line 52) | def forward(self, input_ids=None, token_type_ids=None, position_ids=No...
class BertSelfAttention (line 77) | class BertSelfAttention(nn.Module):
method __init__ (line 78) | def __init__(self, config):
method transpose_for_scores (line 115) | def transpose_for_scores(self, x):
method forward (line 120) | def forward(
class BertSelfOutput (line 174) | class BertSelfOutput(nn.Module):
method __init__ (line 175) | def __init__(self, config):
method forward (line 187) | def forward(self, hidden_states, input_tensor, threshold):
class BertAttention (line 194) | class BertAttention(nn.Module):
method __init__ (line 195) | def __init__(self, config):
method prune_heads (line 201) | def prune_heads(self, heads):
method forward (line 224) | def forward(
class BertIntermediate (line 246) | class BertIntermediate(nn.Module):
method __init__ (line 247) | def __init__(self, config):
method forward (line 261) | def forward(self, hidden_states, threshold):
class BertOutput (line 267) | class BertOutput(nn.Module):
method __init__ (line 268) | def __init__(self, config):
method forward (line 280) | def forward(self, hidden_states, input_tensor, threshold):
class BertLayer (line 287) | class BertLayer(nn.Module):
method __init__ (line 288) | def __init__(self, config):
method forward (line 297) | def forward(
class BertEncoder (line 323) | class BertEncoder(nn.Module):
method __init__ (line 324) | def __init__(self, config):
method forward (line 330) | def forward(
class BertPooler (line 370) | class BertPooler(nn.Module):
method __init__ (line 371) | def __init__(self, config):
method forward (line 376) | def forward(self, hidden_states):
class MaskedBertPreTrainedModel (line 385) | class MaskedBertPreTrainedModel(PreTrainedModel):
method _init_weights (line 394) | def _init_weights(self, module):
class MaskedBertModel (line 468) | class MaskedBertModel(MaskedBertPreTrainedModel):
method __init__ (line 475) | def __init__(self, config):
method get_input_embeddings (line 486) | def get_input_embeddings(self):
method set_input_embeddings (line 489) | def set_input_embeddings(self, value):
method _prune_heads (line 492) | def _prune_heads(self, heads_to_prune):
method forward (line 501) | def forward(
class MaskedBertForSequenceClassification (line 663) | class MaskedBertForSequenceClassification(MaskedBertPreTrainedModel):
method __init__ (line 664) | def __init__(self, config):
method forward (line 675) | def forward(
class MaskedBertForMultipleChoice (line 749) | class MaskedBertForMultipleChoice(MaskedBertPreTrainedModel):
method __init__ (line 750) | def __init__(self, config):
method forward (line 760) | def forward(
class MaskedBertForTokenClassification (line 838) | class MaskedBertForTokenClassification(MaskedBertPreTrainedModel):
method __init__ (line 839) | def __init__(self, config):
method forward (line 850) | def forward(
class MaskedBertForQuestionAnswering (line 925) | class MaskedBertForQuestionAnswering(MaskedBertPreTrainedModel):
method __init__ (line 926) | def __init__(self, config):
method forward (line 936) | def forward(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/movement-pruning/emmental/modules/binarizer.py
class ThresholdBinarizer (line 24) | class ThresholdBinarizer(autograd.Function):
method forward (line 37) | def forward(ctx, inputs: torch.tensor, threshold: float, sigmoid: bool):
method backward (line 65) | def backward(ctx, gradOutput):
class TopKBinarizer (line 69) | class TopKBinarizer(autograd.Function):
method forward (line 82) | def forward(ctx, inputs: torch.tensor, threshold: float):
method backward (line 107) | def backward(ctx, gradOutput):
class MagnitudeBinarizer (line 111) | class MagnitudeBinarizer:
method apply (line 121) | def apply(inputs: torch.tensor, threshold: float):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/movement-pruning/emmental/modules/masked_nn.py
class MaskedLinear (line 31) | class MaskedLinear(nn.Linear):
method __init__ (line 37) | def __init__(
method init_mask (line 77) | def init_mask(self):
method forward (line 85) | def forward(self, input: torch.tensor, threshold: float):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/movement-pruning/masked_run_glue.py
function set_seed (line 61) | def set_seed(args):
function schedule_threshold (line 69) | def schedule_threshold(
function regularization (line 92) | def regularization(model: nn.Module, mode: str):
function train (line 106) | def train(args, train_dataset, model, tokenizer, teacher=None):
function evaluate (line 415) | def evaluate(args, model, tokenizer, prefix=""):
function load_and_cache_examples (line 506) | def load_and_cache_examples(args, task, tokenizer, evaluate=False):
function main (line 561) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/movement-pruning/masked_run_squad.py
function set_seed (line 64) | def set_seed(args):
function schedule_threshold (line 72) | def schedule_threshold(
function regularization (line 95) | def regularization(model: nn.Module, mode: str):
function to_list (line 109) | def to_list(tensor):
function train (line 113) | def train(args, train_dataset, model, tokenizer, teacher=None):
function evaluate (line 436) | def evaluate(args, model, tokenizer, prefix=""):
function load_and_cache_examples (line 583) | def load_and_cache_examples(args, tokenizer, evaluate=False, output_exam...
function main (line 658) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/onnx/summarization/bart_onnx/generation_onnx.py
function _convert_past_list_to_tuple (line 12) | def _convert_past_list_to_tuple(past_key_values):
class EncoderForONNX (line 34) | class EncoderForONNX(torch.nn.Module):
method __init__ (line 35) | def __init__(self, encoder):
method forward (line 39) | def forward(self, input_ids, attention_mask):
class DecoderForONNX (line 47) | class DecoderForONNX(torch.nn.Module):
method __init__ (line 48) | def __init__(self, decoder):
method forward (line 52) | def forward(self, input_ids, encoder_state, attention_mask, past=None):
function _create_traced_encoder (line 72) | def _create_traced_encoder(encoder, input_ids, attention_mask):
function _create_traced_decoder (line 79) | def _create_traced_decoder(decoder, input_ids, encoder_state, attention_...
class BartConfigTS (line 91) | class BartConfigTS(BartConfig, torch.nn.Module):
method __init__ (line 97) | def __init__(self, config):
class MinLengthLogitsProcessorTS (line 102) | class MinLengthLogitsProcessorTS(torch.nn.Module):
method __init__ (line 113) | def __init__(self, min_length: int, eos_token_id: int):
method forward (line 125) | def forward(self, input_ids, scores) -> torch.Tensor:
class BARTGenerator (line 132) | class BARTGenerator(torch.nn.Module, GenerationMixin):
method __init__ (line 133) | def __init__(self, model):
method _trace_modules (line 143) | def _trace_modules(self, model):
method _encoder_forward (line 229) | def _encoder_forward(self, input_ids, attention_mask):
method _init_sequence_length_for_generation (line 233) | def _init_sequence_length_for_generation(
method _decoder_forward (line 242) | def _decoder_forward(self, input_ids, encoder_output, attention_mask, ...
method greedy_search (line 257) | def greedy_search(
method _prepare_decoder_input_ids_for_generation (line 299) | def _prepare_decoder_input_ids_for_generation(
method forward (line 311) | def forward(self, input_ids, attention_mask, max_length, decoder_start...
class BeamSearchScorerTS (line 340) | class BeamSearchScorerTS(torch.nn.Module):
method __init__ (line 341) | def __init__(self):
method is_done (line 358) | def is_done(self) -> torch.Tensor:
method init (line 361) | def init(
method hypo_len (line 403) | def hypo_len(self, hypo_idx: int):
method hypo_add (line 409) | def hypo_add(self, hyp: torch.Tensor, sum_logprobs: float, hypo_idx: i...
method hypo_is_done (line 434) | def hypo_is_done(self, hypo_idx: int, best_sum_logprobs: float, cur_le...
method process (line 448) | def process(
method finalize (line 523) | def finalize(
class BARTBeamSearchGenerator (line 586) | class BARTBeamSearchGenerator(BARTGenerator):
method __init__ (line 587) | def __init__(self, model):
method _expand_inputs_for_generation (line 593) | def _expand_inputs_for_generation(
method adjust_logits_during_generation (line 609) | def adjust_logits_during_generation(self, logits, cur_len: int, max_le...
method _force_token_id_to_be_generated (line 617) | def _force_token_id_to_be_generated(scores, token_id: int):
method _reorder_cache (line 623) | def _reorder_cache(self, past: List[torch.Tensor], beam_idx):
method beam_search (line 631) | def beam_search(
method forward (line 706) | def forward(self, input_ids, attention_mask, num_beams, max_length, de...
FILE: mplsandbox_for_rl/transformers/examples/research_projects/onnx/summarization/bart_onnx/reduce_onnx_size.py
function _is_equal_tensor_proto (line 11) | def _is_equal_tensor_proto(a, b):
function _node_replace_input_with (line 26) | def _node_replace_input_with(node_proto, name, new_name):
function _graph_replace_input_with (line 39) | def _graph_replace_input_with(graph_proto, name, new_name):
function _remove_dup_initializers_from_model (line 44) | def _remove_dup_initializers_from_model(model, model_without_ext, ind_to...
function remove_dup_initializers (line 61) | def remove_dup_initializers(onnx_file_path):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/onnx/summarization/run_onnx_exporter.py
function parse_args (line 46) | def parse_args():
function load_model_tokenizer (line 91) | def load_model_tokenizer(model_name, device="cpu"):
function export_and_validate_model (line 103) | def export_and_validate_model(model, tokenizer, onnx_file_path, num_beam...
function main (line 166) | def main():
FILE: mplsandbox_for_rl/transformers/examples/research_projects/performer/modeling_flax_performer.py
class FlaxPerformerLayerNorm (line 105) | class FlaxPerformerLayerNorm(nn.Module):
method __call__ (line 120) | def __call__(self, x):
class FlaxPerformerEmbedding (line 145) | class FlaxPerformerEmbedding(nn.Module):
method __call__ (line 156) | def __call__(self, inputs):
class FlaxPerformerEmbeddings (line 161) | class FlaxPerformerEmbeddings(nn.Module):
method __call__ (line 170) | def __call__(self, input_ids, token_type_ids, position_ids, attention_...
class FlaxPerformerAttention (line 191) | class FlaxPerformerAttention(nn.Module):
method __call__ (line 196) | def __call__(self, hidden_state, attention_mask):
class FlaxPerformerIntermediate (line 207) | class FlaxPerformerIntermediate(nn.Module):
method __call__ (line 212) | def __call__(self, hidden_state):
class FlaxPerformerOutput (line 218) | class FlaxPerformerOutput(nn.Module):
method __call__ (line 220) | def __call__(self, intermediate_output, attention_output):
class FlaxPerformerLayer (line 226) | class FlaxPerformerLayer(nn.Module):
method __call__ (line 233) | def __call__(self, hidden_state, attention_mask):
class FlaxPerformerLayerCollection (line 245) | class FlaxPerformerLayerCollection(nn.Module):
method __call__ (line 257) | def __call__(self, inputs, attention_mask):
class FlaxPerformerEncoder (line 272) | class FlaxPerformerEncoder(nn.Module):
method __call__ (line 280) | def __call__(self, hidden_state, attention_mask):
class FlaxPerformerPooler (line 292) | class FlaxPerformerPooler(nn.Module):
method __call__ (line 294) | def __call__(self, hidden_state):
class FlaxPerformerModule (line 300) | class FlaxPerformerModule(nn.Module):
method __call__ (line 313) | def __call__(self, input_ids, token_type_ids, position_ids, attention_...
class FlaxPerformerModel (line 340) | class FlaxPerformerModel(FlaxBertPreTrainedModel):
method convert_from_pytorch (line 353) | def convert_from_pytorch(pt_state: Dict, config: BertConfig) -> Dict:
method __init__ (line 420) | def __init__(
method module (line 439) | def module(self) -> nn.Module:
method __call__ (line 442) | def __call__(
class FlaxPerformerForMaskedLM (line 464) | class FlaxPerformerForMaskedLM(FlaxBertPreTrainedModel):
method __init__ (line 465) | def __init__(
method __call__ (line 483) | def __call__(
class FlaxPerformerForMaskedLMModule (line 513) | class FlaxPerformerForMaskedLMModule(nn.Module):
method __call__ (line 527) | def __call__(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/performer/modeling_flax_performer_utils.py
function nonnegative_softmax_kernel_feature_creator (line 40) | def nonnegative_softmax_kernel_feature_creator(
function sincos_softmax_kernel_feature_creator (line 94) | def sincos_softmax_kernel_feature_creator(
function generalized_kernel_feature_creator (line 144) | def generalized_kernel_feature_creator(
function make_fast_softmax_attention (line 182) | def make_fast_softmax_attention(
function make_fast_generalized_attention (line 243) | def make_fast_generalized_attention(
class RandomMatrix (line 287) | class RandomMatrix:
method get_2d_array (line 296) | def get_2d_array(self):
class GaussianUnstructuredRandomMatrix (line 300) | class GaussianUnstructuredRandomMatrix(RandomMatrix):
method __init__ (line 301) | def __init__(self, nb_rows, nb_columns, key):
method get_2d_array (line 306) | def get_2d_array(self):
class GaussianOrthogonalRandomMatrix (line 310) | class GaussianOrthogonalRandomMatrix(RandomMatrix):
method __init__ (line 316) | def __init__(self, nb_rows, nb_columns, key, scaling=0):
method get_2d_array (line 322) | def get_2d_array(self):
class FastAttention (line 351) | class FastAttention:
method dot_product_attention (line 360) | def dot_product_attention(
function _numerator (line 403) | def _numerator(z_slice_shape, precision, unroll=1):
function _denominator (line 442) | def _denominator(t_slice_shape, precision, unroll=1):
class FastAttentionviaLowRankDecomposition (line 478) | class FastAttentionviaLowRankDecomposition(FastAttention):
method __init__ (line 485) | def __init__(
method draw_weights (line 505) | def draw_weights(self, key):
method dot_product_attention (line 512) | def dot_product_attention(
function _invert_perm (line 654) | def _invert_perm(perm):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/performer/run_mlm_performer.py
class WandbArguments (line 78) | class WandbArguments:
class ModelArguments (line 94) | class ModelArguments:
class DataTrainingArguments (line 128) | class DataTrainingArguments:
method __post_init__ (line 187) | def __post_init__(self):
class FlaxDataCollatorForLanguageModeling (line 202) | class FlaxDataCollatorForLanguageModeling:
method __post_init__ (line 229) | def __post_init__(self):
method __call__ (line 236) | def __call__(self, examples: List[Dict[str, np.ndarray]], pad_to_multi...
method mask_tokens (line 253) | def mask_tokens(
function create_learning_rate_scheduler (line 283) | def create_learning_rate_scheduler(
function compute_metrics (line 337) | def compute_metrics(logits, labels, weights, label_smoothing=0.0):
function accuracy (line 346) | def accuracy(logits, targets, weights=None):
function cross_entropy (line 366) | def cross_entropy(logits, targets, weights=None, label_smoothing=0.0):
function training_step (line 401) | def training_step(optimizer, batch, dropout_rng):
function eval_step (line 424) | def eval_step(params, batch):
function generate_batch_splits (line 437) | def generate_batch_splits(samples_idx: np.ndarray, batch_size: int) -> n...
function tokenize_function (line 577) | def tokenize_function(examples):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/pplm/pplm_classification_head.py
class ClassificationHead (line 4) | class ClassificationHead(nn.Module):
method __init__ (line 7) | def __init__(self, class_size, embed_size):
method forward (line 15) | def forward(self, hidden_state):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/pplm/run_pplm.py
function top_k_filter (line 77) | def top_k_filter(logits, k, probs=False):
function perturb_past (line 93) | def perturb_past(
function get_classifier (line 254) | def get_classifier(
function get_bag_of_words_indices (line 295) | def get_bag_of_words_indices(bag_of_words_ids_or_paths: List[str], token...
function build_bows_one_hot_vectors (line 308) | def build_bows_one_hot_vectors(bow_indices, tokenizer, device="cuda"):
function full_text_generation (line 323) | def full_text_generation(
function generate_text_pplm (line 423) | def generate_text_pplm(
function set_generic_model_params (line 575) | def set_generic_model_params(discrim_weights, discrim_meta):
function run_pplm_example (line 587) | def run_pplm_example(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/pplm/run_pplm_discrim_train.py
class Discriminator (line 45) | class Discriminator(nn.Module):
method __init__ (line 48) | def __init__(self, class_size, pretrained_model="openai-community/gpt2...
method get_classifier (line 57) | def get_classifier(self):
method train_custom (line 60) | def train_custom(self):
method avg_representation (line 65) | def avg_representation(self, x):
method forward (line 72) | def forward(self, x):
class Dataset (line 84) | class Dataset(data.Dataset):
method __init__ (line 85) | def __init__(self, X, y):
method __len__ (line 90) | def __len__(self):
method __getitem__ (line 93) | def __getitem__(self, index):
function collate_fn (line 101) | def collate_fn(data):
function cached_collate_fn (line 123) | def cached_collate_fn(data):
function train_epoch (line 134) | def train_epoch(data_loader, discriminator, optimizer, epoch=0, log_inte...
function evaluate_performance (line 161) | def evaluate_performance(data_loader, discriminator, device="cpu"):
function predict (line 184) | def predict(input_sentence, model, classes, cached=False, device="cpu"):
function get_cached_data_loader (line 198) | def get_cached_data_loader(dataset, batch_size, discriminator, shuffle=F...
function train_discriminator (line 218) | def train_discriminator(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/quantization-qdqbert/evaluate-hf-trt-qa.py
function model_infer (line 213) | def model_infer(inputs, context, d_inputs, h_output0, h_output1, d_outpu...
function prepare_validation_features (line 300) | def prepare_validation_features(examples):
function post_processing_function (line 367) | def post_processing_function(examples, features, predictions, stage="eva...
function binding_nbytes (line 404) | def binding_nbytes(binding):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/quantization-qdqbert/quant_trainer.py
function add_arguments (line 35) | def add_arguments(parser):
function set_default_quantizers (line 61) | def set_default_quantizers(args):
function configure_model (line 81) | def configure_model(model, args, calib=False, eval=False):
function enable_calibration (line 116) | def enable_calibration(model):
function finish_calibration (line 130) | def finish_calibration(model, args):
function fuse_qkv (line 152) | def fuse_qkv(model, args):
function clip_gelu (line 180) | def clip_gelu(model, maxval):
function expand_amax (line 193) | def expand_amax(model):
function recalibrate_weights (line 204) | def recalibrate_weights(model):
function print_model_summary (line 222) | def print_model_summary(model, name_width=25, line_width=180, ignore=None):
function print_quant_summary (line 255) | def print_quant_summary(model):
function set_quantizer (line 266) | def set_quantizer(name, mod, quantizer, k, v):
function set_quantizers (line 277) | def set_quantizers(name, mod, which="both", **kwargs):
function set_quantizer_by_name (line 290) | def set_quantizer_by_name(model, names, **kwargs):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/quantization-qdqbert/run_quant_qa.py
class ModelArguments (line 61) | class ModelArguments:
class DataTrainingArguments (line 101) | class DataTrainingArguments:
method __post_init__ (line 204) | def __post_init__(self):
function main (line 224) | def main():
function _mp_fn (line 682) | def _mp_fn(index):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/quantization-qdqbert/trainer_quant_qa.py
class QuestionAnsweringTrainer (line 38) | class QuestionAnsweringTrainer(Trainer):
method __init__ (line 39) | def __init__(self, *args, eval_examples=None, post_process_function=No...
method get_calib_dataloader (line 46) | def get_calib_dataloader(self, calib_dataset=None):
method calibrate (line 69) | def calibrate(self, calib_dataset=None):
method evaluate (line 91) | def evaluate(self, eval_dataset=None, eval_examples=None, ignore_keys=...
method predict (line 132) | def predict(self, predict_dataset, predict_examples, ignore_keys=None,...
method save_onnx (line 164) | def save_onnx(self, output_dir="./"):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/quantization-qdqbert/utils_qa.py
function postprocess_qa_predictions (line 32) | def postprocess_qa_predictions(
function postprocess_qa_predictions_with_beam_search (line 249) | def postprocess_qa_predictions_with_beam_search(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag-end2end-retriever/callbacks_rag.py
function count_trainable_parameters (line 12) | def count_trainable_parameters(model):
function get_checkpoint_callback (line 21) | def get_checkpoint_callback(output_dir, metric):
function get_early_stopping_callback (line 49) | def get_early_stopping_callback(metric, patience):
class Seq2SeqLoggingCallback (line 58) | class Seq2SeqLoggingCallback(pl.Callback):
method on_batch_end (line 59) | def on_batch_end(self, trainer, pl_module):
method _write_logs (line 64) | def _write_logs(
method on_train_start (line 100) | def on_train_start(self, trainer, pl_module):
method on_test_end (line 111) | def on_test_end(self, trainer: pl.Trainer, pl_module: pl.LightningModu...
method on_validation_end (line 116) | def on_validation_end(self, trainer: pl.Trainer, pl_module):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag-end2end-retriever/distributed_ray_retriever.py
class RayRetriever (line 13) | class RayRetriever:
method __init__ (line 14) | def __init__(self):
method create_rag_retriever (line 17) | def create_rag_retriever(self, config, question_encoder_tokenizer, gen...
method init_retrieval (line 28) | def init_retrieval(self):
method clear_object (line 31) | def clear_object(self):
method retrieve (line 36) | def retrieve(self, question_hidden_states, n_docs):
class RagRayDistributedRetriever (line 42) | class RagRayDistributedRetriever(RagRetriever):
method __init__ (line 70) | def __init__(self, config, question_encoder_tokenizer, generator_token...
method init_retrieval (line 98) | def init_retrieval(self):
method retrieve (line 113) | def retrieve(self, question_hidden_states, n_docs):
method get_tokenizers (line 145) | def get_tokenizers(cls, retriever_name_or_path, indexed_dataset=None, ...
method from_pretrained (line 149) | def from_pretrained(cls, retriever_name_or_path, actor_handles, indexe...
method re_load (line 169) | def re_load(self):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag-end2end-retriever/eval_rag.py
function infer_model_type (line 27) | def infer_model_type(model_name_or_path):
function metric_max_over_ground_truths (line 37) | def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
function get_scores (line 41) | def get_scores(args, preds_path, gold_data_path):
function get_precision_at_k (line 67) | def get_precision_at_k(args, preds_path, gold_data_path):
function evaluate_batch_retrieval (line 83) | def evaluate_batch_retrieval(args, rag_model, questions):
function evaluate_batch_e2e (line 116) | def evaluate_batch_e2e(args, rag_model, questions):
function get_args (line 143) | def get_args():
function main (line 257) | def main(args):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag-end2end-retriever/finetune_rag.py
class AttrDict (line 78) | class AttrDict(dict):
method __init__ (line 79) | def __init__(self, *args, **kwargs):
class GenerativeQAModule (line 84) | class GenerativeQAModule(BaseTransformer):
method __init__ (line 90) | def __init__(self, hparams, **kwargs):
method forward (line 200) | def forward(self, input_ids, **kwargs):
method ids_to_clean_text (line 203) | def ids_to_clean_text(self, generated_ids: List[int]):
method _step (line 209) | def _step(self, batch: dict) -> Tuple:
method pad (line 251) | def pad(self) -> int:
method training_step (line 254) | def training_step(self, batch, batch_idx) -> Dict:
method validation_step (line 381) | def validation_step(self, batch, batch_idx) -> Dict:
method validation_epoch_end (line 384) | def validation_epoch_end(self, outputs, prefix="val") -> Dict:
method save_metrics (line 414) | def save_metrics(self, latest_metrics, type_path) -> None:
method calc_generative_metrics (line 418) | def calc_generative_metrics(self, preds, target) -> Dict:
method _generative_step (line 421) | def _generative_step(self, batch: dict) -> dict:
method test_step (line 444) | def test_step(self, batch, batch_idx):
method test_epoch_end (line 447) | def test_epoch_end(self, outputs):
method get_dataset (line 450) | def get_dataset(self, type_path) -> Seq2SeqDataset:
method get_dataloader (line 462) | def get_dataloader(self, type_path: str, batch_size: int, shuffle: boo...
method train_dataloader (line 474) | def train_dataloader(self) -> DataLoader:
method val_dataloader (line 478) | def val_dataloader(self) -> DataLoader:
method test_dataloader (line 481) | def test_dataloader(self) -> DataLoader:
method on_save_checkpoint (line 485) | def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
method add_model_specific_args (line 503) | def add_model_specific_args(parser, root_dir):
method add_retriever_specific_args (line 609) | def add_retriever_specific_args(parser):
method add_ray_specific_args (line 665) | def add_ray_specific_args(parser):
function main (line 691) | def main(args=None, model=None) -> GenerativeQAModule:
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag-end2end-retriever/kb_encode_utils.py
function split_text (line 11) | def split_text(text, n=100, character=" "):
function split_documents (line 17) | def split_documents(documents):
function embed_update (line 28) | def embed_update(ctx_encoder, total_processes, device, process_num, shar...
function add_index (line 67) | def add_index(shard_dir, index_path):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag-end2end-retriever/lightning_base.py
class BaseTransformer (line 63) | class BaseTransformer(pl.LightningModule):
method __init__ (line 64) | def __init__(
method load_hf_checkpoint (line 118) | def load_hf_checkpoint(self, *args, **kwargs):
method get_lr_scheduler (line 121) | def get_lr_scheduler(self):
method configure_optimizers (line 129) | def configure_optimizers(self):
method test_step (line 160) | def test_step(self, batch, batch_nb):
method test_epoch_end (line 163) | def test_epoch_end(self, outputs):
method total_steps (line 166) | def total_steps(self) -> int:
method setup (line 172) | def setup(self, stage):
method get_dataloader (line 179) | def get_dataloader(self, type_path: str, batch_size: int, shuffle: boo...
method train_dataloader (line 182) | def train_dataloader(self):
method val_dataloader (line 185) | def val_dataloader(self):
method test_dataloader (line 188) | def test_dataloader(self):
method _feature_file (line 191) | def _feature_file(self, mode):
method on_save_checkpoint (line 202) | def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
method add_model_specific_args (line 209) | def add_model_specific_args(parser, root_dir):
class InitCallback (line 271) | class InitCallback(pl.Callback):
method on_sanity_check_start (line 274) | def on_sanity_check_start(self, trainer, pl_module):
class CheckParamCallback (line 281) | class CheckParamCallback(pl.Callback):
method on_after_backward (line 283) | def on_after_backward(self, trainer, pl_module):
class LoggingCallback (line 290) | class LoggingCallback(pl.Callback):
method on_batch_end (line 291) | def on_batch_end(self, trainer, pl_module):
method on_validation_end (line 296) | def on_validation_end(self, trainer: pl.Trainer, pl_module: pl.Lightni...
method on_test_end (line 304) | def on_test_end(self, trainer: pl.Trainer, pl_module: pl.LightningModu...
function add_generic_args (line 316) | def add_generic_args(parser, root_dir) -> None:
function generic_train (line 360) | def generic_train(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag-end2end-retriever/use_own_knowledge_dataset.py
function split_text (line 21) | def split_text(text: str, n=100, character=" ") -> List[str]:
function split_documents (line 27) | def split_documents(documents: dict) -> dict:
function embed (line 38) | def embed(documents: dict, ctx_encoder: DPRContextEncoder, ctx_tokenizer...
function main (line 47) | def main(
class RagExampleArguments (line 107) | class RagExampleArguments:
class ProcessingArguments (line 136) | class ProcessingArguments:
class IndexHnswArguments (line 152) | class IndexHnswArguments:
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag-end2end-retriever/utils_rag.py
function encode_line (line 21) | def encode_line(tokenizer, line, max_length, padding_side, pad_to_max_le...
function trim_batch (line 35) | def trim_batch(
class Seq2SeqDataset (line 48) | class Seq2SeqDataset(Dataset):
method __init__ (line 49) | def __init__(
method __len__ (line 75) | def __len__(self):
method __getitem__ (line 78) | def __getitem__(self, index) -> Dict[str, torch.Tensor]:
method get_char_lens (line 109) | def get_char_lens(data_file):
method collate_fn (line 112) | def collate_fn(self, batch) -> Dict[str, torch.Tensor]:
function flatten_list (line 139) | def flatten_list(summary_ids: List[List]):
function save_git_info (line 143) | def save_git_info(folder_path: str) -> None:
function save_json (line 149) | def save_json(content, path, indent=4, **json_dump_kwargs):
function load_json (line 154) | def load_json(path):
function get_git_info (line 159) | def get_git_info():
function lmap (line 170) | def lmap(f: Callable, x: Iterable) -> List:
function pickle_save (line 175) | def pickle_save(obj, path):
function normalize_answer (line 181) | def normalize_answer(s):
function f1_score (line 200) | def f1_score(prediction, ground_truth):
function exact_match_score (line 213) | def exact_match_score(prediction, ground_truth):
function calculate_exact_match (line 217) | def calculate_exact_match(output_lns: List[str], reference_lns: List[str...
function is_rag_model (line 227) | def is_rag_model(model_prefix):
function set_extra_model_params (line 231) | def set_extra_model_params(extra_params, hparams, config):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag/_test_finetune_rag.py
class RagFinetuneExampleTests (line 26) | class RagFinetuneExampleTests(TestCasePlus):
method _create_dummy_data (line 27) | def _create_dummy_data(self, data_dir):
method _run_finetune (line 37) | def _run_finetune(self, gpus: int, distributed_retriever: str = "pytor...
method test_finetune_gpu (line 92) | def test_finetune_gpu(self):
method test_finetune_multigpu (line 97) | def test_finetune_multigpu(self):
method test_finetune_gpu_ray_retrieval (line 103) | def test_finetune_gpu_ray_retrieval(self):
method test_finetune_multigpu_ray_retrieval (line 109) | def test_finetune_multigpu_ray_retrieval(self):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag/callbacks_rag.py
function count_trainable_parameters (line 12) | def count_trainable_parameters(model):
function get_checkpoint_callback (line 21) | def get_checkpoint_callback(output_dir, metric):
function get_early_stopping_callback (line 46) | def get_early_stopping_callback(metric, patience):
class Seq2SeqLoggingCallback (line 55) | class Seq2SeqLoggingCallback(pl.Callback):
method on_batch_end (line 56) | def on_batch_end(self, trainer, pl_module):
method _write_logs (line 61) | def _write_logs(
method on_train_start (line 97) | def on_train_start(self, trainer, pl_module):
method on_test_end (line 108) | def on_test_end(self, trainer: pl.Trainer, pl_module: pl.LightningModu...
method on_validation_end (line 113) | def on_validation_end(self, trainer: pl.Trainer, pl_module):
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag/consolidate_rag_checkpoint.py
function consolidate (line 11) | def consolidate(
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag/distributed_pytorch_retriever.py
class RagPyTorchDistributedRetriever (line 16) | class RagPyTorchDistributedRetriever(RagRetriever):
method __init__ (line 34) | def __init__(self, config, question_encoder_tokenizer, generator_token...
method init_retrieval (line 44) | def init_retrieval(self, distributed_port: int):
method _is_main (line 77) | def _is_main(self):
method _scattered (line 80) | def _scattered(self, scatter_list, target_shape, target_type=torch.flo...
method _infer_socket_ifname (line 85) | def _infer_socket_ifname(self):
method retrieve (line 91) | def retrieve(self, question_hidden_states: np.ndarray, n_docs: int) ->...
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag/distributed_ray_retriever.py
class RayRetriever (line 13) | class RayRetriever:
method __init__ (line 14) | def __init__(self):
method create_rag_retriever (line 17) | def create_rag_retriever(self, config, question_encoder_tokenizer, gen...
method init_retrieval (line 28) | def init_retrieval(self):
method retrieve (line 31) | def retrieve(self, question_hidden_states, n_docs):
class RagRayDistributedRetriever (line 36) | class RagRayDistributedRetriever(RagRetriever):
method __init__ (line 64) | def __init__(self, config, question_encoder_tokenizer, generator_token...
method init_retrieval (line 88) | def init_retrieval(self):
method retrieve (line 103) | def retrieve(self, question_hidden_states, n_docs):
method get_tokenizers (line 132) | def get_tokenizers(cls, retriever_name_or_path, indexed_dataset=None, ...
method from_pretrained (line 136) | def from_pretrained(cls, retriever_name_or_path, actor_handles, indexe...
FILE: mplsandbox_for_rl/transformers/examples/research_projects/rag/eval_rag.py
function infer_model_type (line 27) | def infer_model_type(model_name_or_path):
function metric_max_over_ground_truths (line 37) | def metric_
Copy disabled (too large)
Download .json
Condensed preview — 4523 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (80,277K chars).
[
{
"path": "LICENSE",
"chars": 11357,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 8909,
"preview": "# ✨ MPLSandbox\nMPLSandbox is an out-of-the-box multi-programming language sandbox designed to provide unified and compre"
},
{
"path": "mplsandbox/__init__.py",
"chars": 109,
"preview": "# from sandbox import * # noqa: F401\n# from .const import * # noqa: F401, F403\nfrom .tool import MPLSANDBOX"
},
{
"path": "mplsandbox/analyzetools.py",
"chars": 24930,
"preview": "import io\nimport os\nimport docker\nimport tarfile\nfrom typing import List, Optional\nfrom docker.models.images import Imag"
},
{
"path": "mplsandbox/const.py",
"chars": 1904,
"preview": "from dataclasses import dataclass\n\n@dataclass\nclass Language:\n PYTHON = \"python\"\n JAVA = \"java\"\n JAVASCRIPT = \""
},
{
"path": "mplsandbox/sandbox.py",
"chars": 16202,
"preview": "import io\nimport os\nimport docker\nimport tarfile\nfrom typing import List, Optional\nfrom docker.models.images import Imag"
},
{
"path": "mplsandbox/tool.py",
"chars": 9178,
"preview": "import argparse\nfrom mplsandbox.sandbox import Sandbox\nfrom mplsandbox.analyzetools import AnalyzeTools\nfrom mplsandbox."
},
{
"path": "mplsandbox/utils.py",
"chars": 7990,
"preview": "import docker\nimport docker.errors\nfrom typing import Optional\nimport json\nfrom docker import DockerClient\nimport docker"
},
{
"path": "mplsandbox_for_rl/README.md",
"chars": 1980,
"preview": "# Providing feedback signals in RL\n\nWe validated the effectiveness of MPLSandbox in providing compiler feedback by integ"
},
{
"path": "mplsandbox_for_rl/config.py",
"chars": 11319,
"preview": "import argparse\n\ndef parse_args(*args):\n parser = argparse.ArgumentParser(description='generation model config.')\n "
},
{
"path": "mplsandbox_for_rl/config.yaml",
"chars": 555,
"preview": "compute_environment: LOCAL_MACHINE \ndeepspeed_config:\n gradient_accumulation_steps: 1 \n gradient_clipping: 1.0 \n offl"
},
{
"path": "mplsandbox_for_rl/data/add_index.py",
"chars": 255,
"preview": "import json\n\nori = json.load(open('/root/StepCoder-main/data/valid.json','r'))\n\ncount = 0\ndata = ori[:16]\nfor d in data:"
},
{
"path": "mplsandbox_for_rl/data/train.json",
"chars": 68868,
"preview": "[\n {\n \"prompt\": \"\\ndef IgUhb():\\n \\\"\\\"\\\"There are n student groups at the university. During the study day,"
},
{
"path": "mplsandbox_for_rl/data/valid.json",
"chars": 76892,
"preview": "[\n {\n \"prompt\": \"\\ndef FsxrN():\\n \\\"\\\"\\\"\\\"Duel!\\\"\\n\\nBetting on the lovely princess Claris, the duel betwee"
},
{
"path": "mplsandbox_for_rl/data/valid_all.json",
"chars": 1625000,
"preview": "[\n {\n \"prompt\": \"\\ndef FsxrN():\\n \\\"\\\"\\\"\\\"Duel!\\\"\\n\\nBetting on the lovely princess Claris, the duel betwee"
},
{
"path": "mplsandbox_for_rl/data_helper.py",
"chars": 22156,
"preview": "from argparse import Namespace\nfrom accelerate import Accelerator\nfrom itertools import repeat\nfrom typing import Dict, "
},
{
"path": "mplsandbox_for_rl/generate_utils.py",
"chars": 18713,
"preview": "from typing import Tuple, List, Dict, Set, Union, Callable, Optional\nimport copy, logging, math\nimport torch\nimport torc"
},
{
"path": "mplsandbox_for_rl/generation_config.json",
"chars": 168,
"preview": "{\n \"do_sample\": true,\n \"max_new_tokens\": 1024,\n \"repetition_penalty\": 1.1,\n \"temperature\": 0.2,\n \"top_k\": 40,\n \"to"
},
{
"path": "mplsandbox_for_rl/llama/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "mplsandbox_for_rl/llama/llama_model.py",
"chars": 7766,
"preview": "import torch\nimport math, time, logging\nfrom typing import Dict, Any, Union, List, Tuple\nfrom transformers.models.llama."
},
{
"path": "mplsandbox_for_rl/llama/llama_trainer.py",
"chars": 8411,
"preview": "import torch\nimport time\nfrom typing import Dict, Any, Tuple\nimport torch.nn as nn\nfrom .llama_model import Llama\nfrom d"
},
{
"path": "mplsandbox_for_rl/llama/modeling_moe.py",
"chars": 39206,
"preview": "# coding=utf-8\n# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.\n#\n# This code is based on"
},
{
"path": "mplsandbox_for_rl/llama/reward/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "mplsandbox_for_rl/llama/reward/llama_reward_model.py",
"chars": 17230,
"preview": "import torch\nimport uuid, os, re, subprocess\nimport math, time, logging\nfrom typing import Dict, Any, Union, List, Tuple"
},
{
"path": "mplsandbox_for_rl/log/mplsandbox_for_ppo50_beta005_rollout1_0508_debug.log",
"chars": 45511,
"preview": "[2025-04-15 21:09:34,893] [INFO] [real_accelerator.py:239:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
},
{
"path": "mplsandbox_for_rl/metric.py",
"chars": 21571,
"preview": "from typing import List, Optional, Any, Tuple, Union, Dict\nfrom typing import Counter as TCounter\nfrom sacrebleu.tokeniz"
},
{
"path": "mplsandbox_for_rl/metric_utils.py",
"chars": 12335,
"preview": "\nimport copy\nfrom collections import defaultdict\nimport numpy as np\nimport pdb\nimport math\nimport six\nfrom six.moves imp"
},
{
"path": "mplsandbox_for_rl/ppo/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "mplsandbox_for_rl/ppo/ppo_datahelper.py",
"chars": 11740,
"preview": "from typing import Dict, Any, List, Tuple, Union, Generator\nimport json, logging, torch, copy\nfrom data_helper import Di"
},
{
"path": "mplsandbox_for_rl/ppo/ppo_trainer.py",
"chars": 28389,
"preview": "from collections import OrderedDict\nimport torch\nimport torch.nn as nn\nimport time, os\nfrom typing import Dict, Any, Tup"
},
{
"path": "mplsandbox_for_rl/ppo/ppo_utils.py",
"chars": 3182,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom typing import Tuple\nfrom accelerate import Accel"
},
{
"path": "mplsandbox_for_rl/requirements.txt",
"chars": 120,
"preview": "accelerate\ndeepspeed\nsetuptools==59.5.0\npandas\ncpm_kernels\nsentencepiece\njionlp==1.4.14\nsacrebleu\nnltk\nrouge\ntensorboard"
},
{
"path": "mplsandbox_for_rl/scheduler.py",
"chars": 1194,
"preview": "import torch.optim as optim\nimport math\n\ndef invsqrt_scheduler(warmup_steps: int):\n def _invsqrt_lr(step):\n re"
},
{
"path": "mplsandbox_for_rl/tmp/GoReturn_evalstep50_beta005_rollout1_0508_debug/experiences/experiences_0.json",
"chars": 85872,
"preview": "[\n {\n \"id\": 14,\n \"start_state\": \"\",\n \"context_vec\": \"[887, 526, 385, 319, 29902, 8720, 20255, 29"
},
{
"path": "mplsandbox_for_rl/tokenizer.py",
"chars": 3242,
"preview": "from transformers import AutoTokenizer\nfrom typing import List\nfrom utils import write_log_info_on_rank0\n\nclass HFPretra"
},
{
"path": "mplsandbox_for_rl/train_ppo.py",
"chars": 2919,
"preview": "from llama.llama_model import Llama\nfrom llama.reward.llama_reward_model import LlamaRewardModel, LlamaCriticModel\nfrom "
},
{
"path": "mplsandbox_for_rl/train_ppo.sh",
"chars": 1436,
"preview": "#!/bin/bash\n# Copyright (c) Microsoft Corporation.\n# SPDX-License-Identifier: Apache-2.0\n\nexport CUDA_HOME=/usr/local/cu"
},
{
"path": "mplsandbox_for_rl/trainer.py",
"chars": 24089,
"preview": "import logging\nimport time, math, json\nfrom typing import Callable, Dict, Any, Union, Callable, Optional, Tuple, List\nim"
},
{
"path": "mplsandbox_for_rl/transformers/.circleci/TROUBLESHOOT.md",
"chars": 319,
"preview": "# Troubleshooting\n\nThis is a document explaining how to deal with various issues on Circle-CI. The entries may include a"
},
{
"path": "mplsandbox_for_rl/transformers/.circleci/config.yml",
"chars": 8094,
"preview": "version: 2.1\nsetup: true\norbs:\n continuation: circleci/continuation@0.1.0\n\nparameters:\n nightly:\n type: boo"
},
{
"path": "mplsandbox_for_rl/transformers/.circleci/create_circleci_config.py",
"chars": 22079,
"preview": "# coding=utf-8\n# Copyright 2022 The HuggingFace Inc. team.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/.circleci/parse_test_outputs.py",
"chars": 2534,
"preview": "import re\nimport argparse\n\ndef parse_pytest_output(file_path):\n skipped_tests = {}\n skipped_count = 0\n with ope"
},
{
"path": "mplsandbox_for_rl/transformers/.coveragerc",
"chars": 207,
"preview": "[run]\nsource=transformers\nomit =\n # skip convertion scripts from testing for now\n */convert_*\n */__main__.py\n[r"
},
{
"path": "mplsandbox_for_rl/transformers/.gitattributes",
"chars": 51,
"preview": "*.py\teol=lf\n*.rst\teol=lf\n*.md\teol=lf\n*.mdx eol=lf"
},
{
"path": "mplsandbox_for_rl/transformers/.github/ISSUE_TEMPLATE/bug-report.yml",
"chars": 4612,
"preview": "name: \"\\U0001F41B Bug Report\"\ndescription: Submit a bug report to help us improve transformers\nlabels: [ \"bug\" ]\nbody:\n "
},
{
"path": "mplsandbox_for_rl/transformers/.github/ISSUE_TEMPLATE/config.yml",
"chars": 529,
"preview": "blank_issues_enabled: true\nversion: 2.1\ncontact_links:\n - name: Model checkpoints on the Hugging Face Hub\n url: http"
},
{
"path": "mplsandbox_for_rl/transformers/.github/ISSUE_TEMPLATE/feature-request.yml",
"chars": 1100,
"preview": "name: \"\\U0001F680 Feature request\"\ndescription: Submit a proposal/request for a new transformers feature\nlabels: [ \"Feat"
},
{
"path": "mplsandbox_for_rl/transformers/.github/ISSUE_TEMPLATE/i18n.md",
"chars": 2845,
"preview": "---\nname: 🌐 Translating a new language?\nabout: Start a new translation effort in your language\ntitle: '[i18n-<languageCo"
},
{
"path": "mplsandbox_for_rl/transformers/.github/ISSUE_TEMPLATE/migration.yml",
"chars": 2730,
"preview": "name: \"\\U0001F4DA Migration from pytorch-pretrained-bert or pytorch-transformers\"\ndescription: Report a problem when mig"
},
{
"path": "mplsandbox_for_rl/transformers/.github/ISSUE_TEMPLATE/new-model-addition.yml",
"chars": 1077,
"preview": "name: \"\\U0001F31F New model addition\"\ndescription: Submit a proposal/request to implement a new model\nlabels: [ \"New mod"
},
{
"path": "mplsandbox_for_rl/transformers/.github/PULL_REQUEST_TEMPLATE.md",
"chars": 3219,
"preview": "# What does this PR do?\n\n<!--\nCongratulations! You've made it this far! You're not quite done yet though.\n\nOnce merged, "
},
{
"path": "mplsandbox_for_rl/transformers/.github/conda/build.sh",
"chars": 69,
"preview": "$PYTHON setup.py install # Python command to install the script.\n"
},
{
"path": "mplsandbox_for_rl/transformers/.github/conda/meta.yaml",
"chars": 1001,
"preview": "{% set name = \"transformers\" %}\n\npackage:\n name: \"{{ name|lower }}\"\n version: \"{{ TRANSFORMERS_VERSION }}\"\n\nsource:\n "
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/TROUBLESHOOT.md",
"chars": 382,
"preview": "# Troubleshooting\n\nThis is a document explaining how to deal with various issues on github-actions self-hosted CI. The e"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/add-model-like.yml",
"chars": 2660,
"preview": "name: Add model like runner\n\non:\n push:\n branches:\n - none # put main here when this is fixed\n #pull_request:\n"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/benchmark.yml",
"chars": 2011,
"preview": "name: Self-hosted runner (benchmark)\r\n\r\non:\r\n schedule:\r\n - cron: \"17 2 * * *\"\r\n workflow_call:\r\n\r\nenv:\r\n HF_HOME:"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/build-ci-docker-images.yml",
"chars": 2718,
"preview": "name: Build pr ci-docker\n\non:\n push:\n branches:\n - push-ci-image # for now let's only build on this branch\n re"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/build-docker-images.yml",
"chars": 13455,
"preview": "name: Build docker images (scheduled)\n\non:\n push:\n branches:\n - build_ci_docker_image*\n repository_dispatch:\n "
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/build-nightly-ci-docker-images.yml",
"chars": 1779,
"preview": "name: Build docker images (Nightly CI)\n\non:\n workflow_call:\n push:\n branches:\n - build_nightly_ci_docker_image"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/build-past-ci-docker-images.yml",
"chars": 3207,
"preview": "name: Build docker images (Past CI)\n\non:\n push:\n branches:\n - build_past_ci_docker_image*\n\nconcurrency:\n group"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/build_documentation.yml",
"chars": 566,
"preview": "name: Build documentation\n\non:\n push:\n branches:\n - main\n - doc-builder*\n - v*-release\n - use_te"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/build_pr_documentation.yml",
"chars": 519,
"preview": "name: Build PR Documentation\n\non:\n pull_request:\n\nconcurrency:\n group: ${{ github.workflow }}-${{ github.head_ref || g"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/check_tiny_models.yml",
"chars": 2800,
"preview": "name: Check Tiny Models\r\n\r\non:\r\n push:\r\n branches:\r\n - check_tiny_models*\r\n repository_dispatch:\r\n schedule:\r"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/doctest_job.yml",
"chars": 2644,
"preview": "name: Doctest job\n\non:\n workflow_call:\n inputs:\n job_splits:\n required: true\n type: string\n "
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/doctests.yml",
"chars": 2920,
"preview": "name: Doctests\n\non:\n push:\n branches:\n - run_doctest*\n repository_dispatch:\n schedule:\n - cron: \"17 2 * * "
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/model_jobs.yml",
"chars": 4540,
"preview": "name: model jobs\n\non:\n workflow_call:\n inputs:\n folder_slices:\n required: true\n type: string\n "
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/push-important-models.yml",
"chars": 5584,
"preview": "name: Slow tests on important models (on Push - A10)\n\non:\n push:\n branches: [ main ]\n\nenv:\n OUTPUT_SLACK_CHANNEL_ID"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/release-conda.yml",
"chars": 1077,
"preview": "name: Release - Conda\n\non:\n push:\n tags:\n - v*\n branches:\n - conda_*\n\nenv:\n ANACONDA_API_TOKEN: ${{ se"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-nightly-caller.yml",
"chars": 1262,
"preview": "name: Self-hosted runner (nightly-ci)\n\n\non:\n repository_dispatch:\n schedule:\n - cron: \"17 2 * * *\"\n push:\n bran"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-nightly-past-ci-caller.yml",
"chars": 4743,
"preview": "name: Self-hosted runner (nightly-past-ci-caller)\r\n\r\non:\r\n schedule:\r\n - cron: \"17 2,14 * * *\"\r\n push:\r\n branche"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-past-caller.yml",
"chars": 1124,
"preview": "name: Self-hosted runner (past-ci)\n\n\non:\n workflow_call:\n inputs:\n framework:\n required: true\n ty"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-pr-slow-ci.yml",
"chars": 5553,
"preview": "name: PR slow CI\n\non:\n pull_request:\n paths:\n - \"src/transformers/models/*/modeling_*.py\"\n - \"tests/**/tes"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-push-amd-mi210-caller.yml",
"chars": 664,
"preview": "name: Self-hosted runner (AMD mi210 CI caller)\r\n\r\non:\r\n workflow_run:\r\n workflows: [\"Self-hosted runner (push-caller"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-push-amd-mi250-caller.yml",
"chars": 664,
"preview": "name: Self-hosted runner (AMD mi250 CI caller)\r\n\r\non:\r\n workflow_run:\r\n workflows: [\"Self-hosted runner (push-caller"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-push-amd-mi300-caller.yml",
"chars": 684,
"preview": "name: Self-hosted runner (AMD mi300 CI caller)\n\non:\n workflow_run:\n workflows: [\"Self-hosted runner (push-caller)\"]\n"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-push-amd.yml",
"chars": 14828,
"preview": "name: Self-hosted runner AMD GPU (push)\n\non:\n workflow_call:\n inputs:\n gpu_flavor:\n required: true\n "
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-push-caller.yml",
"chars": 1408,
"preview": "# Used to trigger self-push CI\nname: Self-hosted runner (push-caller)\n\non:\n push:\n branches:\n - main\n paths:"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-push.yml",
"chars": 25794,
"preview": "name: Self-hosted runner (push)\n\non:\n workflow_run:\n workflows: [\"Self-hosted runner (push-caller)\"]\n branches: ["
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-scheduled-amd-caller.yml",
"chars": 338,
"preview": "name: Self-hosted runner (AMD scheduled CI caller)\n\non:\n schedule:\n - cron: \"17 2 * * *\"\n\njobs:\n run_scheduled_amd_"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-scheduled-amd-mi210-caller.yml",
"chars": 643,
"preview": "name: Self-hosted runner (AMD mi210 scheduled CI caller)\r\n\r\non:\r\n workflow_run:\r\n workflows: [\"Self-hosted runner (A"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-scheduled-amd-mi250-caller.yml",
"chars": 643,
"preview": "name: Self-hosted runner (AMD mi250 scheduled CI caller)\r\n\r\non:\r\n workflow_run:\r\n workflows: [\"Self-hosted runner (A"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-scheduled-amd-mi300-caller.yml",
"chars": 698,
"preview": "name: Self-hosted runner (AMD mi300 scheduled CI caller)\n\non:\n workflow_run:\n workflows: [\"Self-hosted runner (AMD s"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-scheduled-amd.yml",
"chars": 19998,
"preview": "name: Self-hosted runner (scheduled-amd)\n\n# Note: For the AMD CI, we rely on a caller workflow and on the workflow_call "
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-scheduled-caller.yml",
"chars": 2138,
"preview": "name: Self-hosted runner (scheduled)\n\n\non:\n repository_dispatch:\n schedule:\n - cron: \"17 2 * * *\"\n push:\n branc"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/self-scheduled.yml",
"chars": 18477,
"preview": "name: Self-hosted runner (scheduled)\n\n# Note that each job's dependencies go into a corresponding docker file.\n#\n# For e"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/slack-report.yml",
"chars": 4117,
"preview": "name: CI slack report\n\non:\n workflow_call:\n inputs:\n job:\n required: true\n type: string\n sla"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/ssh-runner.yml",
"chars": 2054,
"preview": "name: SSH into our runners\n\non:\n workflow_dispatch:\n inputs:\n runner_type:\n description: 'Type of runner"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/stale.yml",
"chars": 551,
"preview": "name: Stale Bot\n\non:\n schedule:\n - cron: \"0 8 * * *\"\n\njobs:\n close_stale_issues:\n name: Close Stale Issues\n i"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/trufflehog.yml",
"chars": 299,
"preview": "on:\n push:\n\nname: Secret Leaks\n\npermissions:\n contents: read\n\njobs:\n trufflehog:\n runs-on: ubuntu-latest\n steps"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/update_metdata.yml",
"chars": 593,
"preview": "name: Update Transformers metadata\n\non:\n push:\n branches:\n - main\n - update_transformers_metadata*\n\njobs:\n"
},
{
"path": "mplsandbox_for_rl/transformers/.github/workflows/upload_pr_documentation.yml",
"chars": 385,
"preview": "name: Upload PR Documentation\n\non:\n workflow_run:\n workflows: [\"Build PR Documentation\"]\n types:\n - complete"
},
{
"path": "mplsandbox_for_rl/transformers/.gitignore",
"chars": 1801,
"preview": "# Initially taken from Github's Python gitignore file\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$"
},
{
"path": "mplsandbox_for_rl/transformers/CITATION.cff",
"chars": 2331,
"preview": "cff-version: \"1.2.0\"\r\ndate-released: 2020-10\r\nmessage: \"If you use this software, please cite it using these metadata.\"\r"
},
{
"path": "mplsandbox_for_rl/transformers/CODE_OF_CONDUCT.md",
"chars": 5489,
"preview": "\n# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nWe as members, contributors, and leaders pledge to make particip"
},
{
"path": "mplsandbox_for_rl/transformers/CONTRIBUTING.md",
"chars": 18842,
"preview": "<!---\nCopyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "mplsandbox_for_rl/transformers/ISSUES.md",
"chars": 18814,
"preview": "<!---\nCopyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "mplsandbox_for_rl/transformers/LICENSE",
"chars": 11418,
"preview": "Copyright 2018- The Hugging Face team. All rights reserved.\n\n Apache License\n "
},
{
"path": "mplsandbox_for_rl/transformers/Makefile",
"chars": 4173,
"preview": ".PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples bench"
},
{
"path": "mplsandbox_for_rl/transformers/README.md",
"chars": 22948,
"preview": "<!---\nCopyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "mplsandbox_for_rl/transformers/SECURITY.md",
"chars": 2372,
"preview": "# Security Policy\n\n## Hugging Face Hub, remote artefacts, and remote code\n\nTransformers is open-source software that is "
},
{
"path": "mplsandbox_for_rl/transformers/awesome-transformers.md",
"chars": 39099,
"preview": "# Awesome projects built with Transformers\n\nThis page lists awesome projects built on top of Transformers. Transformers "
},
{
"path": "mplsandbox_for_rl/transformers/benchmark/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "mplsandbox_for_rl/transformers/benchmark/benchmark.py",
"chars": 12150,
"preview": "# Copyright 2024 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
},
{
"path": "mplsandbox_for_rl/transformers/benchmark/config/generation.yaml",
"chars": 1178,
"preview": "defaults:\n - benchmark # inheriting benchmark schema\n - scenario: inference\n - launcher: process\n - backend: pytorch"
},
{
"path": "mplsandbox_for_rl/transformers/benchmark/optimum_benchmark_wrapper.py",
"chars": 626,
"preview": "import argparse\nimport subprocess\n\n\ndef main(config_dir, config_name, args):\n subprocess.run([\"optimum-benchmark\", \"-"
},
{
"path": "mplsandbox_for_rl/transformers/conftest.py",
"chars": 5201,
"preview": "# Copyright 2020 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
},
{
"path": "mplsandbox_for_rl/transformers/docker/consistency.dockerfile",
"chars": 834,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nUSER root\nARG REF=main\nRUN apt-get update && apt-get install -y time"
},
{
"path": "mplsandbox_for_rl/transformers/docker/custom-tokenizers.dockerfile",
"chars": 1320,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nUSER root\nRUN apt-get update && apt-get install -y libsndfile1-dev e"
},
{
"path": "mplsandbox_for_rl/transformers/docker/examples-tf.dockerfile",
"chars": 576,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nUSER root\nRUN apt-get update && apt-get install -y libsndfile1-dev e"
},
{
"path": "mplsandbox_for_rl/transformers/docker/examples-torch.dockerfile",
"chars": 757,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nUSER root\nRUN apt-get update && apt-get install -y --no-install-rec"
},
{
"path": "mplsandbox_for_rl/transformers/docker/exotic-models.dockerfile",
"chars": 1206,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y libs"
},
{
"path": "mplsandbox_for_rl/transformers/docker/jax-light.dockerfile",
"chars": 588,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y libs"
},
{
"path": "mplsandbox_for_rl/transformers/docker/pipeline-tf.dockerfile",
"chars": 582,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y libs"
},
{
"path": "mplsandbox_for_rl/transformers/docker/pipeline-torch.dockerfile",
"chars": 745,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y --n"
},
{
"path": "mplsandbox_for_rl/transformers/docker/quality.dockerfile",
"chars": 425,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y time"
},
{
"path": "mplsandbox_for_rl/transformers/docker/tf-light.dockerfile",
"chars": 727,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y --n"
},
{
"path": "mplsandbox_for_rl/transformers/docker/torch-jax-light.dockerfile",
"chars": 887,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y lib"
},
{
"path": "mplsandbox_for_rl/transformers/docker/torch-light.dockerfile",
"chars": 763,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nUSER root\nRUN apt-get update && apt-get install -y --n"
},
{
"path": "mplsandbox_for_rl/transformers/docker/torch-tf-light.dockerfile",
"chars": 1009,
"preview": "FROM python:3.10-slim\nENV PYTHONDONTWRITEBYTECODE=1\nARG REF=main\nRUN echo ${REF}\nUSER root\nRUN apt-get update && apt-ge"
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-all-latest-gpu/Dockerfile",
"chars": 3585,
"preview": "FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04\nLABEL maintainer=\"Hugging Face\"\n\nARG DEBIAN_FRONTEND=noninteractive\n\n# "
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-doc-builder/Dockerfile",
"chars": 915,
"preview": "FROM python:3.10\nLABEL maintainer=\"Hugging Face\"\n\nRUN apt update\nRUN git clone https://github.com/huggingface/transforme"
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-gpu/Dockerfile",
"chars": 846,
"preview": "FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04\nLABEL maintainer=\"Hugging Face\"\nLABEL repository=\"transformers\"\n\nRUN apt "
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-past-gpu/Dockerfile",
"chars": 2801,
"preview": "ARG BASE_DOCKER_IMAGE\nFROM $BASE_DOCKER_IMAGE\nLABEL maintainer=\"Hugging Face\"\n\nARG DEBIAN_FRONTEND=noninteractive\n\n# Use"
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-pytorch-amd-gpu/Dockerfile",
"chars": 1466,
"preview": "FROM rocm/dev-ubuntu-22.04:6.0.2\n# rocm/pytorch has no version with 2.1.0\nLABEL maintainer=\"Hugging Face\"\n\nARG DEBIAN_FR"
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile",
"chars": 1813,
"preview": "FROM rocm/dev-ubuntu-22.04:5.6\nLABEL maintainer=\"Hugging Face\"\n\nARG DEBIAN_FRONTEND=noninteractive\nARG PYTORCH='2.1.1'\nA"
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile",
"chars": 2712,
"preview": "# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11\nFROM nvcr.io/nvidia/pyt"
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile",
"chars": 3030,
"preview": "# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11\nFROM nvcr.io/nvidia/pyt"
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-pytorch-gpu/Dockerfile",
"chars": 1658,
"preview": "FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04\nLABEL maintainer=\"Hugging Face\"\n\nARG DEBIAN_FRONTEND=noninteractive\n\nRU"
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-pytorch-tpu/Dockerfile",
"chars": 2877,
"preview": "FROM google/cloud-sdk:slim\n\n# Build args.\nARG GITHUB_REF=refs/heads/main\n\n# TODO: This Dockerfile installs pytorch/xla 3"
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-pytorch-tpu/bert-base-cased.jsonnet",
"chars": 937,
"preview": "local base = import 'templates/base.libsonnet';\nlocal tpus = import 'templates/tpus.libsonnet';\nlocal utils = import \"te"
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-pytorch-tpu/dataset.yaml",
"chars": 797,
"preview": "apiVersion: v1\nkind: PersistentVolume\nmetadata:\n name: huggingface-cluster-disk\nspec:\n storageClassName: \"\"\n capacity"
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-pytorch-tpu/docker-entrypoint.sh",
"chars": 247,
"preview": "#!/bin/bash\nsource ~/.bashrc\necho \"running docker-entrypoint.sh\"\nconda activate container\necho $KUBE_GOOGLE_CLOUD_TPU_EN"
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-quantization-latest-gpu/Dockerfile",
"chars": 2825,
"preview": "FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04\nLABEL maintainer=\"Hugging Face\"\n\nARG DEBIAN_FRONTEND=noninteractive\n\n# "
},
{
"path": "mplsandbox_for_rl/transformers/docker/transformers-tensorflow-gpu/Dockerfile",
"chars": 1068,
"preview": "FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04\nLABEL maintainer=\"Hugging Face\"\n\nARG DEBIAN_FRONTEND=noninteractive\n\nRU"
},
{
"path": "mplsandbox_for_rl/transformers/docs/README.md",
"chars": 16871,
"preview": "<!---\nCopyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "mplsandbox_for_rl/transformers/docs/TRANSLATING.md",
"chars": 3289,
"preview": "### Translating the Transformers documentation into your language\n\nAs part of our mission to democratize machine learnin"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/_config.py",
"chars": 534,
"preview": "# docstyle-ignore\nINSTALL_CONTENT = \"\"\"\n# Transformers installation\n! pip install transformers datasets evaluate acceler"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/_config.py",
"chars": 534,
"preview": "# docstyle-ignore\nINSTALL_CONTENT = \"\"\"\n# Transformers installation\n! pip install transformers datasets evaluate acceler"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/_toctree.yml",
"chars": 1234,
"preview": "- sections:\n - local: index\n title: 🤗 Transformers\n - local: quicktour\n title: Schnellstart\n - local: installat"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/accelerate.md",
"chars": 5252,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/add_new_model.md",
"chars": 61006,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/add_new_pipeline.md",
"chars": 11928,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/autoclass_tutorial.md",
"chars": 7451,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/contributing.md",
"chars": 21358,
"preview": "<!---\nCopyright 2024 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/index.md",
"chars": 59535,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/installation.md",
"chars": 10601,
"preview": "<!---\nCopyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/llm_tutorial.md",
"chars": 12914,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/model_sharing.md",
"chars": 11601,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/peft.md",
"chars": 8425,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\nLicensed under the Apache License, Version 2.0 (the \"Licen"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/pipeline_tutorial.md",
"chars": 8291,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/pr_checks.md",
"chars": 13040,
"preview": "<!---\nCopyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/preprocessing.md",
"chars": 24066,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/quicktour.md",
"chars": 19732,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/run_scripts.md",
"chars": 18409,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/testing.md",
"chars": 48643,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/training.md",
"chars": 20753,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/de/transformers_agents.md",
"chars": 17394,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/_config.py",
"chars": 534,
"preview": "# docstyle-ignore\nINSTALL_CONTENT = \"\"\"\n# Transformers installation\n! pip install transformers datasets evaluate acceler"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/_redirects.yml",
"chars": 128,
"preview": "# Optimizing inference\n\nperf_infer_gpu_many: perf_infer_gpu_one\ntransformers_agents: agents\nquantization: quantization/o"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/_toctree.yml",
"chars": 27233,
"preview": "- sections:\n - local: index\n title: 🤗 Transformers\n - local: quicktour\n title: Quick tour\n - local: installatio"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/accelerate.md",
"chars": 4849,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/add_new_model.md",
"chars": 52100,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/add_new_pipeline.md",
"chars": 10759,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/agents.md",
"chars": 25203,
"preview": "<!--Copyright 2024 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/attention.md",
"chars": 3659,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/autoclass_tutorial.md",
"chars": 9107,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/benchmarks.md",
"chars": 18133,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/bertology.md",
"chars": 2329,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/big_models.md",
"chars": 9549,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/chat_templating.md",
"chars": 39974,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/community.md",
"chars": 26214,
"preview": "<!--⚠️ Note that this file is in Markdown but contains specific syntax for our doc-builder (similar to MDX) that may not"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/contributing.md",
"chars": 18842,
"preview": "<!---\nCopyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/conversations.md",
"chars": 17261,
"preview": "<!--Copyright 2024 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/create_a_model.md",
"chars": 19929,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/custom_models.md",
"chars": 16039,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/debugging.md",
"chars": 19597,
"preview": "<!--Copyright 2021 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/deepspeed.md",
"chars": 58033,
"preview": "<!--Copyright 2024 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/fast_tokenizers.md",
"chars": 2878,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/fsdp.md",
"chars": 7734,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/generation_strategies.md",
"chars": 29757,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/gguf.md",
"chars": 3968,
"preview": "<!--Copyright 2024 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/glossary.md",
"chars": 26255,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/hpo_train.md",
"chars": 5803,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/index.md",
"chars": 41809,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/installation.md",
"chars": 9703,
"preview": "<!---\nCopyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/internal/audio_utils.md",
"chars": 1468,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/internal/file_utils.md",
"chars": 1440,
"preview": "<!--Copyright 2021 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/internal/generation_utils.md",
"chars": 9663,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/internal/image_processing_utils.md",
"chars": 1556,
"preview": "<!--Copyright 2022 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/internal/modeling_utils.md",
"chars": 2321,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/internal/pipelines_utils.md",
"chars": 1377,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/internal/time_series_utils.md",
"chars": 1205,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/internal/tokenization_utils.md",
"chars": 1540,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/internal/trainer_utils.md",
"chars": 1383,
"preview": "<!--Copyright 2020 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/kv_cache.md",
"chars": 23796,
"preview": "<!--Copyright 2024 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/llm_optims.md",
"chars": 23572,
"preview": "<!--Copyright 2024 The HuggingFace Team. All rights reserved.\nLicensed under the Apache License, Version 2.0 (the \"Licen"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/llm_tutorial.md",
"chars": 15240,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/llm_tutorial_optimization.md",
"chars": 52726,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\nLicensed under the Apache License, Version 2.0 (the \"Licen"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/main_classes/agent.md",
"chars": 4118,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
},
{
"path": "mplsandbox_for_rl/transformers/docs/source/en/main_classes/backbones.md",
"chars": 2250,
"preview": "<!--Copyright 2023 The HuggingFace Team. All rights reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"Lice"
}
]
// ... and 4323 more files (download for full content)
About this extraction
This page contains the full source code of the Ablustrund/MPLSandbox GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 4523 files (90.7 MB), approximately 19.3M tokens, and a symbol index with 60948 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.