gitextract_it7z4sjw/

├── .gitignore
├── .pre-commit-config.yaml
├── Dockerfile
├── LICENSE
├── README.md
├── benchmarks/
│   ├── README.md
│   └── test_tipc/
│       ├── ernie/
│       │   └── dygraph/
│       │       └── hybrid_parallel/
│       │           ├── N1C1/
│       │           │   ├── ernie_bs16_fp16_DP1-MP1-PP1.sh
│       │           │   └── ernie_bs16_fp32_DP1-MP1-PP1.sh
│       │           ├── N1C8/
│       │           │   ├── ernie_bs16_fp16_DP2-MP2-PP2.sh
│       │           │   └── ernie_bs16_fp32_DP2-MP2-PP2.sh
│       │           ├── N4C32/
│       │           │   ├── ernie_bs16_fp16_DP1-MP8-PP4.sh
│       │           │   ├── ernie_bs16_fp16_DP2-MP8-PP2.sh
│       │           │   ├── ernie_bs16_fp16_DP4-MP8-PP1.sh
│       │           │   ├── ernie_bs16_fp32_DP1-MP8-PP4.sh
│       │           │   ├── ernie_bs16_fp32_DP2-MP8-PP2.sh
│       │           │   └── ernie_bs16_fp32_DP4-MP8-PP1.sh
│       │           └── benchmark_common/
│       │               ├── prepare.sh
│       │               └── run_benchmark.sh
│       ├── gpt/
│       │   ├── dygraph/
│       │   │   ├── data_parallel/
│       │   │   │   ├── N1C8/
│       │   │   │   │   ├── gpt_1024_bs64_fp16_DP8-MP1-PP1.sh
│       │   │   │   │   ├── gpt_1024_flash_bs64_fp16_DP8-MP1-PP1.sh
│       │   │   │   │   └── gpt_2048_bs64_fp16_DP8-MP1-PP1.sh
│       │   │   │   └── benchmark_common/
│       │   │   │       ├── prepare.sh
│       │   │   │       └── run_benchmark.sh
│       │   │   ├── finetune/
│       │   │   │   ├── N1C1/
│       │   │   │   │   ├── CE_gpt_finetune_CoLA_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_MRPC_acc_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_MRPC_f1_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_QNLI_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_RTE_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_SST2_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_STSB_pearson_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_STSB_spearman_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   └── CE_gpt_finetune_WNLI_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   └── benchmark_common/
│       │   │   │       ├── prepare.sh
│       │   │   │       └── run_benchmark.sh
│       │   │   ├── hybrid_parallel/
│       │   │   │   ├── N1C1/
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   └── gpt_bs16_fp32_DP1-MP1-PP1.sh
│       │   │   │   ├── N1C4/
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP1-PP4.sh
│       │   │   │   │   └── gpt_bs16_fp16_DP1-MP4-PP1.sh
│       │   │   │   ├── N1C8/
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP1-PP8.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP2-PP4.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP4-PP2.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP8-PP1.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP2-MP2-PP2.sh
│       │   │   │   │   ├── gpt_bs16_fp32_DP2-MP2-PP2.sh
│       │   │   │   │   ├── gpt_bs64_fp16_DP8-MP1-PP1.sh
│       │   │   │   │   ├── gpt_bs64_fp32_DP8-MP1-PP1.sh
│       │   │   │   │   ├── gpt_recompute_bs16_fp16_DP2-MP2-PP2.sh
│       │   │   │   │   └── gpt_recompute_bs16_fp32_DP2-MP2-PP2.sh
│       │   │   │   ├── N4C32/
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP8-PP4.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP2-MP8-PP2.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP4-MP8-PP1.sh
│       │   │   │   │   ├── gpt_bs16_fp32_DP1-MP8-PP4.sh
│       │   │   │   │   ├── gpt_bs16_fp32_DP2-MP8-PP2.sh
│       │   │   │   │   └── gpt_bs16_fp32_DP4-MP8-PP1.sh
│       │   │   │   └── benchmark_common/
│       │   │   │       ├── prepare.sh
│       │   │   │       └── run_benchmark.sh
│       │   │   ├── sequence_parallel/
│       │   │   │   ├── N1C8/
│       │   │   │   │   ├── gpt_sp_False_bs8_fp16_DP1-MP8-PP1.sh
│       │   │   │   │   └── gpt_sp_True_bs8_fp16_DP1-MP8-PP1.sh
│       │   │   │   ├── N4C32/
│       │   │   │   │   ├── gpt_sp_False_bs16_fp16_DP2-MP8-PP2.sh
│       │   │   │   │   └── gpt_sp_True_bs16_fp16_DP2-MP8-PP2.sh
│       │   │   │   └── benchmark_common/
│       │   │   │       ├── prepare.sh
│       │   │   │       └── run_benchmark.sh
│       │   │   └── sharding/
│       │   │       ├── N1C2/
│       │   │       │   ├── gpt_stage2_bs16_fp16_DP1-MP1-PP1-Sharding2.sh
│       │   │       │   ├── gpt_stage3_bs16_fp16_DP1-MP1-PP1-Sharding2.sh
│       │   │       │   └── gpt_stage3_bs16_fp32_DP1-MP1-PP1-Sharding2.sh
│       │   │       ├── N2C16/
│       │   │       │   └── gpt_stage2_bs128_fp16_DP1-MP1-PP1-Sharding16.sh
│       │   │       └── benchmark_common/
│       │   │           ├── prepare.sh
│       │   │           └── run_benchmark.sh
│       │   └── static/
│       │       └── auto_parallel/
│       │           ├── N1C1/
│       │           │   └── gpt_auto_recompute_bs8_fp32_DP1-MP1-PP1.sh
│       │           └── benchmark_common/
│       │               ├── prepare.sh
│       │               └── run_benchmark.sh
│       ├── imagen/
│       │   └── dygraph/
│       │       ├── N1C1/
│       │       │   ├── imagen_397M_text2im_64_bs1_fp32_DP1-MP1-PP1.sh
│       │       │   └── imagen_SR256_bs1_fp32_DP1-MP1-PP1.sh
│       │       ├── N1C8/
│       │       │   ├── imagen_2B_text2im_64_bs8_fp32_DP1-Sharding8.sh
│       │       │   ├── imagen_397M_text2im_64_bs8_fp32_DP8-MP1-PP1.sh
│       │       │   ├── imagen_SR256_bs8_fp32_DP8-MP1-PP1.sh
│       │       │   └── imagen_text2im_64_debertav2_bs8_fp32_DP8-MP1-PP1.sh
│       │       └── benchmark_common/
│       │           ├── prepare.sh
│       │           └── run_benchmark.sh
│       └── vit/
│           └── dygraph/
│               ├── finetune/
│               │   ├── N1C8/
│               │   │   ├── ViT_large_patch16_384_ft_fused_False_bs512_fp16_DP.sh
│               │   │   └── ViT_large_patch16_384_ft_fused_True_bs512_fp16_DP.sh
│               │   └── benchmark_common/
│               │       ├── prepare.sh
│               │       └── run_benchmark.sh
│               └── pretrained/
│                   ├── N2C16/
│                   │   ├── ViT_large_patch16_224_pt_fused_False_bs128_fp16_DP.sh
│                   │   └── ViT_large_patch16_224_pt_fused_True_bs128_fp16_DP.sh
│                   └── benchmark_common/
│                       ├── prepare.sh
│                       └── run_benchmark.sh
├── codestyle/
│   ├── .gitignore
│   ├── clang_format.hook
│   ├── copyright.hook
│   ├── cpplint_pre_commit.hook
│   ├── docstring_checker.py
│   ├── pylint_pre_commit.hook
│   └── test_docstring_checker.py
├── docs/
│   ├── cluster_deployment.md
│   ├── compression.md
│   ├── deployment_faq.md
│   ├── docker_install.md
│   ├── quick_start.md
│   └── standard.md
├── examples/
│   └── transformer/
│       ├── __init__.py
│       ├── models/
│       │   └── GPT/
│       │       ├── docs/
│       │       │   ├── README.md
│       │       │   ├── hybrid_parallel.md
│       │       │   ├── hybrid_profiler.md
│       │       │   ├── inference.md
│       │       │   ├── quantization_aware_training.md
│       │       │   ├── single_card.md
│       │       │   ├── single_finetune.md
│       │       │   └── structured_pruning.md
│       │       ├── finetune/
│       │       │   ├── configs/
│       │       │   │   ├── finetune_gpt_345M_single_card_glue.yaml
│       │       │   │   └── finetune_gpt_base.yaml
│       │       │   ├── impls.py
│       │       │   ├── run.py
│       │       │   └── run_task.sh
│       │       ├── generation/
│       │       │   ├── configs/
│       │       │   │   ├── generation_gpt_345M_dp8.yaml
│       │       │   │   ├── generation_gpt_345M_single_card.yaml
│       │       │   │   ├── generation_gpt_base.yaml
│       │       │   │   ├── generation_pruned_gpt_345M_single_card.yaml
│       │       │   │   ├── generation_qat_gpt_345M_single_card.yaml
│       │       │   │   ├── generation_qat_gpt_6.7B_single_card.yaml
│       │       │   │   ├── inference_gpt_345M_dp8.yaml
│       │       │   │   └── inference_gpt_345M_single_card.yaml
│       │       │   ├── export.py
│       │       │   ├── impls.py
│       │       │   ├── inference.py
│       │       │   └── run.py
│       │       ├── offline-eval/
│       │       │   ├── configs/
│       │       │   │   ├── eval_gpt_345M_single_card.yaml
│       │       │   │   ├── eval_gpt_base.yaml
│       │       │   │   ├── eval_pruned_gpt_345M_single_card.yaml
│       │       │   │   └── eval_qat_gpt_345M_single_card.yaml
│       │       │   ├── impls.py
│       │       │   └── run.py
│       │       ├── pretrain/
│       │       │   ├── configs/
│       │       │   │   ├── export_qat_gpt_345M_single_card.yaml
│       │       │   │   ├── pretrain_gpt_1.3B_dp8.yaml
│       │       │   │   ├── pretrain_gpt_1.3B_single_card.yaml
│       │       │   │   ├── pretrain_gpt_175B_mp8_pp16.yaml
│       │       │   │   ├── pretrain_gpt_345M_single_card.yaml
│       │       │   │   ├── pretrain_gpt_6.7B_sharding16.yaml
│       │       │   │   ├── pretrain_gpt_base.yaml
│       │       │   │   ├── pretrain_gpt_cn_345M_single_card.yaml
│       │       │   │   ├── prune_gpt_345M_single_card.yaml
│       │       │   │   ├── qat_gpt_345M_mp8.yaml
│       │       │   │   ├── qat_gpt_345M_single_card.yaml
│       │       │   │   └── qat_gpt_6.7B_sharding16.yaml
│       │       │   ├── export.py
│       │       │   ├── impls.py
│       │       │   └── run.py
│       │       └── pretrain_moe/
│       │           ├── configs/
│       │           │   ├── pretrain_moe_345M_single_card.yaml
│       │           │   └── pretrain_moe_base.yaml
│       │           ├── impls.py
│       │           └── run.py
│       └── utils/
│           ├── __init__.py
│           ├── components.py
│           ├── config.py
│           └── qat.py
├── ppfleetx/
│   ├── __init__.py
│   ├── configs/
│   │   ├── multimodal/
│   │   │   └── imagen/
│   │   │       ├── imagen_397M_text2im_64x64.yaml
│   │   │       ├── imagen_base.yaml
│   │   │       ├── imagen_super_resolution_1024.yaml
│   │   │       ├── imagen_super_resolution_256.yaml
│   │   │       ├── imagen_text2im_64x64_DebertaV2.yaml
│   │   │       └── imagen_text2im_64x64_T5-11B.yaml
│   │   ├── nlp/
│   │   │   ├── ernie/
│   │   │   │   ├── auto/
│   │   │   │   │   ├── finetune_ernie_345M_single_card.yaml
│   │   │   │   │   ├── finetune_ernie_base.yaml
│   │   │   │   │   ├── pretrain_ernie_base.yaml
│   │   │   │   │   └── pretrain_ernie_base_345M_single_card.yaml
│   │   │   │   ├── finetune_ernie_345M_single_card.yaml
│   │   │   │   ├── finetune_ernie_base.yaml
│   │   │   │   ├── inference_ernie_345M_single_card.yaml
│   │   │   │   ├── pretrain_ernie_base.yaml
│   │   │   │   ├── pretrain_ernie_base_175B_mp8_pp16.yaml
│   │   │   │   ├── pretrain_ernie_base_345M_single_card.yaml
│   │   │   │   ├── pretrain_ernie_base_3D.yaml
│   │   │   │   ├── pretrain_ernie_base_6.7B_sharding16.yaml
│   │   │   │   ├── pretrain_ernie_large_single_card.yaml
│   │   │   │   └── qat_ernie_base.yaml
│   │   │   ├── gpt/
│   │   │   │   ├── auto/
│   │   │   │   │   ├── export_gpt_fp16_single_card.yaml
│   │   │   │   │   ├── generation_gpt_175B_mp8.yaml
│   │   │   │   │   ├── generation_gpt_345M_mp2.yaml
│   │   │   │   │   ├── generation_gpt_345M_single_card.yaml
│   │   │   │   │   ├── generation_gpt_6.7B_mp1.yaml
│   │   │   │   │   ├── pretrain_gpt_1.3B_dp8.yaml
│   │   │   │   │   ├── pretrain_gpt_1.3B_dp8_tuning.yaml
│   │   │   │   │   ├── pretrain_gpt_1.3B_single_card.yaml
│   │   │   │   │   ├── pretrain_gpt_345M_single_card.yaml
│   │   │   │   │   ├── pretrain_gpt_6.7B_sharding16.yaml
│   │   │   │   │   ├── pretrain_gpt_base.yaml
│   │   │   │   │   └── qat_generation_gpt_345M_mp2.yaml
│   │   │   │   ├── eval_gpt_345M_single_card.yaml
│   │   │   │   ├── eval_pruned_gpt_345M_single_card.yaml
│   │   │   │   ├── eval_qat_gpt_345M_single_card.yaml
│   │   │   │   ├── export_qat_gpt_345M_single_card.yaml
│   │   │   │   ├── finetune_gpt_345M_single_card_glue.yaml
│   │   │   │   ├── finetune_gpt_base.yaml
│   │   │   │   ├── generation_gpt_345M_dp8.yaml
│   │   │   │   ├── generation_gpt_345M_mp1.yaml
│   │   │   │   ├── generation_gpt_345M_single_card.yaml
│   │   │   │   ├── generation_gpt_6.7B_single_mp1.yaml
│   │   │   │   ├── generation_pruned_gpt_345M_single_card.yaml
│   │   │   │   ├── generation_qat_gpt_345M_single_card.yaml
│   │   │   │   ├── generation_qat_gpt_6.7B_single_card.yaml
│   │   │   │   ├── inference_gpt_345M_dp8.yaml
│   │   │   │   ├── inference_gpt_345M_single_card.yaml
│   │   │   │   ├── pretrain_gpt_1.3B_dp8.yaml
│   │   │   │   ├── pretrain_gpt_1.3B_single_card.yaml
│   │   │   │   ├── pretrain_gpt_13B_dp8.yaml
│   │   │   │   ├── pretrain_gpt_175B_mp8_pp16.yaml
│   │   │   │   ├── pretrain_gpt_345M_single_card.yaml
│   │   │   │   ├── pretrain_gpt_6.7B_sharding16.yaml
│   │   │   │   ├── pretrain_gpt_6.7B_single_card.yaml
│   │   │   │   ├── pretrain_gpt_base.yaml
│   │   │   │   ├── pretrain_gpt_cn_345M_single_card.yaml
│   │   │   │   ├── prune_gpt_345M_single_card.yaml
│   │   │   │   ├── qat_gpt_345M_mp8.yaml
│   │   │   │   ├── qat_gpt_345M_single_card.yaml
│   │   │   │   └── qat_gpt_6.7B_sharding16.yaml
│   │   │   └── moe/
│   │   │       ├── pretrain_moe_1.3B_dp8.yaml
│   │   │       └── pretrain_moe_base.yaml
│   │   └── vis/
│   │       ├── base.yaml
│   │       ├── moco/
│   │       │   ├── moco_lincls_in1k_1n8c.yaml
│   │       │   ├── mocov1_pt_in1k_1n8c.yaml
│   │       │   └── mocov2_pt_in1k_1n8c.yaml
│   │       └── vit/
│   │           ├── ViT_base_patch16_224_inference.yaml
│   │           ├── ViT_base_patch16_224_pt_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_base_patch16_384_ft_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_base_patch16_384_ft_qat_cifar10_1n8c_dp_fp16o2.yaml
│   │           ├── ViT_base_patch16_384_ft_qat_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_large_patch16_384_ft_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_large_patch16_384_ft_qat_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_tiny_patch16_224_ci_cifar10_1n8c_dp_fp16o2.yaml
│   │           └── auto/
│   │               ├── ViT_tiny_patch16_224_ci_cifar10_1n8c_dp_fp16o2.yaml
│   │               └── base.yaml
│   ├── core/
│   │   ├── __init__.py
│   │   ├── engine/
│   │   │   ├── __init__.py
│   │   │   ├── auto_engine.py
│   │   │   ├── basic_engine.py
│   │   │   ├── eager_engine.py
│   │   │   └── inference_engine.py
│   │   └── module/
│   │       ├── __init__.py
│   │       └── basic_module.py
│   ├── data/
│   │   ├── __init__.py
│   │   ├── data_tools/
│   │   │   ├── __init__.py
│   │   │   ├── cpp/
│   │   │   │   ├── Makefile
│   │   │   │   ├── __init__.py
│   │   │   │   ├── compile.py
│   │   │   │   └── fast_index_map_helpers.cpp
│   │   │   ├── ernie/
│   │   │   │   ├── __init__.py
│   │   │   │   └── preprocess/
│   │   │   │       ├── README.md
│   │   │   │       ├── __init__.py
│   │   │   │       ├── create_pretraining_data.py
│   │   │   │       ├── docs/
│   │   │   │       │   ├── CLUECorpus2020.md
│   │   │   │       │   ├── CLUECorpusSmall.md
│   │   │   │       │   ├── OpenWebText2.md
│   │   │   │       │   └── WuDaoCorpusBase.md
│   │   │   │       ├── trans_to_json.py
│   │   │   │       └── words_segmentation.py
│   │   │   └── gpt/
│   │   │       ├── README.md
│   │   │       ├── __init__.py
│   │   │       ├── preprocess_data.py
│   │   │       └── raw_trans_to_json.py
│   │   ├── dataset/
│   │   │   ├── __init__.py
│   │   │   ├── ernie/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dataset_utils.py
│   │   │   │   └── ernie_dataset.py
│   │   │   ├── glue_dataset.py
│   │   │   ├── gpt_dataset.py
│   │   │   ├── multimodal_dataset.py
│   │   │   └── vision_dataset.py
│   │   ├── sampler/
│   │   │   ├── __init__.py
│   │   │   ├── batch_sampler.py
│   │   │   └── collate.py
│   │   ├── tokenizers/
│   │   │   ├── __init__.py
│   │   │   ├── debertav2_tokenizer.py
│   │   │   ├── ernie_tokenizer.py
│   │   │   ├── gpt_tokenizer.py
│   │   │   ├── t5_tokenization_utils.py
│   │   │   ├── t5_tokenizer.py
│   │   │   └── tokenization_utils_base.py
│   │   ├── transforms/
│   │   │   ├── __init__.py
│   │   │   ├── preprocess.py
│   │   │   └── utils.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       └── batch_collate_fn.py
│   ├── distributed/
│   │   ├── __init__.py
│   │   ├── apis/
│   │   │   ├── __init__.py
│   │   │   ├── amp.py
│   │   │   ├── comm_groups.py
│   │   │   ├── env.py
│   │   │   ├── io.py
│   │   │   └── strategy.py
│   │   └── protein_folding/
│   │       ├── __init__.py
│   │       ├── bp.py
│   │       ├── dap.py
│   │       ├── dp.py
│   │       └── scg.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── language_model/
│   │   │   ├── __init__.py
│   │   │   ├── auto_utils.py
│   │   │   ├── debertav2/
│   │   │   │   ├── __init__.py
│   │   │   │   └── modeling.py
│   │   │   ├── ernie/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── auto/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── auto_model.py
│   │   │   │   │   ├── auto_module.py
│   │   │   │   │   └── auto_transformer.py
│   │   │   │   ├── dygraph/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── hybrid_model.py
│   │   │   │   │   └── single_model.py
│   │   │   │   ├── ernie_module.py
│   │   │   │   ├── finetune_configs.yaml
│   │   │   │   └── layers/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── distributed_transformer.py
│   │   │   │       ├── model_outputs.py
│   │   │   │       ├── transformer.py
│   │   │   │       └── utils.py
│   │   │   ├── gpt/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── auto/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── auto_model.py
│   │   │   │   │   └── auto_module.py
│   │   │   │   └── dygraph/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── hybrid_model.py
│   │   │   │       ├── processor.py
│   │   │   │       ├── sequence_parallel_utils.py
│   │   │   │       └── single_model.py
│   │   │   ├── language_module.py
│   │   │   ├── metrics.py
│   │   │   ├── moe/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── comm/
│   │   │   │   │   └── __init__.py
│   │   │   │   ├── comm_ops.py
│   │   │   │   ├── gate/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base_gate.py
│   │   │   │   │   ├── gshard_gate.py
│   │   │   │   │   ├── naive_gate.py
│   │   │   │   │   └── switch_gate.py
│   │   │   │   ├── moe_layer.py
│   │   │   │   └── utils.py
│   │   │   ├── moe_exp/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── experts.py
│   │   │   │   ├── layer.py
│   │   │   │   ├── mappings.py
│   │   │   │   └── sharded_moe.py
│   │   │   ├── t5/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── modeling.py
│   │   │   │   └── utils.py
│   │   │   └── utils.py
│   │   ├── multimodal_model/
│   │   │   ├── __init__.py
│   │   │   ├── clip/
│   │   │   │   └── __init__.py
│   │   │   ├── imagen/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── modeling.py
│   │   │   │   ├── unet.py
│   │   │   │   └── utils.py
│   │   │   ├── multimodal_module.py
│   │   │   └── utils.py
│   │   ├── protein_folding/
│   │   │   ├── __init__.py
│   │   │   ├── all_atom.py
│   │   │   ├── attentions.py
│   │   │   ├── common.py
│   │   │   ├── evoformer.py
│   │   │   ├── outer_product_mean.py
│   │   │   ├── quat_affine.py
│   │   │   ├── r3.py
│   │   │   ├── residue_constants.py
│   │   │   └── template.py
│   │   └── vision_model/
│   │       ├── __init__.py
│   │       ├── factory.py
│   │       ├── general_classification_module.py
│   │       ├── layers/
│   │       │   ├── __init__.py
│   │       │   ├── attention.py
│   │       │   ├── droppath.py
│   │       │   ├── embedding.py
│   │       │   ├── identity.py
│   │       │   ├── initializer.py
│   │       │   └── mlp.py
│   │       ├── loss/
│   │       │   ├── __init__.py
│   │       │   └── cross_entropy.py
│   │       ├── metrics/
│   │       │   ├── __init__.py
│   │       │   └── accuracy.py
│   │       ├── moco/
│   │       │   ├── __init__.py
│   │       │   └── moco.py
│   │       ├── moco_module.py
│   │       ├── resnet/
│   │       │   └── __init__.py
│   │       └── vit/
│   │           ├── __init__.py
│   │           └── vit.py
│   ├── ops/
│   │   ├── setup_cuda.py
│   │   ├── test_topp_sampling.py
│   │   └── topp_sampling.cu
│   ├── optims/
│   │   ├── __init__.py
│   │   ├── grad_clip.py
│   │   ├── lr_scheduler.py
│   │   └── optimizer.py
│   ├── tools/
│   │   ├── __init__.py
│   │   └── multiprocess_tool.py
│   └── utils/
│       ├── __init__.py
│       ├── check.py
│       ├── compression_helper.py
│       ├── config.py
│       ├── device.py
│       ├── download.py
│       ├── export.py
│       ├── file.py
│       ├── log.py
│       ├── tensor_fusion_helper.py
│       └── version.py
├── projects/
│   ├── ernie/
│   │   ├── auto_export_ernie_345M_mp1.sh
│   │   ├── auto_export_ernie_345M_mp2.sh
│   │   ├── auto_export_ernie_345M_mp2_npu.sh
│   │   ├── auto_export_ernie_345M_mp2_xpu.sh
│   │   ├── docs/
│   │   │   ├── README.md
│   │   │   └── inference.md
│   │   ├── export_ernie_345M_single_card.sh
│   │   ├── finetune_ernie_345M_single_card.sh
│   │   ├── finetune_ernie_345M_single_card_npu.sh
│   │   ├── inference.py
│   │   ├── pretrain_ernie_base.sh
│   │   ├── pretrain_ernie_base_175B_mp8_pp16.sh
│   │   ├── pretrain_ernie_base_3D.sh
│   │   ├── pretrain_ernie_base_3D_npu.sh
│   │   ├── pretrain_ernie_base_6.7B_sharding16.sh
│   │   ├── pretrain_ernie_large.sh
│   │   ├── pretrain_ernie_large_mp2_mlu.sh
│   │   ├── pretrain_ernie_large_mp2_npu.sh
│   │   ├── pretrain_ernie_large_mp2_pp2_npu.sh
│   │   ├── pretrain_ernie_large_npu.sh
│   │   ├── run_inference.sh
│   │   ├── run_inference_mp2.sh
│   │   ├── run_inference_mp2_npu.sh
│   │   └── run_inference_mp2_xpu.sh
│   ├── gpt/
│   │   ├── auto_export_gpt_175B_mp8.sh
│   │   ├── auto_export_gpt_345M_mp2.sh
│   │   ├── auto_export_gpt_345M_single_card.sh
│   │   ├── auto_export_gpt_6.7B_mp1.sh
│   │   ├── auto_export_gpt_fp16_single_card.sh
│   │   ├── auto_gpt_1.3B_dp8.sh
│   │   ├── auto_gpt_1.3B_dp8_tuning.sh
│   │   ├── auto_gpt_1.3B_single_card.sh
│   │   ├── auto_gpt_345M_single_card.sh
│   │   ├── auto_gpt_6.7B_sharding16.sh
│   │   ├── auto_qat_export_gpt_345M_mp2.sh
│   │   ├── benchmark.py
│   │   ├── docs/
│   │   │   ├── README.md
│   │   │   ├── auto_parallel.md
│   │   │   ├── hybrid_parallel.md
│   │   │   ├── hybrid_profiler.md
│   │   │   ├── inference.md
│   │   │   ├── quantization_aware_training.md
│   │   │   ├── single_card.md
│   │   │   ├── single_finetune.md
│   │   │   └── structured_pruning.md
│   │   ├── eval_prune_gpt_345M_single_card.sh
│   │   ├── eval_qat_gpt_345M_single_card.sh
│   │   ├── evaluate_gpt_345M_single_card.sh
│   │   ├── export_gpt_345M_single_card.sh
│   │   ├── export_prune_gpt_345M_single_card.sh
│   │   ├── export_qat_gpt_345M_single_card.sh
│   │   ├── finetune_gpt_345M_single_card.sh
│   │   ├── inference.py
│   │   ├── inference_gpt_6.7B_single_card.sh
│   │   ├── inference_gpt_multigpu.sh
│   │   ├── inference_gpt_single_card.sh
│   │   ├── pretrain_gpt_1.3B_dp8.sh
│   │   ├── pretrain_gpt_1.3B_single_card.sh
│   │   ├── pretrain_gpt_175B_mp8_pp16.sh
│   │   ├── pretrain_gpt_345M_single_card.sh
│   │   ├── pretrain_gpt_6.7B_sharding16.sh
│   │   ├── prune_gpt_345M_single_card.sh
│   │   ├── qat_gpt_345M_mp8.sh
│   │   ├── qat_gpt_345M_single_card.sh
│   │   ├── qat_gpt_6.7B_sharding16.sh
│   │   └── run_benchmark.sh
│   ├── imagen/
│   │   ├── README.md
│   │   ├── filelist/
│   │   │   └── laion_400M/
│   │   │       └── train
│   │   ├── run_super_resolution_1024_sharding128.sh
│   │   ├── run_super_resolution_256_dp128.sh
│   │   ├── run_super_resolution_256_single_card.sh
│   │   ├── run_text2im_2B_64x64_T5-11B_sharding8_dp32.sh
│   │   ├── run_text2im_397M_64x64_dp128.sh
│   │   ├── run_text2im_397M_64x64_single_card.sh
│   │   └── run_text2im_64x64_DebertaV2_dp8.sh
│   ├── moco/
│   │   ├── README.md
│   │   ├── run_mocov1_lincls_in1k.sh
│   │   ├── run_mocov1_pretrain_in1k.sh
│   │   ├── run_mocov2_lincls_in1k.sh
│   │   └── run_mocov2_pretrain_in1k.sh
│   ├── protein_folding/
│   │   └── README.md
│   ├── ufo2.0/
│   │   └── README.md
│   └── vit/
│       ├── README.md
│       ├── auto_vit_patch16_224_dp8.sh
│       ├── docs/
│       │   └── inference.md
│       ├── export_qat.sh
│       ├── inference.py
│       ├── run_finetune.sh
│       ├── run_finetune_fused_attention.sh
│       ├── run_inference_base_patch16_224.sh
│       ├── run_pretrain.sh
│       ├── run_pretrained_fused_attention.sh
│       └── run_qat.sh
├── requirements.txt
├── setup.py
├── tasks/
│   └── gpt/
│       ├── generation.py
│       ├── inference.py
│       └── run_generation.sh
└── tools/
    ├── auto.py
    ├── auto_export.py
    ├── eval.py
    ├── export.py
    ├── inference.py
    └── train.py