Full Code of PaddlePaddle/PaddleFleetX for AI

develop 20f33ad21e9d cached

507 files

2.6 MB

700.9k tokens

2117 symbols

1 requests

Download .txt

Showing preview only (2,869K chars total). Download the full file or copy to clipboard to get everything.

Repository: PaddlePaddle/PaddleFleetX
Branch: develop
Commit: 20f33ad21e9d
Files: 507
Total size: 2.6 MB

Directory structure:
gitextract_it7z4sjw/

├── .gitignore
├── .pre-commit-config.yaml
├── Dockerfile
├── LICENSE
├── README.md
├── benchmarks/
│   ├── README.md
│   └── test_tipc/
│       ├── ernie/
│       │   └── dygraph/
│       │       └── hybrid_parallel/
│       │           ├── N1C1/
│       │           │   ├── ernie_bs16_fp16_DP1-MP1-PP1.sh
│       │           │   └── ernie_bs16_fp32_DP1-MP1-PP1.sh
│       │           ├── N1C8/
│       │           │   ├── ernie_bs16_fp16_DP2-MP2-PP2.sh
│       │           │   └── ernie_bs16_fp32_DP2-MP2-PP2.sh
│       │           ├── N4C32/
│       │           │   ├── ernie_bs16_fp16_DP1-MP8-PP4.sh
│       │           │   ├── ernie_bs16_fp16_DP2-MP8-PP2.sh
│       │           │   ├── ernie_bs16_fp16_DP4-MP8-PP1.sh
│       │           │   ├── ernie_bs16_fp32_DP1-MP8-PP4.sh
│       │           │   ├── ernie_bs16_fp32_DP2-MP8-PP2.sh
│       │           │   └── ernie_bs16_fp32_DP4-MP8-PP1.sh
│       │           └── benchmark_common/
│       │               ├── prepare.sh
│       │               └── run_benchmark.sh
│       ├── gpt/
│       │   ├── dygraph/
│       │   │   ├── data_parallel/
│       │   │   │   ├── N1C8/
│       │   │   │   │   ├── gpt_1024_bs64_fp16_DP8-MP1-PP1.sh
│       │   │   │   │   ├── gpt_1024_flash_bs64_fp16_DP8-MP1-PP1.sh
│       │   │   │   │   └── gpt_2048_bs64_fp16_DP8-MP1-PP1.sh
│       │   │   │   └── benchmark_common/
│       │   │   │       ├── prepare.sh
│       │   │   │       └── run_benchmark.sh
│       │   │   ├── finetune/
│       │   │   │   ├── N1C1/
│       │   │   │   │   ├── CE_gpt_finetune_CoLA_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_MRPC_acc_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_MRPC_f1_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_QNLI_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_RTE_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_SST2_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_STSB_pearson_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_STSB_spearman_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   └── CE_gpt_finetune_WNLI_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   └── benchmark_common/
│       │   │   │       ├── prepare.sh
│       │   │   │       └── run_benchmark.sh
│       │   │   ├── hybrid_parallel/
│       │   │   │   ├── N1C1/
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   └── gpt_bs16_fp32_DP1-MP1-PP1.sh
│       │   │   │   ├── N1C4/
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP1-PP4.sh
│       │   │   │   │   └── gpt_bs16_fp16_DP1-MP4-PP1.sh
│       │   │   │   ├── N1C8/
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP1-PP8.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP2-PP4.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP4-PP2.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP8-PP1.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP2-MP2-PP2.sh
│       │   │   │   │   ├── gpt_bs16_fp32_DP2-MP2-PP2.sh
│       │   │   │   │   ├── gpt_bs64_fp16_DP8-MP1-PP1.sh
│       │   │   │   │   ├── gpt_bs64_fp32_DP8-MP1-PP1.sh
│       │   │   │   │   ├── gpt_recompute_bs16_fp16_DP2-MP2-PP2.sh
│       │   │   │   │   └── gpt_recompute_bs16_fp32_DP2-MP2-PP2.sh
│       │   │   │   ├── N4C32/
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP8-PP4.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP2-MP8-PP2.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP4-MP8-PP1.sh
│       │   │   │   │   ├── gpt_bs16_fp32_DP1-MP8-PP4.sh
│       │   │   │   │   ├── gpt_bs16_fp32_DP2-MP8-PP2.sh
│       │   │   │   │   └── gpt_bs16_fp32_DP4-MP8-PP1.sh
│       │   │   │   └── benchmark_common/
│       │   │   │       ├── prepare.sh
│       │   │   │       └── run_benchmark.sh
│       │   │   ├── sequence_parallel/
│       │   │   │   ├── N1C8/
│       │   │   │   │   ├── gpt_sp_False_bs8_fp16_DP1-MP8-PP1.sh
│       │   │   │   │   └── gpt_sp_True_bs8_fp16_DP1-MP8-PP1.sh
│       │   │   │   ├── N4C32/
│       │   │   │   │   ├── gpt_sp_False_bs16_fp16_DP2-MP8-PP2.sh
│       │   │   │   │   └── gpt_sp_True_bs16_fp16_DP2-MP8-PP2.sh
│       │   │   │   └── benchmark_common/
│       │   │   │       ├── prepare.sh
│       │   │   │       └── run_benchmark.sh
│       │   │   └── sharding/
│       │   │       ├── N1C2/
│       │   │       │   ├── gpt_stage2_bs16_fp16_DP1-MP1-PP1-Sharding2.sh
│       │   │       │   ├── gpt_stage3_bs16_fp16_DP1-MP1-PP1-Sharding2.sh
│       │   │       │   └── gpt_stage3_bs16_fp32_DP1-MP1-PP1-Sharding2.sh
│       │   │       ├── N2C16/
│       │   │       │   └── gpt_stage2_bs128_fp16_DP1-MP1-PP1-Sharding16.sh
│       │   │       └── benchmark_common/
│       │   │           ├── prepare.sh
│       │   │           └── run_benchmark.sh
│       │   └── static/
│       │       └── auto_parallel/
│       │           ├── N1C1/
│       │           │   └── gpt_auto_recompute_bs8_fp32_DP1-MP1-PP1.sh
│       │           └── benchmark_common/
│       │               ├── prepare.sh
│       │               └── run_benchmark.sh
│       ├── imagen/
│       │   └── dygraph/
│       │       ├── N1C1/
│       │       │   ├── imagen_397M_text2im_64_bs1_fp32_DP1-MP1-PP1.sh
│       │       │   └── imagen_SR256_bs1_fp32_DP1-MP1-PP1.sh
│       │       ├── N1C8/
│       │       │   ├── imagen_2B_text2im_64_bs8_fp32_DP1-Sharding8.sh
│       │       │   ├── imagen_397M_text2im_64_bs8_fp32_DP8-MP1-PP1.sh
│       │       │   ├── imagen_SR256_bs8_fp32_DP8-MP1-PP1.sh
│       │       │   └── imagen_text2im_64_debertav2_bs8_fp32_DP8-MP1-PP1.sh
│       │       └── benchmark_common/
│       │           ├── prepare.sh
│       │           └── run_benchmark.sh
│       └── vit/
│           └── dygraph/
│               ├── finetune/
│               │   ├── N1C8/
│               │   │   ├── ViT_large_patch16_384_ft_fused_False_bs512_fp16_DP.sh
│               │   │   └── ViT_large_patch16_384_ft_fused_True_bs512_fp16_DP.sh
│               │   └── benchmark_common/
│               │       ├── prepare.sh
│               │       └── run_benchmark.sh
│               └── pretrained/
│                   ├── N2C16/
│                   │   ├── ViT_large_patch16_224_pt_fused_False_bs128_fp16_DP.sh
│                   │   └── ViT_large_patch16_224_pt_fused_True_bs128_fp16_DP.sh
│                   └── benchmark_common/
│                       ├── prepare.sh
│                       └── run_benchmark.sh
├── codestyle/
│   ├── .gitignore
│   ├── clang_format.hook
│   ├── copyright.hook
│   ├── cpplint_pre_commit.hook
│   ├── docstring_checker.py
│   ├── pylint_pre_commit.hook
│   └── test_docstring_checker.py
├── docs/
│   ├── cluster_deployment.md
│   ├── compression.md
│   ├── deployment_faq.md
│   ├── docker_install.md
│   ├── quick_start.md
│   └── standard.md
├── examples/
│   └── transformer/
│       ├── __init__.py
│       ├── models/
│       │   └── GPT/
│       │       ├── docs/
│       │       │   ├── README.md
│       │       │   ├── hybrid_parallel.md
│       │       │   ├── hybrid_profiler.md
│       │       │   ├── inference.md
│       │       │   ├── quantization_aware_training.md
│       │       │   ├── single_card.md
│       │       │   ├── single_finetune.md
│       │       │   └── structured_pruning.md
│       │       ├── finetune/
│       │       │   ├── configs/
│       │       │   │   ├── finetune_gpt_345M_single_card_glue.yaml
│       │       │   │   └── finetune_gpt_base.yaml
│       │       │   ├── impls.py
│       │       │   ├── run.py
│       │       │   └── run_task.sh
│       │       ├── generation/
│       │       │   ├── configs/
│       │       │   │   ├── generation_gpt_345M_dp8.yaml
│       │       │   │   ├── generation_gpt_345M_single_card.yaml
│       │       │   │   ├── generation_gpt_base.yaml
│       │       │   │   ├── generation_pruned_gpt_345M_single_card.yaml
│       │       │   │   ├── generation_qat_gpt_345M_single_card.yaml
│       │       │   │   ├── generation_qat_gpt_6.7B_single_card.yaml
│       │       │   │   ├── inference_gpt_345M_dp8.yaml
│       │       │   │   └── inference_gpt_345M_single_card.yaml
│       │       │   ├── export.py
│       │       │   ├── impls.py
│       │       │   ├── inference.py
│       │       │   └── run.py
│       │       ├── offline-eval/
│       │       │   ├── configs/
│       │       │   │   ├── eval_gpt_345M_single_card.yaml
│       │       │   │   ├── eval_gpt_base.yaml
│       │       │   │   ├── eval_pruned_gpt_345M_single_card.yaml
│       │       │   │   └── eval_qat_gpt_345M_single_card.yaml
│       │       │   ├── impls.py
│       │       │   └── run.py
│       │       ├── pretrain/
│       │       │   ├── configs/
│       │       │   │   ├── export_qat_gpt_345M_single_card.yaml
│       │       │   │   ├── pretrain_gpt_1.3B_dp8.yaml
│       │       │   │   ├── pretrain_gpt_1.3B_single_card.yaml
│       │       │   │   ├── pretrain_gpt_175B_mp8_pp16.yaml
│       │       │   │   ├── pretrain_gpt_345M_single_card.yaml
│       │       │   │   ├── pretrain_gpt_6.7B_sharding16.yaml
│       │       │   │   ├── pretrain_gpt_base.yaml
│       │       │   │   ├── pretrain_gpt_cn_345M_single_card.yaml
│       │       │   │   ├── prune_gpt_345M_single_card.yaml
│       │       │   │   ├── qat_gpt_345M_mp8.yaml
│       │       │   │   ├── qat_gpt_345M_single_card.yaml
│       │       │   │   └── qat_gpt_6.7B_sharding16.yaml
│       │       │   ├── export.py
│       │       │   ├── impls.py
│       │       │   └── run.py
│       │       └── pretrain_moe/
│       │           ├── configs/
│       │           │   ├── pretrain_moe_345M_single_card.yaml
│       │           │   └── pretrain_moe_base.yaml
│       │           ├── impls.py
│       │           └── run.py
│       └── utils/
│           ├── __init__.py
│           ├── components.py
│           ├── config.py
│           └── qat.py
├── ppfleetx/
│   ├── __init__.py
│   ├── configs/
│   │   ├── multimodal/
│   │   │   └── imagen/
│   │   │       ├── imagen_397M_text2im_64x64.yaml
│   │   │       ├── imagen_base.yaml
│   │   │       ├── imagen_super_resolution_1024.yaml
│   │   │       ├── imagen_super_resolution_256.yaml
│   │   │       ├── imagen_text2im_64x64_DebertaV2.yaml
│   │   │       └── imagen_text2im_64x64_T5-11B.yaml
│   │   ├── nlp/
│   │   │   ├── ernie/
│   │   │   │   ├── auto/
│   │   │   │   │   ├── finetune_ernie_345M_single_card.yaml
│   │   │   │   │   ├── finetune_ernie_base.yaml
│   │   │   │   │   ├── pretrain_ernie_base.yaml
│   │   │   │   │   └── pretrain_ernie_base_345M_single_card.yaml
│   │   │   │   ├── finetune_ernie_345M_single_card.yaml
│   │   │   │   ├── finetune_ernie_base.yaml
│   │   │   │   ├── inference_ernie_345M_single_card.yaml
│   │   │   │   ├── pretrain_ernie_base.yaml
│   │   │   │   ├── pretrain_ernie_base_175B_mp8_pp16.yaml
│   │   │   │   ├── pretrain_ernie_base_345M_single_card.yaml
│   │   │   │   ├── pretrain_ernie_base_3D.yaml
│   │   │   │   ├── pretrain_ernie_base_6.7B_sharding16.yaml
│   │   │   │   ├── pretrain_ernie_large_single_card.yaml
│   │   │   │   └── qat_ernie_base.yaml
│   │   │   ├── gpt/
│   │   │   │   ├── auto/
│   │   │   │   │   ├── export_gpt_fp16_single_card.yaml
│   │   │   │   │   ├── generation_gpt_175B_mp8.yaml
│   │   │   │   │   ├── generation_gpt_345M_mp2.yaml
│   │   │   │   │   ├── generation_gpt_345M_single_card.yaml
│   │   │   │   │   ├── generation_gpt_6.7B_mp1.yaml
│   │   │   │   │   ├── pretrain_gpt_1.3B_dp8.yaml
│   │   │   │   │   ├── pretrain_gpt_1.3B_dp8_tuning.yaml
│   │   │   │   │   ├── pretrain_gpt_1.3B_single_card.yaml
│   │   │   │   │   ├── pretrain_gpt_345M_single_card.yaml
│   │   │   │   │   ├── pretrain_gpt_6.7B_sharding16.yaml
│   │   │   │   │   ├── pretrain_gpt_base.yaml
│   │   │   │   │   └── qat_generation_gpt_345M_mp2.yaml
│   │   │   │   ├── eval_gpt_345M_single_card.yaml
│   │   │   │   ├── eval_pruned_gpt_345M_single_card.yaml
│   │   │   │   ├── eval_qat_gpt_345M_single_card.yaml
│   │   │   │   ├── export_qat_gpt_345M_single_card.yaml
│   │   │   │   ├── finetune_gpt_345M_single_card_glue.yaml
│   │   │   │   ├── finetune_gpt_base.yaml
│   │   │   │   ├── generation_gpt_345M_dp8.yaml
│   │   │   │   ├── generation_gpt_345M_mp1.yaml
│   │   │   │   ├── generation_gpt_345M_single_card.yaml
│   │   │   │   ├── generation_gpt_6.7B_single_mp1.yaml
│   │   │   │   ├── generation_pruned_gpt_345M_single_card.yaml
│   │   │   │   ├── generation_qat_gpt_345M_single_card.yaml
│   │   │   │   ├── generation_qat_gpt_6.7B_single_card.yaml
│   │   │   │   ├── inference_gpt_345M_dp8.yaml
│   │   │   │   ├── inference_gpt_345M_single_card.yaml
│   │   │   │   ├── pretrain_gpt_1.3B_dp8.yaml
│   │   │   │   ├── pretrain_gpt_1.3B_single_card.yaml
│   │   │   │   ├── pretrain_gpt_13B_dp8.yaml
│   │   │   │   ├── pretrain_gpt_175B_mp8_pp16.yaml
│   │   │   │   ├── pretrain_gpt_345M_single_card.yaml
│   │   │   │   ├── pretrain_gpt_6.7B_sharding16.yaml
│   │   │   │   ├── pretrain_gpt_6.7B_single_card.yaml
│   │   │   │   ├── pretrain_gpt_base.yaml
│   │   │   │   ├── pretrain_gpt_cn_345M_single_card.yaml
│   │   │   │   ├── prune_gpt_345M_single_card.yaml
│   │   │   │   ├── qat_gpt_345M_mp8.yaml
│   │   │   │   ├── qat_gpt_345M_single_card.yaml
│   │   │   │   └── qat_gpt_6.7B_sharding16.yaml
│   │   │   └── moe/
│   │   │       ├── pretrain_moe_1.3B_dp8.yaml
│   │   │       └── pretrain_moe_base.yaml
│   │   └── vis/
│   │       ├── base.yaml
│   │       ├── moco/
│   │       │   ├── moco_lincls_in1k_1n8c.yaml
│   │       │   ├── mocov1_pt_in1k_1n8c.yaml
│   │       │   └── mocov2_pt_in1k_1n8c.yaml
│   │       └── vit/
│   │           ├── ViT_base_patch16_224_inference.yaml
│   │           ├── ViT_base_patch16_224_pt_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_base_patch16_384_ft_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_base_patch16_384_ft_qat_cifar10_1n8c_dp_fp16o2.yaml
│   │           ├── ViT_base_patch16_384_ft_qat_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_large_patch16_384_ft_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_large_patch16_384_ft_qat_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_tiny_patch16_224_ci_cifar10_1n8c_dp_fp16o2.yaml
│   │           └── auto/
│   │               ├── ViT_tiny_patch16_224_ci_cifar10_1n8c_dp_fp16o2.yaml
│   │               └── base.yaml
│   ├── core/
│   │   ├── __init__.py
│   │   ├── engine/
│   │   │   ├── __init__.py
│   │   │   ├── auto_engine.py
│   │   │   ├── basic_engine.py
│   │   │   ├── eager_engine.py
│   │   │   └── inference_engine.py
│   │   └── module/
│   │       ├── __init__.py
│   │       └── basic_module.py
│   ├── data/
│   │   ├── __init__.py
│   │   ├── data_tools/
│   │   │   ├── __init__.py
│   │   │   ├── cpp/
│   │   │   │   ├── Makefile
│   │   │   │   ├── __init__.py
│   │   │   │   ├── compile.py
│   │   │   │   └── fast_index_map_helpers.cpp
│   │   │   ├── ernie/
│   │   │   │   ├── __init__.py
│   │   │   │   └── preprocess/
│   │   │   │       ├── README.md
│   │   │   │       ├── __init__.py
│   │   │   │       ├── create_pretraining_data.py
│   │   │   │       ├── docs/
│   │   │   │       │   ├── CLUECorpus2020.md
│   │   │   │       │   ├── CLUECorpusSmall.md
│   │   │   │       │   ├── OpenWebText2.md
│   │   │   │       │   └── WuDaoCorpusBase.md
│   │   │   │       ├── trans_to_json.py
│   │   │   │       └── words_segmentation.py
│   │   │   └── gpt/
│   │   │       ├── README.md
│   │   │       ├── __init__.py
│   │   │       ├── preprocess_data.py
│   │   │       └── raw_trans_to_json.py
│   │   ├── dataset/
│   │   │   ├── __init__.py
│   │   │   ├── ernie/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dataset_utils.py
│   │   │   │   └── ernie_dataset.py
│   │   │   ├── glue_dataset.py
│   │   │   ├── gpt_dataset.py
│   │   │   ├── multimodal_dataset.py
│   │   │   └── vision_dataset.py
│   │   ├── sampler/
│   │   │   ├── __init__.py
│   │   │   ├── batch_sampler.py
│   │   │   └── collate.py
│   │   ├── tokenizers/
│   │   │   ├── __init__.py
│   │   │   ├── debertav2_tokenizer.py
│   │   │   ├── ernie_tokenizer.py
│   │   │   ├── gpt_tokenizer.py
│   │   │   ├── t5_tokenization_utils.py
│   │   │   ├── t5_tokenizer.py
│   │   │   └── tokenization_utils_base.py
│   │   ├── transforms/
│   │   │   ├── __init__.py
│   │   │   ├── preprocess.py
│   │   │   └── utils.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       └── batch_collate_fn.py
│   ├── distributed/
│   │   ├── __init__.py
│   │   ├── apis/
│   │   │   ├── __init__.py
│   │   │   ├── amp.py
│   │   │   ├── comm_groups.py
│   │   │   ├── env.py
│   │   │   ├── io.py
│   │   │   └── strategy.py
│   │   └── protein_folding/
│   │       ├── __init__.py
│   │       ├── bp.py
│   │       ├── dap.py
│   │       ├── dp.py
│   │       └── scg.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── language_model/
│   │   │   ├── __init__.py
│   │   │   ├── auto_utils.py
│   │   │   ├── debertav2/
│   │   │   │   ├── __init__.py
│   │   │   │   └── modeling.py
│   │   │   ├── ernie/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── auto/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── auto_model.py
│   │   │   │   │   ├── auto_module.py
│   │   │   │   │   └── auto_transformer.py
│   │   │   │   ├── dygraph/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── hybrid_model.py
│   │   │   │   │   └── single_model.py
│   │   │   │   ├── ernie_module.py
│   │   │   │   ├── finetune_configs.yaml
│   │   │   │   └── layers/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── distributed_transformer.py
│   │   │   │       ├── model_outputs.py
│   │   │   │       ├── transformer.py
│   │   │   │       └── utils.py
│   │   │   ├── gpt/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── auto/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── auto_model.py
│   │   │   │   │   └── auto_module.py
│   │   │   │   └── dygraph/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── hybrid_model.py
│   │   │   │       ├── processor.py
│   │   │   │       ├── sequence_parallel_utils.py
│   │   │   │       └── single_model.py
│   │   │   ├── language_module.py
│   │   │   ├── metrics.py
│   │   │   ├── moe/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── comm/
│   │   │   │   │   └── __init__.py
│   │   │   │   ├── comm_ops.py
│   │   │   │   ├── gate/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base_gate.py
│   │   │   │   │   ├── gshard_gate.py
│   │   │   │   │   ├── naive_gate.py
│   │   │   │   │   └── switch_gate.py
│   │   │   │   ├── moe_layer.py
│   │   │   │   └── utils.py
│   │   │   ├── moe_exp/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── experts.py
│   │   │   │   ├── layer.py
│   │   │   │   ├── mappings.py
│   │   │   │   └── sharded_moe.py
│   │   │   ├── t5/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── modeling.py
│   │   │   │   └── utils.py
│   │   │   └── utils.py
│   │   ├── multimodal_model/
│   │   │   ├── __init__.py
│   │   │   ├── clip/
│   │   │   │   └── __init__.py
│   │   │   ├── imagen/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── modeling.py
│   │   │   │   ├── unet.py
│   │   │   │   └── utils.py
│   │   │   ├── multimodal_module.py
│   │   │   └── utils.py
│   │   ├── protein_folding/
│   │   │   ├── __init__.py
│   │   │   ├── all_atom.py
│   │   │   ├── attentions.py
│   │   │   ├── common.py
│   │   │   ├── evoformer.py
│   │   │   ├── outer_product_mean.py
│   │   │   ├── quat_affine.py
│   │   │   ├── r3.py
│   │   │   ├── residue_constants.py
│   │   │   └── template.py
│   │   └── vision_model/
│   │       ├── __init__.py
│   │       ├── factory.py
│   │       ├── general_classification_module.py
│   │       ├── layers/
│   │       │   ├── __init__.py
│   │       │   ├── attention.py
│   │       │   ├── droppath.py
│   │       │   ├── embedding.py
│   │       │   ├── identity.py
│   │       │   ├── initializer.py
│   │       │   └── mlp.py
│   │       ├── loss/
│   │       │   ├── __init__.py
│   │       │   └── cross_entropy.py
│   │       ├── metrics/
│   │       │   ├── __init__.py
│   │       │   └── accuracy.py
│   │       ├── moco/
│   │       │   ├── __init__.py
│   │       │   └── moco.py
│   │       ├── moco_module.py
│   │       ├── resnet/
│   │       │   └── __init__.py
│   │       └── vit/
│   │           ├── __init__.py
│   │           └── vit.py
│   ├── ops/
│   │   ├── setup_cuda.py
│   │   ├── test_topp_sampling.py
│   │   └── topp_sampling.cu
│   ├── optims/
│   │   ├── __init__.py
│   │   ├── grad_clip.py
│   │   ├── lr_scheduler.py
│   │   └── optimizer.py
│   ├── tools/
│   │   ├── __init__.py
│   │   └── multiprocess_tool.py
│   └── utils/
│       ├── __init__.py
│       ├── check.py
│       ├── compression_helper.py
│       ├── config.py
│       ├── device.py
│       ├── download.py
│       ├── export.py
│       ├── file.py
│       ├── log.py
│       ├── tensor_fusion_helper.py
│       └── version.py
├── projects/
│   ├── ernie/
│   │   ├── auto_export_ernie_345M_mp1.sh
│   │   ├── auto_export_ernie_345M_mp2.sh
│   │   ├── auto_export_ernie_345M_mp2_npu.sh
│   │   ├── auto_export_ernie_345M_mp2_xpu.sh
│   │   ├── docs/
│   │   │   ├── README.md
│   │   │   └── inference.md
│   │   ├── export_ernie_345M_single_card.sh
│   │   ├── finetune_ernie_345M_single_card.sh
│   │   ├── finetune_ernie_345M_single_card_npu.sh
│   │   ├── inference.py
│   │   ├── pretrain_ernie_base.sh
│   │   ├── pretrain_ernie_base_175B_mp8_pp16.sh
│   │   ├── pretrain_ernie_base_3D.sh
│   │   ├── pretrain_ernie_base_3D_npu.sh
│   │   ├── pretrain_ernie_base_6.7B_sharding16.sh
│   │   ├── pretrain_ernie_large.sh
│   │   ├── pretrain_ernie_large_mp2_mlu.sh
│   │   ├── pretrain_ernie_large_mp2_npu.sh
│   │   ├── pretrain_ernie_large_mp2_pp2_npu.sh
│   │   ├── pretrain_ernie_large_npu.sh
│   │   ├── run_inference.sh
│   │   ├── run_inference_mp2.sh
│   │   ├── run_inference_mp2_npu.sh
│   │   └── run_inference_mp2_xpu.sh
│   ├── gpt/
│   │   ├── auto_export_gpt_175B_mp8.sh
│   │   ├── auto_export_gpt_345M_mp2.sh
│   │   ├── auto_export_gpt_345M_single_card.sh
│   │   ├── auto_export_gpt_6.7B_mp1.sh
│   │   ├── auto_export_gpt_fp16_single_card.sh
│   │   ├── auto_gpt_1.3B_dp8.sh
│   │   ├── auto_gpt_1.3B_dp8_tuning.sh
│   │   ├── auto_gpt_1.3B_single_card.sh
│   │   ├── auto_gpt_345M_single_card.sh
│   │   ├── auto_gpt_6.7B_sharding16.sh
│   │   ├── auto_qat_export_gpt_345M_mp2.sh
│   │   ├── benchmark.py
│   │   ├── docs/
│   │   │   ├── README.md
│   │   │   ├── auto_parallel.md
│   │   │   ├── hybrid_parallel.md
│   │   │   ├── hybrid_profiler.md
│   │   │   ├── inference.md
│   │   │   ├── quantization_aware_training.md
│   │   │   ├── single_card.md
│   │   │   ├── single_finetune.md
│   │   │   └── structured_pruning.md
│   │   ├── eval_prune_gpt_345M_single_card.sh
│   │   ├── eval_qat_gpt_345M_single_card.sh
│   │   ├── evaluate_gpt_345M_single_card.sh
│   │   ├── export_gpt_345M_single_card.sh
│   │   ├── export_prune_gpt_345M_single_card.sh
│   │   ├── export_qat_gpt_345M_single_card.sh
│   │   ├── finetune_gpt_345M_single_card.sh
│   │   ├── inference.py
│   │   ├── inference_gpt_6.7B_single_card.sh
│   │   ├── inference_gpt_multigpu.sh
│   │   ├── inference_gpt_single_card.sh
│   │   ├── pretrain_gpt_1.3B_dp8.sh
│   │   ├── pretrain_gpt_1.3B_single_card.sh
│   │   ├── pretrain_gpt_175B_mp8_pp16.sh
│   │   ├── pretrain_gpt_345M_single_card.sh
│   │   ├── pretrain_gpt_6.7B_sharding16.sh
│   │   ├── prune_gpt_345M_single_card.sh
│   │   ├── qat_gpt_345M_mp8.sh
│   │   ├── qat_gpt_345M_single_card.sh
│   │   ├── qat_gpt_6.7B_sharding16.sh
│   │   └── run_benchmark.sh
│   ├── imagen/
│   │   ├── README.md
│   │   ├── filelist/
│   │   │   └── laion_400M/
│   │   │       └── train
│   │   ├── run_super_resolution_1024_sharding128.sh
│   │   ├── run_super_resolution_256_dp128.sh
│   │   ├── run_super_resolution_256_single_card.sh
│   │   ├── run_text2im_2B_64x64_T5-11B_sharding8_dp32.sh
│   │   ├── run_text2im_397M_64x64_dp128.sh
│   │   ├── run_text2im_397M_64x64_single_card.sh
│   │   └── run_text2im_64x64_DebertaV2_dp8.sh
│   ├── moco/
│   │   ├── README.md
│   │   ├── run_mocov1_lincls_in1k.sh
│   │   ├── run_mocov1_pretrain_in1k.sh
│   │   ├── run_mocov2_lincls_in1k.sh
│   │   └── run_mocov2_pretrain_in1k.sh
│   ├── protein_folding/
│   │   └── README.md
│   ├── ufo2.0/
│   │   └── README.md
│   └── vit/
│       ├── README.md
│       ├── auto_vit_patch16_224_dp8.sh
│       ├── docs/
│       │   └── inference.md
│       ├── export_qat.sh
│       ├── inference.py
│       ├── run_finetune.sh
│       ├── run_finetune_fused_attention.sh
│       ├── run_inference_base_patch16_224.sh
│       ├── run_pretrain.sh
│       ├── run_pretrained_fused_attention.sh
│       └── run_qat.sh
├── requirements.txt
├── setup.py
├── tasks/
│   └── gpt/
│       ├── generation.py
│       ├── inference.py
│       └── run_generation.sh
└── tools/
    ├── auto.py
    ├── auto_export.py
    ├── eval.py
    ├── export.py
    ├── inference.py
    └── train.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# dotenv
.env

# virtualenv
.venv
venv/
ENV/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

.DS_Store
.idea


================================================
FILE: .pre-commit-config.yaml
================================================
repos:
-   repo: https://github.com/Lucas-C/pre-commit-hooks.git
    sha: v1.0.1
    hooks:
    -   id: remove-crlf
        files: (?!.*third_party)^.*$ | (?!.*book)^.*$
-   repo: https://github.com/PaddlePaddle/mirrors-yapf.git
    sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
    hooks:
    -   id: yapf
        files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
-   repo: https://github.com/pre-commit/pre-commit-hooks
    sha: 5bf6c09bfa1297d3692cadd621ef95f1284e33c0
    hooks:
    -   id: check-added-large-files
    -   id: check-merge-conflict
    -   id: check-symlinks
    -   id: detect-private-key
        files: (?!.*third_party)^.*$ | (?!.*book)^.*$
    -   id: end-of-file-fixer
-   repo: local
    hooks:
    -   id: clang-format-with-version-check
        name: clang-format
        description: Format files with ClangFormat.
        entry: bash ./codestyle/clang_format.hook -i
        language: system
        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
-   repo: local
    hooks:
    -   id: cpplint-cpp-source
        name: cpplint
        description: Check C++ code style using cpplint.py.
        entry: bash ./codestyle/cpplint_pre_commit.hook
        language: system
        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
-   repo: local
    hooks:
    -   id: pylint-doc-string
        name: pylint
        description: Check python docstring style using docstring_checker.
        entry: bash ./codestyle/pylint_pre_commit.hook
        language: system
        files: \.(py)$
-   repo: local
    hooks:
    -   id: copyright_checker
        name: copyright_checker
        entry: python ./codestyle/copyright.hook
        language: system
        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py|sh)$
        exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$


================================================
FILE: Dockerfile
================================================
ARG BASE_IMAGE=registry.baidubce.com/paddlepaddle/paddle:2.4.1-gpu-cuda11.2-cudnn8.2-trt8.0

FROM $BASE_IMAGE

WORKDIR /paddle

RUN python -m pip install paddlepaddle-gpu==0.0.0.post112 -f https://www.paddlepaddle.org.cn/whl/linux/gpu/develop.html

# RUN wget https://raw.githubusercontent.com/PaddlePaddle/PaddleFleetx/develop/requirements.txt && python -m pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
COPY requirements.txt /paddle

RUN python -m pip install -r requirements.txt #-i https://mirror.baidu.com/pypi/simple

ENV LD_LIBRARY_PATH=/usr/lib64/:${LD_LIBRARY_PATH}



================================================
FILE: LICENSE
================================================
Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

================================================
FILE: README.md
================================================
<p align="center">
  <img src="./paddlefleetx-logo.png" align="middle"  width="350" />
</p>

------------------------------------------------------------------------------------------

<p align="center">
    <a href="./LICENSE"><img src="https://img.shields.io/badge/license-Apache%202-dfd.svg"></a>
    <a href="https://github.com/PaddlePaddle/PaddleFleetX/releases"><img src="https://img.shields.io/github/v/release/PaddlePaddle/PaddleFleetX?color=ffa"></a>
    <a href=""><img src="https://img.shields.io/badge/python-3.7+-aff.svg"></a>
    <a href="https://github.com/PaddlePaddle/PaddleFleetX/graphs/contributors"><img src="https://img.shields.io/github/contributors/PaddlePaddle/PaddleFleetX?color=9ea"></a>
    <a href="https://github.com/PaddlePaddle/PaddleFleetX/issues"><img src="https://img.shields.io/github/issues/PaddlePaddle/PaddleFleetX?color=9cc"></a>
    <a href="https://github.com/PaddlePaddle/PaddleFleetX/stargazers"><img src="https://img.shields.io/github/stars/PaddlePaddle/PaddleFleetX?color=ccf"></a>
</p>

## 简介

PaddleFleetX是基于飞桨深度学习框架开发的大模型套件，旨在提供高性能、灵活易用的大模型全流程应用能力，在**开发**、**训练**、**精调**、**压推**、**推理**、**部署**六大环节提供端到端全流程优化。

<p align="center">
  <img width="1000" alt="飞桨大模型套件" src="https://github.com/PaddlePaddle/PaddleFleetX/assets/1371212/ab5e87cc-df52-48cb-9968-8951d3b164ba">
</p>

## 特色介绍

### 大模型开发：动静统一开发模式，4D混合并行策略灵活配置

<p align="center">
  <img width="771" alt="大模型开发" src="https://github.com/PaddlePaddle/PaddleFleetX/assets/1371212/95d1c0e8-df92-489b-8472-0a8b438cbfcf">
</p>

基于飞桨动静统一的开发模式，大模型套件全面使用动态图开发，在Generate API中可自动完成算子融合具备静态图的调试性能。全场景统一训练器Trainer可以轻松完成4D混合并行的配置，在预训练与精调环节皆可使用。

### 大模型训练：发挥基础计算潜能、全面提升分布式效率

飞桨针对大模型训练，对数据读取、混合精度计算策略、高性能算子库、并行策略自动寻优、流水线调度的整个全流程实现优化，助力文心大模型训练速度提升3倍。

<p align="center">  
  <img width="1000" alt="飞桨支持大模型训练" src="https://github.com/PaddlePaddle/PaddleFleetX/assets/1371212/3874440d-0b0c-4730-bbcb-f9b87900d75f">
</p>



### 大模型精调：主流精调算法实现性能全面领先

提供了主流的精调算法，包括SFT、Prefix-Tuning、LoRA三种主流的精调算法，有效降低的大模型训练的资源门槛。统一的训练器Trainer实现了预训练加速技术在精调场景的复用，并通过变长数据流优化大幅提升精调性能。

<p align="center">
  <img width="800" alt="大模型精调" src="https://github.com/PaddlePaddle/PaddleFleetX/assets/1371212/0dad24ae-0549-4166-8426-b0a471a82450">
</p>


### 大模型压缩：自研量化压缩算法实现无损量化

飞桨自研的Shift-SmoothQuant算法相比SmoothQuant算法可以实现更平滑的激活分布，有效提升量化后模型的精度度和生成结果的稳定性。通过PaddleSlim的大模型压缩工具，我们在 C-Eval 和 NL2SQL 两个数据集上对主流开源大模型可以实现无损量化。更多技术介绍与使用说明可以参考[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)。

<p align="center">
  <img width="350" alt="模型压缩" src="https://github.com/PaddlePaddle/PaddleFleetX/assets/1371212/8b8334d6-dc1a-4ab8-a2f6-dbbece6f0e1e">
</p>
<p align="center">
  <img width="798" alt="模型压缩" src="https://github.com/PaddlePaddle/PaddleFleetX/assets/1371212/badb3f10-314a-4259-8179-08f940197352">
</p>

### 大模型推理：针对大模型场景特性匹配最优量化推理方案

Paddle Inference针对大模型Prompt阶段与Token Generation阶段的计算特性的不同，在通用场景提供静态量化，在访存受限场景提供混合量化与低比特的推理方案。

<p align="center">
  <img width="1000" alt="飞桨支撑大模型推理" src="https://github.com/PaddlePaddle/PaddleFleetX/assets/1371212/6bf2a373-a550-4359-9285-6fa4337e550d">
</p>

<p align="center">
  <img width="400" alt="推理引擎" src="https://github.com/PaddlePaddle/PaddleFleetX/assets/1371212/8d9ab6f9-fc63-4485-bcf2-f9791b1de273">
</p>


### 大模型部署：实时感知负载动态插入请求，最大化硬件利用率

由于大模型生成场景解码阶段耗时较长，且不同Query下生成长度不一，为了最大化服务吞吐，我们在FastDeploy服务框架结合推理引擎实现了动态插入技术，科实时感知服务负载，动态插入用户请求最大化推理硬件利用率。

<p align="center">
  <img width="350" alt="大模型服务部署" src="https://github.com/PaddlePaddle/PaddleFleetX/assets/1371212/d2e38f78-9088-4b1a-a9bd-1018385b5b86">
</p>


## PaddleFleetX 应用案例

### 大语言模型

基于PaddleFleetX的核心能力，我们在PaddleNLP中提供了丰富的大语言模型全流程开发与应用示例，更多详细使用说明可以参考[PaddleNLP大语言模型](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/llm)。

### 跨模态大模型

除了大语言模型外，PaddleFleetX还提供跨模态大模型的开发与训练，如多模态预训练、文生图扩散模型等，覆盖图片、文本、视频和音频等模态，更多详细使用说明可以参考[PaddleMIX](https://github.com/PaddlePaddle/PaddleMIX)。

### 生物计算大模型

在生物计算领域，基于飞桨4D并行策略与高性能优化，我们在PaddleHelix中提供众多业界领先的生物计算预训练模型，更多详细使用说明可以参考[PaddleHelix](https://github.com/PaddlePaddle/PaddleHelix)。


## Citation

```
@misc{paddlefleetx,
    title={PaddleFleetX: An Easy-to-use and High-Performance One-stop Tool for Deep Learning},
    author={PaddleFleetX Contributors},
    howpublished = {\url{https://github.com/PaddlePaddle/PaddleFleetX}},
    year={2022}
}
```

## License

PaddleFleetX 基于 [Apache 2.0 license](./LICENSE) 许可发布。


================================================
FILE: benchmarks/README.md
================================================


================================================
FILE: benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N1C1/ernie_bs16_fp16_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=ernie
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=16
fp_item=fp16
run_mode=DP1-MP1-PP1
device_num=N1C1

model=ernie
micro_bs=${bs_item}

cd ./benchmarks
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N1C1/ernie_bs16_fp32_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=ernie
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=16
fp_item=fp32
run_mode=DP1-MP1-PP1
device_num=N1C1

model=ernie
micro_bs=${bs_item}

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N1C8/ernie_bs16_fp16_DP2-MP2-PP2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=ernie
dp_degree=2
mp_degree=2
pp_degree=2
bs_item=16
fp_item=fp16
run_mode=DP2-MP2-PP2
device_num=N1C8

model=ernie
micro_bs=2

cd ./benchmarks
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N1C8/ernie_bs16_fp32_DP2-MP2-PP2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=ernie
dp_degree=2
mp_degree=2
pp_degree=2
bs_item=16
fp_item=fp32
run_mode=DP2-MP2-PP2
device_num=N1C8

model=ernie
micro_bs=2

cd ./benchmarks
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N4C32/ernie_bs16_fp16_DP1-MP8-PP4.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=ernie
dp_degree=1
mp_degree=8
pp_degree=4
bs_item=16
fp_item=fp16
run_mode=DP1-MP8-PP4
device_num=N4C32

model=ernie
micro_bs=2

cd ./benchmarks
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N4C32/ernie_bs16_fp16_DP2-MP8-PP2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=ernie
dp_degree=2
mp_degree=8
pp_degree=2
bs_item=16
fp_item=fp16
run_mode=DP2-MP8-PP2
device_num=N4C32

model=ernie
micro_bs=2

cd ./benchmarks
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N4C32/ernie_bs16_fp16_DP4-MP8-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=ernie
dp_degree=4
mp_degree=8
pp_degree=1
bs_item=16
fp_item=fp16
run_mode=DP4-MP8-PP1
device_num=N4C32

model=ernie
micro_bs=4

cd ./benchmarks
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N4C32/ernie_bs16_fp32_DP1-MP8-PP4.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=ernie
dp_degree=1
mp_degree=8
pp_degree=4
bs_item=16
fp_item=fp32
run_mode=DP1-MP8-PP4
device_num=N4C32

model=ernie
micro_bs=2

cd ./benchmarks
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N4C32/ernie_bs16_fp32_DP2-MP8-PP2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=ernie
dp_degree=2
mp_degree=8
pp_degree=2
bs_item=16
fp_item=fp32
run_mode=DP2-MP8-PP2
device_num=N4C32

model=ernie
micro_bs=2

cd ./benchmarks
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N4C32/ernie_bs16_fp32_DP4-MP8-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=ernie
dp_degree=4
mp_degree=8
pp_degree=1
bs_item=16
fp_item=fp32
run_mode=DP4-MP8-PP1
device_num=N4C32

model=ernie
micro_bs=4

cd ./benchmarks
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/prepare.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

python -m pip install -r ../requirements.txt
# get data
cd ../
rm -rf dataset/ernie
mkdir -p dataset/ernie
wget -O dataset/ernie/cluecorpussmall_14g_1207_ids_part0 https://paddlefleetx.bj.bcebos.com/model/nlp/ernie/cluecorpussmall_14g_1207_ids_part0
wget -O dataset/ernie/cluecorpussmall_14g_1207_ids_part1 https://paddlefleetx.bj.bcebos.com/model/nlp/ernie/cluecorpussmall_14g_1207_ids_part1
cat dataset/ernie/cluecorpussmall_14g_1207_ids_part* &> dataset/ernie/cluecorpussmall_14g_1207_ids.npy
wget -O dataset/ernie/cluecorpussmall_14g_1207_idx.npz https://paddlefleetx.bj.bcebos.com/model/nlp/ernie/cluecorpussmall_14g_1207_idx.npz


================================================
FILE: benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh
================================================
#!/usr/bin/env bash

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Test training benchmark for a model.
# Usage：bash benchmark/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${use_sharding}
function _set_params(){
    model_item=${1:-"model_item"}   # (必选) 模型 item
    fp_item=${2:-"fp32"}            # (必选) fp32|fp16
    dp_degree=${3:-"1"}             # (必选) dp数据并行度
    mp_degree=${4:-"1"}             # (必选) mp数据并行度
    pp_degree=${5:-"1"}             # (必选) pp数据并行度
    micro_batch_size=${6:-"2"}      # (必选) micro_batch_size
    global_batch_size=${7:-"16"}    # （必选）global_batch_size
    run_mode=${8:-"DP"}             # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP2-MP8-PP2|DP1-MP8-PP4|DP4-MP8-PP1
    device_num=${9:-"N1C1"}         # (必选) 使用的卡数量，N1C1|N1C8|N4C32 （4机32卡）
    profiling=${PROFILING:-"false"}      # (必选) Profiling  开关，默认关闭，通过全局变量传递
    model_repo="PaddleFleetX"          # (必选) 模型套件的名字
    speed_unit="tokens/s"         # (必选)速度指标单位
    skip_steps=0                  # (必选)解析日志，跳过模型前几个性能不稳定的step
    keyword="ips:"                 # (必选)解析日志，筛选出性能数据所在行的关键字
    convergence_key="loss:"        # (可选)解析日志，筛选出收敛数据所在行的关键字 如：convergence_key="loss:"
    max_iter=${10:-500}                      # （可选）需保证模型执行时间在5分钟内，需要修改代码提前中断的直接提PR 合入套件；或使用max_epoch参数
    use_sharding=${11:-"false"}               # （可选) 是否使用Sharding
    num_workers=0                  # (可选)
    base_batch_size=$global_batch_size
    use_recompute=${12:-"False"}    # (可选)是否打开recompute
    sharding_stage=${13:-"1"}       # (可选)sharding case
    sharding_offload=${14:-"False"} # (可选)
    eval_freq=${15:-"1000000"}         # (可选)
    sharding_degree=${16:-"1"}      # (可选)
    # 以下为通用执行命令，无特殊可不用修改
    model_name=${model_item}_bs${global_batch_size}_${fp_item}_${run_mode}  # (必填) 且格式不要改动,与竞品名称对齐
    device=${CUDA_VISIBLE_DEVICES//,/ }
    arr=(${device})
    num_gpu_devices=${#arr[*]}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # （必填） TRAIN_LOG_DIR  benchmark框架设置该参数为全局变量
    profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)}  # （必填） PROFILING_LOG_DIR benchmark框架设置该参数为全局变量
    speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
    #
    train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log
    profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling
    speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed

    OUTPUT_PATH=${run_log_path}/output
}

function _train(){
    batch_size=${local_batch_size}  # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs

    if [ -d $OUTPUT_PATH ]; then
        rm -rf $OUTPUT_PATH
    fi
    mkdir $OUTPUT_PATH

    echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}"

    if [ ${profiling} = "true" ];then
        add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\""
        log_file=${profiling_log_file}
    else
        add_options=""
        log_file=${train_log_file}
    fi

    local_batch_size=`expr ${global_batch_size} / ${dp_degree} / ${sharding_degree}`
    num_attention_heads=16 #"gpt2-medium-en"
    if [ ${mp_degree} -lt 8 -a ${pp_degree} -lt 8 ]; then num_attention_heads=4; fi #"gpt2-small-en"
    num_layers=24 #"gpt2-medium-en"
    if [ ${mp_degree} -lt 8 -a ${pp_degree} -lt 8 ]; then num_layers=4; fi #"gpt2-small-en"
    use_pure_fp16=False # fp32
    if [ "fp16" = ${fp_item} ]; then use_pure_fp16=True; fi
    train_cmd="-o Global.seed=1234 \
               -o Global.local_batch_size=${local_batch_size} \
               -o Global.micro_batch_size=${micro_batch_size} \
               -o Engine.max_steps=${max_iter} \
               -o Engine.eval_freq=${eval_freq} \
               -o Engine.mix_precision.enable=${use_pure_fp16} \
               -o Engine.save_load.save_steps=100000 \
               -o Model.hidden_size=1024 \
               -o Model.num_hidden_layers=${num_layers} \
               -o Model.num_attention_heads=${num_attention_heads} \
               -o Model.use_recompute=${use_recompute} \
               -o Data.Train.dataset.input_dir=./dataset/ernie \
               -o Data.Eval.dataset.input_dir=./dataset/ernie \
               -o Distributed.dp_degree=${dp_degree} \
               -o Distributed.mp_degree=${mp_degree} \
               -o Distributed.pp_degree=${pp_degree} \
               -o Distributed.sharding.sharding_degree=${sharding_degree} \
               -o Distributed.sharding.sharding_stage=${sharding_stage} \
               -o Distributed.sharding.sharding_offload=${sharding_offload} \
               -o Optimizer.lr.max_lr=1e-4 \
               -o Optimizer.lr.min_lr=1e-5 "

    if [ ${PADDLE_TRAINER_ID} ]
    then
        PADDLE_RANK_OPTION=" --rank ${PADDLE_TRAINER_ID}"
    else
        PADDLE_RANK_OPTION=""
    fi
    # 以下为通用执行命令，无特殊可不用修改
    # hybrid_parallelism case
    case ${run_mode} in
    DP1-MP1-PP1) echo "run run_mode: ${run_mode}"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0 ${PADDLE_RANK_OPTION} \
            tools/train.py -c ppfleetx/configs/nlp/ernie/pretrain_ernie_base_3D.yaml \
            ${train_cmd}"
        workerlog_id=0
        ;;
    DP2-MP1-PP1) echo "run run_mode: ${run_mode}"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0,1 ${PADDLE_RANK_OPTION}\
            tools/train.py -c ppfleetx/configs/nlp/ernie/pretrain_ernie_base_3D.yaml \
            ${train_cmd}"
        workerlog_id=0
        ;;
    DP2-MP2-PP2|DP2-MP8-PP2|DP4-MP8-PP1|DP1-MP8-PP4) echo "run run_mode: ${run_mode}"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION}\
            tools/train.py -c ppfleetx/configs/nlp/ernie/pretrain_ernie_base_3D.yaml \
            ${train_cmd}"
        workerlog_id=0
        ;;
    *) echo "choose run_mode "; exit 1;
    esac
    cd ../
    echo "train_cmd: ${train_cmd}  log_file: ${log_file}"
    if [[ ${model_item} =~ "CE" ]];then # CE精度-不限制执行时间
        timeout 240m ${train_cmd} > ${log_file} 2>&1
    else
        timeout 15m ${train_cmd} > ${log_file} 2>&1
    fi
    if [ $? -ne 0 ];then
        echo -e "${model_name}, FAIL"
    else
        echo -e "${model_name}, SUCCESS"
    fi
    #kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
    if [ ${device_num} != "N1C1" -a -d mylog ]; then
        rm ${log_file}
        cp mylog/workerlog.${workerlog_id} ${log_file}
    fi
}

export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH

source ${BENCHMARK_ROOT}/scripts/run_model.sh   # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开
_set_params $@
#_train       # 如果只产出训练log,不解析,可取消注释
_run     # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/data_parallel/N1C8/gpt_1024_bs64_fp16_DP8-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_1024
dp_degree=8
mp_degree=1
pp_degree=1
bs_item=64
fp_item=fp16
run_mode=DP8-MP1-PP1
device_num=N1C8
yaml_path=./ppfleetx/configs/nlp/gpt/pretrain_gpt_345M_single_card.yaml

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/data_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/data_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${yaml_path} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/data_parallel/N1C8/gpt_1024_flash_bs64_fp16_DP8-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_1024_flash
dp_degree=8
mp_degree=1
pp_degree=1
bs_item=64
fp_item=fp16
run_mode=DP8-MP1-PP1
device_num=N1C8
yaml_path=./ppfleetx/configs/nlp/gpt/pretrain_gpt_345M_single_card.yaml

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/data_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/data_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${yaml_path} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/data_parallel/N1C8/gpt_2048_bs64_fp16_DP8-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_2048
dp_degree=8
mp_degree=1
pp_degree=1
bs_item=64
fp_item=fp16
run_mode=DP8-MP1-PP1
device_num=N1C8
yaml_path=./ppfleetx/configs/nlp/gpt/pretrain_gpt_1.3B_dp8.yaml

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/data_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/data_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${yaml_path} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/data_parallel/benchmark_common/prepare.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

python -m pip install -r ../requirements.txt
# get data
cd ../
rm -rf data
mkdir data
wget -O data/gpt_en_dataset_300m_ids.npy https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_ids.npy
wget -O data/gpt_en_dataset_300m_idx.npz https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_idx.npz


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/data_parallel/benchmark_common/run_benchmark.sh
================================================
#!/usr/bin/env bash

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Test training benchmark for a model.
# Usage：bash benchmark/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${use_sharding}
function _set_params(){
    model_item=${1:-"model_item"}   # (必选) 模型 item
    fp_item=${2:-"fp32"}            # (必选) fp32|fp16
    dp_degree=${3:-"1"}             # (必选) dp数据并行度
    mp_degree=${4:-"1"}             # (必选) mp数据并行度
    pp_degree=${5:-"1"}             # (必选) pp数据并行度
    micro_batch_size=${6:-"2"}      # (必选) micro_batch_size
    global_batch_size=${7:-"16"}    # （必选）global_batch_size
    run_mode=${8:-"DP"}             # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP2-MP8-PP2|DP1-MP8-PP4|DP4-MP8-PP1
    device_num=${9:-"N1C1"}         # (必选) 使用的卡数量，N1C1|N1C8|N4C32 （4机32卡）
    profiling=${PROFILING:-"false"}      # (必选) Profiling  开关，默认关闭，通过全局变量传递
    model_repo="PaddleFleetX"          # (必选) 模型套件的名字
    speed_unit="tokens/s"         # (必选)速度指标单位
    skip_steps=0                  # (必选)解析日志，跳过模型前几个性能不稳定的step
    keyword="ips:"                 # (必选)解析日志，筛选出性能数据所在行的关键字
    convergence_key="loss:"        # (可选)解析日志，筛选出收敛数据所在行的关键字 如：convergence_key="loss:"
    yaml_path=${10:-"./pretrain/configs/pretrain_gpt_345M_single_card.yaml"}
    max_iter=${11:-500}                      # （可选）需保证模型执行时间在5分钟内，需要修改代码提前中断的直接提PR 合入套件；或使用max_epoch参数
    num_workers=0                  # (可选)
    base_batch_size=$global_batch_size
    eval_freq=${12:-"1000"}         # (可选)模型评估间隔
    use_recompute=${13:-"False"}    # (可选)是否打开recompute
    # 以下为通用执行命令，无特殊可不用修改
    model_name=${model_item}_bs${global_batch_size}_${fp_item}_${run_mode}  # (必填) 且格式不要改动,与竞品名称对齐
    device=${CUDA_VISIBLE_DEVICES//,/ }
    arr=(${device})
    num_gpu_devices=${#arr[*]}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # （必填） TRAIN_LOG_DIR  benchmark框架设置该参数为全局变量
    profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)}  # （必填） PROFILING_LOG_DIR benchmark框架设置该参数为全局变量
    speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
    #
    train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log
    profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling
    speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed

    OUTPUT_PATH=${run_log_path}/output
}

function _train(){
    batch_size=${local_batch_size}  # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs

    if [ -d $OUTPUT_PATH ]; then
        rm -rf $OUTPUT_PATH
    fi
    mkdir $OUTPUT_PATH

    echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}"

    if [ ${profiling} = "true" ];then
        add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\""
        log_file=${profiling_log_file}
    else
        add_options=""
        log_file=${train_log_file}
    fi

    if [ ${model_item} = "gpt_1024_flash" ];then
        args="-o Model.use_flash_attn=True"
    else
        args=""
    fi

    train_cmd="-c ${yaml_path} ${args} \
               -o Engine.max_steps=${max_iter} \
               -o Engine.eval_freq=${eval_freq} \
               -o Engine.save_load.save_steps=100000 \
               -o Distributed.dp_degree=${dp_degree} \
               "

    if [ ${PADDLE_TRAINER_ID} ]
    then
        PADDLE_RANK_OPTION=" --rank ${PADDLE_TRAINER_ID}"
    else
        PADDLE_RANK_OPTION=""
    fi
    # 以下为通用执行命令，无特殊可不用修改
    case ${run_mode} in
    DP8-MP1-PP1) echo "run run_mode: ${run_mode}"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION}\
            tools/train.py \
            ${train_cmd}"
        workerlog_id=0
        ;;
    *) echo "choose run_mode "; exit 1;
    esac
    cd ../
    echo "train_cmd: ${train_cmd}  log_file: ${log_file}"
    if [[ ${model_item} =~ "CE" ]];then # CE精度-不限制执行时间
        ${train_cmd} > ${log_file} 2>&1
    else
        timeout 15m ${train_cmd} > ${log_file} 2>&1
    fi
    if [ $? -ne 0 ];then
        echo -e "${model_name}, FAIL"
    else
        echo -e "${model_name}, SUCCESS"
    fi
    #kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
    if [ ${device_num} != "N1C1" -a -d mylog ]; then
        rm ${log_file}
        cp mylog/workerlog.${workerlog_id} ${log_file}
    fi
}

export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH

source ${BENCHMARK_ROOT}/scripts/run_model.sh   # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开
_set_params $@
#_train       # 如果只产出训练log,不解析,可取消注释
_run     # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_CoLA_bs32_fp16_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=CE_gpt_finetune_CoLA
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=32
fp_item=fp16
run_mode=DP1-MP1-PP1
device_num=N1C1
convergence_key=mcc:
dataset=CoLA

model=gpt
micro_bs=${bs_item}

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${convergence_key} ${dataset} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_MRPC_acc_bs32_fp16_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=CE_gpt_finetune_MRPC_acc
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=32
fp_item=fp16
run_mode=DP1-MP1-PP1
device_num=N1C1
convergence_key=acc:
dataset=MRPC

model=gpt
micro_bs=${bs_item}

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${convergence_key} ${dataset} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_MRPC_f1_bs32_fp16_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=CE_gpt_finetune_MRPC_f1
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=32
fp_item=fp16
run_mode=DP1-MP1-PP1
device_num=N1C1
convergence_key=f1:
dataset=MRPC

model=gpt
micro_bs=${bs_item}

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${convergence_key} ${dataset} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_QNLI_bs32_fp16_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=CE_gpt_finetune_QNLI
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=32
fp_item=fp16
run_mode=DP1-MP1-PP1
device_num=N1C1
convergence_key=acc:
dataset=QNLI

model=gpt
micro_bs=${bs_item}

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${convergence_key} ${dataset} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_RTE_bs32_fp16_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=CE_gpt_finetune_RTE
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=32
fp_item=fp16
run_mode=DP1-MP1-PP1
device_num=N1C1
convergence_key=acc:
dataset=RTE

model=gpt
micro_bs=${bs_item}

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${convergence_key} ${dataset} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_SST2_bs32_fp16_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=CE_gpt_finetune_SST2
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=32
fp_item=fp16
run_mode=DP1-MP1-PP1
device_num=N1C1
convergence_key=acc:
dataset=SST2

model=gpt
micro_bs=${bs_item}

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${convergence_key} ${dataset} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_STSB_pearson_bs32_fp16_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=CE_gpt_finetune_STSB_pearson
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=32
fp_item=fp16
run_mode=DP1-MP1-PP1
device_num=N1C1
convergence_key=pearson:
dataset=STSB

model=gpt
micro_bs=${bs_item}

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${convergence_key} ${dataset} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_STSB_spearman_bs32_fp16_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=CE_gpt_finetune_STSB_spearman
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=32
fp_item=fp16
run_mode=DP1-MP1-PP1
device_num=N1C1
convergence_key=spearman:
dataset=STSB

model=gpt
micro_bs=${bs_item}

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${convergence_key} ${dataset} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_WNLI_bs32_fp16_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=CE_gpt_finetune_WNLI
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=32
fp_item=fp16
run_mode=DP1-MP1-PP1
device_num=N1C1
convergence_key=acc:
dataset=WNLI

model=gpt
micro_bs=${bs_item}

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/prepare.sh
# run
sed -i "s/num_train_epochs=5/num_train_epochs=20/g" ../projects/gpt/finetune_gpt_345M_single_card.sh
bash ./test_tipc/gpt/dygraph/finetune/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${convergence_key} ${dataset} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/finetune/benchmark_common/prepare.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

python -m pip install -r ../requirements.txt
# get ckpt
cd ../
rm -rf ckpt
mkdir -p ckpt
wget -O ckpt/GPT_345M.tar.gz https://paddlefleetx.bj.bcebos.com/model/nlp/gpt/GPT_345M.tar.gz
tar -xzf ckpt/GPT_345M.tar.gz -C ckpt/


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/finetune/benchmark_common/run_benchmark.sh
================================================
#!/usr/bin/env bash

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Test training benchmark for a model.
# Usage：bash benchmark/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${use_sharding}
function _set_params(){
    model_item=${1:-"model_item"}   # (必选) 模型 item
    fp_item=${2:-"fp32"}            # (必选) fp32|fp16
    dp_degree=${3:-"1"}             # (必选) dp数据并行度
    mp_degree=${4:-"1"}             # (必选) mp数据并行度
    pp_degree=${5:-"1"}             # (必选) pp数据并行度
    micro_batch_size=${6:-"2"}      # (必选) micro_batch_size
    global_batch_size=${7:-"16"}    # （必选）global_batch_size
    run_mode=${8:-"DP"}             # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP2-MP8-PP2|DP1-MP8-PP4|DP4-MP8-PP1
    device_num=${9:-"N1C1"}         # (必选) 使用的卡数量，N1C1|N1C8|N4C32 （4机32卡）
    profiling=${PROFILING:-"false"}      # (必选) Profiling  开关，默认关闭，通过全局变量传递
    model_repo="PaddleFleetX"          # (必选) 模型套件的名字
    speed_unit="steps/s"         # (必选)速度指标单位
    skip_steps=0                  # (必选)解析日志，跳过模型前几个性能不稳定的step
    keyword="ips:"                 # (必选)解析日志，筛选出性能数据所在行的关键字
    convergence_key=${10:-"loss:"}        # (可选)解析日志，筛选出收敛数据所在行的关键字 如：convergence_key="loss:"
    dataset=${11:-"CoLA"}                 # 数据集
    max_iter=${12:-500}                      # （可选）需保证模型执行时间在5分钟内，需要修改代码提前中断的直接提PR 合入套件；或使用max_epoch参数
    base_batch_size=$global_batch_size
    sharding_degree=${13-"1"}      # (可选)
    sharding_stage=${14:-"1"}       # (可选)sharding case
    # 以下为通用执行命令，无特殊可不用修改
    model_name=${model_item}_bs${global_batch_size}_${fp_item}_${run_mode}  # (必填) 且格式不要改动,与竞品名称对齐
    device=${CUDA_VISIBLE_DEVICES//,/ }
    arr=(${device})
    num_gpu_devices=${#arr[*]}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # （必填） TRAIN_LOG_DIR  benchmark框架设置该参数为全局变量
    profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)}  # （必填） PROFILING_LOG_DIR benchmark框架设置该参数为全局变量
    speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
    #
    train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log
    profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling
    speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed

    OUTPUT_PATH=${run_log_path}/output
}

function _train(){
    batch_size=${local_batch_size}  # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs

    if [ -d $OUTPUT_PATH ]; then
        rm -rf $OUTPUT_PATH
    fi
    mkdir $OUTPUT_PATH

    # if [ ${model_item} = "gpt3_moe" ];then
    #     static_scripts="../examples/language_model/gpt-moe/dygraph/"
    # else
    #     echo "not supported model item: ${model_item}"; exit 1;
    # fi

    echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}"

    if [ ${profiling} = "true" ];then
        add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\""
        log_file=${profiling_log_file}
    else
        add_options=""
        log_file=${train_log_file}
    fi

    # data_path="./data/"


    local_batch_size=`expr ${global_batch_size} / ${dp_degree} / ${sharding_degree}`

    train_cmd="${dataset}"


    # 以下为通用执行命令，无特殊可不用修改

    # hybrid_parallelism case
    case ${run_mode} in
    DP1-MP1-PP1) echo "run run_mode: DP1-MP1-PP1"
        train_cmd="bash projects/gpt/finetune_gpt_345M_single_card.sh \
            ${train_cmd}"
        ;;
    *) echo "choose run_mode "; exit 1;
    esac
    cd ../
    echo "train_cmd: ${train_cmd}  log_file: ${log_file}"

    workerlog_id=0
    timeout 40m ${train_cmd} > ${log_file} 2>&1
    if [ $? -ne 0 ];then
        echo -e "${model_name}, FAIL"
    else
        echo -e "${model_name}, SUCCESS"
    fi
    #kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
    if [ ${device_num} != "N1C1" -a -d mylog ]; then
        rm ${log_file}
        cp mylog/workerlog.${workerlog_id} ${log_file}
    fi
}

export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH

source ${BENCHMARK_ROOT}/scripts/run_model.sh   # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开
_set_params $@
#_train       # 如果只产出训练log,不解析,可取消注释
_run     # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C1/gpt_bs16_fp16_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=16
fp_item=fp16
run_mode=DP1-MP1-PP1
device_num=N1C1

model=gpt
micro_bs=${bs_item}

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C1/gpt_bs16_fp32_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=16
fp_item=fp32
run_mode=DP1-MP1-PP1
device_num=N1C1

model=gpt
micro_bs=${bs_item}

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C4/gpt_bs16_fp16_DP1-MP1-PP4.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=1
mp_degree=1
pp_degree=4
bs_item=16
fp_item=fp16
run_mode=DP1-MP1-PP4
device_num=N1C4

model=gpt
micro_bs=2

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C4/gpt_bs16_fp16_DP1-MP4-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=1
mp_degree=4
pp_degree=1
bs_item=16
fp_item=fp16
run_mode=DP1-MP4-PP1
device_num=N1C4

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs16_fp16_DP1-MP1-PP8.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=1
mp_degree=1
pp_degree=8
bs_item=16
fp_item=fp16
run_mode=DP1-MP1-PP8
device_num=N1C8

model=gpt
micro_bs=2

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs16_fp16_DP1-MP2-PP4.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=1
mp_degree=2
pp_degree=4
bs_item=16
fp_item=fp16
run_mode=DP1-MP2-PP4
device_num=N1C8

model=gpt
micro_bs=2

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs16_fp16_DP1-MP4-PP2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=1
mp_degree=4
pp_degree=2
bs_item=16
fp_item=fp16
run_mode=DP1-MP4-PP2
device_num=N1C8

model=gpt
micro_bs=2

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs16_fp16_DP1-MP8-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=1
mp_degree=8
pp_degree=1
bs_item=16
fp_item=fp16
run_mode=DP1-MP8-PP1
device_num=N1C8

model=gpt
micro_bs=16

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs16_fp16_DP2-MP2-PP2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=2
mp_degree=2
pp_degree=2
bs_item=16
fp_item=fp16
run_mode=DP2-MP2-PP2
device_num=N1C8

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs16_fp32_DP2-MP2-PP2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=2
mp_degree=2
pp_degree=2
bs_item=16
fp_item=fp32
run_mode=DP2-MP2-PP2
device_num=N1C8

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs64_fp16_DP8-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=8
mp_degree=1
pp_degree=1
bs_item=64
fp_item=fp16
run_mode=DP8-MP1-PP1
device_num=N1C8
max_iter=500
use_recompute=True

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${max_iter} ${use_recompute} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs64_fp32_DP8-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=8
mp_degree=1
pp_degree=1
bs_item=64
fp_item=fp32
run_mode=DP8-MP1-PP1
device_num=N1C8
max_iter=500
use_recompute=True

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${max_iter} ${use_recompute} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_recompute_bs16_fp16_DP2-MP2-PP2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_recompute
dp_degree=2
mp_degree=2
pp_degree=2
bs_item=16
fp_item=fp16
run_mode=DP2-MP2-PP2
device_num=N1C8
max_iter=500
use_recompute=True

model=gpt
micro_bs=2

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${max_iter} ${use_recompute} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_recompute_bs16_fp32_DP2-MP2-PP2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_recompute
dp_degree=2
mp_degree=2
pp_degree=2
bs_item=16
fp_item=fp32
run_mode=DP2-MP2-PP2
device_num=N1C8
max_iter=500
use_recompute=True

model=gpt
micro_bs=2

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${max_iter} ${use_recompute} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N4C32/gpt_bs16_fp16_DP1-MP8-PP4.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=1
mp_degree=8
pp_degree=4
bs_item=16
fp_item=fp16
run_mode=DP1-MP8-PP4
device_num=N4C32

model=gpt
micro_bs=4

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N4C32/gpt_bs16_fp16_DP2-MP8-PP2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=2
mp_degree=8
pp_degree=2
bs_item=16
fp_item=fp16
run_mode=DP2-MP8-PP2
device_num=N4C32

model=gpt
micro_bs=4

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N4C32/gpt_bs16_fp16_DP4-MP8-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=4
mp_degree=8
pp_degree=1
bs_item=16
fp_item=fp16
run_mode=DP4-MP8-PP1
device_num=N4C32

model=gpt
micro_bs=4

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N4C32/gpt_bs16_fp32_DP1-MP8-PP4.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=1
mp_degree=8
pp_degree=4
bs_item=16
fp_item=fp32
run_mode=DP1-MP8-PP4
device_num=N4C32

model=gpt
micro_bs=4

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N4C32/gpt_bs16_fp32_DP2-MP8-PP2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=2
mp_degree=8
pp_degree=2
bs_item=16
fp_item=fp32
run_mode=DP2-MP8-PP2
device_num=N4C32

model=gpt
micro_bs=4

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N4C32/gpt_bs16_fp32_DP4-MP8-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt
dp_degree=4
mp_degree=8
pp_degree=1
bs_item=16
fp_item=fp32
run_mode=DP4-MP8-PP1
device_num=N4C32

model=gpt
micro_bs=4

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

python -m pip install -r ../requirements.txt
# get data
cd ../
rm -rf data
mkdir data
wget -O data/gpt_en_dataset_300m_ids.npy https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_ids.npy
wget -O data/gpt_en_dataset_300m_idx.npz https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_idx.npz


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh
================================================
#!/usr/bin/env bash

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Test training benchmark for a model.
# Usage：bash benchmark/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${use_sharding}
function _set_params(){
    model_item=${1:-"model_item"}   # (必选) 模型 item
    fp_item=${2:-"fp32"}            # (必选) fp32|fp16
    dp_degree=${3:-"1"}             # (必选) dp数据并行度
    mp_degree=${4:-"1"}             # (必选) mp数据并行度
    pp_degree=${5:-"1"}             # (必选) pp数据并行度
    micro_batch_size=${6:-"2"}      # (必选) micro_batch_size
    global_batch_size=${7:-"16"}    # （必选）global_batch_size
    run_mode=${8:-"DP"}             # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP2-MP8-PP2|DP1-MP8-PP4|DP4-MP8-PP1
    device_num=${9:-"N1C1"}         # (必选) 使用的卡数量，N1C1|N1C8|N4C32 （4机32卡）
    profiling=${PROFILING:-"false"}      # (必选) Profiling  开关，默认关闭，通过全局变量传递
    model_repo="PaddleFleetX"          # (必选) 模型套件的名字
    speed_unit="tokens/s"         # (必选)速度指标单位
    skip_steps=0                  # (必选)解析日志，跳过模型前几个性能不稳定的step
    keyword="ips:"                 # (必选)解析日志，筛选出性能数据所在行的关键字
    convergence_key="loss:"        # (可选)解析日志，筛选出收敛数据所在行的关键字 如：convergence_key="loss:"
    max_iter=${10:-500}                      # （可选）需保证模型执行时间在5分钟内，需要修改代码提前中断的直接提PR 合入套件；或使用max_epoch参数
    num_workers=0                  # (可选)
    base_batch_size=$global_batch_size
    use_recompute=${11:-"False"}    # (可选)是否打开recompute
    eval_freq=${12:-"1000"}         # (可选)模型评估间隔
    sharding_degree=${13:-"1"}      # (可选)分组切分并行维度
    sharding_stage=${14:-"1"}       # (可选)切分策略；1表示仅切分优化器状态，2表示再切分梯度，3表示再切分前向参数
    sharding_offload=${15:-"False"} # (可选)CPU offload策略
    # 以下为通用执行命令，无特殊可不用修改
    model_name=${model_item}_bs${global_batch_size}_${fp_item}_${run_mode}  # (必填) 且格式不要改动,与竞品名称对齐
    device=${CUDA_VISIBLE_DEVICES//,/ }
    arr=(${device})
    num_gpu_devices=${#arr[*]}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # （必填） TRAIN_LOG_DIR  benchmark框架设置该参数为全局变量
    profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)}  # （必填） PROFILING_LOG_DIR benchmark框架设置该参数为全局变量
    speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
    #
    train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log
    profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling
    speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed

    OUTPUT_PATH=${run_log_path}/output
}

function _train(){
    batch_size=${local_batch_size}  # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs

    if [ -d $OUTPUT_PATH ]; then
        rm -rf $OUTPUT_PATH
    fi
    mkdir $OUTPUT_PATH

    echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}"

    if [ ${profiling} = "true" ];then
        add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\""
        log_file=${profiling_log_file}
    else
        add_options=""
        log_file=${train_log_file}
    fi

    local_batch_size=`expr ${global_batch_size} / ${dp_degree} / ${sharding_degree}`
    num_attention_heads=16 #"gpt2-medium-en"
    if [ ${mp_degree} -lt 8 -a ${pp_degree} -lt 8 ]; then num_attention_heads=4; fi #"gpt2-small-en"
    num_layers=24 #"gpt2-medium-en"
    if [ ${mp_degree} -lt 8 -a ${pp_degree} -lt 8 ]; then num_layers=4; fi #"gpt2-small-en"
    use_pure_fp16=False
    if [ "fp16" = ${fp_item} ]; then use_pure_fp16=True; fi
    train_cmd="-o Global.seed=1234 \
               -o Global.local_batch_size=${local_batch_size} \
               -o Global.micro_batch_size=${micro_batch_size} \
               -o Engine.max_steps=${max_iter} \
               -o Engine.eval_freq=${eval_freq} \
               -o Engine.mix_precision.enable=${use_pure_fp16} \
               -o Engine.save_load.save_steps=100000 \
               -o Model.hidden_size=1024 \
               -o Model.num_layers=${num_layers} \
               -o Model.num_attention_heads=${num_attention_heads} \
               -o Model.type_vocab_size=1 \
               -o Model.use_recompute=${use_recompute} \
               -o Distributed.dp_degree=${dp_degree} \
               -o Distributed.mp_degree=${mp_degree} \
               -o Distributed.pp_degree=${pp_degree} \
               -o Distributed.sharding.sharding_degree=${sharding_degree} \
               -o Distributed.sharding.sharding_stage=${sharding_stage} \
               -o Distributed.sharding.sharding_offload=${sharding_offload} \
               -o Optimizer.lr.max_lr=1e-4 \
               -o Optimizer.lr.min_lr=1e-5 "

    if [ ${PADDLE_TRAINER_ID} ]
    then
        PADDLE_RANK_OPTION=" --rank ${PADDLE_TRAINER_ID}"
    else
        PADDLE_RANK_OPTION=""
    fi
    # 以下为通用执行命令，无特殊可不用修改
    case ${run_mode} in
    DP1-MP1-PP1) echo "run run_mode: DP1-MP1-PP1"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0 ${PADDLE_RANK_OPTION}\
              tools/train.py -c ppfleetx/configs/nlp/gpt/pretrain_gpt_1.3B_dp8.yaml \
              ${train_cmd}" 
        workerlog_id=0
        ;;
    DP1-MP1-PP4|DP1-MP4-PP1) echo "run run_mode: ${run_mode}"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0,1,2,3 ${PADDLE_RANK_OPTION}\
            tools/train.py -c ppfleetx/configs/nlp/gpt/pretrain_gpt_1.3B_dp8.yaml \
            ${train_cmd}"
        workerlog_id=0
        ;;
    DP8-MP1-PP1|DP1-MP8-PP1|DP1-MP1-PP8|DP1-MP2-PP4|DP1-MP4-PP2|DP2-MP2-PP2| \
    DP2-MP8-PP2|DP4-MP8-PP1|DP1-MP8-PP4) echo "run run_mode: ${run_mode}"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION}\
            tools/train.py -c ppfleetx/configs/nlp/gpt/pretrain_gpt_1.3B_dp8.yaml \
            ${train_cmd}"
        workerlog_id=0
        ;;
    *) echo "choose run_mode "; exit 1;
    esac
    cd ../
    echo "train_cmd: ${train_cmd}  log_file: ${log_file}"
    if [[ ${model_item} =~ "CE" ]];then # CE精度-不限制执行时间
        ${train_cmd} > ${log_file} 2>&1
    else
        timeout 15m ${train_cmd} > ${log_file} 2>&1
    fi
    if [ $? -ne 0 ];then
        echo -e "${model_name}, FAIL"
    else
        echo -e "${model_name}, SUCCESS"
    fi
    #kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
    if [ ${device_num} != "N1C1" -a -d mylog ]; then
        rm ${log_file}
        cp mylog/workerlog.${workerlog_id} ${log_file}
    fi
}

export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH

source ${BENCHMARK_ROOT}/scripts/run_model.sh   # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开
_set_params $@
#_train       # 如果只产出训练log,不解析,可取消注释
_run     # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/sequence_parallel/N1C8/gpt_sp_False_bs8_fp16_DP1-MP8-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_sp_False
dp_degree=1
mp_degree=8
pp_degree=1
bs_item=8
fp_item=fp16
run_mode=DP1-MP8-PP1
device_num=N1C8
sequence_parallel=False

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/sequence_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/sequence_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${sequence_parallel} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/sequence_parallel/N1C8/gpt_sp_True_bs8_fp16_DP1-MP8-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_sp_True
dp_degree=1
mp_degree=8
pp_degree=1
bs_item=8
fp_item=fp16
run_mode=DP1-MP8-PP1
device_num=N1C8
sequence_parallel=True

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/sequence_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/sequence_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${sequence_parallel} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/sequence_parallel/N4C32/gpt_sp_False_bs16_fp16_DP2-MP8-PP2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_sp_False
dp_degree=2
mp_degree=8
pp_degree=2
bs_item=16
fp_item=fp16
run_mode=DP2-MP8-PP2
device_num=N4C32
sequence_parallel=False

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/sequence_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/sequence_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${sequence_parallel} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/sequence_parallel/N4C32/gpt_sp_True_bs16_fp16_DP2-MP8-PP2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_sp_True
dp_degree=2
mp_degree=8
pp_degree=2
bs_item=16
fp_item=fp16
run_mode=DP2-MP8-PP2
device_num=N4C32
sequence_parallel=True

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/sequence_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/sequence_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${sequence_parallel} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/sequence_parallel/benchmark_common/prepare.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

python -m pip install -r ../requirements.txt
# get data
cd ../
rm -rf data
mkdir data
wget -O data/gpt_en_dataset_300m_ids.npy https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_ids.npy
wget -O data/gpt_en_dataset_300m_idx.npz https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_idx.npz


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/sequence_parallel/benchmark_common/run_benchmark.sh
================================================
#!/usr/bin/env bash

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Test training benchmark for a model.
# Usage：bash benchmark/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${use_sharding}
function _set_params(){
    model_item=${1:-"model_item"}   # (必选) 模型 item
    fp_item=${2:-"fp32"}            # (必选) fp32|fp16
    dp_degree=${3:-"1"}             # (必选) dp数据并行度
    mp_degree=${4:-"1"}             # (必选) mp数据并行度
    pp_degree=${5:-"1"}             # (必选) pp数据并行度
    micro_batch_size=${6:-"2"}      # (必选) micro_batch_size
    global_batch_size=${7:-"16"}    # （必选）global_batch_size
    run_mode=${8:-"DP"}             # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP2-MP8-PP2|DP1-MP8-PP4|DP4-MP8-PP1
    device_num=${9:-"N1C1"}         # (必选) 使用的卡数量，N1C1|N1C8|N4C32 （4机32卡）
    profiling=${PROFILING:-"false"}      # (必选) Profiling  开关，默认关闭，通过全局变量传递
    model_repo="PaddleFleetX"          # (必选) 模型套件的名字
    speed_unit="tokens/s"         # (必选)速度指标单位
    skip_steps=0                  # (必选)解析日志，跳过模型前几个性能不稳定的step
    keyword="ips:"                 # (必选)解析日志，筛选出性能数据所在行的关键字
    convergence_key="loss:"        # (可选)解析日志，筛选出收敛数据所在行的关键字 如：convergence_key="loss:"
    sequence_parallel=${10:-"False"}    # (可选)是否打开sequence_parallel
    max_iter=${11:-1000}                      # （可选）需保证模型执行时间在5分钟内，需要修改代码提前中断的直接提PR 合入套件；或使用max_epoch参数
    eval_freq=${12:-"1000"}         # (可选)模型评估间隔
    num_workers=0                  # (可选)
    base_batch_size=$global_batch_size
    use_recompute=${13:-"True"}    # (可选)是否打开recompute
    sharding_degree=${14:-"1"}      # (可选)分组切分并行维度
    sharding_stage=${15:-"1"}       # (可选)切分策略；1表示仅切分优化器状态，2表示再切分梯度，3表示再切分前向参数
    sharding_offload=${16:-"False"} # (可选)CPU offload策略
    # 以下为通用执行命令，无特殊可不用修改
    model_name=${model_item}_bs${global_batch_size}_${fp_item}_${run_mode}  # (必填) 且格式不要改动,与竞品名称对齐
    device=${CUDA_VISIBLE_DEVICES//,/ }
    arr=(${device})
    num_gpu_devices=${#arr[*]}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # （必填） TRAIN_LOG_DIR  benchmark框架设置该参数为全局变量
    profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)}  # （必填） PROFILING_LOG_DIR benchmark框架设置该参数为全局变量
    speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
    #
    train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log
    profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling
    speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed

    OUTPUT_PATH=${run_log_path}/output
}

function _train(){
    batch_size=${local_batch_size}  # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs

    if [ -d $OUTPUT_PATH ]; then
        rm -rf $OUTPUT_PATH
    fi
    mkdir $OUTPUT_PATH

    echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}"

    if [ ${profiling} = "true" ];then
        add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\""
        log_file=${profiling_log_file}
    else
        add_options=""
        log_file=${train_log_file}
    fi

    local_batch_size=`expr ${global_batch_size} / ${dp_degree} / ${sharding_degree}`
    num_attention_heads=16 #"gpt2-medium-en"
    if [ ${mp_degree} -lt 8 -a ${pp_degree} -lt 8 ]; then num_attention_heads=4; fi #"gpt2-small-en"
    num_layers=24 #"gpt2-medium-en"
    if [ ${mp_degree} -lt 8 -a ${pp_degree} -lt 8 ]; then num_layers=4; fi #"gpt2-small-en"
    use_pure_fp16=False
    if [ "fp16" = ${fp_item} ]; then use_pure_fp16=True; fi
    train_cmd="-o Engine.max_steps=${max_iter} \
               -o Engine.eval_iters=${eval_freq} \
               -o Distributed.dp_degree=${dp_degree} \
               -o Distributed.mp_degree=${mp_degree} \
               -o Distributed.pp_degree=${pp_degree} \
               -o Distributed.sharding.sharding_degree=${sharding_degree} \
               -o Distributed.sharding.sharding_stage=${sharding_stage} \
               -o Distributed.sharding.sharding_offload=${sharding_offload} \
               -o Model.sequence_parallel=${sequence_parallel}  \
               -o Distributed.sharding.reduce_overlap=False \
               -o Distributed.sharding.broadcast_overlap=False \
               -o Optimizer.tensor_fusion=False "

    if [ ${PADDLE_TRAINER_ID} ]
    then
        PADDLE_RANK_OPTION=" --rank ${PADDLE_TRAINER_ID}"
    else
        PADDLE_RANK_OPTION=""
    fi
    # 以下为通用执行命令，无特殊可不用修改
    case ${run_mode} in
    DP1-MP1-PP1) echo "run run_mode: DP1-MP1-PP1"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0 ${PADDLE_RANK_OPTION}\
            tools/train.py -c ppfleetx/configs/nlp/gpt/pretrain_gpt_1.3B_dp8.yaml \
            ${train_cmd}"
        workerlog_id=0
        ;;
    DP1-MP8-PP1) echo "run run_mode: ${run_mode}"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION}\
            tools/train.py -c ppfleetx/configs/nlp/gpt/pretrain_gpt_1.3B_dp8.yaml \
            ${train_cmd}"
        workerlog_id=0
        ;;
    DP2-MP8-PP2) echo "run run_mode: ${run_mode}"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION}\
            tools/train.py -c ppfleetx/configs/nlp/gpt/pretrain_gpt_6.7B_sharding16.yaml \
            ${train_cmd}"
        workerlog_id=0
        ;;
    *) echo "choose run_mode "; exit 1;
    esac
    cd ../
    echo "train_cmd: ${train_cmd}  log_file: ${log_file}"
    if [[ ${model_item} =~ "CE" ]];then # CE精度-不限制执行时间
        ${train_cmd} > ${log_file} 2>&1
    else
        timeout 60m ${train_cmd} > ${log_file} 2>&1
    fi
    if [ $? -ne 0 ];then
        echo -e "${model_name}, FAIL"
    else
        echo -e "${model_name}, SUCCESS"
    fi
    #kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
    if [ ${device_num} != "N1C1" -a -d mylog ]; then
        rm ${log_file}
        cp mylog/workerlog.${workerlog_id} ${log_file}
    fi
}

export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH

source ${BENCHMARK_ROOT}/scripts/run_model.sh   # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开
_set_params $@
#_train       # 如果只产出训练log,不解析,可取消注释
_run     # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/sharding/N1C2/gpt_stage2_bs16_fp16_DP1-MP1-PP1-Sharding2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_stage2
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=16
fp_item=fp16
run_mode=DP1-MP1-PP1-Sharding2
device_num=N1C2
sharding_degree=2
sharding_stage=2
sharding_offload=True

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/sharding/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/sharding/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${sharding_degree} ${sharding_stage} ${sharding_offload} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/sharding/N1C2/gpt_stage3_bs16_fp16_DP1-MP1-PP1-Sharding2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_stage3
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=16
fp_item=fp16
run_mode=DP1-MP1-PP1-Sharding2
device_num=N1C2
sharding_degree=2
sharding_stage=3
sharding_offload=True

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/sharding/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/sharding/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${sharding_degree} ${sharding_stage} ${sharding_offload} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/sharding/N1C2/gpt_stage3_bs16_fp32_DP1-MP1-PP1-Sharding2.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_stage3
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=16
fp_item=fp32
run_mode=DP1-MP1-PP1-Sharding2
device_num=N1C2
sharding_degree=2
sharding_stage=3
sharding_offload=True

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/sharding/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/sharding/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${sharding_degree} ${sharding_stage} ${sharding_offload} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/sharding/N2C16/gpt_stage2_bs128_fp16_DP1-MP1-PP1-Sharding16.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_stage2
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=128
fp_item=fp16
run_mode=DP1-MP1-PP1-Sharding16
device_num=N2C16
sharding_degree=16
sharding_stage=2
sharding_offload=True
max_iter=30

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/dygraph/sharding/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/dygraph/sharding/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${sharding_degree} ${sharding_stage} ${sharding_offload} ${max_iter} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/sharding/benchmark_common/prepare.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

python -m pip install -r ../requirements.txt
# get data
cd ../
rm -rf data
mkdir data
wget -O data/gpt_en_dataset_300m_ids.npy https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_ids.npy
wget -O data/gpt_en_dataset_300m_idx.npz https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_idx.npz


================================================
FILE: benchmarks/test_tipc/gpt/dygraph/sharding/benchmark_common/run_benchmark.sh
================================================
#!/usr/bin/env bash

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Test training benchmark for a model.
# Usage：bash benchmark/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${use_sharding}
function _set_params(){
    model_item=${1:-"model_item"}   # (必选) 模型 item
    fp_item=${2:-"fp32"}            # (必选) fp32|fp16
    dp_degree=${3:-"1"}             # (必选) dp数据并行度
    mp_degree=${4:-"1"}             # (必选) mp数据并行度
    pp_degree=${5:-"1"}             # (必选) pp数据并行度
    micro_batch_size=${6:-"2"}      # (必选) micro_batch_size
    global_batch_size=${7:-"16"}    # （必选）global_batch_size
    run_mode=${8:-"DP"}             # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP2-MP8-PP2|DP1-MP8-PP4|DP4-MP8-PP1
    device_num=${9:-"N1C1"}         # (必选) 使用的卡数量，N1C1|N1C8|N4C32 （4机32卡）
    profiling=${PROFILING:-"false"}      # (必选) Profiling  开关，默认关闭，通过全局变量传递
    model_repo="PaddleFleetX"          # (必选) 模型套件的名字
    speed_unit="tokens/s"         # (必选)速度指标单位
    skip_steps=0                  # (必选)解析日志，跳过模型前几个性能不稳定的step
    keyword="ips:"                 # (必选)解析日志，筛选出性能数据所在行的关键字
    convergence_key="loss:"        # (可选)解析日志，筛选出收敛数据所在行的关键字 如：convergence_key="loss:"
    sharding_degree=${10:-"1"}      # (可选)分组切分并行维度
    sharding_stage=${11:-"1"}       # (可选)切分策略；1表示仅切分优化器状态，2表示再切分梯度，3表示再切分前向参数
    sharding_offload=${12:-"False"} # (可选)CPU offload策略
    max_iter=${13:-500}                      # （可选）需保证模型执行时间在5分钟内，需要修改代码提前中断的直接提PR 合入套件；或使用max_epoch参数
    eval_freq=${14:-"1000"}         # (可选)模型评估间隔
    num_workers=0                  # (可选)
    base_batch_size=$global_batch_size
    use_recompute=${15:-"True"}    # (可选)是否打开recompute
    # 以下为通用执行命令，无特殊可不用修改
    model_name=${model_item}_bs${global_batch_size}_${fp_item}_${run_mode}  # (必填) 且格式不要改动,与竞品名称对齐
    device=${CUDA_VISIBLE_DEVICES//,/ }
    arr=(${device})
    num_gpu_devices=${#arr[*]}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # （必填） TRAIN_LOG_DIR  benchmark框架设置该参数为全局变量
    profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)}  # （必填） PROFILING_LOG_DIR benchmark框架设置该参数为全局变量
    speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
    #
    train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log
    profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling
    speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed

    OUTPUT_PATH=${run_log_path}/output
}

function _train(){
    batch_size=${local_batch_size}  # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs

    if [ -d $OUTPUT_PATH ]; then
        rm -rf $OUTPUT_PATH
    fi
    mkdir $OUTPUT_PATH

    echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}"

    if [ ${profiling} = "true" ];then
        add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\""
        log_file=${profiling_log_file}
    else
        add_options=""
        log_file=${train_log_file}
    fi

    local_batch_size=`expr ${global_batch_size} / ${dp_degree} / ${sharding_degree}`
    use_pure_fp16=False
    if [ "fp16" = ${fp_item} ]; then use_pure_fp16=True; fi
    train_cmd="-o Global.local_batch_size=${local_batch_size} \
               -o Global.micro_batch_size=${micro_batch_size} \
               -o Engine.max_steps=${max_iter} \
               -o Engine.eval_freq=${eval_freq} \
               -o Engine.mix_precision.enable=${use_pure_fp16} \
               -o Engine.save_load.save_steps=100000 \
               -o Model.use_recompute=${use_recompute} \
               -o Distributed.dp_degree=${dp_degree} \
               -o Distributed.mp_degree=${mp_degree} \
               -o Distributed.pp_degree=${pp_degree} \
               -o Distributed.sharding.sharding_degree=${sharding_degree} \
               -o Distributed.sharding.sharding_stage=${sharding_stage} \
               -o Distributed.sharding.sharding_offload=${sharding_offload} \
                "

    if [ ${PADDLE_TRAINER_ID} ]
    then
        PADDLE_RANK_OPTION=" --rank ${PADDLE_TRAINER_ID}"
    else
        PADDLE_RANK_OPTION=""
    fi
    # 以下为通用执行命令，无特殊可不用修改
    case ${run_mode} in
    DP1-MP1-PP1-Sharding2) echo "run run_mode: DP1-MP1-PP1-Sharding2"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0,1 ${PADDLE_RANK_OPTION}\
            ./tools/train.py -c ppfleetx/configs/nlp/gpt/pretrain_gpt_1.3B_dp8.yaml \
            -o Global.seed=1234 \
            -o Model.hidden_size=1024 \
            -o Model.num_layers=4 \
            -o Model.num_attention_heads=4 \
            -o Model.type_vocab_size=1 \
            -o Optimizer.lr.max_lr=1e-4 \
            -o Optimizer.lr.min_lr=1e-5 \
            ${train_cmd}"
        workerlog_id=0
        ;;
    DP1-MP1-PP1-Sharding16) echo "run run_mode: ${run_mode}"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION}\
            ./tools/train.py -c ppfleetx/configs/nlp/gpt/pretrain_gpt_6.7B_sharding16.yaml \
            -o Engine.logging_freq=1 \
            ${train_cmd}"
        workerlog_id=0
        ;;
    *) echo "choose run_mode "; exit 1;
    esac
    cd ../
    echo "train_cmd: ${train_cmd}  log_file: ${log_file}"
    if [[ ${model_item} =~ "CE" ]];then # CE精度-不限制执行时间
        ${train_cmd} > ${log_file} 2>&1
    else
        timeout 70m ${train_cmd} > ${log_file} 2>&1
    fi
    if [ $? -ne 0 ];then
        echo -e "${model_name}, FAIL"
    else
        echo -e "${model_name}, SUCCESS"
    fi
    #kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
    if [ ${device_num} != "N1C1" -a -d mylog ]; then
        rm ${log_file}
        cp mylog/workerlog.${workerlog_id} ${log_file}
    fi
}

export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH

source ${BENCHMARK_ROOT}/scripts/run_model.sh   # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开
_set_params $@
#_train       # 如果只产出训练log,不解析,可取消注释
_run     # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开


================================================
FILE: benchmarks/test_tipc/gpt/static/auto_parallel/N1C1/gpt_auto_recompute_bs8_fp32_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=gpt_auto_recompute
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=8
fp_item=fp32
run_mode=DP1-MP1-PP1
device_num=N1C1
max_iter=500
use_recompute=True

model=gpt
micro_bs=8

cd ./benchmarks
bash ./test_tipc/gpt/static/auto_parallel/benchmark_common/prepare.sh
# run
bash ./test_tipc/gpt/static/auto_parallel/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${max_iter} ${use_recompute} 2>&1;


================================================
FILE: benchmarks/test_tipc/gpt/static/auto_parallel/benchmark_common/prepare.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

python -m pip install -r ../requirements.txt
# get data
cd ../
rm -rf data
mkdir data
wget -O data/gpt_en_dataset_300m_ids.npy https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_ids.npy
wget -O data/gpt_en_dataset_300m_idx.npz https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_idx.npz


================================================
FILE: benchmarks/test_tipc/gpt/static/auto_parallel/benchmark_common/run_benchmark.sh
================================================
#!/usr/bin/env bash

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Test training benchmark for a model.
# Usage：bash benchmark/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${use_sharding}
function _set_params(){
    model_item=${1:-"model_item"}   # (必选) 模型 item
    fp_item=${2:-"fp32"}            # (必选) fp32|fp16
    dp_degree=${3:-"1"}             # (必选) dp数据并行度
    mp_degree=${4:-"1"}             # (必选) mp数据并行度
    pp_degree=${5:-"1"}             # (必选) pp数据并行度
    micro_batch_size=${6:-"2"}      # (必选) micro_batch_size
    global_batch_size=${7:-"16"}    # （必选）global_batch_size
    run_mode=${8:-"DP"}             # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP2-MP8-PP2|DP1-MP8-PP4|DP4-MP8-PP1
    device_num=${9:-"N1C1"}         # (必选) 使用的卡数量，N1C1|N1C8|N4C32 （4机32卡）
    profiling=${PROFILING:-"false"}      # (必选) Profiling  开关，默认关闭，通过全局变量传递
    model_repo="PaddleFleetX"          # (必选) 模型套件的名字
    speed_unit="samples/s"         # (必选)速度指标单位
    skip_steps=0                  # (必选)解析日志，跳过模型前几个性能不稳定的step
    keyword="ips:"                 # (必选)解析日志，筛选出性能数据所在行的关键字
    convergence_key="loss:"        # (可选)解析日志，筛选出收敛数据所在行的关键字 如：convergence_key="loss:"
    max_iter=${10:-500}                      # （可选）需保证模型执行时间在5分钟内，需要修改代码提前中断的直接提PR 合入套件；或使用max_epoch参数
    num_workers=0                  # (可选)
    base_batch_size=$global_batch_size
    use_recompute=${11:-"False"}    # (可选)是否打开recompute
    verbose=${12:-"3"}         # (可选)是否打印性能数据
    logging_freq=${13:-"100000"} # (可选)loss打印频率
    sharding_degree=${14:-"1"}      # (可选)
    sharding_stage=${15:-"1"}       # (可选)sharding case
    
    # 以下为通用执行命令，无特殊可不用修改
    model_name=${model_item}_bs${global_batch_size}_${fp_item}_${run_mode}  # (必填) 且格式不要改动,与竞品名称对齐
    device=${CUDA_VISIBLE_DEVICES//,/ }
    arr=(${device})
    num_gpu_devices=${#arr[*]}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # （必填） TRAIN_LOG_DIR  benchmark框架设置该参数为全局变量
    profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)}  # （必填） PROFILING_LOG_DIR benchmark框架设置该参数为全局变量
    speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
    #
    train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log
    profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling
    speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed

    OUTPUT_PATH=${run_log_path}/output
}

function _train(){
    batch_size=${local_batch_size}  # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs

    if [ -d $OUTPUT_PATH ]; then
        rm -rf $OUTPUT_PATH
    fi
    mkdir $OUTPUT_PATH

    echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}"

    if [ ${profiling} = "true" ];then
        add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\""
        log_file=${profiling_log_file}
    else
        add_options=""
        log_file=${train_log_file}
    fi

    local_batch_size=`expr ${global_batch_size} / ${dp_degree} / ${sharding_degree}`
    num_attention_heads=16 #"gpt2-medium-en"
    if [ ${mp_degree} -lt 8 -a ${pp_degree} -lt 8 ]; then num_attention_heads=4; fi #"gpt2-small-en"
    num_layers=24 #"gpt2-medium-en"
    if [ ${mp_degree} -lt 8 -a ${pp_degree} -lt 8 ]; then num_layers=4; fi #"gpt2-small-en"
    use_pure_fp16=False # fp32
    if [ "fp16" = ${fp_item} ]; then use_pure_fp16=True; fi
    train_cmd="-o Global.seed=1234 \
               -o Global.local_batch_size=${local_batch_size} \
               -o Global.micro_batch_size=${micro_batch_size} \
               -o Engine.max_steps=${max_iter} \
               -o Engine.eval_freq=100000 \
               -o Engine.mix_precision.enable=${use_pure_fp16} \
               -o Engine.save_load.save_steps=100000 \
               -o Model.hidden_size=1024 \
               -o Model.num_layers=${num_layers} \
               -o Model.num_attention_heads=${num_attention_heads} \
               -o Model.type_vocab_size=1 \
               -o Model.use_recompute=${use_recompute} \
               -o Distributed.dp_degree=${dp_degree} \
               -o Distributed.mp_degree=${mp_degree} \
               -o Distributed.pp_degree=${pp_degree} \
               -o Distributed.sharding.sharding_degree=${sharding_degree} \
               -o Distributed.sharding.sharding_stage=${sharding_stage} \
               -o Optimizer.lr.max_lr=1e-4 \
               -o Optimizer.lr.min_lr=1e-5  \
               -o Engine.verbose=${verbose} \
               -o Engine.logging_freq=${logging_freq} "

    if [ ${PADDLE_TRAINER_ID} ]
    then
        PADDLE_RANK_OPTION=" --rank ${PADDLE_TRAINER_ID}"
    else
        PADDLE_RANK_OPTION=""
    fi
    # 以下为通用执行命令，无特殊可不用修改
    case ${run_mode} in
    DP1-MP1-PP1) echo "run run_mode: DP1-MP1-PP1"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0 ${PADDLE_RANK_OPTION}\
            tools/auto.py -c ppfleetx/configs/nlp/gpt/auto/pretrain_gpt_1.3B_dp8.yaml \
            ${train_cmd}"
        workerlog_id=0
        ;;
    DP2-MP2-PP2) echo "run run_mode: ${run_mode}"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION}\
            tools/auto.py -c ppfleetx/configs/nlp/gpt/auto/pretrain_gpt_1.3B_dp8.yaml \
            ${train_cmd}"
        workerlog_id_1=4
        workerlog_id_2=6
        ;;
    *) echo "choose run_mode "; exit 1;
    esac
    cd ../
    echo "train_cmd: ${train_cmd}  log_file: ${log_file}"
    if [[ ${model_item} =~ "CE" ]];then # CE精度-不限制执行时间
        ${train_cmd} > ${log_file} 2>&1
    else
        timeout 20m ${train_cmd} > ${log_file} 2>&1
    fi
    if [ $? -ne 0 ];then
        echo -e "${model_name}, FAIL"
    else
        echo -e "${model_name}, SUCCESS"
    fi
    #kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
    if [ ${device_num} != "N1C1" -a -d mylog ]; then
        rm ${log_file}
        cp mylog/workerlog.${workerlog_id_1} ${log_file}
        cp mylog/workerlog.${workerlog_id_2} ${log_file}_2
    fi
}

export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH

source ${BENCHMARK_ROOT}/scripts/run_model.sh   # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开
_set_params $@
#_train       # 如果只产出训练log,不解析,可取消注释
_run     # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开


================================================
FILE: benchmarks/test_tipc/imagen/dygraph/N1C1/imagen_397M_text2im_64_bs1_fp32_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=imagen_397M_text2im_64
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=1
fp_item=fp32
run_mode=DP1-MP1-PP1
device_num=N1C1
yaml_path=ppfleetx/configs/multimodal/imagen/imagen_397M_text2im_64x64.yaml

model=imagen
micro_bs=1

cd ./benchmarks
bash ./test_tipc/imagen/dygraph/benchmark_common/prepare.sh
# run
bash ./test_tipc/imagen/dygraph/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${yaml_path} 2>&1;


================================================
FILE: benchmarks/test_tipc/imagen/dygraph/N1C1/imagen_SR256_bs1_fp32_DP1-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=imagen_SR256
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=1
fp_item=fp32
run_mode=DP1-MP1-PP1
device_num=N1C1
yaml_path=ppfleetx/configs/multimodal/imagen/imagen_super_resolution_256.yaml

model=imagen
micro_bs=1

cd ./benchmarks
bash ./test_tipc/imagen/dygraph/benchmark_common/prepare.sh
# run
bash ./test_tipc/imagen/dygraph/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${yaml_path} 2>&1;


================================================
FILE: benchmarks/test_tipc/imagen/dygraph/N1C8/imagen_2B_text2im_64_bs8_fp32_DP1-Sharding8.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=imagen_2B_text2im_64
dp_degree=1
mp_degree=1
pp_degree=1
bs_item=8
fp_item=fp32
run_mode=DP1-Sharding8
device_num=N1C8
yaml_path=ppfleetx/configs/multimodal/imagen/imagen_text2im_64x64_T5-11B.yaml
max_iter=1000
sharding_degree=8
sharding_stage=2

model=imagen
micro_bs=1

cd ./benchmarks
bash ./test_tipc/imagen/dygraph/benchmark_common/prepare.sh
# run
bash ./test_tipc/imagen/dygraph/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${yaml_path} ${max_iter} ${sharding_degree} ${sharding_stage} 2>&1;


================================================
FILE: benchmarks/test_tipc/imagen/dygraph/N1C8/imagen_397M_text2im_64_bs8_fp32_DP8-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=imagen_397M_text2im_64
dp_degree=8
mp_degree=1
pp_degree=1
bs_item=8
fp_item=fp32
run_mode=DP8-MP1-PP1
device_num=N1C8
yaml_path=ppfleetx/configs/multimodal/imagen/imagen_397M_text2im_64x64.yaml

model=imagen
micro_bs=1

cd ./benchmarks
bash ./test_tipc/imagen/dygraph/benchmark_common/prepare.sh
# run
bash ./test_tipc/imagen/dygraph/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${yaml_path} 2>&1;


================================================
FILE: benchmarks/test_tipc/imagen/dygraph/N1C8/imagen_SR256_bs8_fp32_DP8-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=imagen_SR256
dp_degree=8
mp_degree=1
pp_degree=1
bs_item=8
fp_item=fp32
run_mode=DP8-MP1-PP1
device_num=N1C8
yaml_path=ppfleetx/configs/multimodal/imagen/imagen_super_resolution_256.yaml

model=imagen
micro_bs=1

cd ./benchmarks
bash ./test_tipc/imagen/dygraph/benchmark_common/prepare.sh
# run
bash ./test_tipc/imagen/dygraph/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${yaml_path} 2>&1;


================================================
FILE: benchmarks/test_tipc/imagen/dygraph/N1C8/imagen_text2im_64_debertav2_bs8_fp32_DP8-MP1-PP1.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model_item=imagen_text2im_64_debertav2
dp_degree=8
mp_degree=1
pp_degree=1
bs_item=8
fp_item=fp32
run_mode=DP8-MP1-PP1
device_num=N1C8
yaml_path=ppfleetx/configs/multimodal/imagen/imagen_text2im_64x64_DebertaV2.yaml

model=imagen
micro_bs=1

cd ./benchmarks
bash ./test_tipc/imagen/dygraph/benchmark_common/prepare.sh
# run
bash ./test_tipc/imagen/dygraph/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
${yaml_path} 2>&1;


================================================
FILE: benchmarks/test_tipc/imagen/dygraph/benchmark_common/prepare.sh
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

python -m pip install -r ../requirements.txt
# get data
cd ../
wget -O projects/imagen/part-00079 https://paddlefleetx.bj.bcebos.com/data/laion400m/part-00079
# T5-11B
mkdir -p projects/imagen/t5/t5-11b/ && cd projects/imagen/t5/t5-11b/
wget https://paddlefleetx.bj.bcebos.com/tokenizers/t5/t5-11b/config.json
wget https://paddlefleetx.bj.bcebos.com/tokenizers/t5/t5-11b/spiece.model
wget https://paddlefleetx.bj.bcebos.com/tokenizers/t5/t5-11b/tokenizer.json
wget https://fleetx.bj.bcebos.com/T5/t5-11b/t5.pd.tar.gz.0
wget https://fleetx.bj.bcebos.com/T5/t5-11b/t5.pd.tar.gz.1
wget https://fleetx.bj.bcebos.com/T5/t5-11b/t5.pd.tar.gz.2
wget https://fleetx.bj.bcebos.com/T5/t5-11b/t5.pd.tar.gz.3
wget https://fleetx.bj.bcebos.com/T5/t5-11b/t5.pd.tar.gz.4
cat t5.pd.tar.gz.* |tar -xf -
cd -
# DeBERTa V2 1.5B
mkdir -p projects/imagen/cache/deberta-v-xxlarge && cd projects/imagen/cache/deberta-v-xxlarge
wget https://paddlefleetx.bj.bcebos.com/tokenizers/debertav2/config.json
wget https://paddlefleetx.bj.bcebos.com/tokenizers/debertav2/spm.model
wget https://paddlefleetx.bj.bcebos.com/tokenizers/debertav2/tokenizer_config.json
wget https://fleetx.bj.bcebos.com/DebertaV2/debertav2.pd.tar.gz.0
wget https://fleetx.bj.bcebos.com/DebertaV2/debertav2.pd.tar.gz.1
cat debertav2.pd.tar.gz.* | tar -xf -
cd -


================================================
FILE: benchmarks/test_tipc/imagen/dygraph/benchmark_common/run_benchmark.sh
================================================
#!/usr/bin/env bash

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Test training benchmark for a model.
# Usage：bash benchmark/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${use_sharding}
function _set_params(){
    model_item=${1:-"model_item"}   # (必选) 模型 item
    fp_item=${2:-"fp32"}            # (必选) fp32|fp16
    dp_degree=${3:-"1"}             # (必选) dp数据并行度
    mp_degree=${4:-"1"}             # (必选) mp数据并行度
    pp_degree=${5:-"1"}             # (必选) pp数据并行度
    micro_batch_size=${6:-"2"}      # (必选) micro_batch_size
    global_batch_size=${7:-"16"}    # （必选）global_batch_size
    run_mode=${8:-"DP"}             # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP2-MP8-PP2|DP1-MP8-PP4|DP4-MP8-PP1
    device_num=${9:-"N1C1"}         # (必选) 使用的卡数量，N1C1|N1C8|N4C32 （4机32卡）
    yaml_path=${10:-"ppfleetx/configs/multimodal/imagen/imagen_397M_text2im_64x64.yaml"}
    profiling=${PROFILING:-"false"}      # (必选) Profiling  开关，默认关闭，通过全局变量传递
    model_repo="PaddleFleetX"          # (必选) 模型套件的名字
    speed_unit="step/s"         # (必选)速度指标单位
    skip_steps=0                  # (必选)解析日志，跳过模型前几个性能不稳定的step
    keyword="speed:"                 # (必选)解析日志，筛选出性能数据所在行的关键字
    convergence_key="loss:"        # (可选)解析日志，筛选出收敛数据所在行的关键字 如：convergence_key="loss:"
    max_iter=${11:-1000}                      # （可选）需保证模型执行时间在5分钟内，需要修改代码提前中断的直接提PR 合入套件；或使用max_epoch参数
    num_workers=0                  # (可选)
    base_batch_size=$global_batch_size
    sharding_degree=${12:-"1"}      # (可选)
    sharding_stage=${13:-"1"}       # (可选)sharding case
    sharding_offload=${14:-"False"} # (可选)
    # 以下为通用执行命令，无特殊可不用修改
    model_name=${model_item}_bs${global_batch_size}_${fp_item}_${run_mode}  # (必填) 且格式不要改动,与竞品名称对齐
    device=${CUDA_VISIBLE_DEVICES//,/ }
    arr=(${device})
    num_gpu_devices=${#arr[*]}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # （必填） TRAIN_LOG_DIR  benchmark框架设置该参数为全局变量
    profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)}  # （必填） PROFILING_LOG_DIR benchmark框架设置该参数为全局变量
    speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
    #
    train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log
    profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling
    speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed

    OUTPUT_PATH=${run_log_path}/output
}

function _train(){
    batch_size=${local_batch_size}  # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs

    if [ -d $OUTPUT_PATH ]; then
        rm -rf $OUTPUT_PATH
    fi
    mkdir $OUTPUT_PATH

    echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}"

    if [ ${profiling} = "true" ];then
        add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\""
        log_file=${profiling_log_file}
    else
        add_options=""
        log_file=${train_log_file}
    fi

    local_batch_size=`expr ${global_batch_size} / ${dp_degree} / ${sharding_degree}`
    train_cmd="-o Engine.max_steps=${max_iter} \
               -o Global.local_batch_size=${local_batch_size} \
               -o Global.micro_batch_size=${micro_batch_size} \
               -o Distributed.dp_degree=${dp_degree} \
               -o Distributed.mp_degree=${mp_degree} \
               -o Distributed.pp_degree=${pp_degree} \
               -o Distributed.sharding.sharding_degree=${sharding_degree} \
               -o Distributed.sharding.sharding_stage=${sharding_stage} \
               -o Distributed.sharding.sharding_offload=${sharding_offload} \
               "
    if [ ${PADDLE_TRAINER_ID} ]
    then
        PADDLE_RANK_OPTION=" --rank ${PADDLE_TRAINER_ID}"
    else
        PADDLE_RANK_OPTION=""
    fi

    # 以下为通用执行命令，无特殊可不用修改
    case ${run_mode} in
    DP1-MP1-PP1) echo "run run_mode: DP1-MP1-PP1"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0 \
            ${PADDLE_RANK_OPTION} tools/train.py -c ${yaml_path} \
            ${train_cmd}"
        workerlog_id=0
        ;;
    DP8-MP1-PP1|DP1-Sharding8) echo "run run_mode: ${run_mode}"
        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0,1,2,3,4,5,6,7 \
            ${PADDLE_RANK_OPTION} tools/train.py -c ${yaml_path} \
            ${train_cmd}"
        workerlog_id=0
        ;;
    *) echo "choose run_mode "; exit 1;
    esac
    cd ../
    echo "train_cmd: ${train_cmd}  log_file: ${log_file}"
    if [[ ${model_item} =~ "CE" ]];then # CE精度-不限制执行时间
        ${train_cmd} > ${log_file} 2>&1
    else
        timeout 30m ${train_cmd} > ${log_file} 2>&1
    fi
    if [ $? -ne 0 ];then
        echo -e "${model_name}, FAIL"
    else
        echo -e "${model_name}, SUCCESS"
    fi
    #kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
    if [ ${device_num} != "N1C1" -a -d mylog ]; then
        rm ${log_file}
        cp mylog/workerlog.${workerlog_id} ${log_file}
    fi
}

export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH

source ${BENCHMARK_ROOT}/scripts/run_model.sh   # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开
_set_params $@
#_train       # 如果只产出训练log,不解析,可取消注释
_run     # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开


================================================
FILE: benchmarks/test_tipc/vit/dygraph/finetune/N1C8/ViT_large_patch16_384_ft_fused_False_bs512_fp16_DP.sh
================================================
model_item=ViT_large_patch16_384_ft_fused_False
fp_item=fp16
bs_item=512
run_mode=DP
device_num=N1C8
use_fused_attn=False
max_iter=1


cd ./benchmarks
bash ./test_tipc/vit/dygraph/finetune/benchmark_common/prepare.sh
# run
bash ./test_tipc/vit/dygraph/finetune/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${bs_item} ${run_mode} ${device_num} \
${use_fused_attn} ${max_iter} 2>&1;


================================================
FILE: benchmarks/test_tipc/vit/dygraph/finetune/N1C8/ViT_large_patch16_384_ft_fused_True_bs512_fp16_DP.sh
================================================
model_item=ViT_large_patch16_384_ft_fused_True
fp_item=fp16
bs_item=512
run_mode=DP
device_num=N1C8
use_fused_attn=True
max_iter=1


cd ./benchmarks
bash ./test_tipc/vit/dygraph/finetune/benchmark_common/prepare.sh
# run
bash ./test_tipc/vit/dygraph/finetune/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${bs_item} ${run_mode} ${device_num} \
${use_fused_attn} ${max_iter} 2>&1;


================================================
FILE: benchmarks/test_tipc/vit/dygraph/finetune/benchmark_common/prepare.sh
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

python -m pip install -r ../requirements.txt
# get data
cd ../
mkdir dataset && cd dataset
cp -r ${BENCHMARK_ROOT}/models_data_cfs/Paddle_distributed/ILSVRC2012.tgz ./
tar -zxf ILSVRC2012.tgz
cd -

# pretrained
mkdir -p pretrained/vit/
wget -O ./pretrained/vit/imagenet21k-ViT-L_16.pdparams \
https://paddle-wheel.bj.bcebos.com/benchmark/imagenet21k-ViT-L_16.pdparams


================================================
FILE: benchmarks/test_tipc/vit/dygraph/finetune/benchmark_common/run_benchmark.sh
================================================
#!/usr/bin/env bash
# Test training benchmark for a model.
# Usage：bash benchmark/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${use_sharding}
function _set_params(){
    model_item=${1:-"model_item"}   # (必选) 模型 item
    fp_item=${2:-"fp32"}            # (必选) fp32|fp16
    global_batch_size=${3:-"128"}    # （必选）global_batch_size
    run_mode=${4:-"DP"}             # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP2-MP8-PP2|DP1-MP8-PP4|DP4-MP8-PP1
    device_num=${5:-"N1C1"}         # (必选) 使用的卡数量，N1C1|N1C8|N4C32 （4机32卡）
    profiling=${PROFILING:-"false"}      # (必选) Profiling  开关，默认关闭，通过全局变量传递
    model_repo="PaddleFleetX"          # (必选) 模型套件的名字
    speed_unit="images/sec"         # (必选)速度指标单位
    skip_steps=0                  # (必选)解析日志，跳过模型前几个性能不稳定的step
    keyword="ips:"                 # (必选)解析日志，筛选出性能数据所在行的关键字
    convergence_key="loss:"        # (可选)解析日志，筛选出收敛数据所在行的关键字 如：convergence_key="loss:"
    use_fused_attn=${6:-"False"}
    max_iter=${7:-1}                      # （可选）需保证模型执行时间在5分钟内，需要修改代码提前中断的直接提PR 合入套件；或使用max_epoch参数
    num_workers=0                  # (可选)
    base_batch_size=$global_batch_size
    # 以下为通用执行命令，无特殊可不用修改
    model_name=${model_item}_bs${global_batch_size}_${fp_item}_${run_mode}  # (必填) 且格式不要改动,与竞品名称对齐
    device=${CUDA_VISIBLE_DEVICES//,/ }
    arr=(${device})
    num_gpu_devices=${#arr[*]}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # （必填） TRAIN_LOG_DIR  benchmark框架设置该参数为全局变量
    profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)}  # （必填） PROFILING_LOG_DIR benchmark框架设置该参数为全局变量
    speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
    #
    train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log
    profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling
    speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed

    OUTPUT_PATH=${run_log_path}/output
}

function _train(){
    batch_size=${local_batch_size}  # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs

    if [ -d $OUTPUT_PATH ]; then
        rm -rf $OUTPUT_PATH
    fi
    mkdir $OUTPUT_PATH

    echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}"

    if [ ${profiling} = "true" ];then
        add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\""
        log_file=${profiling_log_file}
    else
        add_options=""
        log_file=${train_log_file}
    fi

    train_cmd="-o Engine.num_train_epochs=${max_iter} \
               -o Model.model.use_fused_attn=${use_fused_attn} \
               "
    if [ ${PADDLE_TRAINER_ID} ]
    then
        PADDLE_RANK_OPTION=" --rank ${PADDLE_TRAINER_ID}"
    else
        PADDLE_RANK_OPTION=""
    fi
    # 以下为通用执行命令，无特殊可不用修改
    train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION} \
        tools/train.py -c ppfleetx/configs/vis/vit/ViT_large_patch16_384_ft_in1k_2n16c_dp_fp16o2.yaml \
        ${train_cmd}"
    workerlog_id=0
    cd ../
    echo "train_cmd: ${train_cmd}  log_file: ${log_file}"
    if [[ ${model_item} =~ "CE" ]];then # CE精度-不限制执行时间
        ${train_cmd} > ${log_file} 2>&1
    else
        timeout 15m ${train_cmd} > ${log_file} 2>&1
    fi
    if [ $? -ne 0 ];then
        echo -e "${model_name}, FAIL"
    else
        echo -e "${model_name}, SUCCESS"
    fi
    #kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
    if [ ${device_num} != "N1C1" -a -d mylog ]; then
        rm ${log_file}
        cp mylog/workerlog.${workerlog_id} ${log_file}
    fi
}

export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH

source ${BENCHMARK_ROOT}/scripts/run_model.sh   # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开
_set_params $@
#_train       # 如果只产出训练log,不解析,可取消注释
_run     # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开


================================================
FILE: benchmarks/test_tipc/vit/dygraph/pretrained/N2C16/ViT_large_patch16_224_pt_fused_False_bs128_fp16_DP.sh
================================================
model_item=ViT_large_patch16_224_pt_fused_False
fp_item=fp16
bs_item=128
run_mode=DP
device_num=N2C16
use_fused_attn=False
max_iter=1


cd ./benchmarks
bash ./test_tipc/vit/dygraph/pretrained/benchmark_common/prepare.sh
# run
bash ./test_tipc/vit/dygraph/pretrained/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${bs_item} ${run_mode} ${device_num} \
${use_fused_attn} ${max_iter} 2>&1;


================================================
FILE: benchmarks/test_tipc/vit/dygraph/pretrained/N2C16/ViT_large_patch16_224_pt_fused_True_bs128_fp16_DP.sh
================================================
model_item=ViT_large_patch16_224_pt_fused_True
fp_item=fp16
bs_item=128
run_mode=DP
device_num=N2C16
use_fused_attn=True
max_iter=1


cd ./benchmarks
bash ./test_tipc/vit/dygraph/pretrained/benchmark_common/prepare.sh
# run
bash ./test_tipc/vit/dygraph/pretrained/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${bs_item} ${run_mode} ${device_num} \
${use_fused_attn} ${max_iter} 2>&1;


================================================
FILE: benchmarks/test_tipc/vit/dygraph/pretrained/benchmark_common/prepare.sh
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

python -m pip install -r ../requirements.txt
# get data
cd ../
mkdir dataset && cd dataset
cp -r ${BENCHMARK_ROOT}/models_data_cfs/Paddle_distributed/ILSVRC2012.tgz ./
tar -zxf ILSVRC2012.tgz
cd -


================================================
FILE: benchmarks/test_tipc/vit/dygraph/pretrained/benchmark_common/run_benchmark.sh
================================================
#!/usr/bin/env bash
# Test training benchmark for a model.
# Usage：bash benchmark/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_batch_size} ${global_batch_size} ${run_mode} ${device_num} ${use_sharding}
function _set_params(){
    model_item=${1:-"model_item"}   # (必选) 模型 item
    fp_item=${2:-"fp32"}            # (必选) fp32|fp16
    global_batch_size=${3:-"128"}    # （必选）global_batch_size
    run_mode=${4:-"DP"}             # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP2-MP8-PP2|DP1-MP8-PP4|DP4-MP8-PP1
    device_num=${5:-"N1C1"}         # (必选) 使用的卡数量，N1C1|N1C8|N4C32 （4机32卡）
    yaml_path=${6:-"./task/classification/vit/configs/ViT_base_patch16_224_in1k_1n8c_dp_fp16o2.yaml"}
    profiling=${PROFILING:-"false"}      # (必选) Profiling  开关，默认关闭，通过全局变量传递
    model_repo="PaddleFleetX"          # (必选) 模型套件的名字
    speed_unit="images/sec"         # (必选)速度指标单位
    skip_steps=0                  # (必选)解析日志，跳过模型前几个性能不稳定的step
    keyword="ips:"                 # (必选)解析日志，筛选出性能数据所在行的关键字
    convergence_key="loss:"        # (可选)解析日志，筛选出收敛数据所在行的关键字 如：convergence_key="loss:"
    use_fused_attn=${7:-"False"}
    max_iter=${8:-1}                      # （可选）需保证模型执行时间在5分钟内，需要修改代码提前中断的直接提PR 合入套件；或使用max_epoch参数
    num_workers=0                  # (可选)
    base_batch_size=$global_batch_size
    pretrained_model=${9:-"null"}
    # 以下为通用执行命令，无特殊可不用修改
    model_name=${model_item}_bs${global_batch_size}_${fp_item}_${run_mode}  # (必填) 且格式不要改动,与竞品名称对齐
    device=${CUDA_VISIBLE_DEVICES//,/ }
    arr=(${device})
    num_gpu_devices=${#arr[*]}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # （必填） TRAIN_LOG_DIR  benchmark框架设置该参数为全局变量
    profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)}  # （必填） PROFILING_LOG_DIR benchmark框架设置该参数为全局变量
    speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
    #
    train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log
    profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling
    speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed

    OUTPUT_PATH=${run_log_path}/output
}

function _train(){
    batch_size=${local_batch_size}  # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs

    if [ -d $OUTPUT_PATH ]; then
        rm -rf $OUTPUT_PATH
    fi
    mkdir $OUTPUT_PATH

    echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}"

    if [ ${profiling} = "true" ];then
        add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\""
        log_file=${profiling_log_file}
    else
        add_options=""
        log_file=${train_log_file}
    fi

    train_cmd="-o Engine.num_train_epochs=${max_iter} \
               -o Data.Train.sampler.batch_size=${global_batch_size} \
               -o Model.model.name=ViT_large_patch16_224 \
               -o Model.model.use_fused_attn=${use_fused_attn}
               "
    if [ ${PADDLE_TRAINER_ID} ]
    then
        PADDLE_RANK_OPTION=" --rank ${PADDLE_TRAINER_ID}"
    else
        PADDLE_RANK_OPTION=""
    fi
    # 以下为通用执行命令，无特殊可不用修改
    train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --devices=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION} \
        tools/train.py -c ppfleetx/configs/vis/vit/ViT_base_patch16_224_pt_in1k_2n16c_dp_fp16o2.yaml \
        ${train_cmd}"
    workerlog_id=0
    cd ../
    echo "train_cmd: ${train_cmd}  log_file: ${log_file}"
    if [[ ${model_item} =~ "CE" ]];then # CE精度-不限制执行时间
        ${train_cmd} > ${log_file} 2>&1
    else
        timeout 15m ${train_cmd} > ${log_file} 2>&1
    fi
    if [ $? -ne 0 ];then
        echo -e "${model_name}, FAIL"
    else
        echo -e "${model_name}, SUCCESS"
    fi
    #kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
    if [ ${device_num} != "N1C1" -a -d mylog ]; then
        rm ${log_file}
        cp mylog/workerlog.${workerlog_id} ${log_file}
    fi
}

export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH

source ${BENCHMARK_ROOT}/scripts/run_model.sh   # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开
_set_params $@
#_train       # 如果只产出训练log,不解析,可取消注释
_run     # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开


================================================
FILE: codestyle/.gitignore
================================================
*.pyc


================================================
FILE: codestyle/clang_format.hook
================================================
#!/bin/bash
set -e

readonly VERSION="13.0.0"

version=$(clang-format -version)

if ! [[ $(python -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1$2}') -ge 36 ]]; then
    echo "clang-format installation by pip need python version great equal 3.6, 
          please change the default python to higher version."
    exit 1
fi

if ! [[ $version == *"$VERSION"* ]]; then
    # low version of pip may not have the source of clang-format whl
    pip install --upgrade pip 
    pip install clang-format==13.0.0
fi

clang-format $@


================================================
FILE: codestyle/copyright.hook
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import io
import re
import sys
import os
import datetime

COPYRIGHT = '''Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.'''

def _generate_copyright(comment_mark):
    copyright=COPYRIGHT.split(os.linesep)
    header = copyright[0].rstrip()

    p = re.search('(\d{4})', header).group(0)
    now = datetime.datetime.now()

    header = header.replace(p,str(now.year))

    ans=[comment_mark + " " + header + os.linesep]
    for idx, line in enumerate(copyright[1:]):
        ans.append(comment_mark + " " + line.rstrip() + os.linesep)

    return ans

def _get_comment_mark(path):
    lang_type=re.compile(r"\.(py|sh)$")
    if lang_type.search(path) is not None:
        return "#"

    lang_type=re.compile(r"\.(h|c|hpp|cc|cpp|cu|go|cuh|proto)$")
    if lang_type.search(path) is not None:
        return "//"

    return None


RE_ENCODE = re.compile(r"^[ \t\v]*#.*?coding[:=]", re.IGNORECASE)
RE_COPYRIGHT = re.compile(r".*Copyright \(c\) \d{4}", re.IGNORECASE)
RE_SHEBANG = re.compile(r"^[ \t\v]*#[ \t]?\!")

def _check_copyright(path):
    head=[]
    try:
        with open(path) as f:
            head = [next(f) for x in range(4)]
    except StopIteration:
        pass

    for idx, line in enumerate(head):
        if RE_COPYRIGHT.search(line) is not None:
            return True

    return False

def generate_copyright(path, comment_mark):
    original_contents = io.open(path, encoding="utf-8").readlines()
    head = original_contents[0:4]

    insert_line_no=0
    for i, line in enumerate(head):
        if RE_ENCODE.search(line) or RE_SHEBANG.search(line):
            insert_line_no=i+1

    copyright = _generate_copyright(comment_mark)
    if insert_line_no == 0:
        new_contents = copyright
        if len(original_contents) > 0 and len(original_contents[0].strip()) != 0:
            new_contents.append(os.linesep)
        new_contents.extend(original_contents)
    else:
        new_contents=original_contents[0:insert_line_no]
        new_contents.append(os.linesep)
        new_contents.extend(copyright)
        if len(original_contents) > insert_line_no and len(original_contents[insert_line_no].strip()) != 0:
            new_contents.append(os.linesep)
        new_contents.extend(original_contents[insert_line_no:])
    new_contents="".join(new_contents)

    with io.open(path, 'w') as output_file:
        output_file.write(new_contents)



def main(argv=None):
    parser = argparse.ArgumentParser(
        description='Checker for copyright declaration.')
    parser.add_argument('filenames', nargs='*', help='Filenames to check')
    args = parser.parse_args(argv)

    retv = 0
    for path in args.filenames:
        comment_mark = _get_comment_mark(path)
        if comment_mark is None:
            print("warning:Unsupported file", path, file=sys.stderr)
            continue

        if _check_copyright(path):
            continue

        generate_copyright(path, comment_mark)


if __name__ == '__main__':
    exit(main())


================================================
FILE: codestyle/cpplint_pre_commit.hook
================================================
#!/bin/bash

TOTAL_ERRORS=0

readonly VERSION="1.6.0"

version=$(cpplint --version)

if [[ ! $TRAVIS_BRANCH ]]; then
  # install cpplint on local machine.
  if ! [[ $version == *"$VERSION"* ]]; then
    pip install cpplint==1.6.0
  fi
  # diff files on local machine. 
  files=$(git diff --cached --name-status | awk '$1 != "D" {print $2}')
else
  # diff files between PR and latest commit on Travis CI. 
  branch_ref=$(git rev-parse "$TRAVIS_BRANCH")
  head_ref=$(git rev-parse HEAD)
  files=$(git diff --name-status $branch_ref $head_ref | awk '$1 != "D" {print $2}')
fi
# The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $files; do
    if [[ $file =~ ^(patches/.*) ]]; then
        continue;
    else
        cpplint --filter=-readability/fn_size,-build/include_what_you_use,-build/c++11,-whitespace/parens $file;
        TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
    fi
done

exit $TOTAL_ERRORS


================================================
FILE: codestyle/docstring_checker.py
================================================
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""DocstringChecker is used to check python doc string's style."""

import astroid

from pylint.checkers import BaseChecker, utils
from pylint.interfaces import IAstroidChecker

from collections import defaultdict
import re


def register(linter):
    """Register checkers."""
    linter.register_checker(DocstringChecker(linter))


class Docstring(object):
    """Docstring class holds the parsed doc string elements.
    """

    def __init__(self):
        self.d = defaultdict(list)  #name->[]
        self.clear()

    def clear(self):
        self.d['Args'] = []
        self.d['Examples'] = []
        self.d['Returns'] = []
        self.d['Raises'] = []
        self.args = {}  #arg_name->arg_type

    def get_level(self, string, indent='    '):
        level = 0
        unit_size = len(indent)
        while string[:unit_size] == indent:
            string = string[unit_size:]
            level += 1

        return level

    def parse(self, doc):
        """parse gets sections from doc
        Such as Args, Returns, Raises, Examples s
        Args:
            doc (string): is the astroid node doc string.
        Returns:
            True if doc is parsed successfully.
        """
        self.clear()

        lines = doc.splitlines()
        state = ("others", -1)
        for l in lines:
            c = l.strip()
            if len(c) <= 0:
                continue

            level = self.get_level(l)
            if c.startswith("Args:"):
                state = ("Args", level)
            elif c.startswith("Returns:"):
                state = ("Returns", level)
            elif c.startswith("Raises:"):
                state = ("Raises", level)
            elif c.startswith("Examples:"):
                state = ("Examples", level)
            else:
                if level > state[1]:
                    self.d[state[0]].append(c)
                    continue

                state = ("others", -1)
                self.d[state[0]].append(c)

        self._arg_with_type()
        return True

    def get_returns(self):
        return self.d['Returns']

    def get_raises(self):
        return self.d['Raises']

    def get_examples(self):
        return self.d['Examples']

    def _arg_with_type(self):

        for t in self.d['Args']:
            m = re.search(r'([A-Za-z0-9_-]+)\s{0,4}(\(.+\))\s{0,4}:', t)
            if m:
                self.args[m.group(1)] = m.group(2)

        return self.args


class DocstringChecker(BaseChecker):
    """DosstringChecker is pylint checker to
    check docstring style.
    """
    __implements__ = (IAstroidChecker, )

    POSITIONAL_MESSAGE_ID = 'str-used-on-positional-format-argument'
    KEYWORD_MESSAGE_ID = 'str-used-on-keyword-format-argument'

    name = 'doc-string-checker'
    symbol = "doc-string"
    priority = -1
    msgs = {
        'W9001': ('One line doc string on > 1 lines', symbol + "-one-line",
                  'Used when a short doc string is on multiple lines'),
        'W9002':
        ('Doc string does not end with "." period', symbol + "-end-with",
         'Used when a doc string does not end with a period'),
        'W9003':
        ('All args with their types must be mentioned in doc string %s',
         symbol + "-with-all-args",
         'Used when not all arguments are in the doc string '),
        'W9005': ('Missing docstring or docstring is too short',
                  symbol + "-missing", 'Add docstring longer >=10'),
        'W9006': ('Docstring indent error, use 4 space for indent',
                  symbol + "-indent-error", 'Use 4 space for indent'),
        'W9007': ('You should add `Returns` in comments',
                  symbol + "-with-returns",
                  'There should be a `Returns` section in comments'),
        'W9008': ('You should add `Raises` section in comments',
                  symbol + "-with-raises",
                  'There should be a `Raises` section in comments'),
    }
    options = ()

    def visit_functiondef(self, node):
        """visit_functiondef checks Function node docstring style.
        Args:
            node (astroid.node): The visiting node.
        Returns:
            True if successful other wise False.
        """

        self.check_doc_string(node)

        if node.tolineno - node.fromlineno <= 10:
            return True

        if not node.doc:
            return True

        doc = Docstring()
        doc.parse(node.doc)

        self.all_args_in_doc(node, doc)
        self.with_returns(node, doc)
        self.with_raises(node, doc)

    def visit_module(self, node):
        self.check_doc_string(node)

    def visit_classdef(self, node):
        self.check_doc_string(node)

    def check_doc_string(self, node):
        self.missing_doc_string(node)
        self.one_line(node)
        self.has_period(node)
        self.indent_style(node)

    def missing_doc_string(self, node):
        if node.name.startswith("__") or node.name.startswith("_"):
            return True
        if node.tolineno - node.fromlineno <= 10:
            return True

        if node.doc is None or len(node.doc) < 10:
            self.add_message('W9005', node=node, line=node.fromlineno)
        return False

    # FIXME(gongwb): give the docstring line-no
    def indent_style(self, node, indent=4):
        """indent_style checks docstring's indent style
        Args:
            node (astroid.node): The visiting node.
            indent (int): The default indent of style
        Returns:
            True if successful other wise False.
        """
        if node.doc is None:
            return True

        doc = node.doc
        lines = doc.splitlines()
        line_num = 0

        for l in lines:
            if line_num == 0:
                continue
            cur_indent = len(l) - len(l.lstrip())
            if cur_indent % indent != 0:
                self.add_message('W9006', node=node, line=node.fromlineno)
                return False
            line_num += 1

        return True

    def one_line(self, node):
        """one_line checks if docstring (len < 40) is on one line.
        Args:
            node (astroid.node): The node visiting.
        Returns:
            True if successful otherwise False.
        """

        doc = node.doc
        if doc is None:
            return True

        if len(doc) > 40:
            return True
        elif sum(doc.find(nl) for nl in ('\n', '\r', '\n\r')) == -3:
            return True
        else:
            self.add_message('W9001', node=node, line=node.fromlineno)
            return False

        return True

    def has_period(self, node):
        """has_period checks if one line doc end-with '.' .
        Args:
            node (astroid.node): the node is visiting.
        Returns:
            True if successful otherwise False.
        """
        if node.doc is None:
            return True

        if len(node.doc.splitlines()) > 1:
            return True

        if not node.doc.strip().endswith('.'):
            self.add_message('W9002', node=node, line=node.fromlineno)
            return False

        return True

    def with_raises(self, node, doc):
        """with_raises checks if one line doc end-with '.' .
        Args:
            node (astroid.node): the node is visiting.
            doc (Docstring): Docstring object.
        Returns:
            True if successful otherwise False.
        """

        find = False
        for t in node.body:
            if not isinstance(t, astroid.Raise):
                continue

            find = True
            break

        if not find:
            return True

        if len(doc.get_raises()) == 0:
            self.add_message('W9008', node=node, line=node.fromlineno)
            return False

        return True

    def with_returns(self, node, doc):
        """with_returns checks if docstring comments what are returned .
        Args:
            node (astroid.node): the node is visiting.
            doc (Docstring): Docstring object.
        Returns:
            True if successful otherwise False.
        """

        if node.name.startswith("__") or node.name.startswith("_"):
            return True
        find = False
        for t in node.body:
            if not isinstance(t, astroid.Return):
                continue

            find = True
            break

        if not find:
            return True

        if len(doc.get_returns()) == 0:
            self.add_message('W9007', node=node, line=node.fromlineno)
            return False

        return True

    def all_args_in_doc(self, node, doc):
        """all_args_in_doc checks if arguments are mentioned in doc
        Args:
            node (astroid.node): the node is visiting.
            doc (Docstring): Docstring object
        Returns:
            True if successful otherwise False.
        """
        if node.name.startswith("__") or node.name.startswith("_"):
            return True
        args = []
        for arg in node.args.get_children():
            if (not isinstance(arg, astroid.AssignName)) \
                or arg.name == "self":
                continue
            args.append(arg.name)

        if len(args) <= 0:
            return True

        parsed_args = doc.args
        args_not_documented = set(args) - set(parsed_args)
        if len(args) > 0 and len(parsed_args) <= 0:
            self.add_message(
                'W9003',
                node=node,
                line=node.fromlineno,
                args=list(args_not_documented))
            return False

        for t in args:
            if t not in parsed_args:
                self.add_message(
                    'W9003', node=node, line=node.fromlineno, args=[t, ])
                return False

        return True


================================================
FILE: codestyle/pylint_pre_commit.hook
================================================
#!/bin/bash

TOTAL_ERRORS=0


DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
export PYTHONPATH=$DIR:$PYTHONPATH

readonly VERSION="2.12.0"
version=$(pylint --version | grep 'pylint')

if ! [[ $version == *"$VERSION"* ]]; then
    pip install pylint==2.12.0
fi

# The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --name-status | awk '$1 != "D" {print $2}'); do
    pylint --disable=all --load-plugins=docstring_checker \
    --enable=doc-string-one-line,doc-string-end-with,doc-string-with-all-args,doc-string-triple-quotes,doc-string-missing,doc-string-indent-error,doc-string-with-returns,doc-string-with-raises $file;
    TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
done

exit $TOTAL_ERRORS
#For now, just warning:
#exit 0
Footer



================================================
FILE: codestyle/test_docstring_checker.py
================================================
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import docstring_checker
import pylint.testutils
import astroid
import pytest
import sys


class TestDocstring(pylint.testutils.CheckerTestCase):
    CHECKER_CLASS = docstring_checker.DocstringChecker

    def test_one_line(self):
        func_node = astroid.extract_node('''
        def test(): 
            """get 
            news.
            """
            if True:
                return 5
            return 5
        ''')

        self.checker.visit_functiondef(func_node)
        got = self.linter.release_messages()
        assert len(got) == 1
        assert 'W9001' == got[0][0]

    def test_one_line_1(self):
        func_node = astroid.extract_node('''
        def test(): 
            """get news"""
            if True:
                return 5
            return 5
        ''')

        self.checker.visit_functiondef(func_node)
        got = self.linter.release_messages()
        assert len(got) == 1
        assert 'W9002' == got[0][0]

    def test_args(self):
        func_node = astroid.extract_node('''
        def test(scale, mean): 
            """get news.
            Args:
                scale (int): scale is the number.
            """
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
        ''')

        self.checker.visit_functiondef(func_node)
        got = self.linter.release_messages()
        assert len(got) == 1
        assert 'W9003' == got[0][0]

    def test_missing(self):
        func_node = astroid.extract_node('''
        def test(): 
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
        ''')

        self.checker.visit_functiondef(func_node)
        got = self.linter.release_messages()
        assert len(got) == 1
        assert 'W9005' == got[0][0]

    def test_indent(self):
        func_node = astroid.extract_node('''
        def test(): 
            """ get get get get get get get get
              get get get get get get get get.
            """
            pass 
        ''')

        self.checker.visit_functiondef(func_node)
        got = self.linter.release_messages()
        assert len(got) == 1
        assert 'W9006' == got[0][0]

    def test_with_resturns(self):
        func_node = astroid.extract_node('''
        def test(): 
            """get news.
            Args:
                scale (int): scale is the number.
            """
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            return mean
        ''')

        self.checker.visit_functiondef(func_node)
        got = self.linter.release_messages()
        assert len(got) == 1
        assert 'W9007' == got[0][0]

    def test_with_raises(self):
        func_node = astroid.extract_node('''
        def test(): 
            """get news.
            Args:
                scale (int): scale is the number.
            """
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            mean=scale
            raise ValueError('A very specific bad thing happened.')
        ''')

        self.checker.visit_functiondef(func_node)
        got = self.linter.release_messages()
        assert len(got) == 1
        assert 'W9008' == got[0][0]

    def test_no_message(self):
        p = '''
def fc(input,
       size,
       num_flatten_dims=1,
       param_attr=None,
       bias_attr=None,
       act=None,
       name=None):
    """
    **Fully Connected Layer**
    The fully connected layer can take multiple tensors as its inputs. It
    creates a variable called weights for each input tensor, which represents
    a fully connected weight matrix from each input unit to each output unit.
    The fully connected layer multiplies each input tensor with its coresponding
    weight to produce an output Tensor. If multiple input tensors are given,
    the results of multiple multiplications will be sumed up. If bias_attr is
    not None, a bias variable will be created and added to the output. Finally,
    if activation is not None, it will be applied to the output as well.
    This process can be formulated as follows:
    Args:
        input (Variable|list of Variable): The input tensor(s) of this layer, and the dimension of
            the input tensor(s) is at least 2.
        size(int): The number of output units in this layer.
        num_flatten_dims (int, default 1): The fc layer can accept an input tensor with more than
            two dimensions. If this happens, the multidimensional tensor will first be flattened
            into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input
            tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1)
            dimensions will be flatten to form the first dimension of the final matrix (height of
            the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to
            form the second dimension of the final matrix (width of the matrix). For example, suppose
            `X` is a 6-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3.
            Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30].
        param_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for learnable
            parameters/weights of this layer.
        bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias
            of this layer. If it is set to None, no bias will be added to the output units.
        act (str, default None): Activation to be applied to the output of this layer.
        name (str, default None): The name of this layer.
    Returns:
        A tensor variable storing the transformation result.
    Raises:
        ValueError: If rank of the input tensor is less than 2.
    Examples:
        .. code-block:: python
            data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
            fc = fluid.layers.fc(input=data, size=1000, act="tanh")
    """
    raise ValueError('A very specific bad thing happened.')
    size = 1
    size = 1
    size = 1
    size = 1
    size = 1
    size = 1
    size = 1
    size = 1
    size = 1
    size = 1
    size = 1
    size = 1
    size = 1
    return size
    '''

        func_node = astroid.extract_node(p)
        self.checker.visit_functiondef(func_node)
        got = self.linter.release_messages()
        assert len(got) == 0


================================================
FILE: docs/cluster_deployment.md
================================================

## 集群部署

本文档介绍在集群上使用分布式进行大模型训练的方法，包括在 Kubernetes 上使用 PaddlePaddle 分布式和在云上使用的方法。

### 1. Kubernetes部署

在 Kubernetes 上部署分布式任务需要安装 [paddle-operator](https://github.com/PaddleFlow/paddle-operator) 。

paddle-operator 通过添加自定义资源类型 (paddlejob) 以及部署 controller 和一系列 Kubernetes 原生组件的方式实现简单定义即可运行 PaddlePaddle 任务的需求。

目前支持运行 ParameterServer (PS) 和 Collective 两种分布式任务，当然也支持运行单节点任务。

**paddle-operator 安装**

安装 paddle-operator 需要有已经安装的 Kubernetes (v1.16+) 集群和 [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) (v1.16+) 工具。

本节所需配置文件和示例可以在 [这里](https://github.com/PaddleFlow/paddle-operator/tree/main/deploy) 找到，
可以通过 *git clone* 或者复制文件内容保存。

```yaml
deploy
|-- examples
|   |-- resnet.yaml
|   |-- wide_and_deep.yaml
|   |-- wide_and_deep_podip.yaml
|   |-- wide_and_deep_service.yaml
|   `-- wide_and_deep_volcano.yaml
|-- v1
|   |-- crd.yaml
|   `-- operator.yaml
```

执行以下命令，

```shell
kubectl create -f https://raw.githubusercontent.com/PaddleFlow/paddle-operator/dev/deploy/v1/crd.yaml
```

或者

```shell
kubectl create -f deploy/v1/crd.yaml
```

通过以下命令查看是否成功，

```shell
kubectl get crd
NAME                                    CREATED AT
paddlejobs.batch.paddlepaddle.org       2021-02-08T07:43:24Z
```

执行以下部署命令，

```shell
kubectl create -f https://raw.githubusercontent.com/PaddleFlow/paddle-operator/dev/deploy/v1/operator.yaml
```

或者

```shell
kubectl create -f deploy/v1/operator.yaml
```

通过以下命令查看部署结果和运行状态，

```shell
kubectl -n paddle-system get pods
NAME                                         READY   STATUS    RESTARTS   AGE
paddle-controller-manager-698dd7b855-n65jr   1/1     Running   0          1m
```

通过查看 controller 日志以确保运行正常，

```shell
kubectl -n paddle-system logs paddle-controller-manager-698dd7b855-n65jr
```

提交 demo 任务查看效果，

```shell
kubectl -n paddle-system create -f deploy/examples/wide_and_deep.yaml
```

查看 paddlejob 任务状态, pdj 为 paddlejob 的缩写，

```shell
kubectl -n paddle-system get pdj
NAME                     STATUS      MODE   AGE
wide-ande-deep-service   Completed   PS     4m4s
```

以上信息可以看出：训练任务已经正确完成，该任务为 ps 模式。
可通过 cleanPodPolicy 配置任务完成/失败后的 pod 删除策略，详见任务配置。

训练期间可以通过如下命令查看 pod 状态，

```shell
kubectl -n paddle-system get pods
```

**paddlejob 任务提交**

本resnet示例为 Collective 模式，使用 GPU 进行训练，只需要配置 worker，worker 配置中需要声明使用的 GPU 信息。

准备配置文件，

```yaml
apiVersion: batch.paddlepaddle.org/v1
kind: PaddleJob
metadata:
  name: resnet
spec:
  cleanPodPolicy: Never
  worker:
    replicas: 2
    template:
      spec:
        containers:
          - name: paddle
            image: registry.baidubce.com/paddle-operator/demo-resnet:v1
            command:
            - python
            args:
            - "-m"
            - "paddle.distributed.launch"
            - "train_fleet.py"
            volumeMounts:
            - mountPath: /dev/shm
              name: dshm
            resources:
              limits:
                nvidia.com/gpu: 1
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
```

注意：

* 这里需要添加 shared memory 挂载以防止缓存出错。
* 本示例采用内置 flower 数据集，程序启动后会进行下载，根据网络环境可能等待较长时间。

提交任务: 使用 kubectl 提交 yaml 配置文件以创建任务，

```shell
kubectl -n paddle-system create -f resnet.yaml
```

**卸载**

通过以下命令卸载部署的组件，

```shell
kubectl delete -f deploy/v1/crd.yaml -f deploy/v1/operator.yaml
```

*注意：重新安装时，建议先卸载再安装*

### 2. 公有云和私有云部署

在公有云上运行 PaddlePaddle 分布式建议通过选购容器引擎服务的方式，各大云厂商都推出了基于标准 Kubernetes 的云产品，然后根据上节中的教程安装使用即可。

| 云厂商 | 容器引擎 | 链接                                           |
| --- | ---- | -------------------------------------------- |
| 百度云 | CCE  | https://cloud.baidu.com/product/cce.html     |
| 阿里云 | ACK  | https://help.aliyun.com/product/85222.html   |
| 华为云 | CCE  | https://www.huaweicloud.com/product/cce.html |

更为方便的是使用百度提供的全功能AI开发平台 [BML](https://cloud.baidu.com/product/bml) 来使用，详细的使用方式请参考 [BML文档](https://ai.baidu.com/ai-doc/BML/pkhxhgo5v)。



================================================
FILE: docs/compression.md
================================================
# 模型压缩

------------------------------------------------------------------------------------------

## **简介**

PaddleFleetX 集成了 PaddleSlim 中的常见的压缩方法：量化训练（Qutization Aware Training，QAT）、结构化稀疏（Structured Pruning，SP）和知识蒸馏（Knowledge Distillation，KD）。本文会介绍如何在 PaddleFleetX 中使用这些功能，来压缩并且导出压缩后的模型。

## **特性**

- <a href=https://github.com/PaddlePaddle/PaddleSlim/tree/release/2.4/demo/dygraph/quant>量化训练</a>：通过将全连接层的矩阵乘计算由 Float 浮点型优化为 INT8 整型来优化推理性能；
- <a href=https:

Download .txt

gitextract_it7z4sjw/

├── .gitignore
├── .pre-commit-config.yaml
├── Dockerfile
├── LICENSE
├── README.md
├── benchmarks/
│   ├── README.md
│   └── test_tipc/
│       ├── ernie/
│       │   └── dygraph/
│       │       └── hybrid_parallel/
│       │           ├── N1C1/
│       │           │   ├── ernie_bs16_fp16_DP1-MP1-PP1.sh
│       │           │   └── ernie_bs16_fp32_DP1-MP1-PP1.sh
│       │           ├── N1C8/
│       │           │   ├── ernie_bs16_fp16_DP2-MP2-PP2.sh
│       │           │   └── ernie_bs16_fp32_DP2-MP2-PP2.sh
│       │           ├── N4C32/
│       │           │   ├── ernie_bs16_fp16_DP1-MP8-PP4.sh
│       │           │   ├── ernie_bs16_fp16_DP2-MP8-PP2.sh
│       │           │   ├── ernie_bs16_fp16_DP4-MP8-PP1.sh
│       │           │   ├── ernie_bs16_fp32_DP1-MP8-PP4.sh
│       │           │   ├── ernie_bs16_fp32_DP2-MP8-PP2.sh
│       │           │   └── ernie_bs16_fp32_DP4-MP8-PP1.sh
│       │           └── benchmark_common/
│       │               ├── prepare.sh
│       │               └── run_benchmark.sh
│       ├── gpt/
│       │   ├── dygraph/
│       │   │   ├── data_parallel/
│       │   │   │   ├── N1C8/
│       │   │   │   │   ├── gpt_1024_bs64_fp16_DP8-MP1-PP1.sh
│       │   │   │   │   ├── gpt_1024_flash_bs64_fp16_DP8-MP1-PP1.sh
│       │   │   │   │   └── gpt_2048_bs64_fp16_DP8-MP1-PP1.sh
│       │   │   │   └── benchmark_common/
│       │   │   │       ├── prepare.sh
│       │   │   │       └── run_benchmark.sh
│       │   │   ├── finetune/
│       │   │   │   ├── N1C1/
│       │   │   │   │   ├── CE_gpt_finetune_CoLA_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_MRPC_acc_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_MRPC_f1_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_QNLI_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_RTE_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_SST2_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_STSB_pearson_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   ├── CE_gpt_finetune_STSB_spearman_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   └── CE_gpt_finetune_WNLI_bs32_fp16_DP1-MP1-PP1.sh
│       │   │   │   └── benchmark_common/
│       │   │   │       ├── prepare.sh
│       │   │   │       └── run_benchmark.sh
│       │   │   ├── hybrid_parallel/
│       │   │   │   ├── N1C1/
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP1-PP1.sh
│       │   │   │   │   └── gpt_bs16_fp32_DP1-MP1-PP1.sh
│       │   │   │   ├── N1C4/
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP1-PP4.sh
│       │   │   │   │   └── gpt_bs16_fp16_DP1-MP4-PP1.sh
│       │   │   │   ├── N1C8/
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP1-PP8.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP2-PP4.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP4-PP2.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP8-PP1.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP2-MP2-PP2.sh
│       │   │   │   │   ├── gpt_bs16_fp32_DP2-MP2-PP2.sh
│       │   │   │   │   ├── gpt_bs64_fp16_DP8-MP1-PP1.sh
│       │   │   │   │   ├── gpt_bs64_fp32_DP8-MP1-PP1.sh
│       │   │   │   │   ├── gpt_recompute_bs16_fp16_DP2-MP2-PP2.sh
│       │   │   │   │   └── gpt_recompute_bs16_fp32_DP2-MP2-PP2.sh
│       │   │   │   ├── N4C32/
│       │   │   │   │   ├── gpt_bs16_fp16_DP1-MP8-PP4.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP2-MP8-PP2.sh
│       │   │   │   │   ├── gpt_bs16_fp16_DP4-MP8-PP1.sh
│       │   │   │   │   ├── gpt_bs16_fp32_DP1-MP8-PP4.sh
│       │   │   │   │   ├── gpt_bs16_fp32_DP2-MP8-PP2.sh
│       │   │   │   │   └── gpt_bs16_fp32_DP4-MP8-PP1.sh
│       │   │   │   └── benchmark_common/
│       │   │   │       ├── prepare.sh
│       │   │   │       └── run_benchmark.sh
│       │   │   ├── sequence_parallel/
│       │   │   │   ├── N1C8/
│       │   │   │   │   ├── gpt_sp_False_bs8_fp16_DP1-MP8-PP1.sh
│       │   │   │   │   └── gpt_sp_True_bs8_fp16_DP1-MP8-PP1.sh
│       │   │   │   ├── N4C32/
│       │   │   │   │   ├── gpt_sp_False_bs16_fp16_DP2-MP8-PP2.sh
│       │   │   │   │   └── gpt_sp_True_bs16_fp16_DP2-MP8-PP2.sh
│       │   │   │   └── benchmark_common/
│       │   │   │       ├── prepare.sh
│       │   │   │       └── run_benchmark.sh
│       │   │   └── sharding/
│       │   │       ├── N1C2/
│       │   │       │   ├── gpt_stage2_bs16_fp16_DP1-MP1-PP1-Sharding2.sh
│       │   │       │   ├── gpt_stage3_bs16_fp16_DP1-MP1-PP1-Sharding2.sh
│       │   │       │   └── gpt_stage3_bs16_fp32_DP1-MP1-PP1-Sharding2.sh
│       │   │       ├── N2C16/
│       │   │       │   └── gpt_stage2_bs128_fp16_DP1-MP1-PP1-Sharding16.sh
│       │   │       └── benchmark_common/
│       │   │           ├── prepare.sh
│       │   │           └── run_benchmark.sh
│       │   └── static/
│       │       └── auto_parallel/
│       │           ├── N1C1/
│       │           │   └── gpt_auto_recompute_bs8_fp32_DP1-MP1-PP1.sh
│       │           └── benchmark_common/
│       │               ├── prepare.sh
│       │               └── run_benchmark.sh
│       ├── imagen/
│       │   └── dygraph/
│       │       ├── N1C1/
│       │       │   ├── imagen_397M_text2im_64_bs1_fp32_DP1-MP1-PP1.sh
│       │       │   └── imagen_SR256_bs1_fp32_DP1-MP1-PP1.sh
│       │       ├── N1C8/
│       │       │   ├── imagen_2B_text2im_64_bs8_fp32_DP1-Sharding8.sh
│       │       │   ├── imagen_397M_text2im_64_bs8_fp32_DP8-MP1-PP1.sh
│       │       │   ├── imagen_SR256_bs8_fp32_DP8-MP1-PP1.sh
│       │       │   └── imagen_text2im_64_debertav2_bs8_fp32_DP8-MP1-PP1.sh
│       │       └── benchmark_common/
│       │           ├── prepare.sh
│       │           └── run_benchmark.sh
│       └── vit/
│           └── dygraph/
│               ├── finetune/
│               │   ├── N1C8/
│               │   │   ├── ViT_large_patch16_384_ft_fused_False_bs512_fp16_DP.sh
│               │   │   └── ViT_large_patch16_384_ft_fused_True_bs512_fp16_DP.sh
│               │   └── benchmark_common/
│               │       ├── prepare.sh
│               │       └── run_benchmark.sh
│               └── pretrained/
│                   ├── N2C16/
│                   │   ├── ViT_large_patch16_224_pt_fused_False_bs128_fp16_DP.sh
│                   │   └── ViT_large_patch16_224_pt_fused_True_bs128_fp16_DP.sh
│                   └── benchmark_common/
│                       ├── prepare.sh
│                       └── run_benchmark.sh
├── codestyle/
│   ├── .gitignore
│   ├── clang_format.hook
│   ├── copyright.hook
│   ├── cpplint_pre_commit.hook
│   ├── docstring_checker.py
│   ├── pylint_pre_commit.hook
│   └── test_docstring_checker.py
├── docs/
│   ├── cluster_deployment.md
│   ├── compression.md
│   ├── deployment_faq.md
│   ├── docker_install.md
│   ├── quick_start.md
│   └── standard.md
├── examples/
│   └── transformer/
│       ├── __init__.py
│       ├── models/
│       │   └── GPT/
│       │       ├── docs/
│       │       │   ├── README.md
│       │       │   ├── hybrid_parallel.md
│       │       │   ├── hybrid_profiler.md
│       │       │   ├── inference.md
│       │       │   ├── quantization_aware_training.md
│       │       │   ├── single_card.md
│       │       │   ├── single_finetune.md
│       │       │   └── structured_pruning.md
│       │       ├── finetune/
│       │       │   ├── configs/
│       │       │   │   ├── finetune_gpt_345M_single_card_glue.yaml
│       │       │   │   └── finetune_gpt_base.yaml
│       │       │   ├── impls.py
│       │       │   ├── run.py
│       │       │   └── run_task.sh
│       │       ├── generation/
│       │       │   ├── configs/
│       │       │   │   ├── generation_gpt_345M_dp8.yaml
│       │       │   │   ├── generation_gpt_345M_single_card.yaml
│       │       │   │   ├── generation_gpt_base.yaml
│       │       │   │   ├── generation_pruned_gpt_345M_single_card.yaml
│       │       │   │   ├── generation_qat_gpt_345M_single_card.yaml
│       │       │   │   ├── generation_qat_gpt_6.7B_single_card.yaml
│       │       │   │   ├── inference_gpt_345M_dp8.yaml
│       │       │   │   └── inference_gpt_345M_single_card.yaml
│       │       │   ├── export.py
│       │       │   ├── impls.py
│       │       │   ├── inference.py
│       │       │   └── run.py
│       │       ├── offline-eval/
│       │       │   ├── configs/
│       │       │   │   ├── eval_gpt_345M_single_card.yaml
│       │       │   │   ├── eval_gpt_base.yaml
│       │       │   │   ├── eval_pruned_gpt_345M_single_card.yaml
│       │       │   │   └── eval_qat_gpt_345M_single_card.yaml
│       │       │   ├── impls.py
│       │       │   └── run.py
│       │       ├── pretrain/
│       │       │   ├── configs/
│       │       │   │   ├── export_qat_gpt_345M_single_card.yaml
│       │       │   │   ├── pretrain_gpt_1.3B_dp8.yaml
│       │       │   │   ├── pretrain_gpt_1.3B_single_card.yaml
│       │       │   │   ├── pretrain_gpt_175B_mp8_pp16.yaml
│       │       │   │   ├── pretrain_gpt_345M_single_card.yaml
│       │       │   │   ├── pretrain_gpt_6.7B_sharding16.yaml
│       │       │   │   ├── pretrain_gpt_base.yaml
│       │       │   │   ├── pretrain_gpt_cn_345M_single_card.yaml
│       │       │   │   ├── prune_gpt_345M_single_card.yaml
│       │       │   │   ├── qat_gpt_345M_mp8.yaml
│       │       │   │   ├── qat_gpt_345M_single_card.yaml
│       │       │   │   └── qat_gpt_6.7B_sharding16.yaml
│       │       │   ├── export.py
│       │       │   ├── impls.py
│       │       │   └── run.py
│       │       └── pretrain_moe/
│       │           ├── configs/
│       │           │   ├── pretrain_moe_345M_single_card.yaml
│       │           │   └── pretrain_moe_base.yaml
│       │           ├── impls.py
│       │           └── run.py
│       └── utils/
│           ├── __init__.py
│           ├── components.py
│           ├── config.py
│           └── qat.py
├── ppfleetx/
│   ├── __init__.py
│   ├── configs/
│   │   ├── multimodal/
│   │   │   └── imagen/
│   │   │       ├── imagen_397M_text2im_64x64.yaml
│   │   │       ├── imagen_base.yaml
│   │   │       ├── imagen_super_resolution_1024.yaml
│   │   │       ├── imagen_super_resolution_256.yaml
│   │   │       ├── imagen_text2im_64x64_DebertaV2.yaml
│   │   │       └── imagen_text2im_64x64_T5-11B.yaml
│   │   ├── nlp/
│   │   │   ├── ernie/
│   │   │   │   ├── auto/
│   │   │   │   │   ├── finetune_ernie_345M_single_card.yaml
│   │   │   │   │   ├── finetune_ernie_base.yaml
│   │   │   │   │   ├── pretrain_ernie_base.yaml
│   │   │   │   │   └── pretrain_ernie_base_345M_single_card.yaml
│   │   │   │   ├── finetune_ernie_345M_single_card.yaml
│   │   │   │   ├── finetune_ernie_base.yaml
│   │   │   │   ├── inference_ernie_345M_single_card.yaml
│   │   │   │   ├── pretrain_ernie_base.yaml
│   │   │   │   ├── pretrain_ernie_base_175B_mp8_pp16.yaml
│   │   │   │   ├── pretrain_ernie_base_345M_single_card.yaml
│   │   │   │   ├── pretrain_ernie_base_3D.yaml
│   │   │   │   ├── pretrain_ernie_base_6.7B_sharding16.yaml
│   │   │   │   ├── pretrain_ernie_large_single_card.yaml
│   │   │   │   └── qat_ernie_base.yaml
│   │   │   ├── gpt/
│   │   │   │   ├── auto/
│   │   │   │   │   ├── export_gpt_fp16_single_card.yaml
│   │   │   │   │   ├── generation_gpt_175B_mp8.yaml
│   │   │   │   │   ├── generation_gpt_345M_mp2.yaml
│   │   │   │   │   ├── generation_gpt_345M_single_card.yaml
│   │   │   │   │   ├── generation_gpt_6.7B_mp1.yaml
│   │   │   │   │   ├── pretrain_gpt_1.3B_dp8.yaml
│   │   │   │   │   ├── pretrain_gpt_1.3B_dp8_tuning.yaml
│   │   │   │   │   ├── pretrain_gpt_1.3B_single_card.yaml
│   │   │   │   │   ├── pretrain_gpt_345M_single_card.yaml
│   │   │   │   │   ├── pretrain_gpt_6.7B_sharding16.yaml
│   │   │   │   │   ├── pretrain_gpt_base.yaml
│   │   │   │   │   └── qat_generation_gpt_345M_mp2.yaml
│   │   │   │   ├── eval_gpt_345M_single_card.yaml
│   │   │   │   ├── eval_pruned_gpt_345M_single_card.yaml
│   │   │   │   ├── eval_qat_gpt_345M_single_card.yaml
│   │   │   │   ├── export_qat_gpt_345M_single_card.yaml
│   │   │   │   ├── finetune_gpt_345M_single_card_glue.yaml
│   │   │   │   ├── finetune_gpt_base.yaml
│   │   │   │   ├── generation_gpt_345M_dp8.yaml
│   │   │   │   ├── generation_gpt_345M_mp1.yaml
│   │   │   │   ├── generation_gpt_345M_single_card.yaml
│   │   │   │   ├── generation_gpt_6.7B_single_mp1.yaml
│   │   │   │   ├── generation_pruned_gpt_345M_single_card.yaml
│   │   │   │   ├── generation_qat_gpt_345M_single_card.yaml
│   │   │   │   ├── generation_qat_gpt_6.7B_single_card.yaml
│   │   │   │   ├── inference_gpt_345M_dp8.yaml
│   │   │   │   ├── inference_gpt_345M_single_card.yaml
│   │   │   │   ├── pretrain_gpt_1.3B_dp8.yaml
│   │   │   │   ├── pretrain_gpt_1.3B_single_card.yaml
│   │   │   │   ├── pretrain_gpt_13B_dp8.yaml
│   │   │   │   ├── pretrain_gpt_175B_mp8_pp16.yaml
│   │   │   │   ├── pretrain_gpt_345M_single_card.yaml
│   │   │   │   ├── pretrain_gpt_6.7B_sharding16.yaml
│   │   │   │   ├── pretrain_gpt_6.7B_single_card.yaml
│   │   │   │   ├── pretrain_gpt_base.yaml
│   │   │   │   ├── pretrain_gpt_cn_345M_single_card.yaml
│   │   │   │   ├── prune_gpt_345M_single_card.yaml
│   │   │   │   ├── qat_gpt_345M_mp8.yaml
│   │   │   │   ├── qat_gpt_345M_single_card.yaml
│   │   │   │   └── qat_gpt_6.7B_sharding16.yaml
│   │   │   └── moe/
│   │   │       ├── pretrain_moe_1.3B_dp8.yaml
│   │   │       └── pretrain_moe_base.yaml
│   │   └── vis/
│   │       ├── base.yaml
│   │       ├── moco/
│   │       │   ├── moco_lincls_in1k_1n8c.yaml
│   │       │   ├── mocov1_pt_in1k_1n8c.yaml
│   │       │   └── mocov2_pt_in1k_1n8c.yaml
│   │       └── vit/
│   │           ├── ViT_base_patch16_224_inference.yaml
│   │           ├── ViT_base_patch16_224_pt_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_base_patch16_384_ft_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_base_patch16_384_ft_qat_cifar10_1n8c_dp_fp16o2.yaml
│   │           ├── ViT_base_patch16_384_ft_qat_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_large_patch16_384_ft_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_large_patch16_384_ft_qat_in1k_2n16c_dp_fp16o2.yaml
│   │           ├── ViT_tiny_patch16_224_ci_cifar10_1n8c_dp_fp16o2.yaml
│   │           └── auto/
│   │               ├── ViT_tiny_patch16_224_ci_cifar10_1n8c_dp_fp16o2.yaml
│   │               └── base.yaml
│   ├── core/
│   │   ├── __init__.py
│   │   ├── engine/
│   │   │   ├── __init__.py
│   │   │   ├── auto_engine.py
│   │   │   ├── basic_engine.py
│   │   │   ├── eager_engine.py
│   │   │   └── inference_engine.py
│   │   └── module/
│   │       ├── __init__.py
│   │       └── basic_module.py
│   ├── data/
│   │   ├── __init__.py
│   │   ├── data_tools/
│   │   │   ├── __init__.py
│   │   │   ├── cpp/
│   │   │   │   ├── Makefile
│   │   │   │   ├── __init__.py
│   │   │   │   ├── compile.py
│   │   │   │   └── fast_index_map_helpers.cpp
│   │   │   ├── ernie/
│   │   │   │   ├── __init__.py
│   │   │   │   └── preprocess/
│   │   │   │       ├── README.md
│   │   │   │       ├── __init__.py
│   │   │   │       ├── create_pretraining_data.py
│   │   │   │       ├── docs/
│   │   │   │       │   ├── CLUECorpus2020.md
│   │   │   │       │   ├── CLUECorpusSmall.md
│   │   │   │       │   ├── OpenWebText2.md
│   │   │   │       │   └── WuDaoCorpusBase.md
│   │   │   │       ├── trans_to_json.py
│   │   │   │       └── words_segmentation.py
│   │   │   └── gpt/
│   │   │       ├── README.md
│   │   │       ├── __init__.py
│   │   │       ├── preprocess_data.py
│   │   │       └── raw_trans_to_json.py
│   │   ├── dataset/
│   │   │   ├── __init__.py
│   │   │   ├── ernie/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dataset_utils.py
│   │   │   │   └── ernie_dataset.py
│   │   │   ├── glue_dataset.py
│   │   │   ├── gpt_dataset.py
│   │   │   ├── multimodal_dataset.py
│   │   │   └── vision_dataset.py
│   │   ├── sampler/
│   │   │   ├── __init__.py
│   │   │   ├── batch_sampler.py
│   │   │   └── collate.py
│   │   ├── tokenizers/
│   │   │   ├── __init__.py
│   │   │   ├── debertav2_tokenizer.py
│   │   │   ├── ernie_tokenizer.py
│   │   │   ├── gpt_tokenizer.py
│   │   │   ├── t5_tokenization_utils.py
│   │   │   ├── t5_tokenizer.py
│   │   │   └── tokenization_utils_base.py
│   │   ├── transforms/
│   │   │   ├── __init__.py
│   │   │   ├── preprocess.py
│   │   │   └── utils.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       └── batch_collate_fn.py
│   ├── distributed/
│   │   ├── __init__.py
│   │   ├── apis/
│   │   │   ├── __init__.py
│   │   │   ├── amp.py
│   │   │   ├── comm_groups.py
│   │   │   ├── env.py
│   │   │   ├── io.py
│   │   │   └── strategy.py
│   │   └── protein_folding/
│   │       ├── __init__.py
│   │       ├── bp.py
│   │       ├── dap.py
│   │       ├── dp.py
│   │       └── scg.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── language_model/
│   │   │   ├── __init__.py
│   │   │   ├── auto_utils.py
│   │   │   ├── debertav2/
│   │   │   │   ├── __init__.py
│   │   │   │   └── modeling.py
│   │   │   ├── ernie/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── auto/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── auto_model.py
│   │   │   │   │   ├── auto_module.py
│   │   │   │   │   └── auto_transformer.py
│   │   │   │   ├── dygraph/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── hybrid_model.py
│   │   │   │   │   └── single_model.py
│   │   │   │   ├── ernie_module.py
│   │   │   │   ├── finetune_configs.yaml
│   │   │   │   └── layers/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── distributed_transformer.py
│   │   │   │       ├── model_outputs.py
│   │   │   │       ├── transformer.py
│   │   │   │       └── utils.py
│   │   │   ├── gpt/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── auto/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── auto_model.py
│   │   │   │   │   └── auto_module.py
│   │   │   │   └── dygraph/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── hybrid_model.py
│   │   │   │       ├── processor.py
│   │   │   │       ├── sequence_parallel_utils.py
│   │   │   │       └── single_model.py
│   │   │   ├── language_module.py
│   │   │   ├── metrics.py
│   │   │   ├── moe/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── comm/
│   │   │   │   │   └── __init__.py
│   │   │   │   ├── comm_ops.py
│   │   │   │   ├── gate/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base_gate.py
│   │   │   │   │   ├── gshard_gate.py
│   │   │   │   │   ├── naive_gate.py
│   │   │   │   │   └── switch_gate.py
│   │   │   │   ├── moe_layer.py
│   │   │   │   └── utils.py
│   │   │   ├── moe_exp/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── experts.py
│   │   │   │   ├── layer.py
│   │   │   │   ├── mappings.py
│   │   │   │   └── sharded_moe.py
│   │   │   ├── t5/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── modeling.py
│   │   │   │   └── utils.py
│   │   │   └── utils.py
│   │   ├── multimodal_model/
│   │   │   ├── __init__.py
│   │   │   ├── clip/
│   │   │   │   └── __init__.py
│   │   │   ├── imagen/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── modeling.py
│   │   │   │   ├── unet.py
│   │   │   │   └── utils.py
│   │   │   ├── multimodal_module.py
│   │   │   └── utils.py
│   │   ├── protein_folding/
│   │   │   ├── __init__.py
│   │   │   ├── all_atom.py
│   │   │   ├── attentions.py
│   │   │   ├── common.py
│   │   │   ├── evoformer.py
│   │   │   ├── outer_product_mean.py
│   │   │   ├── quat_affine.py
│   │   │   ├── r3.py
│   │   │   ├── residue_constants.py
│   │   │   └── template.py
│   │   └── vision_model/
│   │       ├── __init__.py
│   │       ├── factory.py
│   │       ├── general_classification_module.py
│   │       ├── layers/
│   │       │   ├── __init__.py
│   │       │   ├── attention.py
│   │       │   ├── droppath.py
│   │       │   ├── embedding.py
│   │       │   ├── identity.py
│   │       │   ├── initializer.py
│   │       │   └── mlp.py
│   │       ├── loss/
│   │       │   ├── __init__.py
│   │       │   └── cross_entropy.py
│   │       ├── metrics/
│   │       │   ├── __init__.py
│   │       │   └── accuracy.py
│   │       ├── moco/
│   │       │   ├── __init__.py
│   │       │   └── moco.py
│   │       ├── moco_module.py
│   │       ├── resnet/
│   │       │   └── __init__.py
│   │       └── vit/
│   │           ├── __init__.py
│   │           └── vit.py
│   ├── ops/
│   │   ├── setup_cuda.py
│   │   ├── test_topp_sampling.py
│   │   └── topp_sampling.cu
│   ├── optims/
│   │   ├── __init__.py
│   │   ├── grad_clip.py
│   │   ├── lr_scheduler.py
│   │   └── optimizer.py
│   ├── tools/
│   │   ├── __init__.py
│   │   └── multiprocess_tool.py
│   └── utils/
│       ├── __init__.py
│       ├── check.py
│       ├── compression_helper.py
│       ├── config.py
│       ├── device.py
│       ├── download.py
│       ├── export.py
│       ├── file.py
│       ├── log.py
│       ├── tensor_fusion_helper.py
│       └── version.py
├── projects/
│   ├── ernie/
│   │   ├── auto_export_ernie_345M_mp1.sh
│   │   ├── auto_export_ernie_345M_mp2.sh
│   │   ├── auto_export_ernie_345M_mp2_npu.sh
│   │   ├── auto_export_ernie_345M_mp2_xpu.sh
│   │   ├── docs/
│   │   │   ├── README.md
│   │   │   └── inference.md
│   │   ├── export_ernie_345M_single_card.sh
│   │   ├── finetune_ernie_345M_single_card.sh
│   │   ├── finetune_ernie_345M_single_card_npu.sh
│   │   ├── inference.py
│   │   ├── pretrain_ernie_base.sh
│   │   ├── pretrain_ernie_base_175B_mp8_pp16.sh
│   │   ├── pretrain_ernie_base_3D.sh
│   │   ├── pretrain_ernie_base_3D_npu.sh
│   │   ├── pretrain_ernie_base_6.7B_sharding16.sh
│   │   ├── pretrain_ernie_large.sh
│   │   ├── pretrain_ernie_large_mp2_mlu.sh
│   │   ├── pretrain_ernie_large_mp2_npu.sh
│   │   ├── pretrain_ernie_large_mp2_pp2_npu.sh
│   │   ├── pretrain_ernie_large_npu.sh
│   │   ├── run_inference.sh
│   │   ├── run_inference_mp2.sh
│   │   ├── run_inference_mp2_npu.sh
│   │   └── run_inference_mp2_xpu.sh
│   ├── gpt/
│   │   ├── auto_export_gpt_175B_mp8.sh
│   │   ├── auto_export_gpt_345M_mp2.sh
│   │   ├── auto_export_gpt_345M_single_card.sh
│   │   ├── auto_export_gpt_6.7B_mp1.sh
│   │   ├── auto_export_gpt_fp16_single_card.sh
│   │   ├── auto_gpt_1.3B_dp8.sh
│   │   ├── auto_gpt_1.3B_dp8_tuning.sh
│   │   ├── auto_gpt_1.3B_single_card.sh
│   │   ├── auto_gpt_345M_single_card.sh
│   │   ├── auto_gpt_6.7B_sharding16.sh
│   │   ├── auto_qat_export_gpt_345M_mp2.sh
│   │   ├── benchmark.py
│   │   ├── docs/
│   │   │   ├── README.md
│   │   │   ├── auto_parallel.md
│   │   │   ├── hybrid_parallel.md
│   │   │   ├── hybrid_profiler.md
│   │   │   ├── inference.md
│   │   │   ├── quantization_aware_training.md
│   │   │   ├── single_card.md
│   │   │   ├── single_finetune.md
│   │   │   └── structured_pruning.md
│   │   ├── eval_prune_gpt_345M_single_card.sh
│   │   ├── eval_qat_gpt_345M_single_card.sh
│   │   ├── evaluate_gpt_345M_single_card.sh
│   │   ├── export_gpt_345M_single_card.sh
│   │   ├── export_prune_gpt_345M_single_card.sh
│   │   ├── export_qat_gpt_345M_single_card.sh
│   │   ├── finetune_gpt_345M_single_card.sh
│   │   ├── inference.py
│   │   ├── inference_gpt_6.7B_single_card.sh
│   │   ├── inference_gpt_multigpu.sh
│   │   ├── inference_gpt_single_card.sh
│   │   ├── pretrain_gpt_1.3B_dp8.sh
│   │   ├── pretrain_gpt_1.3B_single_card.sh
│   │   ├── pretrain_gpt_175B_mp8_pp16.sh
│   │   ├── pretrain_gpt_345M_single_card.sh
│   │   ├── pretrain_gpt_6.7B_sharding16.sh
│   │   ├── prune_gpt_345M_single_card.sh
│   │   ├── qat_gpt_345M_mp8.sh
│   │   ├── qat_gpt_345M_single_card.sh
│   │   ├── qat_gpt_6.7B_sharding16.sh
│   │   └── run_benchmark.sh
│   ├── imagen/
│   │   ├── README.md
│   │   ├── filelist/
│   │   │   └── laion_400M/
│   │   │       └── train
│   │   ├── run_super_resolution_1024_sharding128.sh
│   │   ├── run_super_resolution_256_dp128.sh
│   │   ├── run_super_resolution_256_single_card.sh
│   │   ├── run_text2im_2B_64x64_T5-11B_sharding8_dp32.sh
│   │   ├── run_text2im_397M_64x64_dp128.sh
│   │   ├── run_text2im_397M_64x64_single_card.sh
│   │   └── run_text2im_64x64_DebertaV2_dp8.sh
│   ├── moco/
│   │   ├── README.md
│   │   ├── run_mocov1_lincls_in1k.sh
│   │   ├── run_mocov1_pretrain_in1k.sh
│   │   ├── run_mocov2_lincls_in1k.sh
│   │   └── run_mocov2_pretrain_in1k.sh
│   ├── protein_folding/
│   │   └── README.md
│   ├── ufo2.0/
│   │   └── README.md
│   └── vit/
│       ├── README.md
│       ├── auto_vit_patch16_224_dp8.sh
│       ├── docs/
│       │   └── inference.md
│       ├── export_qat.sh
│       ├── inference.py
│       ├── run_finetune.sh
│       ├── run_finetune_fused_attention.sh
│       ├── run_inference_base_patch16_224.sh
│       ├── run_pretrain.sh
│       ├── run_pretrained_fused_attention.sh
│       └── run_qat.sh
├── requirements.txt
├── setup.py
├── tasks/
│   └── gpt/
│       ├── generation.py
│       ├── inference.py
│       └── run_generation.sh
└── tools/
    ├── auto.py
    ├── auto_export.py
    ├── eval.py
    ├── export.py
    ├── inference.py
    └── train.py

Download .txt

SYMBOL INDEX (2117 symbols across 135 files)

FILE: codestyle/docstring_checker.py
  function register (line 25) | def register(linter):
  class Docstring (line 30) | class Docstring(object):
    method __init__ (line 34) | def __init__(self):
    method clear (line 38) | def clear(self):
    method get_level (line 45) | def get_level(self, string, indent='    '):
    method parse (line 54) | def parse(self, doc):
    method get_returns (line 91) | def get_returns(self):
    method get_raises (line 94) | def get_raises(self):
    method get_examples (line 97) | def get_examples(self):
    method _arg_with_type (line 100) | def _arg_with_type(self):
  class DocstringChecker (line 110) | class DocstringChecker(BaseChecker):
    method visit_functiondef (line 145) | def visit_functiondef(self, node):
    method visit_module (line 168) | def visit_module(self, node):
    method visit_classdef (line 171) | def visit_classdef(self, node):
    method check_doc_string (line 174) | def check_doc_string(self, node):
    method missing_doc_string (line 180) | def missing_doc_string(self, node):
    method indent_style (line 191) | def indent_style(self, node, indent=4):
    method one_line (line 217) | def one_line(self, node):
    method has_period (line 239) | def has_period(self, node):
    method with_raises (line 258) | def with_raises(self, node, doc):
    method with_returns (line 284) | def with_returns(self, node, doc):
    method all_args_in_doc (line 312) | def all_args_in_doc(self, node, doc):

FILE: codestyle/test_docstring_checker.py
  class TestDocstring (line 22) | class TestDocstring(pylint.testutils.CheckerTestCase):
    method test_one_line (line 25) | def test_one_line(self):
    method test_one_line_1 (line 41) | def test_one_line_1(self):
    method test_args (line 55) | def test_args(self):
    method test_missing (line 76) | def test_missing(self):
    method test_indent (line 97) | def test_indent(self):
    method test_with_resturns (line 111) | def test_with_resturns(self):
    method test_with_raises (line 137) | def test_with_raises(self):
    method test_no_message (line 163) | def test_no_message(self):

FILE: examples/transformer/models/GPT/finetune/impls.py
  function _get_model_size (line 35) | def _get_model_size(l, h, v, s):
  function build_model (line 40) | def build_model(config):
  function fit_impl (line 182) | def fit_impl(config, batch, forward_func, **kwargs):
  function eval_impl (line 190) | def eval_impl(config, batch, model, loss_fn, eval_metric):

FILE: examples/transformer/models/GPT/finetune/run.py
  function forward_func (line 162) | def forward_func(batch, model, loss_fn):

FILE: examples/transformer/models/GPT/generation/impls.py
  function adjust_length_to_model (line 35) | def adjust_length_to_model(length, max_sequence_length):
  function build_model (line 41) | def build_model(config):
  function left_padding (line 76) | def left_padding(inputs, pad_id, padding="longest"):

FILE: examples/transformer/models/GPT/offline-eval/impls.py
  function build_model (line 36) | def build_model(config):
  function eval_impl (line 61) | def eval_impl(config, batch, model):
  class LM_Eval_Dataset (line 93) | class LM_Eval_Dataset(paddle.io.Dataset):
    method __init__ (line 94) | def __init__(self,
    method __len__ (line 114) | def __len__(self):
    method _construct_sample (line 117) | def _construct_sample(self, tokens):
    method __getitem__ (line 137) | def __getitem__(self, idx):
  class Lambada_Eval_Dataset (line 153) | class Lambada_Eval_Dataset(paddle.io.Dataset):
    method __init__ (line 154) | def __init__(self, tokens, labels, max_seq_len, eos_token_id, **kwargs):
    method __len__ (line 160) | def __len__(self):
    method _construct_sample (line 163) | def _construct_sample(self, tokens):
    method __getitem__ (line 182) | def __getitem__(self, idx):
  function wikitext_detokenizer (line 197) | def wikitext_detokenizer(string):
  function get_tokens (line 235) | def get_tokens(tokenizer, text, strict=True):

FILE: examples/transformer/models/GPT/pretrain/impls.py
  function _get_model_size (line 40) | def _get_model_size(l, h, v, s):
  function _vocab_size_with_padding (line 55) | def _vocab_size_with_padding(vocab_size, div_unit, mp_degree):
  function build_model (line 66) | def build_model(config):
  function model_forward_backward (line 118) | def model_forward_backward(config, batch, forward_func, **kwargs):
  function optim_update_params (line 187) | def optim_update_params(config, **kwargs):
  function fit_impl (line 225) | def fit_impl(config, batch, forward_func, **kwargs):
  function eval_impl (line 246) | def eval_impl(config, batch, model, loss_fn):

FILE: examples/transformer/models/GPT/pretrain/run.py
  function forward_func (line 183) | def forward_func(batch, model, loss_fn):

FILE: examples/transformer/models/GPT/pretrain_moe/impls.py
  function _get_model_size (line 39) | def _get_model_size(l, h, v, s, ne, ei):
  function build_model (line 71) | def build_model(config):
  function model_forward_backward (line 121) | def model_forward_backward(config, batch, forward_func, **kwargs):
  function optim_update_params (line 200) | def optim_update_params(config, **kwargs):
  function fit_impl (line 234) | def fit_impl(config, batch, forward_func, **kwargs):
  function eval_impl (line 255) | def eval_impl(config, batch, model, loss_fn):

FILE: examples/transformer/models/GPT/pretrain_moe/run.py
  function forward_func (line 159) | def forward_func(batch, model, loss_fn):

FILE: examples/transformer/utils/components.py
  function build_dataset (line 32) | def build_dataset(config_dataset, **config_kwargs):
  function build_batch_sampler (line 47) | def build_batch_sampler(config_sampler, dataset, **config_kwargs):
  function build_dataloader (line 64) | def build_dataloader(config_loader,
  function build_lr_scheduler (line 101) | def build_lr_scheduler(lr_config):
  function build_grad_clip (line 116) | def build_grad_clip(grad_clip_config):
  function build_optimizer (line 126) | def build_optimizer(config, model, lr_scheduler=None, multi_precision=Fa...
  function build_profiler (line 146) | def build_profiler(profiler_config):
  function profiler_done (line 169) | def profiler_done(profiler, profiler_config):
  function _print_summary (line 191) | def _print_summary(profiler, profiler_config):

FILE: examples/transformer/utils/config.py
  class AttrDict (line 36) | class AttrDict(dict):
    method __getattr__ (line 37) | def __getattr__(self, key):
    method __setattr__ (line 40) | def __setattr__(self, key, value):
    method __copy__ (line 46) | def __copy__(self):
    method __deepcopy__ (line 52) | def __deepcopy__(self, memo):
    method setdefault (line 62) | def setdefault(self, k, default=None):
  function create_attr_dict (line 70) | def create_attr_dict(yaml_config):
  function parse_config (line 86) | def parse_config(cfg_file):
  function print_dict (line 128) | def print_dict(d, delimiter=0):
  function print_config (line 148) | def print_config(config):
  function check_config (line 158) | def check_config(config):
  function override (line 177) | def override(dl, ks, v):
  function override_config (line 214) | def override_config(config, options=None):
  function get_config (line 242) | def get_config(fname, overrides=None, show=False):
  function parse_args (line 261) | def parse_args():
  function is_fused_matmul_bias_supported (line 279) | def is_fused_matmul_bias_supported():
  function process_dist_config (line 286) | def process_dist_config(configs):
  function process_global_configs (line 351) | def process_global_configs(config):
  function process_model_configs (line 432) | def process_model_configs(config):
  function process_optim_configs (line 502) | def process_optim_configs(config):
  function process_data_configs (line 523) | def process_data_configs(config):
  function process_inference_configs (line 549) | def process_inference_configs(config):
  function process_configs (line 565) | def process_configs(config):

FILE: examples/transformer/utils/qat.py
  function compress_model (line 21) | def compress_model(config, model, input_spec):

FILE: ppfleetx/core/engine/auto_engine.py
  class AutoEngine (line 39) | class AutoEngine(BasicEngine):
    method __init__ (line 40) | def __init__(self, configs, module=None, mode='train'):
    method fit (line 104) | def fit(self, epoch=1, train_dataset=None, valid_dataset=None):
    method evaluate (line 124) | def evaluate(self, valid_dataset=None):
    method predict (line 133) | def predict(self, test_dataset=None):
    method export (line 142) | def export(self):
    method tune (line 146) | def tune(self, tune_dataset=None):
    method save (line 152) | def save(self, training=True):
    method load (line 159) | def load(self):
    method export_from_prog (line 165) | def export_from_prog(self):

FILE: ppfleetx/core/engine/basic_engine.py
  class BasicEngine (line 16) | class BasicEngine:
    method __init__ (line 20) | def __init__(self, *args, **kwargs):
    method fit (line 23) | def fit(self, *args, **kwargs):
    method evaluate (line 26) | def evaluate(self, *args, **kwargs):
    method predict (line 29) | def predict(self, *args, **kwargs):
    method save (line 32) | def save(self, *args, **kwargs):
    method load (line 35) | def load(self, *args, **kwargs):
    method inference (line 38) | def inference(self, *args, **kwargs):

FILE: ppfleetx/core/engine/eager_engine.py
  class EagerEngine (line 47) | class EagerEngine(BasicEngine):
    method __init__ (line 53) | def __init__(self, configs, module, optimizer=None, lr=None, mode='tra...
    method _wrap_with_fleet (line 274) | def _wrap_with_fleet(self):
    method _wrap_sharding_2_3 (line 281) | def _wrap_sharding_2_3(self):
    method _wrap_3D_parallel (line 309) | def _wrap_3D_parallel(self):
    method _train_one_epoch (line 325) | def _train_one_epoch(self,
    method fit (line 422) | def fit(self, epoch=1, train_data_loader=None, valid_data_loader=None):
    method _fit_impl (line 479) | def _fit_impl(self, batch):
    method _model_forward_backward (line 522) | def _model_forward_backward(self, batch):
    method _optim_update_params (line 563) | def _optim_update_params(self):
    method evaluate (line 581) | def evaluate(self, epoch=1, valid_data_loader=None):
    method _evaluate_one_epoch (line 610) | def _evaluate_one_epoch(self, epoch=1, valid_data_loader=None):
    method _evaluate_impl (line 642) | def _evaluate_impl(self, batch):
    method predict (line 660) | def predict(self, epoch=1, test_data_loader=None):
    method _predict_impl (line 700) | def _predict_impl(self, batch):
    method save (line 717) | def save(self, epoch=0, step=0):
    method compress_model (line 757) | def compress_model(self):
    method load (line 776) | def load(self):
    method export (line 832) | def export(self):
    method inference (line 852) | def inference(self, data):
    method _print_summary (line 866) | def _print_summary(self):
    method _profiler_done (line 905) | def _profiler_done(self):

FILE: ppfleetx/core/engine/inference_engine.py
  class _StaticGuard (line 30) | class _StaticGuard(object):
    method __init__ (line 31) | def __init__(self):
    method __enter__ (line 34) | def __enter__(self):
    method __exit__ (line 37) | def __exit__(self, exc_type, exc_val, exc_tb):
  class TensorRTConfig (line 41) | class TensorRTConfig(object):
    method __init__ (line 56) | def __init__(self,
    method precision (line 75) | def precision(self):
    method precision (line 79) | def precision(self, value):
    method collect_shape (line 87) | def collect_shape(self):
    method collect_shape (line 91) | def collect_shape(self, value):
  class InferenceEngine (line 104) | class InferenceEngine(object):
    method __init__ (line 114) | def __init__(self,
    method _check_model (line 144) | def _check_model(self):
    method _generate_comm_init_config (line 173) | def _generate_comm_init_config(self, rank, nranks):
    method _init_predictor (line 187) | def _init_predictor(self):
    method input_names (line 246) | def input_names(self):
    method output_names (line 249) | def output_names(self):
    method predict (line 252) | def predict(self, data):

FILE: ppfleetx/core/module/basic_module.py
  class BasicModule (line 29) | class BasicModule(nn.Layer):
    method __init__ (line 34) | def __init__(self, configs, *args, **kwargs):
    method process_configs (line 39) | def process_configs(self, configs):
    method get_model (line 42) | def get_model(self):
    method get_loss_fn (line 45) | def get_loss_fn(self):
    method pretreating_batch (line 48) | def pretreating_batch(self, batch):
    method forward (line 51) | def forward(self, *args, **kwargs):
    method training_step (line 54) | def training_step(self, *args, **kwargs):
    method training_step_end (line 57) | def training_step_end(self, *args, **kwargs):
    method validation_step (line 60) | def validation_step(self, *args, **kwargs):
    method validation_step_end (line 63) | def validation_step_end(self, *args, **kwargs):
    method test_step (line 66) | def test_step(self, *args, **kwargs):
    method test_step_end (line 69) | def test_step_end(self, *args, **kwargs):
    method backward (line 72) | def backward(self, loss):
    method input_spec (line 75) | def input_spec(self):
    method inference_end (line 79) | def inference_end(self, outputs):
    method training_epoch_end (line 82) | def training_epoch_end(self, *args, **kwargs):
    method validation_epoch_end (line 85) | def validation_epoch_end(self, *args, **kwargs):

FILE: ppfleetx/data/__init__.py
  function build_auto_dataset (line 28) | def build_auto_dataset(config, mode):
  function build_dataset (line 57) | def build_dataset(config, mode):
  function build_dataloader (line 69) | def build_dataloader(config, mode):

FILE: ppfleetx/data/data_tools/cpp/compile.py
  function compile_helper (line 20) | def compile_helper():

FILE: ppfleetx/data/data_tools/cpp/fast_index_map_helpers.cpp
  function build_blending_indices (line 32) | void build_blending_indices(
  function build_sample_idx (line 92) | py::array build_sample_idx(const py::array_t<int64_t> &sizes_,
  function get_target_sample_len (line 179) | inline int32_t get_target_sample_len(const int32_t short_seq_ratio,
  function build_mapping_impl (line 194) | py::array
  function build_mapping (line 431) | py::array build_mapping(const py::array_t<int64_t> &docs_,
  function build_blocks_mapping_impl (line 455) | py::array build_blocks_mapping_impl(
  function build_blocks_mapping (line 671) | py::array build_blocks_mapping(
  function PYBIND11_MODULE (line 693) | PYBIND11_MODULE(fast_index_map_helpers, m) {

FILE: ppfleetx/data/data_tools/ernie/preprocess/create_pretraining_data.py
  function get_args (line 36) | def get_args():
  function lexical_analysis_fn (line 122) | def lexical_analysis_fn():
  function chinese_segmentation_fn (line 133) | def chinese_segmentation_fn():
  function jieba_segmentation_fn (line 144) | def jieba_segmentation_fn():
  function get_whole_word_mask_tokens (line 161) | def get_whole_word_mask_tokens(tokens, words, max_word_length=6):
  class IdentitySplitter (line 226) | class IdentitySplitter(object):
    method tokenize (line 227) | def tokenize(self, *text):
  class NewlineSplitter (line 231) | class NewlineSplitter():
    method tokenize (line 232) | def tokenize(self, text):
  class Converter (line 236) | class Converter(object):
    method __init__ (line 237) | def __init__(self, args):
    method initializer (line 240) | def initializer(self):
    method encode (line 294) | def encode(self, json_line):
  function main (line 308) | def main():

FILE: ppfleetx/data/data_tools/ernie/preprocess/trans_to_json.py
  function get_args (line 29) | def get_args():
  function raw_text_to_json (line 75) | def raw_text_to_json(path, doc_spliter="", json_key="text", min_doc_leng...
  function merge_file (line 108) | def merge_file(file_paths, output_path):
  function shuffle_file (line 122) | def shuffle_file(output_path):
  function main (line 131) | def main():

FILE: ppfleetx/data/data_tools/ernie/preprocess/words_segmentation.py
  function get_args (line 26) | def get_args():
  function lexical_analysis_fn (line 64) | def lexical_analysis_fn():
  function chinese_segmentation_fn (line 75) | def chinese_segmentation_fn():
  function jieba_segmentation_fn (line 86) | def jieba_segmentation_fn():
  function read_wudao (line 103) | def read_wudao(path):
  function read_jsonl (line 115) | def read_jsonl(path):
  function text_to_text (line 134) | def text_to_text(path, output_path, read_func, seg_func):
  function main (line 175) | def main():

FILE: ppfleetx/data/data_tools/gpt/preprocess_data.py
  function get_args (line 44) | def get_args():
  function lexical_analysis_fn (line 136) | def lexical_analysis_fn():
  function chinese_segmentation_fn (line 147) | def chinese_segmentation_fn():
  function jieba_segmentation_fn (line 158) | def jieba_segmentation_fn():
  function get_whole_word_mask_tokens (line 168) | def get_whole_word_mask_tokens(tokens, words, max_word_length=4):
  class IdentitySplitter (line 230) | class IdentitySplitter(object):
    method tokenize (line 231) | def tokenize(self, *text):
  class NewlineSplitter (line 235) | class NewlineSplitter():
    method tokenize (line 236) | def tokenize(self, text):
  class Converter (line 240) | class Converter(object):
    method __init__ (line 241) | def __init__(self, args):
    method initializer (line 244) | def initializer(self):
    method encode (line 283) | def encode(self, json_line):
  function main (line 297) | def main():

FILE: ppfleetx/data/data_tools/gpt/raw_trans_to_json.py
  function get_args (line 29) | def get_args():
  function raw_text_to_json (line 75) | def raw_text_to_json(path, doc_spliter="", json_key="text", min_doc_leng...
  function merge_file (line 108) | def merge_file(file_paths, output_path):
  function shuffle_file (line 122) | def shuffle_file(output_path):
  function main (line 131) | def main():

FILE: ppfleetx/data/dataset/ernie/dataset_utils.py
  function get_local_rank (line 32) | def get_local_rank():
  function get_datasets_weights_and_num_samples (line 46) | def get_datasets_weights_and_num_samples(data_prefix,
  class MMapIndexedDataset (line 78) | class MMapIndexedDataset(paddle.io.Dataset):
    method __init__ (line 79) | def __init__(self, path, skip_warmup=False):
    method __getstate__ (line 100) | def __getstate__(self):
    method __len__ (line 103) | def __len__(self):
    method __getitem__ (line 107) | def __getitem__(self, idx):
    method get (line 127) | def get(self, idx, offset=0, length=None):
    method sizes (line 143) | def sizes(self):
    method doc_idx (line 147) | def doc_idx(self):
    method get_doc_idx (line 150) | def get_doc_idx(self):
    method set_doc_idx (line 153) | def set_doc_idx(self, doc_idx_):
  function make_indexed_dataset (line 157) | def make_indexed_dataset(data_prefix, data_impl=None, skip_warmup=False):
  function get_a_and_b_segments (line 161) | def get_a_and_b_segments(sample, np_rng):
  function truncate_segments (line 193) | def truncate_segments(tokens_a, tokens_b, len_a, len_b, max_num_tokens,
  function create_tokens_and_tokentypes (line 214) | def create_tokens_and_tokentypes(tokens_a, tokens_b, cls_id, sep_id):
  function is_start_piece (line 245) | def is_start_piece(piece):
  function create_masked_lm_predictions (line 254) | def create_masked_lm_predictions(tokens,
  function pad_and_convert_to_numpy (line 496) | def pad_and_convert_to_numpy(tokens, tokentypes, masked_positions,
  function get_indexed_dataset_ (line 529) | def get_indexed_dataset_(data_prefix, data_impl, skip_warmup):
  function get_train_valid_test_split_ (line 548) | def get_train_valid_test_split_(splits_string, size):
  function get_samples_mapping (line 576) | def get_samples_mapping(indexed_dataset, data_prefix, num_epochs,

FILE: ppfleetx/data/dataset/ernie/ernie_dataset.py
  function get_local_rank (line 36) | def get_local_rank():
  class ErnieDataset (line 46) | class ErnieDataset(paddle.io.Dataset):
    method __init__ (line 47) | def __init__(self, input_dir, tokenizer_type, split, num_samples, mode,
    method __len__ (line 126) | def __len__(self):
    method __getitem__ (line 129) | def __getitem__(self, idx):
  function build_training_sample (line 156) | def build_training_sample(sample,
  function pad_and_convert_to_numpy (line 245) | def pad_and_convert_to_numpy(tokens, tokentypes, masked_positions,
  function get_train_data_file (line 280) | def get_train_data_file(input_dir):
  function get_train_valid_test_split_ (line 303) | def get_train_valid_test_split_(splits, size):
  class ErnieSeqClsDataset (line 327) | class ErnieSeqClsDataset(paddle.io.Dataset):
    method __init__ (line 328) | def __init__(self, dataset_type, tokenizer_type, max_seq_len, mode):
    method __getitem__ (line 350) | def __getitem__(self, idx):
    method __len__ (line 353) | def __len__(self):
    method _seq_trans_fn (line 356) | def _seq_trans_fn(self, example):
    method _clue_trans_fn (line 362) | def _clue_trans_fn(self, example):
    method _convert_example (line 369) | def _convert_example(self,
    method _convert_clue (line 405) | def _convert_clue(self,

FILE: ppfleetx/data/dataset/glue_dataset.py
  class CoLA (line 48) | class CoLA(paddle.io.Dataset):
    method __init__ (line 75) | def __init__(self, root, split, max_length=128):
    method __getitem__ (line 111) | def __getitem__(self, idx):
    method __len__ (line 127) | def __len__(self):
    method class_num (line 131) | def class_num(self):
  class SST2 (line 135) | class SST2(paddle.io.Dataset):
    method __init__ (line 162) | def __init__(self, root, split, max_length=128):
    method __getitem__ (line 204) | def __getitem__(self, idx):
    method __len__ (line 220) | def __len__(self):
    method class_num (line 224) | def class_num(self):
  class MNLI (line 228) | class MNLI(paddle.io.Dataset):
    method __init__ (line 261) | def __init__(self, root, split, max_length=128):
    method __getitem__ (line 297) | def __getitem__(self, idx):
    method __len__ (line 311) | def __len__(self):
    method class_num (line 315) | def class_num(self):
  class QNLI (line 319) | class QNLI(paddle.io.Dataset):
    method __init__ (line 353) | def __init__(self, root, split, max_length=128):
    method __getitem__ (line 386) | def __getitem__(self, idx):
    method __len__ (line 403) | def __len__(self):
    method class_num (line 407) | def class_num(self):
  class RTE (line 411) | class RTE(paddle.io.Dataset):
    method __init__ (line 441) | def __init__(self, root, split, max_length=128):
    method __getitem__ (line 474) | def __getitem__(self, idx):
    method __len__ (line 491) | def __len__(self):
    method class_num (line 495) | def class_num(self):
  class WNLI (line 499) | class WNLI(paddle.io.Dataset):
    method __init__ (line 537) | def __init__(self, root, split, max_length=128):
    method __getitem__ (line 570) | def __getitem__(self, idx):
    method __len__ (line 587) | def __len__(self):
    method class_num (line 591) | def class_num(self):
  class MRPC (line 595) | class MRPC(paddle.io.Dataset):
    method __init__ (line 626) | def __init__(self, root, split, max_length=128):
    method __getitem__ (line 648) | def __getitem__(self, idx):
    method __len__ (line 662) | def __len__(self):
    method class_num (line 666) | def class_num(self):
  class QQP (line 670) | class QQP(paddle.io.Dataset):
    method __init__ (line 698) | def __init__(self, root, split, max_length=128):
    method __getitem__ (line 731) | def __getitem__(self, idx):
    method __len__ (line 748) | def __len__(self):
    method class_num (line 752) | def class_num(self):
  class STSB (line 756) | class STSB(paddle.io.Dataset):
    method __init__ (line 783) | def __init__(self, root, split, max_length=128):
    method __getitem__ (line 816) | def __getitem__(self, idx):
    method __len__ (line 836) | def __len__(self):
    method class_num (line 840) | def class_num(self):

FILE: ppfleetx/data/dataset/gpt_dataset.py
  class GPTDataset (line 42) | class GPTDataset(paddle.io.Dataset):
    method __init__ (line 43) | def __init__(self,
    method _construct_sample (line 153) | def _construct_sample(self, tokens):
    method _get_single_sample_from_idx (line 173) | def _get_single_sample_from_idx(self, doc_index_f, doc_index_l, offset_f,
    method __getitem__ (line 205) | def __getitem__(self, index):
    method __len__ (line 216) | def __len__(self):
  function get_train_data_file (line 220) | def get_train_data_file(input_dir):
  function get_train_valid_test_split_ (line 250) | def get_train_valid_test_split_(splits, size):
  function construct_samples_and_shuffle_data (line 274) | def construct_samples_and_shuffle_data(name, data_prefix, documents, sizes,
  function _num_tokens (line 396) | def _num_tokens(documents, lens):
  function _num_epochs (line 401) | def _num_epochs(tokens_per_epoch, seq_length, num_samples):
  function _build_doc_idx (line 413) | def _build_doc_idx(documents, num_epochs, np_rng, separate_last_epoch):
  function _build_sample_idx (line 432) | def _build_sample_idx(sizes, doc_idx, seq_length, num_epochs,
  function _build_shuffle_idx (line 466) | def _build_shuffle_idx(num_samples, total_size, np_rng):
  class LM_Eval_Dataset (line 484) | class LM_Eval_Dataset(paddle.io.Dataset):
    method __init__ (line 485) | def __init__(self,
    method __len__ (line 517) | def __len__(self):
    method _construct_sample (line 520) | def _construct_sample(self, tokens):
    method __getitem__ (line 540) | def __getitem__(self, idx):
    method _wikitext_detokenizer (line 556) | def _wikitext_detokenizer(self, string):
  class Lambada_Eval_Dataset (line 589) | class Lambada_Eval_Dataset(paddle.io.Dataset):
    method __init__ (line 590) | def __init__(self, input_dir, max_seq_len, model_type="GPT", **kwargs):
    method __len__ (line 608) | def __len__(self):
    method _construct_sample (line 611) | def _construct_sample(self, tokens):
    method __getitem__ (line 630) | def __getitem__(self, idx):
    method _get_tokens (line 647) | def _get_tokens(self, tokenizer, text, strict=True):

FILE: ppfleetx/data/dataset/multimodal_dataset.py
  function get_keys (line 40) | def get_keys(data_path, gpu_num):
  class ImagenDataset (line 62) | class ImagenDataset(Dataset):
    method __init__ (line 63) | def __init__(self,
    method load_path (line 101) | def load_path(self, data_path, f_index=None):
    method base64_to_image (line 120) | def base64_to_image(base64_str):
    method get_line_for_line (line 128) | def get_line_for_line(self, filename):
    method __getitem__ (line 151) | def __getitem__(self, index):
    method __len__ (line 198) | def __len__(self):

FILE: ppfleetx/data/dataset/vision_dataset.py
  class GeneralClsDataset (line 33) | class GeneralClsDataset(paddle.io.Dataset):
    method __init__ (line 34) | def __init__(self,
    method _load_anno (line 57) | def _load_anno(self):
    method __getitem__ (line 77) | def __getitem__(self, idx):
    method __len__ (line 98) | def __len__(self):
    method class_num (line 102) | def class_num(self):
  class ImageFolder (line 112) | class ImageFolder(paddle.io.Dataset):
    method __init__ (line 144) | def __init__(self, root, extensions=IMG_EXTENSIONS, transform_ops=None):
    method make_dataset (line 164) | def make_dataset(
    method find_classes (line 241) | def find_classes(self, directory):
    method __getitem__ (line 278) | def __getitem__(self, idx):
    method __len__ (line 294) | def __len__(self) -> int:
    method class_num (line 298) | def class_num(self):
  class CIFAR10 (line 302) | class CIFAR10(paddle.io.Dataset):
    method __init__ (line 303) | def __init__(
    method _load_anno (line 337) | def _load_anno(self):
    method __getitem__ (line 364) | def __getitem__(self, idx):
    method __len__ (line 371) | def __len__(self):
    method class_num (line 375) | def class_num(self):
  class ContrativeLearningDataset (line 379) | class ContrativeLearningDataset(ImageFolder):
    method __init__ (line 393) | def __init__(self, root, extensions=IMG_EXTENSIONS, transform_ops=None):
    method __getitem__ (line 404) | def __getitem__(self, idx):
    method __len__ (line 421) | def __len__(self) -> int:
    method class_num (line 425) | def class_num(self):

FILE: ppfleetx/data/sampler/batch_sampler.py
  class GPTBatchSampler (line 31) | class GPTBatchSampler(paddle.io.BatchSampler):
    method __init__ (line 80) | def __init__(self,
    method get_start_end_idx (line 123) | def get_start_end_idx(self):
    method __iter__ (line 128) | def __iter__(self):
    method __len__ (line 152) | def __len__(self):
    method set_epoch (line 157) | def set_epoch(self, epoch=0, consumed_samples=0):

FILE: ppfleetx/data/sampler/collate.py
  class Stack (line 27) | class Stack(object):
    method __init__ (line 38) | def __init__(self, axis=0, dtype=None):
    method __call__ (line 42) | def __call__(self, data):
  class Pad (line 70) | class Pad(object):
    method __init__ (line 92) | def __init__(self,
    method __call__ (line 104) | def __call__(self, data):
  class Tuple (line 173) | class Tuple(object):
    method __init__ (line 190) | def __init__(self, fn, *args):
    method __call__ (line 204) | def __call__(self, data):
  class Dict (line 248) | class Dict(object):
    method __init__ (line 266) | def __init__(self, fn):
    method __call__ (line 278) | def __call__(self, data):

FILE: ppfleetx/data/tokenizers/debertav2_tokenizer.py
  function get_debertav2_tokenizer (line 55) | def get_debertav2_tokenizer(name):
  function debertav2_tokenize (line 60) | def debertav2_tokenize(texts, tokenizer):
  class DebertaV2Tokenizer (line 113) | class DebertaV2Tokenizer(SpecialTokensMixin):
    method __init__ (line 171) | def __init__(self,
    method __len__ (line 214) | def __len__(self):
    method from_pretrained (line 221) | def from_pretrained(cls, pretrained_model_name_or_path, *init_inputs,
    method _from_pretrained (line 375) | def _from_pretrained(cls,
    method vocab_size (line 613) | def vocab_size(self):
    method vocab (line 617) | def vocab(self):
    method get_vocab (line 620) | def get_vocab(self):
    method _dict_from_json_file (line 626) | def _dict_from_json_file(cls, json_file):
    method _tokenize (line 631) | def _tokenize(self, text: str) -> List[str]:
    method _convert_token_to_id (line 637) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 641) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 646) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 650) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method get_special_tokens_mask (line 674) | def get_special_tokens_mask(self,
    method create_token_type_ids_from_sequences (line 705) | def create_token_type_ids_from_sequences(self,
    method prepare_for_tokenization (line 735) | def prepare_for_tokenization(self,
    method save_vocabulary (line 744) | def save_vocabulary(self,
    method _eventual_warn_about_too_long_sequence (line 750) | def _eventual_warn_about_too_long_sequence(self,
    method _get_padding_truncation_strategies (line 775) | def _get_padding_truncation_strategies(self,
    method _pad (line 920) | def _pad(self,
    method pad (line 1008) | def pad(
    method create_token_type_ids_from_sequences (line 1163) | def create_token_type_ids_from_sequences(self,
    method _add_eos_if_not_present (line 1185) | def _add_eos_if_not_present(self, token_ids):
    method truncate_sequences (line 1195) | def truncate_sequences(self,
    method num_special_tokens_to_add (line 1315) | def num_special_tokens_to_add(self, pair: bool=False) -> int:
    method prepare_for_model (line 1340) | def prepare_for_model(self,
    method _batch_prepare_for_model (line 1472) | def _batch_prepare_for_model(
    method _get_padding_truncation_strategies (line 1535) | def _get_padding_truncation_strategies(self,
    method batch_encode_plus (line 1680) | def batch_encode_plus(self,
    method _batch_encode_plus (line 1742) | def _batch_encode_plus(
    method tokenize (line 1824) | def tokenize(self, text, **kwargs):
  class SPMTokenizer (line 1899) | class SPMTokenizer:
    method __init__ (line 1924) | def __init__(self,
    method __getstate__ (line 1949) | def __getstate__(self):
    method __setstate__ (line 1954) | def __setstate__(self, d):
    method tokenize (line 1964) | def tokenize(self, text):
    method convert_ids_to_tokens (line 1967) | def convert_ids_to_tokens(self, ids):
    method decode (line 1973) | def decode(self, tokens, start=-1, end=-1, raw_text=None):
    method add_special_token (line 1990) | def add_special_token(self, token):
    method part_of_whole_word (line 1998) | def part_of_whole_word(self, token, is_bos=False):
    method pad (line 2009) | def pad(self):
    method bos (line 2012) | def bos(self):
    method eos (line 2015) | def eos(self):
    method unk (line 2018) | def unk(self):
    method mask (line 2021) | def mask(self):
    method sym (line 2024) | def sym(self, id):
    method id (line 2027) | def id(self, sym):
    method _encode_as_pieces (line 2030) | def _encode_as_pieces(self, text):
    method split_to_words (line 2039) | def split_to_words(self, text):
    method _run_strip_accents (line 2075) | def _run_strip_accents(self, text):
    method _run_split_on_punc (line 2086) | def _run_split_on_punc(self, text):
    method save_pretrained (line 2106) | def save_pretrained(self, path: str, filename_prefix: str=None):
  function _is_whitespace (line 2116) | def _is_whitespace(char):
  function _is_control (line 2128) | def _is_control(char):
  function _is_punctuation (line 2140) | def _is_punctuation(char):
  function convert_to_unicode (line 2156) | def convert_to_unicode(text):

FILE: ppfleetx/data/tokenizers/ernie_tokenizer.py
  function get_ernie_tokenizer (line 20) | def get_ernie_tokenizer(tokenizer_type):

FILE: ppfleetx/data/tokenizers/gpt_tokenizer.py
  function lru_cache (line 36) | def lru_cache():
  function bytes_to_unicode (line 61) | def bytes_to_unicode():
  function get_pairs (line 85) | def get_pairs(word):
  class GPTTokenizer (line 97) | class GPTTokenizer(object):
    method from_pretrained (line 110) | def from_pretrained(cls,
    method __init__ (line 179) | def __init__(self,
    method __call__ (line 222) | def __call__(self,
    method encode_plus (line 279) | def encode_plus(self,
    method num_special_tokens_to_add (line 381) | def num_special_tokens_to_add(self, pair: bool=False) -> int:
    method build_inputs_with_special_tokens (line 388) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method create_token_type_ids_from_sequences (line 393) | def create_token_type_ids_from_sequences(self,
    method truncate_sequences (line 400) | def truncate_sequences(
    method pad (line 481) | def pad(
    method _pad (line 542) | def _pad(
    method __len__ (line 621) | def __len__(self):
    method set_special_tokens (line 624) | def set_special_tokens(self, special_tokens):
    method bpe (line 641) | def bpe(self, token):
    method tokenize (line 684) | def tokenize(self, text):
    method convert_tokens_to_ids (line 697) | def convert_tokens_to_ids(self, tokens):
    method convert_ids_to_string (line 719) | def convert_ids_to_string(self, ids):
    method convert_ids_to_tokens (line 740) | def convert_ids_to_tokens(self, ids, skip_special_tokens=False):
    method encode (line 751) | def encode(self, text):
    method decode (line 754) | def decode(self, tokens):
    method save_vocabulary (line 763) | def save_vocabulary(self, vocab_path):
    method vocab_size (line 806) | def vocab_size(self):
    method vocab (line 810) | def vocab(self):
    method inv_vocab (line 814) | def inv_vocab(self):
    method eos_token_id (line 818) | def eos_token_id(self):

FILE: ppfleetx/data/tokenizers/t5_tokenization_utils.py
  class Trie (line 50) | class Trie:
    method __init__ (line 56) | def __init__(self):
    method add (line 59) | def add(self, word):
    method split (line 88) | def split(self, text):
    method cut_text (line 243) | def cut_text(self, text, offsets):
  function _is_whitespace (line 267) | def _is_whitespace(char):
  function _is_control (line 279) | def _is_control(char):
  function _is_punctuation (line 291) | def _is_punctuation(char):
  function _is_end_of_word (line 307) | def _is_end_of_word(text):
  function _is_start_of_word (line 315) | def _is_start_of_word(text):
  function _insert_one_token_to_ordered_list (line 323) | def _insert_one_token_to_ordered_list(token_list, new_token):
  class PreTrainedTokenizer (line 338) | class PreTrainedTokenizer(PreTrainedTokenizerBase):
    method __init__ (line 351) | def __init__(self, **kwargs):
    method is_fast (line 364) | def is_fast(self):
    method vocab_size (line 368) | def vocab_size(self):
    method get_added_vocab (line 374) | def get_added_vocab(self):
    method __len__ (line 383) | def __len__(self):
    method _add_tokens (line 389) | def _add_tokens(self, new_tokens, special_tokens=False):
    method _create_trie (line 460) | def _create_trie(self, unique_no_split_tokens):
    method num_special_tokens_to_add (line 471) | def num_special_tokens_to_add(self, pair):
    method tokenize (line 496) | def tokenize(self, text, **kwargs):
    method _tokenize (line 570) | def _tokenize(self, text, **kwargs):
    method convert_tokens_to_ids (line 579) | def convert_tokens_to_ids(self, tokens):
    method _convert_token_to_id_with_added_voc (line 601) | def _convert_token_to_id_with_added_voc(self, token):
    method _convert_token_to_id (line 609) | def _convert_token_to_id(self, token):
    method _encode_plus (line 612) | def _encode_plus(self,
    method _batch_encode_plus (line 690) | def _batch_encode_plus(
    method _batch_prepare_for_model (line 774) | def _batch_prepare_for_model(
    method prepare_for_tokenization (line 837) | def prepare_for_tokenization(self,
    method get_special_tokens_mask (line 862) | def get_special_tokens_mask(self,
    method convert_ids_to_tokens (line 896) | def convert_ids_to_tokens(self, ids: int,
    method convert_ids_to_tokens (line 901) | def convert_ids_to_tokens(self,
    method convert_ids_to_tokens (line 906) | def convert_ids_to_tokens(
    method _convert_id_to_token (line 938) | def _convert_id_to_token(self, index: int) -> str:
    method convert_tokens_to_string (line 941) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method _decode (line 944) | def _decode(self,

FILE: ppfleetx/data/tokenizers/t5_tokenizer.py
  function lru_cache (line 44) | def lru_cache():
  function get_t5_tokenizer (line 86) | def get_t5_tokenizer(name=DEFAULT_T5_NAME):
  function t5_tokenize (line 91) | def t5_tokenize(texts, tokenizer):
  class T5Tokenizer (line 104) | class T5Tokenizer(SpecialTokensMixin):
    method __init__ (line 117) | def __init__(self,
    method from_pretrained (line 160) | def from_pretrained(cls, pretrained_model_name_or_path, *init_inputs,
    method _from_pretrained (line 282) | def _from_pretrained(cls,
    method _eventual_warn_about_too_long_sequence (line 446) | def _eventual_warn_about_too_long_sequence(self,
    method _get_padding_truncation_strategies (line 471) | def _get_padding_truncation_strategies(self,
    method _pad (line 616) | def _pad(self,
    method pad (line 704) | def pad(
    method create_token_type_ids_from_sequences (line 859) | def create_token_type_ids_from_sequences(self,
    method _add_eos_if_not_present (line 881) | def _add_eos_if_not_present(self, token_ids):
    method build_inputs_with_special_tokens (line 891) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method truncate_sequences (line 915) | def truncate_sequences(self,
    method prepare_for_model (line 1035) | def prepare_for_model(self,
    method _batch_prepare_for_model (line 1167) | def _batch_prepare_for_model(
    method _get_padding_truncation_strategies (line 1230) | def _get_padding_truncation_strategies(self,
    method batch_encode_plus (line 1375) | def batch_encode_plus(self,
    method _batch_encode_plus (line 1437) | def _batch_encode_plus(
    method tokenize (line 1519) | def tokenize(self, text, **kwargs):
    method _tokenize (line 1593) | def _tokenize(self, text):
    method prepare_for_tokenization (line 1597) | def prepare_for_tokenization(self,
    method convert_tokens_to_ids (line 1622) | def convert_tokens_to_ids(self, tokens):
    method _convert_token_to_id_with_added_voc (line 1644) | def _convert_token_to_id_with_added_voc(self, token):
    method _convert_token_to_id (line 1652) | def _convert_token_to_id(self, token):
    method num_special_tokens_to_add (line 1660) | def num_special_tokens_to_add(self, pair=False):
    method build_inputs_with_special_tokens (line 1685) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method _eventually_correct_t5_max_length (line 1710) | def _eventually_correct_t5_max_length(pretrained_model_name_or_path,
    method vocab_size (line 1734) | def vocab_size(self):
    method get_vocab (line 1737) | def get_vocab(self):
    method get_special_tokens_mask (line 1745) | def get_special_tokens_mask(self,
    method _add_eos_if_not_present (line 1775) | def _add_eos_if_not_present(self, token_ids):
    method create_token_type_ids_from_sequences (line 1785) | def create_token_type_ids_from_sequences(self,
    method build_inputs_with_special_tokens (line 1807) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method __getstate__ (line 1831) | def __getstate__(self):
    method __setstate__ (line 1836) | def __setstate__(self, d):
    method _tokenize (line 1846) | def _tokenize(self, text: str):
    method _convert_token_to_id (line 1850) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 1858) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 1866) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 1881) | def save_vocabulary(self, save_directory, filename_prefix=None):
    method _dict_from_json_file (line 1902) | def _dict_from_json_file(cls, json_file):

FILE: ppfleetx/data/tokenizers/tokenization_utils_base.py
  function is_sentencepiece_available (line 36) | def is_sentencepiece_available():
  function is_tokenizers_available (line 40) | def is_tokenizers_available():
  class AddedToken (line 49) | class AddedToken:
    method __getstate__ (line 61) | def __getstate__(self):
  function model_type_to_module_name (line 310) | def model_type_to_module_name(key):
  class _LazyConfigMapping (line 319) | class _LazyConfigMapping(OrderedDict):
    method __init__ (line 324) | def __init__(self, mapping):
    method __getitem__ (line 329) | def __getitem__(self, key):
    method keys (line 348) | def keys(self):
    method values (line 351) | def values(self):
    method items (line 355) | def items(self):
    method __iter__ (line 359) | def __iter__(self):
    method __contains__ (line 363) | def __contains__(self, item):
    method register (line 366) | def register(self, key, value):
  class Trie (line 377) | class Trie:
    method __init__ (line 383) | def __init__(self):
    method add (line 386) | def add(self, word: str):
    method split (line 415) | def split(self, text: str) -> List[str]:
    method cut_text (line 570) | def cut_text(self, text, offsets):
  class ExplicitEnum (line 597) | class ExplicitEnum(Enum):
    method _missing_ (line 603) | def _missing_(cls, value):
  class TensorType (line 609) | class TensorType(ExplicitEnum):
  class BatchEncoding (line 622) | class BatchEncoding(UserDict):
    method __init__ (line 649) | def __init__(
    method n_sequences (line 672) | def n_sequences(self) -> Optional[int]:
    method is_fast (line 681) | def is_fast(self) -> bool:
    method __getitem__ (line 690) | def __getitem__(self, item):
    method __getattr__ (line 706) | def __getattr__(self, item: str):
    method __getstate__ (line 712) | def __getstate__(self):
    method __setstate__ (line 715) | def __setstate__(self, state):
    method keys (line 722) | def keys(self):
    method values (line 725) | def values(self):
    method items (line 728) | def items(self):
    method encodings (line 736) | def encodings(self):
    method tokens (line 743) | def tokens(self, batch_index=0):
    method sequence_ids (line 759) | def sequence_ids(self, batch_index=0):
    method words (line 782) | def words(self, batch_index=0):
    method word_ids (line 803) | def word_ids(self, batch_index: int=0) -> List[Optional[int]]:
    method token_to_sequence (line 821) | def token_to_sequence(self, batch_or_token_index, token_index):
    method token_to_word (line 862) | def token_to_word(self, batch_or_token_index, token_index=None):
    method word_to_tokens (line 902) | def word_to_tokens(self,
    method token_to_chars (line 957) | def token_to_chars(self, batch_or_token_index: int, token_index=None):
    method char_to_token (line 998) | def char_to_token(self,
    method word_to_chars (line 1043) | def word_to_chars(self,
    method char_to_word (line 1092) | def char_to_word(self,
    method convert_to_tensors (line 1137) | def convert_to_tensors(self,
  class TruncationStrategy (line 1199) | class TruncationStrategy(ExplicitEnum):
  class PaddingStrategy (line 1211) | class PaddingStrategy(ExplicitEnum):
  class SpecialTokensMixin (line 1222) | class SpecialTokensMixin:
    method __init__ (line 1260) | def __init__(self, verbose=True, **kwargs):
    method convert_tokens_to_ids (line 1299) | def convert_tokens_to_ids(
    method _convert_token_to_id_with_added_voc (line 1322) | def _convert_token_to_id_with_added_voc(self, token):
    method _convert_token_to_id (line 1330) | def _convert_token_to_id(self, token):
    method sanitize_special_tokens (line 1338) | def sanitize_special_tokens(self) -> int:
    method add_special_tokens (line 1351) | def add_special_tokens(
    method add_tokens (line 1426) | def add_tokens(
    method _add_tokens (line 1474) | def _add_tokens(self,
    method _create_trie (line 1522) | def _create_trie(self, unique_no_split_tokens):
    method bos_token (line 1534) | def bos_token(self) -> str:
    method eos_token (line 1545) | def eos_token(self) -> str:
    method unk_token (line 1556) | def unk_token(self) -> str:
    method sep_token (line 1567) | def sep_token(self) -> str:
    method pad_token (line 1579) | def pad_token(self) -> str:
    method cls_token (line 1590) | def cls_token(self) -> str:
    method mask_token (line 1602) | def mask_token(self) -> str:
    method additional_special_tokens (line 1614) | def additional_special_tokens(self) -> List[str]:
    method bos_token (line 1626) | def bos_token(self, value):
    method eos_token (line 1630) | def eos_token(self, value):
    method unk_token (line 1634) | def unk_token(self, value):
    method sep_token (line 1638) | def sep_token(self, value):
    method pad_token (line 1642) | def pad_token(self, value):
    method cls_token (line 1646) | def cls_token(self, value):
    method mask_token (line 1650) | def mask_token(self, value):
    method additional_special_tokens (line 1654) | def additional_special_tokens(self, value):
    method bos_token_id (line 1658) | def bos_token_id(self) -> Optional[int]:
    method eos_token_id (line 1668) | def eos_token_id(self) -> Optional[int]:
    method unk_token_id (line 1678) | def unk_token_id(self) -> Optional[int]:
    method sep_token_id (line 1687) | def sep_token_id(self) -> Optional[int]:
    method pad_token_id (line 1697) | def pad_token_id(self) -> Optional[int]:
    method pad_token_type_id (line 1706) | def pad_token_type_id(self) -> int:
    method cls_token_id (line 1713) | def cls_token_id(self) -> Optional[int]:
    method mask_token_id (line 1725) | def mask_token_id(self) -> Optional[int]:
    method additional_special_tokens_ids (line 1735) | def additional_special_tokens_ids(self) -> List[int]:
    method bos_token_id (line 1743) | def bos_token_id(self, value):
    method eos_token_id (line 1748) | def eos_token_id(self, value):
    method unk_token_id (line 1753) | def unk_token_id(self, value):
    method sep_token_id (line 1758) | def sep_token_id(self, value):
    method pad_token_id (line 1763) | def pad_token_id(self, value):
    method cls_token_id (line 1768) | def cls_token_id(self, value):
    method mask_token_id (line 1773) | def mask_token_id(self, value):
    method additional_special_tokens_ids (line 1778) | def additional_special_tokens_ids(self, values):
    method special_tokens_map (line 1784) | def special_tokens_map(self) -> Dict[str, Union[str, List[str]]]:
    method special_tokens_map_extended (line 1802) | def special_tokens_map_extended(self) -> Dict[str, Union[
    method all_special_tokens (line 1819) | def all_special_tokens(self) -> List[str]:
    method all_special_tokens_extended (line 1829) | def all_special_tokens_extended(self) -> List[Union[str, AddedToken]]:
    method all_special_ids (line 1846) | def all_special_ids(self) -> List[int]:

FILE: ppfleetx/data/transforms/preprocess.py
  class OperatorParamError (line 34) | class OperatorParamError(ValueError):
  class DecodeImage (line 40) | class DecodeImage(object):
    method __init__ (line 43) | def __init__(self, to_rgb=True, channel_first=False):
    method __call__ (line 47) | def __call__(self, img):
  class UnifiedResize (line 63) | class UnifiedResize(object):
    method __init__ (line 64) | def __init__(self, interpolation=None, backend="cv2"):
    method __call__ (line 103) | def __call__(self, src, size):
  class ResizeImage (line 107) | class ResizeImage(object):
    method __init__ (line 110) | def __init__(self,
    method __call__ (line 130) | def __call__(self, img):
  class CenterCropImage (line 142) | class CenterCropImage(object):
    method __init__ (line 145) | def __init__(self, size):
    method __call__ (line 151) | def __call__(self, img):
  class RandCropImage (line 162) | class RandCropImage(object):
    method __init__ (line 165) | def __init__(self,
    method __call__ (line 182) | def __call__(self, img):
  class RandFlipImage (line 211) | class RandFlipImage(object):
    method __init__ (line 219) | def __init__(self, flip_code=1):
    method __call__ (line 224) | def __call__(self, img):
  class NormalizeImage (line 231) | class NormalizeImage(object):
    method __init__ (line 235) | def __init__(self,
    method __call__ (line 258) | def __call__(self, img):
  class ToCHWImage (line 280) | class ToCHWImage(object):
    method __init__ (line 284) | def __init__(self):
    method __call__ (line 287) | def __call__(self, img):
  class ColorJitter (line 294) | class ColorJitter(PPColorJitter):
    method __init__ (line 298) | def __init__(self, *args, **kwargs):
    method __call__ (line 302) | def __call__(self, img):
  class GaussianBlur (line 313) | class GaussianBlur(object):
    method __init__ (line 316) | def __init__(self, sigma=[.1, 2.], p=1.0):
    method __call__ (line 320) | def __call__(self, img):
  class Pixels (line 332) | class Pixels(object):
    method __init__ (line 333) | def __init__(self, mode="const", mean=[0., 0., 0.]):
    method __call__ (line 337) | def __call__(self, h=224, w=224, c=3):
  class RandomErasing (line 350) | class RandomErasing(object):
    method __init__ (line 355) | def __init__(self,
    method __call__ (line 374) | def __call__(self, img):
  class RandomGrayscale (line 401) | class RandomGrayscale(object):
    method __init__ (line 412) | def __init__(self, p=0.1):
    method __call__ (line 415) | def __call__(self, img):

FILE: ppfleetx/data/transforms/utils.py
  function transform (line 18) | def transform(data, ops=[]):
  function create_preprocess_operators (line 25) | def create_preprocess_operators(params):

FILE: ppfleetx/data/utils/batch_collate_fn.py
  function collate_fn (line 31) | def collate_fn(batch):
  function default_collate_fn (line 80) | def default_collate_fn(batch_transform=None):
  function gpt_collate_fn (line 95) | def gpt_collate_fn(batch):
  class ErnieCollateData (line 99) | class ErnieCollateData():
    method __init__ (line 100) | def __init__(self, micro_batch_size=1):
    method generate_data (line 103) | def generate_data(self, data, stack_fn=Stack()):
    method __call__ (line 133) | def __call__(self, data):
  class DataCollatorWithPadding (line 150) | class DataCollatorWithPadding:
    method __init__ (line 158) | def __init__(self,
    method __call__ (line 173) | def __call__(self, features):
  function imagen_collate_fn (line 190) | def imagen_collate_fn(samples):

FILE: ppfleetx/distributed/apis/amp.py
  class MixPrecisionLayer (line 30) | class MixPrecisionLayer(nn.Layer):
    method __init__ (line 31) | def __init__(self, layers, dtype="float16"):
    method _update_main_grad_hook (line 44) | def _update_main_grad_hook(self, param):
    method forward (line 65) | def forward(self, *inputs, **kwargs):
    method state_dict (line 70) | def state_dict(
    method set_state_dict (line 82) | def set_state_dict(self, state_dict, use_structured_name=True):
  class MixPrecisionOptimizer (line 88) | class MixPrecisionOptimizer:
    method __init__ (line 89) | def __init__(self, optimizer):
    method _obtain_optimizer_parameters_list (line 93) | def _obtain_optimizer_parameters_list(self):
    method step (line 109) | def step(self):
    method clear_grad (line 165) | def clear_grad(self, set_to_zero=True):
    method __getattr__ (line 189) | def __getattr__(self, item):
  function unscale_method (line 193) | def unscale_method(self, optimizer):
  class MixPrecisionScaler (line 228) | class MixPrecisionScaler:
    method __init__ (line 229) | def __init__(self, scaler):
    method __getattr__ (line 233) | def __getattr__(self, item):

FILE: ppfleetx/distributed/apis/comm_groups.py
  function create_hcg (line 27) | def create_hcg(strategy, hcg_name):
  class MoEGroup (line 38) | class MoEGroup(StrategyGroupBase):
    method __init__ (line 48) | def __init__(self, list_of_ranks):
  class Hybrid4DCommGroup (line 55) | class Hybrid4DCommGroup(OrthogonalStrategy):
    method __init__ (line 56) | def __init__(self, list_of_strategy=None, fused_strategy_dict={}):
    method get_data_parallel_rank (line 69) | def get_data_parallel_rank(self):
    method get_data_parallel_world_size (line 72) | def get_data_parallel_world_size(self):
    method get_data_parallel_group (line 75) | def get_data_parallel_group(self):
    method get_data_parallel_group_src_rank (line 78) | def get_data_parallel_group_src_rank(self):
    method get_model_parallel_rank (line 82) | def get_model_parallel_rank(self):
    method get_model_parallel_world_size (line 85) | def get_model_parallel_world_size(self):
    method get_model_parallel_group (line 88) | def get_model_parallel_group(self):
    method get_model_parallel_group_src_rank (line 91) | def get_model_parallel_group_src_rank(self):
    method get_stage_id (line 95) | def get_stage_id(self):
    method get_pipe_parallel_world_size (line 98) | def get_pipe_parallel_world_size(self):
    method get_pipe_parallel_group (line 101) | def get_pipe_parallel_group(self):
    method get_p2p_groups (line 104) | def get_p2p_groups(self):
    method get_sharding_parallel_rank (line 108) | def get_sharding_parallel_rank(self):
    method get_sharding_parallel_world_size (line 111) | def get_sharding_parallel_world_size(self):
    method get_sharding_parallel_group (line 114) | def get_sharding_parallel_group(self):
    method get_sharding_parallel_group_src_rank (line 117) | def get_sharding_parallel_group_src_rank(self):
    method get_check_parallel_group (line 121) | def get_check_parallel_group(self):
  class HybridCommGroupForMoE (line 125) | class HybridCommGroupForMoE(Hybrid4DCommGroup):
    method __init__ (line 126) | def __init__(self, strategy):
    method get_expert_parallel_world_size (line 149) | def get_expert_parallel_world_size(self):
    method get_expert_parallel_group (line 152) | def get_expert_parallel_group(self):

FILE: ppfleetx/distributed/apis/env.py
  function set_seed (line 34) | def set_seed(seed):
  function set_hcg (line 101) | def set_hcg(hcg):
  function get_hcg (line 106) | def get_hcg():
  function get_seed (line 111) | def get_seed():
  function get_dp_seed (line 116) | def get_dp_seed():
  function init_dist_env (line 121) | def init_dist_env(config):
  function get_local_rank (line 154) | def get_local_rank():
  function get_data_world_size (line 158) | def get_data_world_size():
  function get_data_world_rank (line 169) | def get_data_world_rank():
  function work_at_local_rank0 (line 181) | def work_at_local_rank0(func):

FILE: ppfleetx/distributed/apis/io.py
  function save (line 28) | def save(output_dir, model, optimizer=None, step=0, epoch=0, sharding_st...
  function load (line 84) | def load(ckpt_dir, model, optimizer=None, mode='train', load_recovery=No...

FILE: ppfleetx/distributed/apis/strategy.py
  function wrap_with_fleet (line 28) | def wrap_with_fleet(dist_config, model, optimizer=None, scaler=None):
  function wrap_sharding_2_3 (line 37) | def wrap_sharding_2_3(dist_config, model, optimizer=None, scaler=None):
  function wrap_3D_parallel (line 73) | def wrap_3D_parallel(dist_config, model, optimizer=None, scaler=None):

FILE: ppfleetx/distributed/protein_folding/bp.py
  function get_world_size (line 25) | def get_world_size():
  function get_rank_in_group (line 32) | def get_rank_in_group():
  function broadcast (line 39) | def broadcast(tensor, src):
  class BroadcastGrad (line 51) | class BroadcastGrad(PyLayer):
    method forward (line 54) | def forward(ctx, input, src):
    method backward (line 60) | def backward(ctx, grad_output):
  function broadcast_grad_for_backward (line 65) | def broadcast_grad_for_backward(input, src):
  function all_reduce (line 77) | def all_reduce(tensor):
  class SyncEvoformerResults (line 90) | class SyncEvoformerResults(PyLayer):
    method forward (line 93) | def forward(ctx, outer, msa, pair):
    method backward (line 102) | def backward(ctx, *grad_output):
  function sync_evoformer_results (line 114) | def sync_evoformer_results(outer, msa, pair):
  function grad_sync (line 127) | def grad_sync(param_groups):

FILE: ppfleetx/distributed/protein_folding/dap.py
  function set_dap_sync_op (line 37) | def set_dap_sync_op(sync_op):
  function get_dap_sync_op (line 44) | def get_dap_sync_op():
  function get_world_size (line 49) | def get_world_size():
  function get_rank_in_group (line 56) | def get_rank_in_group():
  function ensure_divisibility (line 63) | def ensure_divisibility(numerator, denominator):
  function divide (line 69) | def divide(numerator, denominator):
  function _all_gather (line 75) | def _all_gather(tensor, axis=-1, sync_op=True):
  function _gather (line 87) | def _gather(tensor, axis=-1):
  function _split (line 97) | def _split(tensor, axis=-1):
  class Scatter (line 106) | class Scatter(PyLayer):
    method forward (line 110) | def forward(ctx, input, axis: -1):
    method backward (line 115) | def backward(ctx, grad_output):
  function scatter (line 119) | def scatter(input, axis=-1):
  class Gather (line 131) | class Gather(PyLayer):
    method forward (line 135) | def forward(ctx, input, axis=-1):
    method backward (line 140) | def backward(ctx, grad_output):
  function gather (line 144) | def gather(input, axis=-1):
  function _reduce_scatter (line 157) | def _reduce_scatter(tensor, sync_op=True):
  class AllGather (line 168) | class AllGather(PyLayer):
    method forward (line 172) | def forward(ctx, input, axis=-1, sync_op=True):
    method backward (line 179) | def backward(ctx, grad_output):
  class AllGather_Opp (line 186) | class AllGather_Opp(PyLayer):
    method forward (line 190) | def forward(ctx, input, axis=-1, sync_op=True):
    method backward (line 196) | def backward(ctx, grad_output):
  function all_gather (line 201) | def all_gather(input, axis=-1):
  function all_gather_opp (line 220) | def all_gather_opp(output, axis=-1):
  function _all_to_all (line 244) | def _all_to_all(tensor, in_axis=-1, out_axis=-1, sync_op=True):
  class All_to_All (line 256) | class All_to_All(PyLayer):
    method forward (line 260) | def forward(ctx, input, in_axis=-1, out_axis=-1, sync_op=True):
    method backward (line 268) | def backward(ctx, grad_output):
  class All_to_All_Opp (line 275) | class All_to_All_Opp(PyLayer):
    method forward (line 279) | def forward(ctx, output, in_axis=-1, out_axis=-1, sync_op=True):
    method backward (line 286) | def backward(ctx, grad_output):
  function all_to_all (line 294) | def all_to_all(input, in_axis, out_axis):
  function all_to_all_opp (line 319) | def all_to_all_opp(output, in_axis, out_axis):
  class All2All (line 345) | class All2All(PyLayer):
    method forward (line 347) | def forward(ctx, input, in_axis=-1, out_axis=-1):
    method backward (line 353) | def backward(ctx, grad_output):
  function row_to_col (line 358) | def row_to_col(input):
  function col_to_row (line 379) | def col_to_row(input):
  function grad_sync (line 401) | def grad_sync(param_groups):

FILE: ppfleetx/distributed/protein_folding/dp.py
  function get_world_size (line 28) | def get_world_size():
  function get_rank_in_group (line 35) | def get_rank_in_group():
  function grad_sync (line 42) | def grad_sync(param_groups, grad_avg=True):
  function param_sync (line 72) | def param_sync(model, src_rank=0, comm_group=None):
  function all_reduce (line 98) | def all_reduce(tensor, op=paddle.distributed.ReduceOp.SUM):

FILE: ppfleetx/distributed/protein_folding/scg.py
  function ensure_divisibility (line 22) | def ensure_divisibility(numerator, denominator):
  class SingletonCommunicationGroup (line 28) | class SingletonCommunicationGroup(object):
    method __init__ (line 31) | def __init__(self):
    method init_process_group (line 34) | def init_process_group(self,

FILE: ppfleetx/models/__init__.py
  function build_module (line 30) | def build_module(config):

FILE: ppfleetx/models/language_model/auto_utils.py
  function process_mesh_config (line 24) | def process_mesh_config(config):
  function process_model_configs (line 111) | def process_model_configs(config):
  function process_data_configs (line 126) | def process_data_configs(config):
  function process_configs (line 151) | def process_configs(config):

FILE: ppfleetx/models/language_model/debertav2/modeling.py
  class BaseModelOutput (line 31) | class BaseModelOutput(ModelOutput):
  class XSoftmax (line 57) | class XSoftmax(paddle.autograd.PyLayer):
    method forward (line 86) | def forward(self, input, mask, dim):
  class DropoutContext (line 100) | class DropoutContext(object):
    method __init__ (line 101) | def __init__(self):
  function get_mask (line 109) | def get_mask(input, local_context):
  class XDropout (line 131) | class XDropout(paddle.autograd.PyLayer):
    method forward (line 135) | def forward(ctx, input, local_ctx):
  class StableDropout (line 146) | class StableDropout(nn.Layer):
    method __init__ (line 154) | def __init__(self, drop_prob):
    method forward (line 160) | def forward(self, x):
    method clear_context (line 171) | def clear_context(self):
    method init_context (line 175) | def init_context(self, reuse_mask=True, scale=1):
    method get_context (line 183) | def get_context(self):
  class DebertaV2SelfOutput (line 196) | class DebertaV2SelfOutput(nn.Layer):
    method __init__ (line 197) | def __init__(self,
    method forward (line 206) | def forward(self, hidden_states, input_tensor):
  class DebertaV2Attention (line 214) | class DebertaV2Attention(nn.Layer):
    method __init__ (line 215) | def __init__(
    method forward (line 247) | def forward(
  class DebertaV2Intermediate (line 275) | class DebertaV2Intermediate(nn.Layer):
    method __init__ (line 276) | def __init__(
    method forward (line 288) | def forward(self, hidden_states: paddle.Tensor) -> paddle.Tensor:
  class DebertaV2Output (line 295) | class DebertaV2Output(nn.Layer):
    method __init__ (line 296) | def __init__(
    method forward (line 307) | def forward(self, hidden_states, input_tensor):
  class DebertaV2Layer (line 315) | class DebertaV2Layer(nn.Layer):
    method __init__ (line 316) | def __init__(
    method forward (line 356) | def forward(
  class ConvLayer (line 381) | class ConvLayer(nn.Layer):
    method __init__ (line 382) | def __init__(
    method forward (line 403) | def forward(self, hidden_states, residual_states, input_mask):
  class DebertaV2Encoder (line 428) | class DebertaV2Encoder(nn.Layer):
    method __init__ (line 431) | def __init__(
    method get_rel_embedding (line 502) | def get_rel_embedding(self):
    method get_attention_mask (line 508) | def get_attention_mask(self, attention_mask):
    method get_rel_pos (line 519) | def get_rel_pos(self, hidden_states, query_states=None, relative_pos=N...
    method forward (line 530) | def forward(
  function make_log_bucket_position (line 616) | def make_log_bucket_position(relative_pos, bucket_size, max_position):
  function build_relative_position (line 632) | def build_relative_position(query_size,
  function c2p_dynamic_expand (line 666) | def c2p_dynamic_expand(c2p_pos, query_layer, relative_pos):
  function p2c_dynamic_expand (line 674) | def p2c_dynamic_expand(c2p_pos, query_layer, key_layer):
  function pos_dynamic_expand (line 682) | def pos_dynamic_expand(pos_index, p2c_att, key_layer):
  class DisentangledSelfAttention (line 688) | class DisentangledSelfAttention(nn.Layer):
    method __init__ (line 696) | def __init__(
    method transpose_for_scores (line 747) | def transpose_for_scores(self, x, attention_heads):
    method forward (line 752) | def forward(
    method disentangled_attention_bias (line 843) | def disentangled_attention_bias(self, query_layer, key_layer, relative...
  class DebertaV2Embeddings (line 946) | class DebertaV2Embeddings(nn.Layer):
    method __init__ (line 949) | def __init__(
    method forward (line 988) | def forward(self,
  class DebertaV2PreTrainedModel (line 1042) | class DebertaV2PreTrainedModel(nn.Layer):
    method _init_weights (line 1053) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 1066) | def _set_gradient_checkpointing(self, module, value=False):
  class DebertaV2Model (line 1087) | class DebertaV2Model(DebertaV2PreTrainedModel):
    method __init__ (line 1088) | def __init__(self,
    method get_input_embeddings (line 1155) | def get_input_embeddings(self):
    method set_input_embeddings (line 1158) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 1161) | def _prune_heads(self, heads_to_prune):
    method forward (line 1169) | def forward(
  function get_debertav2_model (line 1248) | def get_debertav2_model(name, pretrained=True):
  function dict_from_json_file (line 1293) | def dict_from_json_file(name):
  function debertav2_encode_text (line 1300) | def debertav2_encode_text(debertav2, texts, tokenizer, return_attn_mask=...
  function get_debertav2_encoded_dim (line 1316) | def get_debertav2_encoded_dim(name):

FILE: ppfleetx/models/language_model/ernie/auto/auto_model.py
  class Embedding (line 37) | class Embedding(nn.Layer):
    method __init__ (line 38) | def __init__(
    method forward (line 83) | def forward(self, x):
    method extra_repr (line 91) | def extra_repr(self):
  class ErnieEmbeddings (line 101) | class ErnieEmbeddings(nn.Layer):
    method __init__ (line 106) | def __init__(self,
    method forward (line 140) | def forward(self,
  class ErniePooler (line 188) | class ErniePooler(nn.Layer):
    method __init__ (line 189) | def __init__(self, hidden_size, weight_attr=None):
    method forward (line 195) | def forward(self, hidden_states):
  class ErnieModelAuto (line 204) | class ErnieModelAuto(nn.Layer):
    method __init__ (line 258) | def __init__(self,
    method get_input_embeddings (line 314) | def get_input_embeddings(self):
    method set_input_embeddings (line 317) | def set_input_embeddings(self, value):
    method forward (line 320) | def forward(self,
    method init_weights (line 464) | def init_weights(self, layer):
  class ErnieLMPredictionHead (line 481) | class ErnieLMPredictionHead(nn.Layer):
    method __init__ (line 486) | def __init__(
    method forward (line 510) | def forward(self, hidden_states, masked_positions=None):
  class ErniePretrainingHeads (line 529) | class ErniePretrainingHeads(nn.Layer):
    method __init__ (line 530) | def __init__(
    method forward (line 544) | def forward(self, sequence_output, pooled_output, masked_positions=None):
  class ErnieForPretrainingAuto (line 550) | class ErnieForPretrainingAuto(nn.Layer):
    method __init__ (line 557) | def __init__(self, ernie):
    method forward (line 572) | def forward(self,
    method init_weights (line 660) | def init_weights(self, layer):
  class ErniePretrainingCriterionAuto (line 677) | class ErniePretrainingCriterionAuto(paddle.nn.Layer):
    method __init__ (line 684) | def __init__(self, with_nsp_loss=True):
    method forward (line 688) | def forward(self,
  class ErnieForSequenceClassificationAuto (line 733) | class ErnieForSequenceClassificationAuto(nn.Layer):
    method __init__ (line 749) | def __init__(self, ernie, num_classes=2, dropout=None):
    method forward (line 758) | def forward(self,
    method init_weights (line 839) | def init_weights(self, layer):

FILE: ppfleetx/models/language_model/ernie/auto/auto_module.py
  function process_data_configs (line 34) | def process_data_configs(config):
  function process_model_configs (line 63) | def process_model_configs(config):
  class ErnieModuleAuto (line 71) | class ErnieModuleAuto(BasicModule):
    method __init__ (line 72) | def __init__(self, configs):
    method process_configs (line 80) | def process_configs(self, configs):
    method get_model (line 85) | def get_model(self):
    method input_spec (line 95) | def input_spec(self):
  class ErnieSeqClsModuleAuto (line 108) | class ErnieSeqClsModuleAuto(BasicModule):
    method __init__ (line 109) | def __init__(self, configs):
    method process_configs (line 113) | def process_configs(self, configs):
    method get_model (line 128) | def get_model(self):
    method input_spec (line 139) | def input_spec(self):

FILE: ppfleetx/models/language_model/ernie/auto/auto_transformer.py
  function _convert_param_attr_to_list (line 36) | def _convert_param_attr_to_list(param_attr, n):
  function _convert_attention_mask (line 82) | def _convert_attention_mask(attn_mask, dtype):
  class MultiHeadAttention (line 111) | class MultiHeadAttention(Layer):
    method __init__ (line 156) | def __init__(self,
    method _prepare_qkv (line 195) | def _prepare_qkv(self, query, key, value, cache=None):
    method compute_kv (line 253) | def compute_kv(self, key, value):
    method gen_cache (line 290) | def gen_cache(self, key, value=None, type=Cache):
    method forward (line 358) | def forward(self, query, key=None, value=None, attn_mask=None, cache=N...
  class TransformerEncoderLayer (line 456) | class TransformerEncoderLayer(Layer):
    method __init__ (line 513) | def __init__(self,
    method forward (line 568) | def forward(self, src, src_mask=None, cache=None, output_attentions=Fa...
    method gen_cache (line 635) | def gen_cache(self, src):
  class TransformerEncoder (line 657) | class TransformerEncoder(Layer):
    method __init__ (line 685) | def __init__(self,
    method forward (line 709) | def forward(self,
    method gen_cache (line 813) | def gen_cache(self, src):

FILE: ppfleetx/models/language_model/ernie/dygraph/hybrid_model.py
  function parallel_matmul (line 40) | def parallel_matmul(lm_output, logit_weights, parallel_output):
  class ErnieEmbeddings (line 64) | class ErnieEmbeddings(nn.Layer):
    method __init__ (line 69) | def __init__(self,
    method forward (line 105) | def forward(self,
  class ErniePooler (line 151) | class ErniePooler(nn.Layer):
    method __init__ (line 152) | def __init__(self, hidden_size, weight_attr=None):
    method forward (line 158) | def forward(self, hidden_states):
  class ErnieModelHybrid (line 167) | class ErnieModelHybrid(nn.Layer):
    method __init__ (line 221) | def __init__(self,
    method get_input_embeddings (line 273) | def get_input_embeddings(self):
    method set_input_embeddings (line 276) | def set_input_embeddings(self, value):
    method forward (line 279) | def forward(self,
    method init_weights (line 423) | def init_weights(self, layer):
  class ErnieLMPredictionHead (line 440) | class ErnieLMPredictionHead(nn.Layer):
    method __init__ (line 445) | def __init__(
    method forward (line 471) | def forward(self, hidden_states, masked_positions=None):
  class ErniePretrainingHeads (line 490) | class ErniePretrainingHeads(nn.Layer):
    method __init__ (line 491) | def __init__(
    method forward (line 505) | def forward(self, sequence_output, pooled_output, masked_positions=None):
  class ErnieForPretrainingHybrid (line 511) | class ErnieForPretrainingHybrid(nn.Layer):
    method __init__ (line 518) | def __init__(self, ernie):
    method forward (line 533) | def forward(self,
    method init_weights (line 625) | def init_weights(self, layer):
  class ErniePretrainingCriterionHybrid (line 642) | class ErniePretrainingCriterionHybrid(paddle.nn.Layer):
    method __init__ (line 649) | def __init__(self, with_nsp_loss=True):
    method forward (line 653) | def forward(self,
  class EmbeddingsPipe (line 713) | class EmbeddingsPipe(ErnieEmbeddings):
    method embedding_weight (line 715) | def embedding_weight(self):
    method forward (line 718) | def forward(self, tensors):
  class TransformerEncoderLayerPipe (line 754) | class TransformerEncoderLayerPipe(TransformerEncoderLayer):
    method forward (line 755) | def forward(self, tensors):
  class LayerNormPipe (line 761) | class LayerNormPipe(nn.LayerNorm):
    method forward (line 762) | def forward(self, tensors):
  class ErniePoolerPipe (line 768) | class ErniePoolerPipe(ErniePooler):
    method forward (line 769) | def forward(self, args):
  class ErniePretrainingCriterionPipe (line 775) | class ErniePretrainingCriterionPipe(ErniePretrainingCriterionHybrid):
    method __init__ (line 776) | def __init__(self, *heads_args, **heads_kargs):
    method forward (line 780) | def forward(self, outputs, data):
  class ErnieForPretrainingPipe (line 796) | class ErnieForPretrainingPipe(PipelineLayer):
    method __init__ (line 797) | def __init__(self,
  class ErnieForSequenceClassificationHybrid (line 874) | class ErnieForSequenceClassificationHybrid(nn.Layer):
    method __init__ (line 890) | def __init__(self, ernie, num_classes=2, dropout=None):
    method forward (line 899) | def forward(self,
    method init_weights (line 980) | def init_weights(self, layer):

FILE: ppfleetx/models/language_model/ernie/dygraph/single_model.py
  class ErnieEmbeddings (line 34) | class ErnieEmbeddings(nn.Layer):
    method __init__ (line 39) | def __init__(self,
    method forward (line 71) | def forward(self,
  class ErniePooler (line 115) | class ErniePooler(nn.Layer):
    method __init__ (line 116) | def __init__(self, hidden_size, weight_attr=None):
    method forward (line 122) | def forward(self, hidden_states):
  class ErnieModel (line 131) | class ErnieModel(nn.Layer):
    method __init__ (line 185) | def __init__(self,
    method get_input_embeddings (line 235) | def get_input_embeddings(self):
    method set_input_embeddings (line 238) | def set_input_embeddings(self, value):
    method forward (line 241) | def forward(self,
    method init_weights (line 384) | def init_weights(self, layer):
  class ErnieLMPredictionHead (line 401) | class ErnieLMPredictionHead(nn.Layer):
    method __init__ (line 406) | def __init__(
    method forward (line 427) | def forward(self, hidden_states, masked_positions=None):
  class ErniePretrainingHeads (line 443) | class ErniePretrainingHeads(nn.Layer):
    method __init__ (line 444) | def __init__(
    method forward (line 458) | def forward(self, sequence_output, pooled_output, masked_positions=None):
  class ErnieForPretraining (line 464) | class ErnieForPretraining(nn.Layer):
    method __init__ (line 471) | def __init__(self, ernie):
    method forward (line 486) | def forward(self,
    method init_weights (line 574) | def init_weights(self, layer):
  class ErniePretrainingCriterion (line 591) | class ErniePretrainingCriterion(paddle.nn.Layer):
    method __init__ (line 598) | def __init__(self, with_nsp_loss=True):
    method forward (line 603) | def forward(self,
  class ErnieForSequenceClassification (line 647) | class ErnieForSequenceClassification(nn.Layer):
    method __init__ (line 663) | def __init__(self, ernie, num_classes=2, dropout=None):
    method forward (line 672) | def forward(self,
    method init_weights (line 753) | def init_weights(self, layer):

FILE: ppfleetx/models/language_model/ernie/ernie_module.py
  function process_data_configs (line 44) | def process_data_configs(config):
  function process_model_configs (line 75) | def process_model_configs(config):
  function process_finetune_configs (line 81) | def process_finetune_configs(task, config):
  class ErnieModule (line 120) | class ErnieModule(BasicModule):
    method __init__ (line 121) | def __init__(self, configs):
    method get_model_size (line 132) | def get_model_size(self, l, h, v, s):
    method process_configs (line 137) | def process_configs(self, configs):
    method get_model (line 142) | def get_model(self):
    method forward (line 168) | def forward(self, tokens):
    method pretreating_batch (line 171) | def pretreating_batch(self, batch):
    method training_step (line 189) | def training_step(self, batch):
    method training_step_end (line 217) | def training_step_end(self, log_dict):
    method input_spec (line 228) | def input_spec(self):
  class ErnieSeqClsModule (line 237) | class ErnieSeqClsModule(BasicModule):
    method __init__ (line 238) | def __init__(self, configs):
    method process_configs (line 250) | def process_configs(self, configs):
    method get_model (line 268) | def get_model(self):
    method prepare_input (line 289) | def prepare_input(self, data):
    method pretreating_batch (line 306) | def pretreating_batch(self, batch):
    method forward (line 316) | def forward(self, inputs):
    method compute_loss (line 319) | def compute_loss(self, inputs, return_outputs=False):
    method training_step (line 344) | def training_step(self, batch):
    method training_step_end (line 347) | def training_step_end(self, log_dict):
    method input_spec (line 358) | def input_spec(self):
    method validation_step (line 367) | def validation_step(self, inputs):
    method validation_step_end (line 377) | def validation_step_end(self, log_dict):

FILE: ppfleetx/models/language_model/ernie/layers/distributed_transformer.py
  function _convert_param_attr_to_list (line 40) | def _convert_param_attr_to_list(param_attr, n):
  function _convert_attention_mask (line 86) | def _convert_attention_mask(attn_mask, dtype):
  class MultiHeadAttention (line 115) | class MultiHeadAttention(Layer):
    method __init__ (line 160) | def __init__(self,
    method _prepare_qkv (line 227) | def _prepare_qkv(self, query, key, value, cache=None):
    method compute_kv (line 282) | def compute_kv(self, key, value):
    method gen_cache (line 314) | def gen_cache(self, key, value=None, type=Cache):
    method forward (line 382) | def forward(self, query, key=None, value=None, attn_mask=None, cache=N...
  class TransformerEncoderLayer (line 478) | class TransformerEncoderLayer(Layer):
    method __init__ (line 535) | def __init__(self,
    method forward (line 600) | def forward(self, src, src_mask=None, cache=None, output_attentions=Fa...
    method gen_cache (line 670) | def gen_cache(self, src):
  class TransformerEncoder (line 692) | class TransformerEncoder(Layer):
    method __init__ (line 720) | def __init__(self,
    method forward (line 733) | def forward(self,
    method gen_cache (line 845) | def gen_cache(self, src):

FILE: ppfleetx/models/language_model/ernie/layers/model_outputs.py
  function is_tensor (line 28) | def is_tensor(x):
  class ModelOutput (line 35) | class ModelOutput(OrderedDict):
    method __post_init__ (line 49) | def __post_init__(self):
    method __delitem__ (line 102) | def __delitem__(self, *args, **kwargs):
    method setdefault (line 107) | def setdefault(self, *args, **kwargs):
    method pop (line 112) | def pop(self, *args, **kwargs):
    method update (line 116) | def update(self, *args, **kwargs):
    method __getitem__ (line 121) | def __getitem__(self, k):
    method __setattr__ (line 128) | def __setattr__(self, name, value):
    method __setitem__ (line 134) | def __setitem__(self, key, value):
    method to_tuple (line 140) | def to_tuple(self) -> Tuple[Any]:
  class ErnieForPreTrainingOutput (line 148) | class ErnieForPreTrainingOutput(ModelOutput):
  class BaseModelOutputWithPastAndCrossAttentions (line 179) | class BaseModelOutputWithPastAndCrossAttentions(ModelOutput):
  class BaseModelOutputWithPoolingAndCrossAttentions (line 225) | class BaseModelOutputWithPoolingAndCrossAttentions(ModelOutput):
  class SequenceClassifierOutput (line 274) | class SequenceClassifierOutput(ModelOutput):
  class TokenClassifierOutput (line 303) | class TokenClassifierOutput(ModelOutput):
  class QuestionAnsweringModelOutput (line 332) | class QuestionAnsweringModelOutput(ModelOutput):
  class MultipleChoiceModelOutput (line 364) | class MultipleChoiceModelOutput(ModelOutput):
  class MaskedLMOutput (line 395) | class MaskedLMOutput(ModelOutput):
  class CausalLMOutputWithCrossAttentions (line 424) | class CausalLMOutputWithCrossAttentions(ModelOutput):

FILE: ppfleetx/models/language_model/ernie/layers/transformer.py
  function _convert_param_attr_to_list (line 36) | def _convert_param_attr_to_list(param_attr, n):
  function _convert_attention_mask (line 82) | def _convert_attention_mask(attn_mask, dtype):
  class MultiHeadAttention (line 111) | class MultiHeadAttention(Layer):
    method __init__ (line 156) | def __init__(self,
    method _prepare_qkv (line 191) | def _prepare_qkv(self, query, key, value, cache=None):
    method compute_kv (line 246) | def compute_kv(self, key, value):
    method gen_cache (line 278) | def gen_cache(self, key, value=None, type=Cache):
    method forward (line 346) | def forward(self, query, key=None, value=None, attn_mask=None, cache=N...
  class TransformerEncoderLayer (line 438) | class TransformerEncoderLayer(Layer):
    method __init__ (line 495) | def __init__(self,
    method forward (line 544) | def forward(self, src, src_mask=None, cache=None, output_attentions=Fa...
    method gen_cache (line 606) | def gen_cache(self, src):
  class TransformerEncoder (line 628) | class TransformerEncoder(Layer):
    method __init__ (line 656) | def __init__(self,
    method forward (line 669) | def forward(self,
    method gen_cache (line 770) | def gen_cache(self, src):

FILE: ppfleetx/models/language_model/ernie/layers/utils.py
  function fn_args_to_dict (line 23) | def fn_args_to_dict(func, *args, **kwargs):
  function adapt_stale_fwd_patch (line 47) | def adapt_stale_fwd_patch(self, name, value):
  class InitTrackerMeta (line 110) | class InitTrackerMeta(type(Layer)):
    method __init__ (line 123) | def __init__(cls, name, bases, attrs):
    method init_and_track_conf (line 137) | def init_and_track_conf(init_func, pre_init_func=None,
    method __setattr__ (line 172) | def __setattr__(self, name, value):

FILE: ppfleetx/models/language_model/gpt/auto/auto_model.py
  class MultiHeadAttention (line 35) | class MultiHeadAttention(nn.Layer):
    method __init__ (line 46) | def __init__(self,
    method _fuse_prepare_qkv (line 91) | def _fuse_prepare_qkv(self, query, use_cache=False, cache=None):
    method _prepare_qkv (line 114) | def _prepare_qkv(self, query, key, value, use_cache=False, cache=None):
    method compute_kv (line 143) | def compute_kv(self, key, value):
    method gen_cache (line 168) | def gen_cache(self, key, value=None, type=Cache):
    method core_attn (line 193) | def core_attn(self, q, k, v, attn_mask=None):
    method forward (line 220) | def forward(self,
  class TransformerDecoder (line 270) | class TransformerDecoder(nn.Layer):
    method __init__ (line 275) | def __init__(self,
    method forward (line 294) | def forward(self,
    method gen_cache (line 341) | def gen_cache(self, memory, do_zip=False):
  class TransformerDecoderLayer (line 355) | class TransformerDecoderLayer(nn.Layer):
    method __init__ (line 362) | def __init__(self,
    method forward (line 418) | def forward(self, tgt, memory, tgt_mask=None, use_cache=False, cache=N...
    method gen_cache (line 462) | def gen_cache(self, memory):
  class GPTEmbeddings (line 468) | class GPTEmbeddings(nn.Layer):
    method __init__ (line 473) | def __init__(self,
    method forward (line 498) | def forward(self, input_ids, position_ids=None):
  class GPTModelAuto (line 514) | class GPTModelAuto(nn.Layer):
    method __init__ (line 515) | def __init__(self,
    method forward (line 580) | def forward(self,
  class GPTForPretrainingAuto (line 633) | class GPTForPretrainingAuto(nn.Layer):
    method __init__ (line 643) | def __init__(self, gpt):
    method forward (line 647) | def forward(self,
  class GPTPretrainingCriterionAuto (line 682) | class GPTPretrainingCriterionAuto(nn.Layer):
    method __init__ (line 687) | def __init__(self, mesh):
    method forward (line 692) | def forward(self, prediction_scores, masked_lm_labels, loss_mask):
  class GPTForGenerationAuto (line 726) | class GPTForGenerationAuto(nn.Layer):
    method __init__ (line 736) | def __init__(self, gpt, configs):
    method prepare_input_ids_for_generation (line 768) | def prepare_input_ids_for_generation(self,
    method prepare_attention_mask_for_generation (line 779) | def prepare_attention_mask_for_generation(self, input_ids, pad_token_id,
    method update_scores_for_generation (line 793) | def update_scores_for_generation(self, scores, next_scores, length,
    method get_logits_processor (line 801) | def get_logits_processor(self,
    method expand_inputs_for_generation (line 836) | def expand_inputs_for_generation(self,
    method prepare_inputs_for_generation (line 879) | def prepare_inputs_for_generation(self,
    method update_model_kwargs_for_generation (line 900) | def update_model_kwargs_for_generation(self,
    method sample (line 968) | def sample(self,
    method forward (line 1167) | def forward(self, input_ids=None, **model_kwargs):

FILE: ppfleetx/models/language_model/gpt/auto/auto_module.py
  class LanguageModuleAuto (line 39) | class LanguageModuleAuto(BasicModule):
    method __init__ (line 40) | def __init__(self, configs):
    method process_configs (line 46) | def process_configs(self, configs):
    method get_model_size (line 50) | def get_model_size(self, l, h, v, s):
  class GPTModuleAuto (line 56) | class GPTModuleAuto(LanguageModuleAuto):
    method __init__ (line 57) | def __init__(self, configs):
    method get_model (line 60) | def get_model(self):
    method get_loss_fn (line 78) | def get_loss_fn(self):
  class GPTGenerationModuleAuto (line 83) | class GPTGenerationModuleAuto(BasicModule):
    method __init__ (line 84) | def __init__(self, configs):
    method process_configs (line 91) | def process_configs(self, configs):
    method get_model (line 95) | def get_model(self):
    method adjust_length_to_model (line 115) | def adjust_length_to_model(self, length, max_sequence_length):
    method left_padding (line 120) | def left_padding(self, inputs, pad_id, padding="longest"):
    method input_spec (line 144) | def input_spec(self):

FILE: ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py
  function get_attr (line 59) | def get_attr(layer, name):
  function parallel_matmul (line 66) | def parallel_matmul(lm_output, logit_weights, parallel_output):
  class MultiHeadAttention (line 90) | class MultiHeadAttention(nn.Layer):
    method __init__ (line 101) | def __init__(self,
    method _fuse_prepare_qkv (line 198) | def _fuse_prepare_qkv(self, query, use_cache=False, cache=None):
    method _prepare_qkv (line 216) | def _prepare_qkv(self, query, key, value, use_cache=False, cache=None):
    method compute_kv (line 241) | def compute_kv(self, key, value):
    method gen_cache (line 259) | def gen_cache(self, key, value=None, type=Cache):
    method _flash_attention (line 284) | def _flash_attention(self, q, k, v, attn_mask=None):
    method core_attn (line 303) | def core_attn(self, q, k, v, attn_mask=None):
    method forward (line 348) | def forward(self,
  class TransformerDecoder (line 395) | class TransformerDecoder(nn.Layer):
    method __init__ (line 400) | def __init__(self,
    method forward (line 431) | def forward(self,
    method gen_cache (line 475) | def gen_cache(self, memory, do_zip=False):
  class TransformerDecoderLayer (line 489) | class TransformerDecoderLayer(nn.Layer):
    method __init__ (line 496) | def __init__(self,
    method forward (line 625) | def forward(self,
    method gen_cache (line 676) | def gen_cache(self, memory):
  class GPTEmbeddings (line 682) | class GPTEmbeddings(nn.Layer):
    method __init__ (line 687) | def __init__(self,
    method forward (line 718) | def forward(self, input_ids, position_ids=None):
  class GPTModelHybrid (line 739) | class GPTModelHybrid(nn.Layer):
    method __init__ (line 740) | def __init__(self,
    method forward (line 840) | def forward(self,
  class GPTForPretrainingHybrid (line 897) | class GPTForPretrainingHybrid(nn.Layer):
    method __init__ (line 907) | def __init__(self, gpt):
    method forward (line 915) | def forward(self,
  class GPTPretrainingCriterionHybird (line 943) | class GPTPretrainingCriterionHybird(nn.Layer):
    method __init__ (line 948) | def __init__(self, topo=None, sequence_parallel=False):
    method forward (line 955) | def forward(self, prediction_scores, masked_lm_labels, loss_mask):
  class GPTPretrainingCriterionPipe (line 1002) | class GPTPretrainingCriterionPipe(GPTPretrainingCriterionHybird):
    method forward (line 1005) | def forward(self, prediction_scores, args):
  class EmbeddingPipe (line 1012) | class EmbeddingPipe(GPTEmbeddings):
    method embedding_weight (line 1016) | def embedding_weight(self):
    method forward (line 1019) | def forward(self, tensors):
  class LayerNormPipe (line 1026) | class LayerNormPipe(nn.Layer):
    method __init__ (line 1027) | def __init__(self,
    method forward (line 1048) | def forward(self, input):
  class GPTForPretrainingPipe (line 1055) | class GPTForPretrainingPipe(PipelineLayer):
    method __init__ (line 1062) | def __init__(self,
  class GPTForGenerationHybrid (line 1209) | class GPTForGenerationHybrid(nn.Layer):
    method __init__ (line 1219) | def __init__(self, gpt, configs):
    method prepare_input_ids_for_generation (line 1252) | def prepare_input_ids_for_generation(self,
    method prepare_attention_mask_for_generation (line 1263) | def prepare_attention_mask_for_generation(self, input_ids, pad_token_id,
    method update_scores_for_generation (line 1277) | def update_scores_for_generation(self, scores, next_scores, length,
    method get_logits_processor (line 1285) | def get_logits_processor(self,
    method expand_inputs_for_generation (line 1320) | def expand_inputs_for_generation(self,
    method prepare_inputs_for_generation (line 1363) | def prepare_inputs_for_generation(self,
    method update_model_kwargs_for_generation (line 1388) | def update_model_kwargs_for_generation(self,
    method sample (line 1453) | def sample(self,
    method forward (line 1596) | def forward(self, input_ids=None, **model_kwargs):
  function get_triangle_upper_mask (line 1681) | def get_triangle_upper_mask(x, mask):
  class ConcatSoftmaxInput (line 1691) | class ConcatSoftmaxInput(PyLayer):
    method forward (line 1693) | def forward(ctx, inp, group=None):
    method backward (line 1702) | def backward(ctx, grad):

FILE: ppfleetx/models/language_model/gpt/dygraph/processor.py
  class LogitsProcessorList (line 22) | class LogitsProcessorList(List):
    method __call__ (line 23) | def __call__(self, input_ids, logits, **kwargs):
  class LogitsProcessor (line 36) | class LogitsProcessor(ABC):
    method __call__ (line 42) | def __call__(self, input_ids, logits):
  class MinLengthLogitsProcessor (line 48) | class MinLengthLogitsProcessor(LogitsProcessor):
    method __init__ (line 56) | def __init__(self, min_length, eos_token_id):
    method __call__ (line 70) | def __call__(self, input_ids, logits):
  class RepetitionPenaltyLogitsProcessor (line 77) | class RepetitionPenaltyLogitsProcessor(LogitsProcessor):
    method __init__ (line 86) | def __init__(self, penalty: float):
    method __call__ (line 94) | def __call__(self, input_ids, logits):
  class HammingDiversityLogitsProcessor (line 106) | class HammingDiversityLogitsProcessor(LogitsProcessor):
    method __init__ (line 120) | def __init__(self, diversity_rate, num_beams, num_beam_groups):
    method __call__ (line 135) | def __call__(self, input_ids, scores, current_tokens, beam_group_idx):
  class ForcedBOSTokenLogitsProcessor (line 158) | class ForcedBOSTokenLogitsProcessor(LogitsProcessor):
    method __init__ (line 166) | def __init__(self, forced_bos_token_id):
    method __call__ (line 169) | def __call__(self, input_ids, scores):
  class ForcedEOSTokenLogitsProcessor (line 180) | class ForcedEOSTokenLogitsProcessor(LogitsProcessor):
    method __init__ (line 188) | def __init__(self, max_length, forced_eos_token_id):
    method __call__ (line 192) | def __call__(self, input_ids, scores):

FILE: ppfleetx/models/language_model/gpt/dygraph/sequence_parallel_utils.py
  function scatter (line 41) | def scatter(input):
  function all_gather (line 58) | def all_gather(input):
  function reduce_scatter (line 69) | def reduce_scatter(input):
  class ScatterOp (line 84) | class ScatterOp(PyLayer):
    method forward (line 88) | def forward(ctx, input):
    method backward (line 92) | def backward(ctx, grad):
  class GatherOp (line 96) | class GatherOp(PyLayer):
    method forward (line 100) | def forward(ctx, input):
    method backward (line 104) | def backward(ctx, grad):
  class AllGatherOp (line 110) | class AllGatherOp(PyLayer):
    method forward (line 114) | def forward(ctx, input):
    method backward (line 120) | def backward(ctx, grad):
  class ReduceScatterOp (line 126) | class ReduceScatterOp(PyLayer):
    method forward (line 130) | def forward(ctx, input):
    method backward (line 136) | def backward(ctx, grad):
  function mark_as_sequence_parallel_parameter (line 147) | def mark_as_sequence_parallel_parameter(parameter):
  function is_sequence_parallel_parameter (line 151) | def is_sequence_parallel_parameter(parameter):
  function create_fused_allreduce_gradient_hook (line 155) | def create_fused_allreduce_gradient_hook(parameter_list, accumulation_st...
  function create_non_fused_allreduce_gradient_hook (line 173) | def create_non_fused_allreduce_gradient_hook(param, accumulation_steps):
  function register_sequence_parallel_allreduce_hooks (line 190) | def register_sequence_parallel_allreduce_hooks(
  function is_fused_matmul_bias_supported (line 215) | def is_fused_matmul_bias_supported():
  class ColumnSequenceParallelLinear (line 222) | class ColumnSequenceParallelLinear(Layer):
    method __init__ (line 223) | def __init__(self,
    method forward (line 294) | def forward(self, x):
  class RowSequenceParallelLinear (line 307) | class RowSequenceParallelLinear(Layer):
    method __init__ (line 308) | def __init__(self,
    method forward (line 386) | def forward(self, x):

FILE: ppfleetx/models/language_model/gpt/dygraph/single_model.py
  function get_attr (line 49) | def get_attr(layer, name):
  class ExpertLayer (line 56) | class ExpertLayer(nn.Layer):
    method __init__ (line 57) | def __init__(self, d_model, d_hidden, name=None):
    method forward (line 76) | def forward(self, x):
  class MultiHeadAttention (line 83) | class MultiHeadAttention(nn.Layer):
    method __init__ (line 94) | def __init__(self,
    method _fuse_prepare_qkv (line 150) | def _fuse_prepare_qkv(self, query, use_cache=False, cache=None):
    method _prepare_qkv (line 168) | def _prepare_qkv(self, query, key, value, use_cache=False, cache=None):
    method compute_kv (line 193) | def compute_kv(self, key, value):
    method gen_cache (line 211) | def gen_cache(self, key, value=None, type=Cache):
    method _flash_attention (line 236) | def _flash_attention(self, q, k, v, attn_mask=None):
    method core_attn (line 247) | def core_attn(self, q, k, v, attn_mask=None):
    method forward (line 282) | def forward(self,
  class TransformerDecoder (line 320) | class TransformerDecoder(nn.Layer):
    method __init__ (line 325) | def __init__(self,
    method forward (line 349) | def forward(self,
    method gen_cache (line 392) | def gen_cache(self, memory, do_zip=False):
  class TransformerDecoderLayer (line 406) | class TransformerDecoderLayer(nn.Layer):
    method __init__ (line 413) | def __init__(self,
    method forward (line 520) | def forward(self, tgt, memory, tgt_mask=None, use_cache=False, cache=N...
    method gen_cache (line 557) | def gen_cache(self, memory):
  class GPTEmbeddings (line 563) | class GPTEmbeddings(nn.Layer):
    method __init__ (line 568) | def __init__(self,
    method forward (line 595) | def forward(self, input_ids, position_ids=None):
  class GPTModel (line 608) | class GPTModel(nn.Layer):
    method __init__ (line 609) | def __init__(self,
    method forward (line 724) | def forward(self,
  class GPTForPretraining (line 777) | class GPTForPretraining(nn.Layer):
    method __init__ (line 787) | def __init__(self, gpt):
    method forward (line 791) | def forward(self,
  class GPTPretrainingCriterion (line 819) | class GPTPretrainingCriterion(nn.Layer):
    method __init__ (line 824) | def __init__(self, topo=None):
    method forward (line 828) | def forward(self, prediction_scores, masked_lm_labels, loss_mask):
  class GPTForSequenceClassification (line 856) | class GPTForSequenceClassification(nn.Layer):
    method __init__ (line 868) | def __init__(self, gpt, num_classes=2):
    method forward (line 878) | def forward(self, input_ids, position_ids=None, attention_mask=None):
  class GPTForGeneration (line 898) | class GPTForGeneration(nn.Layer):
    method __init__ (line 908) | def __init__(self, gpt, configs):
    method prepare_input_ids_for_generation (line 939) | def prepare_input_ids_for_generation(self,
    method prepare_attention_mask_for_generation (line 950) | def prepare_attention_mask_for_generation(self, input_ids, pad_token_id,
    method update_scores_for_generation (line 964) | def update_scores_for_generation(self, scores, next_scores, length,
    method get_logits_processor (line 972) | def get_logits_processor(self,
    method expand_inputs_for_generation (line 1007) | def expand_inputs_for_generation(self,
    method prepare_inputs_for_generation (line 1050) | def prepare_inputs_for_generation(self,
    method update_model_kwargs_for_generation (line 1071) | def update_model_kwargs_for_generation(self,
    method sample (line 1139) | def sample(self,
    method forward (line 1322) | def forward(self, input_ids=None, **model_kwargs):

FILE: ppfleetx/models/language_model/language_module.py
  function get_model_size (line 47) | def get_model_size(l, h, v, s):
  function vocab_size_with_padding (line 62) | def vocab_size_with_padding(vocab_size, div_unit, mp_degree):
  class LanguageModule (line 73) | class LanguageModule(BasicModule):
    method __init__ (line 74) | def __init__(self, configs):
    method process_configs (line 81) | def process_configs(self, configs):
    method forward (line 85) | def forward(self, tokens, ids):
    method training_step (line 88) | def training_step(self, batch):
    method training_step_end (line 100) | def training_step_end(self, log_dict):
    method validation_step (line 115) | def validation_step(self, batch):
    method validation_step_end (line 122) | def validation_step_end(self, log_dict):
    method test_step (line 129) | def test_step(self, batch):
    method test_step_end (line 136) | def test_step_end(self, log_dict):
    method training_epoch_end (line 143) | def training_epoch_end(self, log_dict):
  class GPTModule (line 148) | class GPTModule(LanguageModule):
    method __init__ (line 149) | def __init__(self, configs):
    method get_model (line 156) | def get_model(self):
    method get_loss_fn (line 196) | def get_loss_fn(self):
    method pretreating_batch (line 204) | def pretreating_batch(self, batch):
    method input_spec (line 212) | def input_spec(self):
    method inference_end (line 219) | def inference_end(self, outputs):
  class GPTFinetuneModule (line 228) | class GPTFinetuneModule(BasicModule):
    method __init__ (line 229) | def __init__(self, configs):
    method process_configs (line 263) | def process_configs(self, configs):
    method get_model (line 266) | def get_model(self):
    method forward (line 402) | def forward(self, tokens):
    method training_step (line 405) | def training_step(self, batch):
    method training_step_end (line 416) | def training_step_end(self, log_dict):
    method validation_step (line 427) | def validation_step(self, batch):
    method validation_step_end (line 439) | def validation_step_end(self, log_dict):
    method test_step (line 446) | def test_step(self, batch):
    method test_step_end (line 453) | def test_step_end(self, log_dict):
    method training_epoch_end (line 460) | def training_epoch_end(self, log_dict):
    method validation_epoch_end (line 464) | def validation_epoch_end(self, log_dict):
  class GPTGenerationModule (line 490) | class GPTGenerationModule(BasicModule):
    method __init__ (line 491) | def __init__(self, configs):
    method process_configs (line 498) | def process_configs(self, configs):
    method get_model (line 502) | def get_model(self):
    method adjust_length_to_model (line 539) | def adjust_length_to_model(self, length, max_sequence_length):
    method left_padding (line 544) | def left_padding(self, inputs, pad_id, padding="longest"):
    method generate (line 568) | def generate(self, input_text):
    method forward (line 571) | def forward(self, input_text):
    method input_spec (line 596) | def input_spec(self):
  class GPTEvalModule (line 600) | class GPTEvalModule(LanguageModule):
    method __init__ (line 601) | def __init__(self, configs):
    method post_process_configs (line 612) | def post_process_configs(self):
    method get_model (line 632) | def get_model(self):
    method forward (line 659) | def forward(self, tokens, ids, mask):
    method validation_step (line 662) | def validation_step(self, batch):
    method validation_step_end (line 689) | def validation_step_end(self, log_dict):
    method validation_epoch_end (line 704) | def validation_epoch_end(self, log_dict):
    method input_spec (line 728) | def input_spec(self):
  class MoEModule (line 736) | class MoEModule(LanguageModule):
    method __init__ (line 737) | def __init__(self, configs):
    method get_model (line 743) | def get_model(self):
    method get_loss_fn (line 769) | def get_loss_fn(self):
    method training_step (line 776) | def training_step(self, batch):
    method initialize_mp_dp_parameters (line 806) | def initialize_mp_dp_parameters(self):

FILE: ppfleetx/models/language_model/metrics.py
  class AccuracyAndF1 (line 31) | class AccuracyAndF1(Metric):
    method __init__ (line 67) | def __init__(self,
    method compute (line 82) | def compute(self, pred, label, *args):
    method update (line 106) | def update(self, correct, *args):
    method accumulate (line 122) | def accumulate(self):
    method reset (line 159) | def reset(self):
    method name (line 169) | def name(self):
  class Mcc (line 180) | class Mcc(Metric):
    method __init__ (line 206) | def __init__(self, name='mcc', *args, **kwargs):
    method compute (line 214) | def compute(self, pred, label, *args):
    method update (line 235) | def update(self, preds_and_labels):
    method accumulate (line 267) | def accumulate(self):
    method reset (line 285) | def reset(self):
    method name (line 294) | def name(self):
  class PearsonAndSpearman (line 305) | class PearsonAndSpearman(Metric):
    method __init__ (line 332) | def __init__(self, name='pearson_and_spearman', *args, **kwargs):
    method update (line 338) | def update(self, preds_and_labels):
    method accumulate (line 360) | def accumulate(self):
    method pearson (line 390) | def pearson(self, preds, labels):
    method spearman (line 408) | def spearman(self, preds, labels):
    method get_rank (line 419) | def get_rank(self, raw_list):
    method reset (line 427) | def reset(self):
    method name (line 434) | def name(self):
  class MultiLabelsMetric (line 445) | class MultiLabelsMetric(Metric):
    method __init__ (line 492) | def __init__(self, num_labels, name='multi_labels_metric'):
    method update (line 502) | def update(self, args):
    method accumulate (line 516) | def accumulate(self, average=None, pos_label=1):
    method compute (line 625) | def compute(self, pred, label):
    method _multi_labels_confusion_matrix (line 668) | def _multi_labels_confusion_matrix(self, pred, label):
    method reset (line 681) | def reset(self):
    method name (line 684) | def name(self):

FILE: ppfleetx/models/language_model/moe/comm_ops.py
  class MoEScatter (line 28) | class MoEScatter(PyLayer):
    method forward (line 36) | def forward(ctx,
    method backward (line 61) | def backward(ctx, grad):
  class MoEGather (line 74) | class MoEGather(PyLayer):
    method forward (line 81) | def forward(ctx,
    method backward (line 106) | def backward(ctx, grad_out):
  class AllGather (line 121) | class AllGather(PyLayer):
    method forward (line 127) | def forward(ctx, inp, rank, world_size, group):
    method backward (line 135) | def backward(ctx, grad_out):
  class Slice (line 141) | class Slice(PyLayer):
    method forward (line 147) | def forward(ctx, inp, rank, world_size, group):
    method backward (line 158) | def backward(ctx, grad_out):

FILE: ppfleetx/models/language_model/moe/gate/base_gate.py
  class BaseGate (line 25) | class BaseGate(nn.Layer):
    method __init__ (line 26) | def __init__(self, num_expert, group=None):
    method forward (line 33) | def forward(self, x):
    method set_loss (line 36) | def set_loss(self, loss):
    method get_loss (line 39) | def get_loss(self, clear=True):

FILE: ppfleetx/models/language_model/moe/gate/gshard_gate.py
  class GShardGate (line 29) | class GShardGate(NaiveGate):
    method __init__ (line 30) | def __init__(self,
    method forward (line 43) | def forward(self, x):

FILE: ppfleetx/models/language_model/moe/gate/naive_gate.py
  class NaiveGate (line 28) | class NaiveGate(BaseGate):
    method __init__ (line 29) | def __init__(self, d_model, num_expert, group=None, topk=2):
    method forward (line 36) | def forward(self, inp, return_all_scores=False):

FILE: ppfleetx/models/language_model/moe/gate/switch_gate.py
  class SwitchGate (line 29) | class SwitchGate(NaiveGate):
    method __init__ (line 30) | def __init__(self,
    method forward (line 43) | def forward(self, inp):

FILE: ppfleetx/models/language_model/moe/moe_layer.py
  class MoELayer (line 33) | class MoELayer(nn.Layer):
    method __init__ (line 90) | def __init__(self,
    method forward (line 158) | def forward(self, inp):

FILE: ppfleetx/models/language_model/moe/utils.py
  function prepare_forward (line 26) | def prepare_forward(gate, num_expert, world_size, moe_group):
  function _alltoall (line 41) | def _alltoall(in_tensor_list, group=None, use_calc_stream=True):
  function _local_scatter (line 53) | def _local_scatter(inp, pos):
  function _local_gather (line 61) | def _local_gather(inp, pos, out_batch_size, maybe_overlap=True):
  function _all_gather (line 78) | def _all_gather(tensor, group=None, use_calc_stream=True):
  function count_by_gate (line 93) | def count_by_gate(gate, num_expert, world_size, require_pos=True, group=...
  function limit_by_capacity (line 110) | def limit_by_capacity(topk_idx, num_expert, world_size, capacity, group=...

FILE: ppfleetx/models/language_model/moe_exp/experts.py
  class Experts (line 26) | class Experts(nn.Layer):
    method __init__ (line 27) | def __init__(self, expert, num_local_experts=1, expert_group_name=None):
    method forward (line 41) | def forward(self, inputs):

FILE: ppfleetx/models/language_model/moe_exp/layer.py
  class MoE (line 30) | class MoE(nn.Layer):
    method __init__ (line 31) | def __init__(self,
    method forward (line 74) | def forward(self, hidden_states, used_token=None):

FILE: ppfleetx/models/language_model/moe_exp/mappings.py
  function _gather_tokens (line 27) | def _gather_tokens(input_, group, axis=0):
  function _drop_tokens (line 37) | def _drop_tokens(input_, group, axis=0):
  class _GatherTokens (line 52) | class _GatherTokens(PyLayer):
    method forward (line 56) | def forward(ctx, input_, group, axis):
    method backward (line 62) | def backward(ctx, grad_output):
  class _DropTokens (line 66) | class _DropTokens(PyLayer):
    method forward (line 70) | def forward(ctx, input_, group, axis):
    method backward (line 76) | def backward(ctx, grad_output):
  function gather_tokens (line 80) | def gather_tokens(input_, group=None, axis=0):
  function drop_tokens (line 87) | def drop_tokens(input_, group=None, axis=0):

FILE: ppfleetx/models/language_model/moe_exp/sharded_moe.py
  function multiplicative_jitter (line 39) | def multiplicative_jitter(x, epsilon=1e-2):
  function gumbel_rsample (line 52) | def gumbel_rsample(shape):
  class _AllToAll (line 66) | class _AllToAll(PyLayer):
    method forward (line 68) | def forward(ctx: Any, group: dist.collective.Group,
    method backward (line 76) | def backward(ctx: Any, *grad_output: Tensor) -> Tuple[None, Tensor]:
  function einsum (line 87) | def einsum(rule, a, b):
  function _capacity (line 119) | def _capacity(gates, capacity_factor, min_capacity):
  function _top_idx (line 130) | def _top_idx(source, k):
  function top1gating (line 134) | def top1gating(logits,
  function top2gating (line 226) | def top2gating(logits: Tensor, capacity_factor: float,
  class TopKGate (line 300) | class TopKGate(nn.Layer):
    method __init__ (line 318) | def __init__(self,
    method forward (line 345) | def forward(self, input: paddle.Tensor, used_token: paddle.Tensor=None
  class MOELayer (line 379) | class MOELayer(nn.Layer):
    method __init__ (line 381) | def __init__(self,
    method _set_ep_group (line 403) | def _set_ep_group(self, ep_group):
    method get_loss (line 406) | def get_loss(self):
    method forward (line 409) | def forward(self, *input: Tensor, **kwargs: Any) -> Tensor:

FILE: ppfleetx/models/language_model/t5/modeling.py
  function finfo (line 30) | def finfo(dtype):
  function fields (line 39) | def fields(class_or_instance):
  function is_tensor (line 57) | def is_tensor(x):
  class ModelOutput (line 61) | class ModelOutput(OrderedDict):
    method __post_init__ (line 75) | def __post_init__(self):
    method __delitem__ (line 120) | def __delitem__(self, *args, **kwargs):
    method setdefault (line 125) | def setdefault(self, *args, **kwargs):
    method pop (line 130) | def pop(self, *args, **kwargs):
    method update (line 134) | def update(self, *args, **kwargs):
    method __getitem__ (line 139) | def __getitem__(self, k):
    method __setattr__ (line 146) | def __setattr__(self, name, value):
    method __setitem__ (line 152) | def __setitem__(self, key, value):
    method to_tuple (line 158) | def to_tuple(self) -> Tuple[Any]:
  class NewGELUActivation (line 165) | class NewGELUActivation(nn.Layer):
    method forward (line 171) | def forward(self, input):
  class GELUActivation (line 177) | class GELUActivation(nn.Layer):
    method __init__ (line 185) | def __init__(self, use_gelu_python: bool=False):
    method _gelu_python (line 189) | def _gelu_python(self, input):
    method forward (line 192) | def forward(self, input):
  class FastGELUActivation (line 196) | class FastGELUActivation(nn.Layer):
    method forward (line 201) | def forward(self, input):
  class QuickGELUActivation (line 207) | class QuickGELUActivation(nn.Layer):
    method forward (line 212) | def forward(self, input):
  class ClippedGELUActivation (line 216) | class ClippedGELUActivation(nn.Layer):
    method __init__ (line 229) | def __init__(self, min: float, max: float):
    method forward (line 238) | def forward(self, x):
  class SiLUActivation (line 242) | class SiLUActivation(nn.Layer):
    method __init__ (line 251) | def __init__(self):
    method _silu_python (line 255) | def _silu_python(self, input):
    method forward (line 258) | def forward(self, input):
  class MishActivation (line 262) | class MishActivation(nn.Layer):
    method __init__ (line 268) | def __init__(self):
    method _mish_python (line 272) | def _mish_python(self, input):
    method forward (line 275) | def forward(self, input):
  class LinearActivation (line 279) | class LinearActivation(nn.Layer):
    method forward (line 284) | def forward(self, input):
  function get_activation (line 305) | def get_activation(activation_string):
  function prune_linear_layer (line 325) | def prune_linear_layer(layer: nn.Linear, index: paddle.int64,
  function find_pruneable_heads_and_indices (line 360) | def find_pruneable_heads_and_indices(heads,
  class BaseModelOutputWithPastAndCrossAttentions (line 389) | class BaseModelOutputWithPastAndCrossAttentions(ModelOutput):
  class T5Config (line 434) | class T5Config(object):
    method __init__ (line 435) | def __init__(self, **kwargs):
  class T5LayerNorm (line 473) | class T5LayerNorm(nn.Layer):
    method __init__ (line 474) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 485) | def forward(self, hidden_states):
  class T5DenseActDense (line 504) | class T5DenseActDense(nn.Layer):
    method __init__ (line 505) | def __init__(self, d_model, d_ff, dropout_rate, dense_act_fn):
    method forward (line 512) | def forward(self, hidden_states):
  class T5DenseGatedActDense (line 520) | class T5DenseGatedActDense(nn.Layer):
    method __init__ (line 521) | def __init__(self, d_model, d_ff, dropout_rate, dense_act_fn):
    method forward (line 529) | def forward(self, hidden_states):
  class T5LayerFF (line 538) | class T5LayerFF(nn.Layer):
    method __init__ (line 539) | def __init__(self, d_model, d_ff, dropout_rate, layer_norm_epsilon,
    method forward (line 552) | def forward(self, hidden_states):
  class T5Attention (line 559) | class T5Attention(nn.Layer):
    method __init__ (line 560) | def __init__(self,
    method prune_heads (line 590) | def prune_heads(self, heads):
    method _relative_position_bucket (line 606) | def _relative_position_bucket(relative_position,
    method compute_bias (line 658) | def compute_bias(self, query_length, key_length, device=None):
    method forward (line 676) | def forward(
  class T5LayerSelfAttention (line 802) | class T5LayerSelfAttention(nn.Layer):
    method __init__ (line 803) | def __init__(self,
    method forward (line 824) | def forward(
  class T5LayerCrossAttention (line 848) | class T5LayerCrossAttention(nn.Layer):
    method __init__ (line 849) | def __init__(self, is_decoder, relative_attention_num_buckets, d_model,
    method forward (line 862) | def forward(
  class T5Block (line 890) | class T5Block(nn.Layer):
    method __init__ (line 891) | def __init__(self,
    method forward (line 925) | def forward(
  class T5Stack (line 1033) | class T5Stack(nn.Layer):
    method __init__ (line 1034) | def __init__(self,
    method get_input_embeddings (line 1068) | def get_input_embeddings(self):
    method set_input_embeddings (line 1071) | def set_input_embeddings(self, new_embeddings):
    method get_extended_attention_mask (line 1074) | def get_extended_attention_mask(self, attention_mask, input_shape):
    method get_head_mask (line 1110) | def get_head_mask(self,
    method _convert_head_mask_to_5d (line 1139) | def _convert_head_mask_to_5d(self, head_mask, num_hidden_layers):
    method forward (line 1153) | def forward(
  class T5EncoderModel (line 1318) | class T5EncoderModel(nn.Layer):
    method __init__ (line 1321) | def __init__(self,
    method get_input_embeddings (line 1352) | def get_input_embeddings(self):
    method set_input_embeddings (line 1355) | def set_input_embeddings(self, new_embeddings):
    method get_encoder (line 1359) | def get_encoder(self):
    method _prune_heads (line 1362) | def _prune_heads(self, heads_to_prune):
    method forward (line 1370) | def forward(
  function T5Model (line 1411) | def T5Model(config):
  function get_t5_model (line 1417) | def get_t5_model(name, pretrained=True):
  function t5_11b (line 1442) | def t5_11b():
  function dict_from_json_file (line 1457) | def dict_from_json_file(name):
  function t5_encode_text (line 1464) | def t5_encode_text(t5, texts, tokenizer, return_attn_mask=False):
  function get_encoded_dim (line 1478) | def get_encoded_dim(name):

FILE: ppfleetx/models/language_model/t5/utils.py
  function constant_ (line 24) | def constant_(x, value):
  function normal_ (line 31) | def normal_(x, mean=0., std=1.):
  function normal_init (line 37) | def normal_init(layer, mean=0, std=1, bias=0):
  function constant_init (line 46) | def constant_init(layer, val, bias=0):

FILE: ppfleetx/models/language_model/utils.py
  function is_fused_matmul_bias_supported (line 32) | def is_fused_matmul_bias_supported():
  function process_inference_configs (line 39) | def process_inference_configs(config):
  function process_model_configs (line 55) | def process_model_configs(config):
  function process_optim_configs (line 125) | def process_optim_configs(config):
  function process_data_configs (line 146) | def process_data_configs(config):
  function process_configs (line 174) | def process_configs(config):

FILE: ppfleetx/models/multimodal_model/imagen/modeling.py
  class Unet64_397M (line 36) | class Unet64_397M(Unet):
    method __init__ (line 37) | def __init__(self, *args, **kwargs):
  class BaseUnet64 (line 50) | class BaseUnet64(Unet):
    method __init__ (line 51) | def __init__(self, *args, **kwargs):
  class SRUnet256 (line 65) | class SRUnet256(Unet):
    method __init__ (line 66) | def __init__(self, *args, **kwargs):
  class SRUnet1024 (line 79) | class SRUnet1024(Unet):
    method __init__ (line 80) | def __init__(self, *args, **kwargs):
  class ImagenCriterion (line 94) | class ImagenCriterion(nn.Layer):
    method __init__ (line 99) | def __init__(self, name='mse_loss', p2_loss_weight_k=1):
    method forward (line 112) | def forward(self, pred, target, log_snr, p2_loss_weight_gamma):
  class ImagenModel (line 138) | class ImagenModel(nn.Layer):
    method __init__ (line 139) | def __init__(
    method get_unet (line 304) | def get_unet(self, unet_number):
    method reset_unets (line 315) | def reset_unets(self, ):
    method one_unet_in_gpu (line 320) | def one_unet_in_gpu(self, unet_number=None, unet=None):
    method reset_unets_all (line 328) | def reset_unets_all(self, ):
    method state_dict (line 334) | def state_dict(self, *args, **kwargs):
    method load_state_dict (line 338) | def load_state_dict(self, *args, **kwargs):
    method p_mean_variance (line 345) | def p_mean_variance(self,
    method p_sample (line 397) | def p_sample(self,
    method p_sample_loop (line 440) | def p_sample_loop(self,
    method sample (line 544) | def sample(
    method p_losses (line 715) | def p_losses(self,
    method forward (line 835) | def forward(self,
  function imagen_397M_text2im_64 (line 952) | def imagen_397M_text2im_64(**kwargs):
  function imagen_text2im_64 (line 962) | def imagen_text2im_64(**kwargs):
  function imagen_text2im_64_debertav2 (line 977) | def imagen_text2im_64_debertav2(**kwargs):
  function imagen_text2im_64_SR256 (line 988) | def imagen_text2im_64_SR256(**kwargs):
  function imagen_SR256 (line 999) | def imagen_SR256(**kwargs):
  function imagen_SR1024 (line 1014) | def imagen_SR1024(**kwargs):

FILE: ppfleetx/models/multimodal_model/imagen/unet.py
  class LayerNorm (line 33) | class LayerNorm(nn.Layer):
    method __init__ (line 34) | def __init__(self, feats, stable=False, dim=-1):
    method forward (line 43) | def forward(self, x):
  class Residual (line 60) | class Residual(nn.Layer):
    method __init__ (line 61) | def __init__(self, fn):
    method forward (line 65) | def forward(self, x, **kwargs):
  class PerceiverAttention (line 72) | class PerceiverAttention(nn.Layer):
    method __init__ (line 73) | def __init__(self, *, dim, dim_head=64, heads=8, cosine_sim_attn=False):
    method forward (line 92) | def forward(self, x, latents, mask=None):
  class PerceiverResampler (line 135) | class PerceiverResampler(nn.Layer):
    method __init__ (line 136) | def __init__(
    method forward (line 175) | def forward(self, x, mask=None):
  class Attention (line 201) | class Attention(nn.Layer):
    method __init__ (line 202) | def __init__(
    method forward (line 235) | def forward(self, x, context=None, mask=None, attn_bias=None):
    method _forward (line 241) | def _forward(self, x, context=None, mask=None, attn_bias=None):
  function Upsample (line 304) | def Upsample(dim, dim_out=None):
  class PixelShuffleUpsample (line 314) | class PixelShuffleUpsample(nn.Layer):
    method __init__ (line 320) | def __init__(self, dim, dim_out=None):
    method init_conv_ (line 329) | def init_conv_(self, conv):
    method forward (line 338) | def forward(self, x):
  function Downsample (line 342) | def Downsample(dim, dim_out=None):
  class SinusoidalPosEmb (line 350) | class SinusoidalPosEmb(nn.Layer):
    method __init__ (line 351) | def __init__(self, dim):
    method forward (line 355) | def forward(self, x):
  class LearnedSinusoidalPosEmb (line 363) | class LearnedSinusoidalPosEmb(nn.Layer):
    method __init__ (line 367) | def __init__(self, dim):
    method forward (line 374) | def forward(self, x):
  class Block (line 382) | class Block(nn.Layer):
    method __init__ (line 383) | def __init__(self, dim, dim_out, groups=8, norm=True):
    method forward (line 389) | def forward(self, x, scale_shift=None):
  class ResnetBlock (line 400) | class ResnetBlock(nn.Layer):
    method __init__ (line 401) | def __init__(self,
    method forward (line 439) | def forward(self, x, time_emb=None, cond=None):
  class CrossAttention (line 464) | class CrossAttention(nn.Layer):
    method __init__ (line 465) | def __init__(self,
    method forward (line 496) | def forward(self, x, context, mask=None):
  class LinearCrossAttention (line 544) | class LinearCrossAttention(CrossAttention):
    method forward (line 545) | def forward(self, x, context, mask=None):
  class LinearAttention (line 587) | class LinearAttention(nn.Layer):
    method __init__ (line 588) | def __init__(self,
    method forward (line 649) | def forward(self, fmap, context=None):
  class GlobalContext (line 677) | class GlobalContext(nn.Layer):
    method __init__ (line 680) | def __init__(self, *, dim_in, dim_out):
    method forward (line 689) | def forward(self, x):
  function FeedForward (line 697) | def FeedForward(dim, mult=2):
  function ChanFeedForward (line 709) | def ChanFeedForward(
  class TransformerBlock (line 723) | class TransformerBlock(nn.Layer):
    method __init__ (line 724) | def __init__(
    method forward (line 751) | def forward(self, x, context=None):
  class LinearAttentionTransformerBlock (line 763) | class LinearAttentionTransformerBlock(nn.Layer):
    method __init__ (line 764) | def __init__(self,
    method forward (line 787) | def forward(self, x, context=None):
  class CrossEmbedLayer (line 794) | class CrossEmbedLayer(nn.Layer):
    method __init__ (line 795) | def __init__(self, dim_in, kernel_sizes, dim_out=None, stride=2):
    method forward (line 817) | def forward(self, x):
  class UpsampleCombiner (line 822) | class UpsampleCombiner(nn.Layer):
    method __init__ (line 823) | def __init__(self,
    method forward (line 845) | def forward(self, x, fmaps=None):
  class Unet (line 858) | class Unet(nn.Layer):
    method __init__ (line 859) | def __init__(self,
    method cast_model_parameters (line 1295) | def cast_model_parameters(self, *, text_embed_dim, channels, channels_...
    method to_config_and_state_dict (line 1313) | def to_config_and_state_dict(self):
    method from_config_and_state_dict (line 1319) | def from_config_and_state_dict(klass, config, state_dict):
    method persist_to_file (line 1326) | def persist_to_file(self, path):
    method hydrate_from_file (line 1337) | def hydrate_from_file(klass, path):
    method forward_with_cond_scale (line 1349) | def forward_with_cond_scale(self, *args, cond_scale=1., **kwargs):
    method forward (line 1359) | def forward(self,

FILE: ppfleetx/models/multimodal_model/imagen/utils.py
  function exists (line 26) | def exists(val):
  function identity (line 30) | def identity(t, *args, **kwargs):
  function first (line 34) | def first(arr, d=None):
  function maybe (line 40) | def maybe(fn):
  function once (line 50) | def once(fn):
  function default (line 67) | def default(val, d):
  function cast_tuple (line 73) | def cast_tuple(val, length=None):
  function is_float_dtype (line 85) | def is_float_dtype(dtype):
  function cast_uint8_images_to_float (line 93) | def cast_uint8_images_to_float(images):
  function zero_init_ (line 102) | def zero_init_(m):
  function eval_decorator (line 108) | def eval_decorator(fn):
  function pad_tuple_to_length (line 120) | def pad_tuple_to_length(t, length, fillvalue=None):
  class Identity (line 130) | class Identity(nn.Layer):
    method __init__ (line 131) | def __init__(self, *args, **kwargs):
    method forward (line 134) | def forward(self, x, *args, **kwargs):
  function log (line 141) | def log(t, eps: float=1e-12):
  class Parallel (line 145) | class Parallel(nn.Layer):
    method __init__ (line 146) | def __init__(self, *fns):
    method forward (line 150) | def forward(self, x):
  function l2norm (line 155) | def l2norm(t):
  function right_pad_dims_to (line 159) | def right_pad_dims_to(x, t):
  function masked_mean (line 166) | def masked_mean(t, *, axis, mask=None):
  function resize_image_to (line 177) | def resize_image_to(image, target_image_size, clamp_range=None):
  function normalize_neg_one_to_one (line 196) | def normalize_neg_one_to_one(img):
  function unnormalize_zero_to_one (line 200) | def unnormalize_zero_to_one(normed_img):
  function prob_mask_like (line 207) | def prob_mask_like(shape, prob):
  function rearrange (line 216) | def rearrange(tensor,
  function rearrange_many (line 281) | def rearrange_many(tensors, pattern: str, h: int=-1, x: int=-1, y: int=-1):
  function repeat (line 293) | def repeat(tensor, pattern: str, h: int=-1, b: int=-1):
  function repeat_many (line 316) | def repeat_many(tensors, pattern: str, h: int=-1, b: int=-1):
  function reduce (line 327) | def reduce(losses, pattern: str, reduction: str='mean'):
  class EinopsToAndFrom (line 333) | class EinopsToAndFrom(nn.Layer):
    method __init__ (line 334) | def __init__(self, from_einops, to_einops, fn):
    method forward (line 340) | def forward(self, x, **kwargs):
  class Rearrange (line 351) | class Rearrange(nn.Layer):
    method __init__ (line 352) | def __init__(self, pattern, n=None, s1=None, s2=None):
    method forward (line 359) | def forward(self, x, **kwargs):
  function beta_linear_log_snr (line 370) | def beta_linear_log_snr(t):
  function alpha_cosine_log_snr (line 374) | def alpha_cosine_log_snr(t, s: float=0.008):
  function log_snr_to_alpha_sigma (line 380) | def log_snr_to_alpha_sigma(log_snr):
  class GaussianDiffusionContinuousTimes (line 384) | class GaussianDiffusionContinuousTimes(nn.Layer):
    method __init__ (line 385) | def __init__(self, *, noise_schedule, timesteps=1000):
    method get_times (line 397) | def get_times(self, batch_size, noise_level):
    method sample_random_times (line 400) | def sample_random_times(self, batch_size):
    method get_condition (line 403) | def get_condition(self, times):
    method get_sampling_timesteps (line 406) | def get_sampling_timesteps(self, batch):
    method q_posterior (line 413) | def q_posterior(self, x_start, x_t, t, *, t_next=None):
    method q_sample (line 434) | def q_sample(self, x_start, t, noise=None):
    method q_sample_from_to (line 448) | def q_sample_from_to(self, x_from, from_t, to_t, noise=None):
    method predict_start_from_v (line 471) | def predict_start_from_v(self, x_t, t, v):
    method predict_start_from_noise (line 477) | def predict_start_from_noise(self, x_t, t, noise):
  class Always (line 484) | class Always():
    method __init__ (line 485) | def __init__(self, val):
    method __call__ (line 488) | def __call__(self, *args, **kwargs):

FILE: ppfleetx/models/multimodal_model/multimodal_module.py
  class MultiModalModule (line 27) | class MultiModalModule(BasicModule):
    method __init__ (line 28) | def __init__(self, configs):
    method process_configs (line 34) | def process_configs(self, configs):
    method forward (line 38) | def forward(self, batch):
    method training_step (line 41) | def training_step(self, batch):
    method training_step_end (line 46) | def training_step_end(self, log_dict):
    method validation_step (line 54) | def validation_step(self, batch):
    method validation_step_end (line 61) | def validation_step_end(self, log_dict):
    method test_step (line 68) | def test_step(self, batch):
    method test_step_end (line 75) | def test_step_end(self, log_dict):
    method input_spec (line 82) | def input_spec(self):
    method training_epoch_end (line 89) | def training_epoch_end(self, log_dict):
  class ImagenModule (line 94) | class ImagenModule(MultiModalModule):
    method __init__ (line 95) | def __init__(self, configs):
    method get_model (line 98) | def get_model(self):
    method get_loss_fn (line 105) | def get_loss_fn(self):
    method pretreating_batch (line 110) | def pretreating_batch(self, batch):

FILE: ppfleetx/models/multimodal_model/utils.py
  function process_global_configs (line 31) | def process_global_configs(config):
  function is_fused_matmul_bias_supported (line 61) | def is_fused_matmul_bias_supported():
  function process_fused_configs (line 68) | def process_fused_configs(config):
  function process_inference_configs (line 80) | def process_inference_configs(config):
  function process_model_configs (line 93) | def process_model_configs(config):
  function process_optim_configs (line 110) | def process_optim_configs(config):
  function process_engine_configs (line 118) | def process_engine_configs(config):
  function process_configs (line 130) | def process_configs(config):

FILE: ppfleetx/models/protein_folding/all_atom.py
  function get_chi_atom_indices (line 25) | def get_chi_atom_indices():
  function atom37_to_torsion_angles (line 52) | def atom37_to_torsion_angles(

FILE: ppfleetx/models/protein_folding/attentions.py
  class Attention (line 35) | class Attention(nn.Layer):
    method __init__ (line 38) | def __init__(self, config, global_config, q_dim, kv_dim, output_dim):
    method forward (line 109) | def forward(self, q_data, m_data, bias, nonbatched_bias=None):
  class GlobalAttention (line 167) | class GlobalAttention(nn.Layer):
    method __init__ (line 173) | def __init__(self, config, global_config, q_dim, kv_dim, output_dim):
    method forward (line 227) | def forward(self, q_data, m_data, q_mask):
  class MSARowAttentionWithPairBias (line 272) | class MSARowAttentionWithPairBias(nn.Layer):
    method __init__ (line 278) | def __init__(self, channel_num, config, global_config, is_extra_msa):
    method forward (line 308) | def forward(self, msa_act, msa_mask, pair_act):
  class MSAColumnGlobalAttention (line 360) | class MSAColumnGlobalAttention(nn.Layer):
    method __init__ (line 366) | def __init__(self, channel_num, config, global_config):
    method forward (line 379) | def forward(self, msa_act, msa_mask):
  class MSAColumnAttention (line 418) | class MSAColumnAttention(nn.Layer):
    method __init__ (line 424) | def __init__(self, channel_num, config, global_config):
    method forward (line 436) | def forward(self, msa_act, msa_mask):
  class TriangleAttention (line 473) | class TriangleAttention(nn.Layer):
    method __init__ (line 480) | def __init__(self,
    method forward (line 504) | def forward(self, pair_act, pair_mask):
  class TriangleMultiplication (line 555) | class TriangleMultiplication(nn.Layer):
    method __init__ (line 562) | def __init__(self,
    method forward (line 610) | def forward(self, act, mask):

FILE: ppfleetx/models/protein_folding/common.py
  function set_tensor_constant (line 29) | def set_tensor_constant(tensor, constant):
  function init_gate_linear (line 33) | def init_gate_linear(linear):
  function init_final_linear (line 38) | def init_final_linear(linear):
  function recompute_wrapper (line 42) | def recompute_wrapper(func, *args, is_recompute=True):
  function subbatch (line 50) | def subbatch(f, arg_idx, dim, bs, out_idx, same_arg_idx={}):
  function batched_gather (line 103) | def batched_gather(params, indices, axis=0, batch_dims=0):
  function mask_mean (line 160) | def mask_mean(mask, value, axis=None, drop_mask_channel=False, eps=1e-10):
  class Transition (line 189) | class Transition(nn.Layer):
    method __init__ (line 196) | def __init__(self, channel_num, config, global_config, is_extra_msa,
    method forward (line 232) | def forward(self, act, mask):
  class Dropout (line 252) | class Dropout(nn.Layer):
    method __init__ (line 253) | def __init__(self, p=0.5, axis=None, mode="upscale_in_train", name=None):
    method forward (line 275) | def forward(self, input):
    method extra_repr (line 302) | def extra_repr(self):
  function dgram_from_positions (line 308) | def dgram_from_positions(positions, num_bins, min_bin, max_bin):

FILE: ppfleetx/models/protein_folding/evoformer.py
  class EvoformerIteration (line 43) | class EvoformerIteration(nn.Layer):
    method __init__ (line 49) | def __init__(self, channel_num, config, global_config, is_extra_msa=Fa...
    method _parse_dropout_params (line 166) | def _parse_dropout_params(self, module):
    method outer_product_mean_origin (line 180) | def outer_product_mean_origin(self, msa_act, pair_act, masks):
    method outer_product_mean_first (line 307) | def outer_product_mean_first(self, msa_act, pair_act, masks):
    method outer_product_mean_end (line 313) | def outer_product_mean_end(self, msa_act, pair_act, masks):
    method forward (line 507) | def forward(self, msa_act, pair_act, masks):
  class DistEmbeddingsAndEvoformer (line 532) | class DistEmbeddingsAndEvoformer(nn.Layer):
    method __init__ (line 539) | def __init__(self, channel_num, config, global_config):
    method _pseudo_beta_fn (line 633) | def _pseudo_beta_fn(self, aatype, all_atom_positions, all_atom_masks):
    method _create_extra_msa_feature (line 670) | def _create_extra_msa_feature(self, batch):
    method forward (line 682) | def forward(self, batch):

FILE: ppfleetx/models/protein_folding/outer_product_mean.py
  class OuterProductMean (line 23) | class OuterProductMean(nn.Layer):
    method __init__ (line 29) | def __init__(self,
    method forward (line 70) | def forward(self, act, mask):

FILE: ppfleetx/models/protein_folding/quat_affine.py
  function rot_to_quat (line 69) | def rot_to_quat(rot):
  function quat_to_rot (line 116) | def quat_to_rot(normalized_quat):
  function quat_multiply_by_vec (line 131) | def quat_multiply_by_vec(quat, vec):
  function quat_multiply (line 139) | def quat_multiply(quat1, quat2):
  function apply_rot_to_vec (line 147) | def apply_rot_to_vec(rot, vec, unstack=False):
  function apply_rot_to_vec_np (line 162) | def apply_rot_to_vec_np(rot, vec, unstack=False):
  function apply_inverse_rot_to_vec (line 177) | def apply_inverse_rot_to_vec(rot, vec):
  class QuatAffine (line 190) | class QuatAffine(object):
    method __init__ (line 193) | def __init__(self,
    method to_tensor (line 227) | def to_tensor(self):
    method stop_rot_gradient (line 230) | def stop_rot_gradient(self):
    method scale_translation (line 243) | def scale_translation(self, position_scale):
    method from_tensor (line 253) | def from_tensor(cls, tensor, normalize=False):
    method pre_compose (line 259) | def pre_compose(self, update):
    method apply_to_point (line 282) | def apply_to_point(self, point, extra_dims=0):
    method invert_point (line 309) | def invert_point(self, transformed_point, extra_dims=0):
    method invert (line 336) | def invert(self):
  function _multiply (line 342) | def _multiply(a, b):
  function make_canonical_transform (line 386) | def make_canonical_transform(
  function make_transform_from_reference (line 464) | def make_transform_from_reference(
  function _multiply_np (line 495) | def _multiply_np(a, b):
  function make_canonical_transform_np (line 513) | def make_canonical_transform_np(
  function make_transform_from_reference_np (line 586) | def make_transform_from_reference_np(

FILE: ppfleetx/models/protein_folding/r3.py
  class Vecs (line 44) | class Vecs:
    method __init__ (line 45) | def __init__(self, *args):
    method map (line 61) | def map(self, map_fn, *args):
    method shape (line 73) | def shape(self):
    method x (line 77) | def x(self):
    method y (line 81) | def y(self):
    method z (line 85) | def z(self):
    method __getitem__ (line 88) | def __getitem__(self, index):
    method __str__ (line 91) | def __str__(self):
    method __repr__ (line 94) | def __repr__(self):
    method reshape (line 97) | def reshape(self, *argv):
  class Rots (line 101) | class Rots:
    method __init__ (line 102) | def __init__(self, *args):
    method map (line 121) | def map(self, map_fn, *args):
    method shape (line 137) | def shape(self):
    method xx (line 141) | def xx(self):
    method xy (line 145) | def xy(self):
    method xz (line 149) | def xz(self):
    method yx (line 153) | def yx(self):
    method yy (line 157) | def yy(self):
    method yz (line 161) | def yz(self):
    method zx (line 165) | def zx(self):
    method zy (line 169) | def zy(self):
    method zz (line 173) | def zz(self):
    method __getitem__ (line 176) | def __getitem__(self, index):
    method __str__ (line 179) | def __str__(self):
    method __repr__ (line 182) | def __repr__(self):
    method reshape (line 185) | def reshape(self, *argv):
  function squared_difference (line 189) | def squared_difference(x, y):
  function invert_rigids (line 193) | def invert_rigids(r: Rigids) -> Rigids:
  function invert_rots (line 201) | def invert_rots(m: Rots) -> Rots:
  function rigids_from_3_points_vecs (line 206) | def rigids_from_3_points_vecs(
  function rigids_from_3_points (line 231) | def rigids_from_3_points(point_on_neg_x_axis: paddle.Tensor,
  function rigids_from_list (line 278) | def rigids_from_list(l: List[paddle.Tensor]) -> Rigids:
  function rigids_from_quataffine (line 284) | def rigids_from_quataffine(a: quat_affine.QuatAffine) -> Rigids:
  function rigids_from_tensor4x4 (line 289) | def rigids_from_tensor4x4(m: paddle.Tensor) -> Rigids:
  function rigids_from_tensor_flat9 (line 306) | def rigids_from_tensor_flat9(m: paddle.Tensor) -> Rigids:
  function rigids_from_tensor_flat12 (line 315) | def rigids_from_tensor_flat12(m: paddle.Tensor  # shape (..., 12)
  function rigids_mul_rigids (line 322) | def rigids_mul_rigids(a: Rigids, b: Rigids) -> Rigids:
  function rigids_mul_rots (line 329) | def rigids_mul_rots(r: Rigids, m: Rots) -> Rigids:
  function rigids_mul_vecs (line 334) | def rigids_mul_vecs(r: Rigids, v: Vecs) -> Vecs:
  function rigids_to_list (line 339) | def rigids_to_list(r: Rigids) -> List[paddle.Tensor]:
  function rigids_to_quataffine (line 344) | def rigids_to_quataffine(r: Rigids) -> quat_affine.QuatAffine:
  function rigids_to_tensor_flat9 (line 352) | def rigids_to_tensor_flat9(r: Rigids) -> paddle.Tensor:  # shape (..., 9)
  function rigids_to_tensor_flat12 (line 360) | def rigids_to_tensor_flat12(r: Rigids  # shape (...)
  function rots_from_tensor3x3 (line 371) | def rots_from_tensor3x3(
  function rots_from_two_vecs (line 380) | def rots_from_two_vecs(e0_unnormalized: Vecs, e1_unnormalized: Vecs) -> ...
  function broadcast_shape (line 409) | def broadcast_shape(x_shape, y_shape):
  function broadcast_to (line 427) | def broadcast_to(x, broadcast_shape):
  function rots_mul_rots (line 434) | def rots_mul_rots(a: Rots, b: Rots) -> Rots:
  function rots_mul_vecs (line 442) | def rots_mul_vecs(m: Rots, v: Vecs) -> Vecs:
  function vecs_add (line 456) | def vecs_add(v1: Vecs, v2: Vecs) -> Vecs:
  function vecs_dot_vecs (line 461) | def vecs_dot_vecs(v1: Vecs, v2: Vecs) -> paddle.Tensor:
  function vecs_cross_vecs (line 466) | def vecs_cross_vecs(v1: Vecs, v2: Vecs) -> Vecs:
  function vecs_from_tensor (line 471) | def vecs_from_tensor(x: paddle.Tensor  # shape (..., 3)
  function vecs_robust_normalize (line 478) | def vecs_robust_normalize(v: Vecs, epsilon: float=1e-8) -> Vecs:
  function vecs_robust_norm (line 491) | def vecs_robust_norm(v: Vecs, epsilon: float=1e-8) -> paddle.Tensor:
  function vecs_sub (line 504) | def vecs_sub(v1: Vecs, v2: Vecs) -> Vecs:
  function vecs_squared_distance (line 509) | def vecs_squared_distance(v1: Vecs, v2: Vecs) -> paddle.Tensor:
  function vecs_to_tensor (line 515) | def vecs_to_tensor(v: Vecs  # shape (...)

FILE: ppfleetx/models/protein_folding/residue_constants.py
  function load_stereo_chemical_props (line 403) | def load_stereo_chemical_props() -> Tuple[Mapping[str, List[Bond]], Mapp...
  function sequence_to_onehot (line 591) | def sequence_to_onehot(sequence: str,
  function _make_standard_atom_mask (line 739) | def _make_standard_atom_mask() -> np.ndarray:
  function chi_angle_atom (line 757) | def chi_angle_atom(atom_index: int) -> np.ndarray:
  function _make_rigid_transformation_4x4 (line 801) | def _make_rigid_transformation_4x4(ex, ey, translation):
  function _make_rigid_group_constants (line 831) | def _make_rigid_group_constants():
  function make_atom14_dists_bounds (line 908) | def make_atom14_dists_bounds(overlap_tolerance=1.5,

FILE: ppfleetx/models/protein_folding/template.py
  class TemplatePair (line 36) | class TemplatePair(nn.Layer):
    method __init__ (line 42) | def __init__(self, channel_num, config, global_config):
    method _parse_dropout_params (line 107) | def _parse_dropout_params(self, module):
    method forward (line 119) | def forward(self, pair_act, pair_mask):
  class SingleTemplateEmbedding (line 164) | class SingleTemplateEmbedding(nn.Layer):
    method __init__ (line 170) | def __init__(self, channel_num, config, global_config):
    method forward (line 190) | def forward(self, query_embedding, batch, mask_2d):
  class TemplateEmbedding (line 290) | class TemplateEmbedding(nn.Layer):
    method __init__ (line 297) | def __init__(self, channel_num, config, global_config):
    method forward (line 308) | def forward(self, query_embedding, template_batch, mask_2d):

FILE: ppfleetx/models/vision_model/factory.py
  function build (line 28) | def build(config):

FILE: ppfleetx/models/vision_model/general_classification_module.py
  class GeneralClsModule (line 31) | class GeneralClsModule(BasicModule):
    method __init__ (line 32) | def __init__(self, configs):
    method get_model (line 56) | def get_model(self):
    method qat_model (line 62) | def qat_model(self):
    method forward (line 66) | def forward(self, inputs):
    method training_step (line 69) | def training_step(self, batch):
    method training_step_end (line 84) | def training_step_end(self, log_dict):
    method validation_step (line 91) | def validation_step(self, batch):
    method validation_step_end (line 118) | def validation_step_end(self, log_dict):
    method input_spec (line 126) | def input_spec(self):
    method training_epoch_end (line 132) | def training_epoch_end(self, log_dict):
    method validation_epoch_end (line 136) | def validation_epoch_end(self, log_dict):
  class GeneralClsModuleAuto (line 162) | class GeneralClsModuleAuto(BasicModule):
    method __init__ (line 163) | def __init__(self, configs):
    method get_model (line 177) | def get_model(self):
    method input_spec (line 183) | def input_spec(self):

FILE: ppfleetx/models/vision_model/layers/attention.py
  class ViTAttention (line 21) | class ViTAttention(nn.Layer):
    method __init__ (line 22) | def __init__(self,
    method _init_weights (line 41) | def _init_weights(self, m):
    method forward (line 46) | def forward(self, x):

FILE: ppfleetx/models/vision_model/layers/droppath.py
  function drop_path (line 19) | def drop_path(x, drop_prob=0., training=False):
  class DropPath (line 38) | class DropPath(nn.Layer):
    method __init__ (line 42) | def __init__(self, drop_prob=None):
    method forward (line 46) | def forward(self, x):

FILE: ppfleetx/models/vision_model/layers/embedding.py
  class ViTPatchEmbed (line 19) | class ViTPatchEmbed(nn.Layer):
    method __init__ (line 23) | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=...
    method forward (line 38) | def forward(self, x):

FILE: ppfleetx/models/vision_model/layers/identity.py
  class Identity (line 21) | class Identity(nn.Layer):
    method __init__ (line 22) | def __init__(self):
    method forward (line 25) | def forward(self, input):

FILE: ppfleetx/models/vision_model/layers/initializer.py
  function xavier_uniform_2d_ (line 27) | def xavier_uniform_2d_(param, axis=-1):

FILE: ppfleetx/models/vision_model/layers/mlp.py
  class ViTMLP (line 22) | class ViTMLP(nn.Layer):
    method __init__ (line 23) | def __init__(self,
    method _init_weights (line 39) | def _init_weights(self, m):
    method forward (line 44) | def forward(self, x):

FILE: ppfleetx/models/vision_model/loss/cross_entropy.py
  class CELoss (line 25) | class CELoss(nn.Layer):
    method __init__ (line 30) | def __init__(self, epsilon=None):
    method _labelsmoothing (line 36) | def _labelsmoothing(self, target, class_num):
    method forward (line 45) | def forward(self, x, label):
  class ViTCELoss (line 64) | class ViTCELoss(nn.Layer):
    method __init__ (line 69) | def __init__(self, epsilon=None):
    method forward (line 75) | def forward(self, x, label):

FILE: ppfleetx/models/vision_model/metrics/accuracy.py
  class TopkAcc (line 19) | class TopkAcc(nn.Layer):
    method __init__ (line 20) | def __init__(self, topk=(1, 5)):
    method forward (line 27) | def forward(self, x, label):

FILE: ppfleetx/models/vision_model/moco/moco.py
  function concat_all_gather (line 36) | def concat_all_gather(tensor):
  class MoCoV2Projector (line 50) | class MoCoV2Projector(nn.Layer):
    method __init__ (line 51) | def __init__(self, with_pool, in_dim, out_dim):
    method forward (line 61) | def forward(self, x):
  class MoCoClassifier (line 70) | class MoCoClassifier(nn.Layer):
    method __init__ (line 71) | def __init__(self, with_pool, num_features, num_classes):
    method forward (line 86) | def forward(self, x):
  class MoCo (line 94) | class MoCo(nn.Layer):
    method __init__ (line 101) | def __init__(self,
    method _update_momentum_encoder (line 136) | def _update_momentum_encoder(self):
    method _dequeue_and_enqueue (line 147) | def _dequeue_and_enqueue(self, keys):
    method _batch_shuffle_ddp (line 162) | def _batch_shuffle_ddp(self, x):
    method _batch_unshuffle_ddp (line 190) | def _batch_unshuffle_ddp(self, x, idx_unshuffle):
    method forward (line 208) | def forward(self, x1, x2):

FILE: ppfleetx/models/vision_model/moco_module.py
  class MOCOModule (line 32) | class MOCOModule(BasicModule):
    method __init__ (line 33) | def __init__(self, configs):
    method get_model (line 47) | def get_model(self):
    method forward (line 72) | def forward(self, img_q, img_k):
    method training_step (line 75) | def training_step(self, batch):
    method training_step_end (line 91) | def training_step_end(self, log_dict):
    method input_spec (line 108) | def input_spec(self):
    method training_epoch_end (line 114) | def training_epoch_end(self, log_dict):
  class MOCOClsModule (line 119) | class MOCOClsModule(BasicModule):
    method __init__ (line 120) | def __init__(self, configs):
    method _freeze_backbone (line 144) | def _freeze_backbone(self, layer):
    method get_model (line 154) | def get_model(self):
    method forward (line 189) | def forward(self, inputs):
    method training_step (line 192) | def training_step(self, batch):
    method training_step_end (line 207) | def training_step_end(self, log_dict):
    method validation_step (line 224) | def validation_step(self, batch):
    method validation_step_end (line 251) | def validation_step_end(self, log_dict):
    method input_spec (line 259) | def input_spec(self):
    method training_epoch_end (line 265) | def training_epoch_end(self, log_dict):
    method validation_epoch_end (line 269) | def validation_epoch_end(self, log_dict):

FILE: ppfleetx/models/vision_model/vit/vit.py
  class FusedBlock (line 54) | class FusedBlock(nn.Layer):
    method __init__ (line 55) | def __init__(self,
    method forward (line 112) | def forward(self, x):
  class Block (line 116) | class Block(nn.Layer):
    method __init__ (line 117) | def __init__(self,
    method forward (line 160) | def forward(self, x):
  class ViT (line 166) | class ViT(nn.Layer):
    method __init__ (line 170) | def __init__(self,
    method _init_weights (line 257) | def _init_weights(self, m):
    method forward_features (line 262) | def forward_features(self, x):
    method forward (line 275) | def forward(self, x):
    method state_dict (line 301) | def state_dict(self,
    method set_state_dict (line 339) | def set_state_dict(self, state_dict, use_structured_name=True):
    method load_pretrained (line 368) | def load_pretrained(self, prefix_path, finetune=False):
  function ViT_tiny_patch16_224 (line 422) | def ViT_tiny_patch16_224(**kwargs):
  function ViT_base_patch16_224 (line 435) | def ViT_base_patch16_224(**kwargs):
  function ViT_base_patch16_384 (line 448) | def ViT_base_patch16_384(**kwargs):
  function ViT_base_patch32_224 (line 462) | def ViT_base_patch32_224(**kwargs):
  function ViT_base_patch32_384 (line 475) | def ViT_base_patch32_384(**kwargs):
  function ViT_large_patch16_224 (line 489) | def ViT_large_patch16_224(**kwargs):
  function ViT_large_patch16_384 (line 502) | def ViT_large_patch16_384(**kwargs):
  function ViT_large_patch32_224 (line 516) | def ViT_large_patch32_224(**kwargs):
  function ViT_large_patch32_384 (line 529) | def ViT_large_patch32_384(**kwargs):
  function ViT_huge_patch14_224 (line 543) | def ViT_huge_patch14_224(**kwargs):
  function ViT_huge_patch14_384 (line 556) | def ViT_huge_patch14_384(**kwargs):
  function ViT_g_patch14_224 (line 570) | def ViT_g_patch14_224(**kwargs):
  function ViT_G_patch14_224 (line 584) | def ViT_G_patch14_224(**kwargs):
  function ViT_6B_patch14_224 (line 598) | def ViT_6B_patch14_224(**kwargs):

FILE: ppfleetx/optims/__init__.py
  function build_lr_scheduler (line 29) | def build_lr_scheduler(lr_config):
  function build_grad_clip (line 44) | def build_grad_clip(grad_clip_config):
  function build_optimizer (line 55) | def build_optimizer(config, model, lr_scheduler=None):

FILE: ppfleetx/optims/grad_clip.py
  class ClipGradForMOEByGlobalNorm (line 27) | class ClipGradForMOEByGlobalNorm(ClipGradBase):
    method __init__ (line 28) | def __init__(self, clip_norm):
    method __str__ (line 38) | def __str__(self):
    method get_l2_norm_pow (line 42) | def get_l2_norm_pow(params_grads, sum_dtype=None):
    method _dygraph_clip (line 93) | def _dygraph_clip(self, params_grads):

FILE: ppfleetx/optims/lr_scheduler.py
  class CosineAnnealingWithWarmupDecay (line 31) | class CosineAnnealingWithWarmupDecay(LRScheduler):
    method __init__ (line 32) | def __init__(self,
    method get_lr (line 48) | def get_lr(self):
    method step (line 61) | def step(self, epoch=None):
  class LinearDecayWithWarmup (line 77) | class LinearDecayWithWarmup(LRScheduler):
    method __init__ (line 78) | def __init__(self,
    method get_lr (line 96) | def get_lr(self):
  class ViTLRScheduler (line 103) | class ViTLRScheduler(LRScheduler):
    method __init__ (line 104) | def __init__(self,
    method get_lr (line 127) | def get_lr(self):
  class MultiStepDecay (line 144) | class MultiStepDecay(lr.MultiStepDecay):
    method __init__ (line 145) | def __init__(self,
  class CosineDecay (line 162) | class CosineDecay(lr.LRScheduler):
    method __init__ (line 163) | def __init__(self,
    method get_lr (line 181) | def get_lr(self):

FILE: ppfleetx/optims/optimizer.py
  class FusedAdamW (line 31) | class FusedAdamW(paddle.optimizer.AdamW):
    method __init__ (line 32) | def __init__(self, learning_rate, parameters, grad_clip, **config):

FILE: ppfleetx/tools/multiprocess_tool.py
  function process_fn (line 49) | def process_fn(cmd_list):
  function read_command (line 59) | def read_command(shell_cmd_list_filename):
  function parallel_process (line 68) | def parallel_process(cmd_list, nproc=20):
  function main (line 87) | def main(args):

FILE: ppfleetx/utils/check.py
  function check_version (line 27) | def check_version():
  function check_device (line 43) | def check_device(device):

FILE: ppfleetx/utils/compression_helper.py
  function get_pruned_params (line 19) | def get_pruned_params(model):
  function prune_model (line 42) | def prune_model(model, configs, inputs_desc=[]):
  function quant_model (line 77) | def quant_model(model, configs):

FILE: ppfleetx/utils/config.py
  function process_dist_config (line 33) | def process_dist_config(configs):
  function process_global_configs (line 104) | def process_global_configs(config):
  function process_engine_config (line 151) | def process_engine_config(config):
  class AttrDict (line 192) | class AttrDict(dict):
    method __getattr__ (line 193) | def __getattr__(self, key):
    method __setattr__ (line 196) | def __setattr__(self, key, value):
    method __copy__ (line 202) | def __copy__(self):
    method __deepcopy__ (line 208) | def __deepcopy__(self, memo):
    method setdefault (line 218) | def setdefault(self, k, default=None):
  function create_attr_dict (line 226) | def create_attr_dict(yaml_config):
  function parse_config (line 242) | def parse_config(cfg_file):
  function print_dict (line 284) | def print_dict(d, delimiter=0):
  function print_config (line 304) | def print_config(config):
  function check_config (line 314) | def check_config(config):
  function override (line 333) | def override(dl, ks, v):
  function override_config (line 370) | def override_config(config, options=None):
  function get_config (line 398) | def get_config(fname, overrides=None, show=False):
  function process_auto_dist_configs (line 418) | def process_auto_dist_configs(config):
  function process_auto_global_configs (line 441) | def process_auto_global_configs(config):
  function process_auto_engine_configs (line 481) | def process_auto_engine_configs(config):
  function process_auto_strategy (line 515) | def process_auto_strategy(config):
  function process_auto_ckpt_dir (line 593) | def process_auto_ckpt_dir(config):
  function get_auto_config (line 616) | def get_auto_config(fname, overrides=None, show=False):
  function parse_args (line 637) | def parse_args():

FILE: ppfleetx/utils/device.py
  function get_device_and_mapping (line 19) | def get_device_and_mapping():
  function get_device (line 36) | def get_device():
  function synchronize (line 44) | def synchronize():

FILE: ppfleetx/utils/download.py
  function is_url (line 27) | def is_url(path):
  function _map_path (line 36) | def _map_path(url, root_dir):
  function cached_path (line 43) | def cached_path(url_or_path, cache_dir=None):
  function _download (line 68) | def _download(url, fullname):
  function download (line 117) | def download(url, path):

FILE: ppfleetx/utils/export.py
  function _prune_input_spec (line 24) | def _prune_input_spec(input_spec, program, targets):
  function export_inference_model (line 44) | def export_inference_model(

FILE: ppfleetx/utils/file.py
  function unzip (line 26) | def unzip(zip_path, mode="r", out_dir=None, delete=False):
  function untar (line 35) | def untar(tar_path, mode="r:gz", out_dir=None, delete=False):
  function parse_csv (line 44) | def parse_csv(path,

FILE: ppfleetx/utils/log.py
  class Logger (line 65) | class Logger(object):
    method __init__ (line 73) | def __init__(self, name: str=None):
    method disable (line 100) | def disable(self):
    method enable (line 103) | def enable(self):
    method is_enable (line 107) | def is_enable(self) -> bool:
    method __call__ (line 110) | def __call__(self, log_level: str, msg: str):
    method use_terminator (line 117) | def use_terminator(self, terminator: str):
    method processing (line 124) | def processing(self, msg: str, interval: float=0.1):
  function advertise (line 153) | def advertise():
  function get_timestamp (line 181) | def get_timestamp():
  function convert_timestamp_to_data (line 188) | def convert_timestamp_to_data(timeStamp):

FILE: ppfleetx/utils/tensor_fusion_helper.py
  function assign_group_by_size (line 30) | def assign_group_by_size(parameters, group_size=256 * 1024 * 1024):
  function flatten_dense_tensors (line 43) | def flatten_dense_tensors(parameters):
  function obtain_storage (line 78) | def obtain_storage(parameters):
  function fused_parameters (line 90) | def fused_parameters(parameters, use_sharding=False):
  function all_reduce_parameters (line 109) | def all_reduce_parameters(params, group):

FILE: ppfleetx/utils/version.py
  function version_check (line 18) | def version_check():

FILE: projects/ernie/inference.py
  function parse_args (line 31) | def parse_args():
  function main (line 43) | def main(args):

FILE: projects/gpt/benchmark.py
  function parse_args (line 26) | def parse_args():
  function predict (line 44) | def predict(engine, data, args):
  function main (line 67) | def main():

FILE: projects/gpt/inference.py
  function parse_args (line 31) | def parse_args():
  function main (line 41) | def main():

FILE: projects/vit/inference.py
  function softmax (line 38) | def softmax(x):
  function preprocess (line 42) | def preprocess(img_path):

FILE: setup.py
  function fetch_requirements (line 21) | def fetch_requirements(path):

FILE: tools/train.py
  function set_default_flags (line 38) | def set_default_flags(flags):

Download .json

Condensed preview — 507 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (2,885K chars).

[
  {
    "path": ".gitignore",
    "chars": 1174,
    "preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
  },
  {
    "path": ".pre-commit-config.yaml",
    "chars": 1773,
    "preview": "repos:\n-   repo: https://github.com/Lucas-C/pre-commit-hooks.git\n    sha: v1.0.1\n    hooks:\n    -   id: remove-crlf\n    "
  },
  {
    "path": "Dockerfile",
    "chars": 601,
    "preview": "ARG BASE_IMAGE=registry.baidubce.com/paddlepaddle/paddle:2.4.1-gpu-cuda11.2-cudnn8.2-trt8.0\n\nFROM $BASE_IMAGE\n\nWORKDIR /"
  },
  {
    "path": "LICENSE",
    "chars": 11437,
    "preview": "Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved\n\n                                 Apache License\n          "
  },
  {
    "path": "README.md",
    "chars": 4326,
    "preview": "<p align=\"center\">\n  <img src=\"./paddlefleetx-logo.png\" align=\"middle\"  width=\"350\" />\n</p>\n\n---------------------------"
  },
  {
    "path": "benchmarks/README.md",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N1C1/ernie_bs16_fp16_DP1-MP1-PP1.sh",
    "chars": 1059,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N1C1/ernie_bs16_fp32_DP1-MP1-PP1.sh",
    "chars": 1055,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N1C8/ernie_bs16_fp16_DP2-MP2-PP2.sh",
    "chars": 1050,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N1C8/ernie_bs16_fp32_DP2-MP2-PP2.sh",
    "chars": 1050,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N4C32/ernie_bs16_fp16_DP1-MP8-PP4.sh",
    "chars": 1051,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N4C32/ernie_bs16_fp16_DP2-MP8-PP2.sh",
    "chars": 1051,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N4C32/ernie_bs16_fp16_DP4-MP8-PP1.sh",
    "chars": 1051,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N4C32/ernie_bs16_fp32_DP1-MP8-PP4.sh",
    "chars": 1051,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N4C32/ernie_bs16_fp32_DP2-MP8-PP2.sh",
    "chars": 1051,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/N4C32/ernie_bs16_fp32_DP4-MP8-PP1.sh",
    "chars": 1051,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/prepare.sh",
    "chars": 1249,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/ernie/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh",
    "chars": 7475,
    "preview": "#!/usr/bin/env bash\n\n# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache Lice"
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/data_parallel/N1C8/gpt_1024_bs64_fp16_DP8-MP1-PP1.sh",
    "chars": 1127,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/data_parallel/N1C8/gpt_1024_flash_bs64_fp16_DP8-MP1-PP1.sh",
    "chars": 1133,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/data_parallel/N1C8/gpt_2048_bs64_fp16_DP8-MP1-PP1.sh",
    "chars": 1119,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/data_parallel/benchmark_common/prepare.sh",
    "chars": 957,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/data_parallel/benchmark_common/run_benchmark.sh",
    "chars": 5314,
    "preview": "#!/usr/bin/env bash\n\n# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache Licen"
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_CoLA_bs32_fp16_DP1-MP1-PP1.sh",
    "chars": 1117,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_MRPC_acc_bs32_fp16_DP1-MP1-PP1.sh",
    "chars": 1121,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_MRPC_f1_bs32_fp16_DP1-MP1-PP1.sh",
    "chars": 1119,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_QNLI_bs32_fp16_DP1-MP1-PP1.sh",
    "chars": 1117,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_RTE_bs32_fp16_DP1-MP1-PP1.sh",
    "chars": 1115,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_SST2_bs32_fp16_DP1-MP1-PP1.sh",
    "chars": 1117,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_STSB_pearson_bs32_fp16_DP1-MP1-PP1.sh",
    "chars": 1129,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_STSB_spearman_bs32_fp16_DP1-MP1-PP1.sh",
    "chars": 1131,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/finetune/N1C1/CE_gpt_finetune_WNLI_bs32_fp16_DP1-MP1-PP1.sh",
    "chars": 1218,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/finetune/benchmark_common/prepare.sh",
    "chars": 833,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/finetune/benchmark_common/run_benchmark.sh",
    "chars": 4883,
    "preview": "#!/usr/bin/env bash\n\n# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache Licen"
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C1/gpt_bs16_fp16_DP1-MP1-PP1.sh",
    "chars": 1048,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C1/gpt_bs16_fp32_DP1-MP1-PP1.sh",
    "chars": 1048,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C4/gpt_bs16_fp16_DP1-MP1-PP4.sh",
    "chars": 1039,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C4/gpt_bs16_fp16_DP1-MP4-PP1.sh",
    "chars": 1039,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs16_fp16_DP1-MP1-PP8.sh",
    "chars": 1039,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs16_fp16_DP1-MP2-PP4.sh",
    "chars": 1039,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs16_fp16_DP1-MP4-PP2.sh",
    "chars": 1039,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs16_fp16_DP1-MP8-PP1.sh",
    "chars": 1040,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs16_fp16_DP2-MP2-PP2.sh",
    "chars": 1039,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs16_fp32_DP2-MP2-PP2.sh",
    "chars": 1039,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs64_fp16_DP8-MP1-PP1.sh",
    "chars": 1102,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_bs64_fp32_DP8-MP1-PP1.sh",
    "chars": 1102,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_recompute_bs16_fp16_DP2-MP2-PP2.sh",
    "chars": 1112,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N1C8/gpt_recompute_bs16_fp32_DP2-MP2-PP2.sh",
    "chars": 1112,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N4C32/gpt_bs16_fp16_DP1-MP8-PP4.sh",
    "chars": 1040,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N4C32/gpt_bs16_fp16_DP2-MP8-PP2.sh",
    "chars": 1040,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N4C32/gpt_bs16_fp16_DP4-MP8-PP1.sh",
    "chars": 1040,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N4C32/gpt_bs16_fp32_DP1-MP8-PP4.sh",
    "chars": 1040,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N4C32/gpt_bs16_fp32_DP2-MP8-PP2.sh",
    "chars": 1040,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/N4C32/gpt_bs16_fp32_DP4-MP8-PP1.sh",
    "chars": 1040,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/prepare.sh",
    "chars": 957,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/hybrid_parallel/benchmark_common/run_benchmark.sh",
    "chars": 7389,
    "preview": "#!/usr/bin/env bash\n\n# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache Licen"
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/sequence_parallel/N1C8/gpt_sp_False_bs8_fp16_DP1-MP8-PP1.sh",
    "chars": 1098,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/sequence_parallel/N1C8/gpt_sp_True_bs8_fp16_DP1-MP8-PP1.sh",
    "chars": 1096,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/sequence_parallel/N4C32/gpt_sp_False_bs16_fp16_DP2-MP8-PP2.sh",
    "chars": 1100,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/sequence_parallel/N4C32/gpt_sp_True_bs16_fp16_DP2-MP8-PP2.sh",
    "chars": 1098,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/sequence_parallel/benchmark_common/prepare.sh",
    "chars": 957,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/sequence_parallel/benchmark_common/run_benchmark.sh",
    "chars": 6957,
    "preview": "#!/usr/bin/env bash\n\n# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache Licen"
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/sharding/N1C2/gpt_stage2_bs16_fp16_DP1-MP1-PP1-Sharding2.sh",
    "chars": 1158,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/sharding/N1C2/gpt_stage3_bs16_fp16_DP1-MP1-PP1-Sharding2.sh",
    "chars": 1158,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/sharding/N1C2/gpt_stage3_bs16_fp32_DP1-MP1-PP1-Sharding2.sh",
    "chars": 1158,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/sharding/N2C16/gpt_stage2_bs128_fp16_DP1-MP1-PP1-Sharding16.sh",
    "chars": 1186,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/sharding/benchmark_common/prepare.sh",
    "chars": 957,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/gpt/dygraph/sharding/benchmark_common/run_benchmark.sh",
    "chars": 6733,
    "preview": "#!/usr/bin/env bash\n\n# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache Licen"
  },
  {
    "path": "benchmarks/test_tipc/gpt/static/auto_parallel/N1C1/gpt_auto_recompute_bs8_fp32_DP1-MP1-PP1.sh",
    "chars": 1113,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/gpt/static/auto_parallel/benchmark_common/prepare.sh",
    "chars": 960,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/gpt/static/auto_parallel/benchmark_common/run_benchmark.sh",
    "chars": 7050,
    "preview": "#!/usr/bin/env bash\n\n# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache Lice"
  },
  {
    "path": "benchmarks/test_tipc/imagen/dygraph/N1C1/imagen_397M_text2im_64_bs1_fp32_DP1-MP1-PP1.sh",
    "chars": 1125,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/imagen/dygraph/N1C1/imagen_SR256_bs1_fp32_DP1-MP1-PP1.sh",
    "chars": 1117,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/imagen/dygraph/N1C8/imagen_2B_text2im_64_bs8_fp32_DP1-Sharding8.sh",
    "chars": 1225,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/imagen/dygraph/N1C8/imagen_397M_text2im_64_bs8_fp32_DP8-MP1-PP1.sh",
    "chars": 1125,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/imagen/dygraph/N1C8/imagen_SR256_bs8_fp32_DP8-MP1-PP1.sh",
    "chars": 1117,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/imagen/dygraph/N1C8/imagen_text2im_64_debertav2_bs8_fp32_DP8-MP1-PP1.sh",
    "chars": 1135,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/imagen/dygraph/benchmark_common/prepare.sh",
    "chars": 1916,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "benchmarks/test_tipc/imagen/dygraph/benchmark_common/run_benchmark.sh",
    "chars": 5938,
    "preview": "#!/usr/bin/env bash\n\n# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache Licen"
  },
  {
    "path": "benchmarks/test_tipc/vit/dygraph/finetune/N1C8/ViT_large_patch16_384_ft_fused_False_bs512_fp16_DP.sh",
    "chars": 395,
    "preview": "model_item=ViT_large_patch16_384_ft_fused_False\nfp_item=fp16\nbs_item=512\nrun_mode=DP\ndevice_num=N1C8\nuse_fused_attn=Fals"
  },
  {
    "path": "benchmarks/test_tipc/vit/dygraph/finetune/N1C8/ViT_large_patch16_384_ft_fused_True_bs512_fp16_DP.sh",
    "chars": 393,
    "preview": "model_item=ViT_large_patch16_384_ft_fused_True\nfp_item=fp16\nbs_item=512\nrun_mode=DP\ndevice_num=N1C8\nuse_fused_attn=True\n"
  },
  {
    "path": "benchmarks/test_tipc/vit/dygraph/finetune/benchmark_common/prepare.sh",
    "chars": 982,
    "preview": "# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/vit/dygraph/finetune/benchmark_common/run_benchmark.sh",
    "chars": 4014,
    "preview": "#!/usr/bin/env bash\n# Test training benchmark for a model.\n# Usage：bash benchmark/run_benchmark.sh ${model_item} ${fp_it"
  },
  {
    "path": "benchmarks/test_tipc/vit/dygraph/pretrained/N2C16/ViT_large_patch16_224_pt_fused_False_bs128_fp16_DP.sh",
    "chars": 400,
    "preview": "model_item=ViT_large_patch16_224_pt_fused_False\nfp_item=fp16\nbs_item=128\nrun_mode=DP\ndevice_num=N2C16\nuse_fused_attn=Fal"
  },
  {
    "path": "benchmarks/test_tipc/vit/dygraph/pretrained/N2C16/ViT_large_patch16_224_pt_fused_True_bs128_fp16_DP.sh",
    "chars": 398,
    "preview": "model_item=ViT_large_patch16_224_pt_fused_True\nfp_item=fp16\nbs_item=128\nrun_mode=DP\ndevice_num=N2C16\nuse_fused_attn=True"
  },
  {
    "path": "benchmarks/test_tipc/vit/dygraph/pretrained/benchmark_common/prepare.sh",
    "chars": 811,
    "preview": "# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "benchmarks/test_tipc/vit/dygraph/pretrained/benchmark_common/run_benchmark.sh",
    "chars": 4277,
    "preview": "#!/usr/bin/env bash\n# Test training benchmark for a model.\n# Usage：bash benchmark/run_benchmark.sh ${model_item} ${fp_it"
  },
  {
    "path": "codestyle/.gitignore",
    "chars": 6,
    "preview": "*.pyc\n"
  },
  {
    "path": "codestyle/clang_format.hook",
    "chars": 528,
    "preview": "#!/bin/bash\nset -e\n\nreadonly VERSION=\"13.0.0\"\n\nversion=$(clang-format -version)\n\nif ! [[ $(python -V 2>&1 | awk '{print "
  },
  {
    "path": "codestyle/copyright.hook",
    "chars": 4260,
    "preview": "# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "codestyle/cpplint_pre_commit.hook",
    "chars": 932,
    "preview": "#!/bin/bash\n\nTOTAL_ERRORS=0\n\nreadonly VERSION=\"1.6.0\"\n\nversion=$(cpplint --version)\n\nif [[ ! $TRAVIS_BRANCH ]]; then\n  #"
  },
  {
    "path": "codestyle/docstring_checker.py",
    "chars": 10389,
    "preview": "#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (th"
  },
  {
    "path": "codestyle/pylint_pre_commit.hook",
    "chars": 784,
    "preview": "#!/bin/bash\n\nTOTAL_ERRORS=0\n\n\nDIR=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" && pwd )\"\nexport PYTHONPATH=$DIR:$PYTHONPATH"
  },
  {
    "path": "codestyle/test_docstring_checker.py",
    "chars": 7640,
    "preview": "#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (th"
  },
  {
    "path": "docs/cluster_deployment.md",
    "chars": 3841,
    "preview": "\n## 集群部署\n\n本文档介绍在集群上使用分布式进行大模型训练的方法，包括在 Kubernetes 上使用 PaddlePaddle 分布式和在云上使用的方法。\n\n### 1. Kubernetes部署\n\n在 Kubernetes 上部署分"
  },
  {
    "path": "docs/compression.md",
    "chars": 3043,
    "preview": "# 模型压缩\n\n------------------------------------------------------------------------------------------\n\n## **简介**\n\nPaddleFle"
  },
  {
    "path": "docs/deployment_faq.md",
    "chars": 25902,
    "preview": "## 环境验证和常见问题\n\n本文为环境问题排查指引，包括环境正确性验证的方法和常见的一些问题解决方法。\n\n### 1. 单机环境验证\n\n以下验证不区分本机环境和 Docker 环境。\n\n**GPU验证**\n\n当使用 GPU 时，使用 `nv"
  },
  {
    "path": "docs/docker_install.md",
    "chars": 1539,
    "preview": "\n## Docker 环境安装\n\n使用 Docker 首先需要安装 Docker  环境，安装的完整流程请参考[文档](https://docs.docker.com/engine/install/)，基础安装流程如下所述。\n另外在 Doc"
  },
  {
    "path": "docs/quick_start.md",
    "chars": 7937,
    "preview": "\n# 快速开始\n\n## 1. 环境准备\n\n这里介绍使用裸机或者 Docker 环境使用 PaddleFleetX 的方法，用户根据具体情况选择一种安装部署方式即可。\n使用多机训练时，需要在每台机器上都部署相应的环境。\n\n### 1.1 Do"
  },
  {
    "path": "docs/standard.md",
    "chars": 8979,
    "preview": "## 模型接入规范\n\n本文讲述在PaddleFleetX repo接入一个新模型，该如何添加和修改文件，以及相应的规范化流程。\n\n### 1.PaddleFleetX 介绍\nPaddleFleetX是飞桨大模型训练推理一站式工具组件。与Pa"
  },
  {
    "path": "examples/transformer/__init__.py",
    "chars": 610,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/models/GPT/docs/README.md",
    "chars": 9620,
    "preview": "# GPT\n\n## 模型介绍\nGPT-[2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners"
  },
  {
    "path": "examples/transformer/models/GPT/docs/hybrid_parallel.md",
    "chars": 10695,
    "preview": "# GPT 混合并行模型训练\n\n当训练超大模型时，就必须借助混合并行策略，混合并行策略分别指数据并行、张量模型并行、流水线并行和分组切片并行。其中数据并行保存完整的模型参数并独立处理一份子数据集，以加速模型训练过程；张量模型并行将网络中的张"
  },
  {
    "path": "examples/transformer/models/GPT/docs/hybrid_profiler.md",
    "chars": 10888,
    "preview": "# Profiler\n\n本文档主要包括在 GPT 中开启 Profiler 并分析调试分析结果的方法，在模型开发中使用 Profiler 分析工具的方法请参考[教程](https://www.paddlepaddle.org.cn/docu"
  },
  {
    "path": "examples/transformer/models/GPT/docs/inference.md",
    "chars": 3737,
    "preview": "\n# 推理部署\n\n模型训练完成后，可使用飞桨高性能推理引擎Paddle Inference通过如下方式进行推理部署。\n\n## 1. 模型导出\n\n首先需要安装`ppfleetx-ops`\n\n```bash\ncd PaddleFleetX/ #"
  },
  {
    "path": "examples/transformer/models/GPT/docs/quantization_aware_training.md",
    "chars": 4110,
    "preview": "\n# GPT模型量化训练\n\n本项目对语言模型 GPT 进行量化训练。目前，PaddleFleetX 提供了 [GPT-345M量化模型](https://paddlefleetx.bj.bcebos.com/model/nlp/gpt/GP"
  },
  {
    "path": "examples/transformer/models/GPT/docs/single_card.md",
    "chars": 10021,
    "preview": "# GPT 单卡模型训练\n\n## 运行方式\n\n本文档按照345M和1.3B规模大小，给出32G V100环境下GPT模型单卡训练的策略配置如下：\n\n| 模型规模 | 训练策略       | yaml文件                  "
  },
  {
    "path": "examples/transformer/models/GPT/docs/single_finetune.md",
    "chars": 13753,
    "preview": "# GPT2 微调\n\n本教程主要针对于 GLUE (General Language Understanding Evaluation) benchmark 中的数据集进行微调，涉及到分类和回归任务。\n\n## 下载 GPT345M 预训练模"
  },
  {
    "path": "examples/transformer/models/GPT/docs/structured_pruning.md",
    "chars": 2122,
    "preview": "# GPT模型结构化稀疏\n\n本项目对语言模型 GPT 进行结构化稀疏（以下简称稀疏）。在 GPT 模型中，我们对 fused-qkv、out-linear、ffn1 和 ffn2 四层的权重进行了通道稀疏，其中，fused-qkv 和 ff"
  },
  {
    "path": "examples/transformer/models/GPT/finetune/configs/finetune_gpt_345M_single_card_glue.yaml",
    "chars": 2084,
    "preview": "_base_: ./finetune_gpt_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 32\n  micro_batch_size: 32\n  \n  run_m"
  },
  {
    "path": "examples/transformer/models/GPT/finetune/configs/finetune_gpt_base.yaml",
    "chars": 368,
    "preview": "Global:\n  device: gpu\n  seed: 42\n\n  global_batch_size: \n  local_batch_size: 1\n  micro_batch_size: 1\n\n  run_mode: epoch\n "
  },
  {
    "path": "examples/transformer/models/GPT/finetune/impls.py",
    "chars": 7431,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/models/GPT/finetune/run.py",
    "chars": 11063,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/models/GPT/finetune/run_task.sh",
    "chars": 4692,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/models/GPT/generation/configs/generation_gpt_345M_dp8.yaml",
    "chars": 383,
    "preview": "_base_: ./generation_gpt_base.yaml\n\nGeneration:\n  top_k: 50\n  top_p: 0.75\n  temperature: 1.0\n  min_dec_len: 1\n  max_dec_"
  },
  {
    "path": "examples/transformer/models/GPT/generation/configs/generation_gpt_345M_single_card.yaml",
    "chars": 185,
    "preview": "_base_: ./generation_gpt_base.yaml\n\nGeneration:\n  top_k: 50\n  top_p: 0.75\n  temperature: 1.0\n  min_dec_len: 1\n  max_dec_"
  },
  {
    "path": "examples/transformer/models/GPT/generation/configs/generation_gpt_base.yaml",
    "chars": 1238,
    "preview": "Global:\n  device: gpu\n  seed: 1024\n\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n  max_steps: 5000"
  },
  {
    "path": "examples/transformer/models/GPT/generation/configs/generation_pruned_gpt_345M_single_card.yaml",
    "chars": 262,
    "preview": "_base_: ./generation_gpt_base.yaml\n\nCompress:\n  Prune:\n    enable: True\n    criterion: l1_norm\n    ratio: 0.125\n\nGenerat"
  },
  {
    "path": "examples/transformer/models/GPT/generation/configs/generation_qat_gpt_345M_single_card.yaml",
    "chars": 527,
    "preview": "_base_: ./generation_gpt_base.yaml\n\nGeneration:\n  top_k: 50\n  top_p: 0.75\n  temperature: 1.0\n  min_dec_len: 1\n  max_dec_"
  },
  {
    "path": "examples/transformer/models/GPT/generation/configs/generation_qat_gpt_6.7B_single_card.yaml",
    "chars": 857,
    "preview": "_base_: ./generation_gpt_base.yaml\n\nModel:\n  vocab_size: 50304\n  hidden_size: 1024\n  num_layers: 32\n  num_attention_head"
  },
  {
    "path": "examples/transformer/models/GPT/generation/configs/inference_gpt_345M_dp8.yaml",
    "chars": 288,
    "preview": "_base_: ./generation_gpt_345M_dp8.yaml\n\n\nInference:\n  model_dir: ./output\n  mp_degree: 1\n\n\nDistributed:\n  dp_degree: \n  "
  },
  {
    "path": "examples/transformer/models/GPT/generation/configs/inference_gpt_345M_single_card.yaml",
    "chars": 296,
    "preview": "_base_: ./generation_gpt_345M_single_card.yaml\n\n\nInference:\n  model_dir: ./output\n  mp_degree: 1\n\n\nDistributed:\n  dp_deg"
  },
  {
    "path": "examples/transformer/models/GPT/generation/export.py",
    "chars": 3075,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "examples/transformer/models/GPT/generation/impls.py",
    "chars": 3524,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/models/GPT/generation/inference.py",
    "chars": 2632,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "examples/transformer/models/GPT/generation/run.py",
    "chars": 3370,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/models/GPT/offline-eval/configs/eval_gpt_345M_single_card.yaml",
    "chars": 189,
    "preview": "_base_: ./eval_gpt_base.yaml\n\n\nOffline_Eval:\n  eval_path: ./wikitext-103/wiki.valid.tokens\n  cloze_eval: False\n  overlap"
  },
  {
    "path": "examples/transformer/models/GPT/offline-eval/configs/eval_gpt_base.yaml",
    "chars": 1730,
    "preview": "Global:\n  device: gpu\n  seed: 1024\n\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n  max_steps: 5000"
  },
  {
    "path": "examples/transformer/models/GPT/offline-eval/configs/eval_pruned_gpt_345M_single_card.yaml",
    "chars": 326,
    "preview": "_base_: ./eval_gpt_base.yaml\n\n\nModel:\n  hidden_dropout_prob: 0.0\n  attention_probs_dropout_prob: 0.0\n\n\nCompress:\n  Prune"
  },
  {
    "path": "examples/transformer/models/GPT/offline-eval/configs/eval_qat_gpt_345M_single_card.yaml",
    "chars": 740,
    "preview": "_base_: ./eval_gpt_base.yaml\n\n\nCompress:\n  pretrained:\n  Quantization:\n    enable: True\n    weight_quantize_type: 'abs_m"
  },
  {
    "path": "examples/transformer/models/GPT/offline-eval/impls.py",
    "chars": 8717,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/models/GPT/offline-eval/run.py",
    "chars": 8271,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/configs/export_qat_gpt_345M_single_card.yaml",
    "chars": 930,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: 8\n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n  "
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/configs/pretrain_gpt_1.3B_dp8.yaml",
    "chars": 635,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n  v"
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/configs/pretrain_gpt_1.3B_single_card.yaml",
    "chars": 636,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: 8\n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n  "
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/configs/pretrain_gpt_175B_mp8_pp16.yaml",
    "chars": 719,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 1536\n  micro_batch_size: 1\n\n\nModel:\n"
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/configs/pretrain_gpt_345M_single_card.yaml",
    "chars": 640,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n  v"
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/configs/pretrain_gpt_6.7B_sharding16.yaml",
    "chars": 705,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n  logging_f"
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/configs/pretrain_gpt_base.yaml",
    "chars": 1877,
    "preview": "Global:\n  device: gpu\n  seed: 1024\n\n  global_batch_size: \n  local_batch_size: 1\n  micro_batch_size: 1\n\n  max_steps: 5000"
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/configs/pretrain_gpt_cn_345M_single_card.yaml",
    "chars": 657,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n  n"
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/configs/prune_gpt_345M_single_card.yaml",
    "chars": 919,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n  save_load"
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/configs/qat_gpt_345M_mp8.yaml",
    "chars": 1171,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: 8\n  local_batch_size: 8\n  micro_batch_size: 1\n\n\nModel:\n  "
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/configs/qat_gpt_345M_single_card.yaml",
    "chars": 1210,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: 8\n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n  "
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/configs/qat_gpt_6.7B_sharding16.yaml",
    "chars": 1042,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n  logging_f"
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/export.py",
    "chars": 3150,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/impls.py",
    "chars": 10111,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/models/GPT/pretrain/run.py",
    "chars": 10897,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/models/GPT/pretrain_moe/configs/pretrain_moe_345M_single_card.yaml",
    "chars": 1104,
    "preview": "_base_: ./pretrain_moe_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 2\n  max_steps:"
  },
  {
    "path": "examples/transformer/models/GPT/pretrain_moe/configs/pretrain_moe_base.yaml",
    "chars": 1712,
    "preview": "Global:\n  device: gpu\n  seed: 1234\n\n  global_batch_size: \n  local_batch_size: 1\n  micro_batch_size: 1\n\n  max_steps: 5000"
  },
  {
    "path": "examples/transformer/models/GPT/pretrain_moe/impls.py",
    "chars": 10115,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/models/GPT/pretrain_moe/run.py",
    "chars": 9427,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/utils/__init__.py",
    "chars": 610,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/utils/components.py",
    "chars": 7439,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "examples/transformer/utils/config.py",
    "chars": 20377,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n# \n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "examples/transformer/utils/qat.py",
    "chars": 2017,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "ppfleetx/__init__.py",
    "chars": 610,
    "preview": "# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
  },
  {
    "path": "ppfleetx/configs/multimodal/imagen/imagen_397M_text2im_64x64.yaml",
    "chars": 1225,
    "preview": "_base_: ./imagen_base.yaml\n\nGlobal:\n  global_batch_size:\n  local_batch_size: 1\n  micro_batch_size: 1\n\n\nModel:\n  name: im"
  },
  {
    "path": "ppfleetx/configs/multimodal/imagen/imagen_base.yaml",
    "chars": 1465,
    "preview": "Global:\n  device: gpu\n  seed: 1024\n\n  global_batch_size: \n  local_batch_size: 1\n  micro_batch_size: 1\n\n\nEngine:\n  max_st"
  },
  {
    "path": "ppfleetx/configs/multimodal/imagen/imagen_super_resolution_1024.yaml",
    "chars": 1697,
    "preview": "_base_: ./imagen_base.yaml\n\nGlobal:\n  global_batch_size:\n  local_batch_size: 1\n  micro_batch_size: 1\n\n\nModel:\n  name: im"
  },
  {
    "path": "ppfleetx/configs/multimodal/imagen/imagen_super_resolution_256.yaml",
    "chars": 1295,
    "preview": "_base_: ./imagen_base.yaml\n\nGlobal:\n  global_batch_size:\n  local_batch_size: 1\n  micro_batch_size: 1\n\n\nModel:\n  name: im"
  },
  {
    "path": "ppfleetx/configs/multimodal/imagen/imagen_text2im_64x64_DebertaV2.yaml",
    "chars": 1242,
    "preview": "_base_: ./imagen_base.yaml\n\nGlobal:\n  global_batch_size:\n  local_batch_size: 1\n  micro_batch_size: 1\n\n\nModel:\n  name: im"
  },
  {
    "path": "ppfleetx/configs/multimodal/imagen/imagen_text2im_64x64_T5-11B.yaml",
    "chars": 1219,
    "preview": "_base_: ./imagen_base.yaml\n\nGlobal:\n  global_batch_size:\n  local_batch_size: 1\n  micro_batch_size: 1\n\n\nModel:\n  name: im"
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/auto/finetune_ernie_345M_single_card.yaml",
    "chars": 609,
    "preview": "_base_: ./finetune_ernie_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n "
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/auto/finetune_ernie_base.yaml",
    "chars": 1697,
    "preview": "Global:\n  device: gpu\n  seed: 1024\n  binary_head: True\n  \n  global_batch_size: \n  local_batch_size: 16\n  micro_batch_siz"
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/auto/pretrain_ernie_base.yaml",
    "chars": 2127,
    "preview": "Global:\n  device: gpu\n  seed: 1024\n  binary_head: True\n  \n  global_batch_size: \n  local_batch_size: 1\n  micro_batch_size"
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/auto/pretrain_ernie_base_345M_single_card.yaml",
    "chars": 747,
    "preview": "_base_: ./pretrain_ernie_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n "
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/finetune_ernie_345M_single_card.yaml",
    "chars": 692,
    "preview": "_base_: ./finetune_ernie_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n "
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/finetune_ernie_base.yaml",
    "chars": 2115,
    "preview": "Global:\n  device: gpu\n  seed: 1024\n  binary_head: True\n  \n  global_batch_size: \n  local_batch_size: 16\n  micro_batch_siz"
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/inference_ernie_345M_single_card.yaml",
    "chars": 296,
    "preview": "_base_: ./finetune_ernie_345M_single_card.yaml\n\n\nInference:\n  model_dir: ./output\n  mp_degree: 1\n\n\nDistributed:\n  dp_deg"
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/pretrain_ernie_base.yaml",
    "chars": 2556,
    "preview": "Global:\n  device: gpu\n  seed: 1024\n  binary_head: True\n  \n  global_batch_size: \n  local_batch_size: 1\n  micro_batch_size"
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/pretrain_ernie_base_175B_mp8_pp16.yaml",
    "chars": 778,
    "preview": "_base_: ./pretrain_ernie_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 512\n  micro_batch_size: 1\n\n\nModel:"
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/pretrain_ernie_base_345M_single_card.yaml",
    "chars": 775,
    "preview": "_base_: ./pretrain_ernie_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n "
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/pretrain_ernie_base_3D.yaml",
    "chars": 772,
    "preview": "_base_: ./pretrain_ernie_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 1\n\n\nModel:\n "
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/pretrain_ernie_base_6.7B_sharding16.yaml",
    "chars": 776,
    "preview": "_base_: ./pretrain_ernie_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 512\n  micro_batch_size: 1\n\n\nModel:"
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/pretrain_ernie_large_single_card.yaml",
    "chars": 725,
    "preview": "_base_: ./pretrain_ernie_base.yaml\n\nGlobal:\n  global_batch_size: 8\n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n"
  },
  {
    "path": "ppfleetx/configs/nlp/ernie/qat_ernie_base.yaml",
    "chars": 2892,
    "preview": "Global:\n  device: gpu\n  seed: 1024\n  binary_head: True\n  \n  global_batch_size: \n  local_batch_size: 1\n  micro_batch_size"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/auto/export_gpt_fp16_single_card.yaml",
    "chars": 636,
    "preview": "Global:\n  device: gpu\n  seed: 1024\n  global_batch_size: \n  local_batch_size: 1\n  micro_batch_size: 1\n\nEngine:\n  max_step"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/auto/generation_gpt_175B_mp8.yaml",
    "chars": 997,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\n\nEngine:\n  mix_precision:\n    enable: True\n    dtype: \"float16\"\n    level: \"o2\"\n    sc"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/auto/generation_gpt_345M_mp2.yaml",
    "chars": 974,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\n\nEngine:\n  mix_precision:\n    enable: True\n    dtype: \"float16\"\n    level: \"o2\"\n    sc"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/auto/generation_gpt_345M_single_card.yaml",
    "chars": 988,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\n\nEngine:\n  mix_precision:\n    enable: True\n    dtype: \"float16\"\n    level: \"o2\"\n    sc"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/auto/generation_gpt_6.7B_mp1.yaml",
    "chars": 989,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\n\nEngine:\n  mix_precision:\n    enable: True\n    dtype: \"float16\"\n    level: \"o2\"\n    sc"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/auto/pretrain_gpt_1.3B_dp8.yaml",
    "chars": 572,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n  v"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/auto/pretrain_gpt_1.3B_dp8_tuning.yaml",
    "chars": 680,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n  v"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/auto/pretrain_gpt_1.3B_single_card.yaml",
    "chars": 573,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: 8\n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n  "
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/auto/pretrain_gpt_345M_single_card.yaml",
    "chars": 529,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n  v"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/auto/pretrain_gpt_6.7B_sharding16.yaml",
    "chars": 570,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n  v"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/auto/pretrain_gpt_base.yaml",
    "chars": 1233,
    "preview": "Global:\n  device: gpu\n  seed: 1024\n\n  global_batch_size: \n  local_batch_size: 1\n  micro_batch_size: 1\n\n\nEngine:\n  max_st"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/auto/qat_generation_gpt_345M_mp2.yaml",
    "chars": 1046,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\n\nEngine:\n  mix_precision:\n    enable: True\n    dtype: \"float16\"\n    level: \"o2\"\n    sc"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/eval_gpt_345M_single_card.yaml",
    "chars": 238,
    "preview": "_base_: ./pretrain_gpt_345M_single_card.yaml\n\n\nModel:\n  module: GPTEvalModule\n\n\nOffline_Eval:\n  eval_path: ./wikitext-10"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/eval_pruned_gpt_345M_single_card.yaml",
    "chars": 403,
    "preview": "_base_: ./pretrain_gpt_345M_single_card.yaml\n\n\nEngine:\n  save_load:\n    ckpt_dir:\n\n\nModel:\n  module: GPTEvalModule\n  hid"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/eval_qat_gpt_345M_single_card.yaml",
    "chars": 787,
    "preview": "_base_: ./pretrain_gpt_345M_single_card.yaml\n\n\nModel:\n  module: GPTEvalModule\n\nCompress:\n  pretrained:\n  Quantization:\n "
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/export_qat_gpt_345M_single_card.yaml",
    "chars": 930,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\nGlobal:\n  global_batch_size: 8\n  local_batch_size: 8\n  micro_batch_size: 8\n\n\nModel:\n  "
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/finetune_gpt_345M_single_card_glue.yaml",
    "chars": 2123,
    "preview": "_base_: ./finetune_gpt_base.yaml\n\nGlobal:\n  global_batch_size: \n  local_batch_size: 32\n  micro_batch_size: 32\n  \n\nEngine"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/finetune_gpt_base.yaml",
    "chars": 410,
    "preview": "Global:\n  device: gpu\n  seed: 42\n\n  global_batch_size: \n  local_batch_size: 1\n  micro_batch_size: 1\n  \nEngine:\n  run_mod"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/generation_gpt_345M_dp8.yaml",
    "chars": 431,
    "preview": "_base_: ./pretrain_gpt_345M_single_card.yaml\n\nModel:\n  module: GPTGenerationModule\n\nGeneration:\n  top_k: 50\n  top_p: 0.7"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/generation_gpt_345M_mp1.yaml",
    "chars": 677,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\n\nEngine:\n  mix_precision:\n    level:\n\n\nGeneration:\n  top_k: 50\n  top_p: 0.75\n  tempera"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/generation_gpt_345M_single_card.yaml",
    "chars": 233,
    "preview": "_base_: ./pretrain_gpt_345M_single_card.yaml\n\nModel:\n  module: GPTGenerationModule\n\nGeneration:\n  top_k: 50\n  top_p: 0.7"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/generation_gpt_6.7B_single_mp1.yaml",
    "chars": 930,
    "preview": "_base_: ./pretrain_gpt_base.yaml\n\n\nEngine:\n  mix_precision:\n    level: \"o2\"\n    scale_loss: 32768.0\n    custom_black_lis"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/generation_pruned_gpt_345M_single_card.yaml",
    "chars": 310,
    "preview": "_base_: ./pretrain_gpt_345M_single_card.yaml\n\nModel:\n  module: GPTGenerationModule\n\nCompress:\n  Prune:\n    enable: True\n"
  },
  {
    "path": "ppfleetx/configs/nlp/gpt/generation_qat_gpt_345M_single_card.yaml",
    "chars": 574,
    "preview": "_base_: ./pretrain_gpt_345M_single_card.yaml\n\nModel:\n  module: GPTGenerationModule\n\nGeneration:\n  top_k: 50\n  top_p: 0.7"
  }
]

// ... and 307 more files (download for full content)

About this extraction

This page contains the full source code of the PaddlePaddle/PaddleFleetX GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 507 files (2.6 MB), approximately 700.9k tokens, and a symbol index with 2117 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo