gitextract_ufof6x83/

├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README-zh.md
├── README.md
├── docker/
│   ├── Dockerfile.cu124
│   └── Dockerfile.cu126
├── docs/
│   ├── Makefile
│   ├── conf.py
│   ├── examples/
│   │   ├── config.rst
│   │   ├── cpgd_example.rst
│   │   ├── deepscaler_example.rst
│   │   ├── embodied_srpo_example.rst
│   │   ├── megatron_backend_example.rst
│   │   └── mm_eureka_example.rst
│   ├── hardware_tutorial/
│   │   ├── ascend_profiling_en.rst
│   │   ├── ascend_quickstart.rst
│   │   └── metax_quickstart.rst
│   ├── index.rst
│   ├── preparation/
│   │   ├── prepare_data.rst
│   │   └── reward_function.rst
│   ├── programming_guide/
│   │   ├── code_structure.rst
│   │   ├── siiRL_code_explained.rst
│   │   ├── siirl_architecture_guide.rst
│   │   └── srpo_code_explained.rst
│   ├── requirements-docs.txt
│   ├── start/
│   │   ├── install.rst
│   │   └── quickstart.rst
│   └── user_interface/
│       ├── filter_interface.rst
│       ├── metrics_interface.rst
│       ├── pipeline_interface.rst
│       └── reward_interface.rst
├── examples/
│   ├── cpgd_trainer/
│   │   ├── run_qwen2_5-7b.sh
│   │   ├── run_qwen2_5_vl-72b.sh
│   │   ├── run_qwen2_5_vl-7b.sh
│   │   ├── run_qwen3-1.7b.sh
│   │   └── run_qwen3-8b.sh
│   ├── custom_pipeline_example/
│   │   └── custom_grpo.py
│   ├── custom_reward/
│   │   ├── rewardfunc_gsm8k.py
│   │   └── run_qwen2_5-7b-custom_reward.sh
│   ├── dapo_trainer/
│   │   ├── run_qwen2_5-7b.sh
│   │   ├── run_qwen3-235b-megatron-gspo.sh
│   │   └── run_qwen3-8b.sh
│   ├── data_preprocess/
│   │   ├── deepscaler.py
│   │   ├── geo3k.py
│   │   ├── gsm8k.py
│   │   ├── math_dataset.py
│   │   └── mm_eureka.py
│   ├── embodied_srpo_trainer/
│   │   ├── run_openvla_oft_libero_goal.sh
│   │   ├── run_openvla_oft_libero_long.sh
│   │   ├── run_openvla_oft_libero_object.sh
│   │   └── run_openvla_oft_libero_spatial.sh
│   ├── experimental/
│   │   ├── marft/
│   │   │   ├── config/
│   │   │   │   ├── code_env.py
│   │   │   │   ├── math_env.py
│   │   │   │   ├── process.py
│   │   │   │   ├── workflow_marft.yaml
│   │   │   │   └── workflow_marft_code.yaml
│   │   │   └── run_qwen2_5-3b_marft.sh
│   │   └── multiturn_server/
│   │       └── run_qwen2_5-3b_grpo_multiturn_vllm.sh
│   ├── grpo_trainer/
│   │   ├── run_qwen2_5-32b-metax.sh
│   │   ├── run_qwen2_5-32b-npu.sh
│   │   ├── run_qwen2_5-72b-npu.sh
│   │   ├── run_qwen2_5-7b-npu-e2e_prof.sh
│   │   ├── run_qwen2_5-7b-npu-mindspeed.sh
│   │   ├── run_qwen2_5-7b-npu.sh
│   │   ├── run_qwen2_5-7b.sh
│   │   ├── run_qwen2_5_vl-72b.sh
│   │   ├── run_qwen2_5_vl-7b-npu.sh
│   │   ├── run_qwen2_5_vl-7b.sh
│   │   ├── run_qwen3-235b-megatron.sh
│   │   ├── run_qwen3-235b-npu-mindspeed.sh
│   │   ├── run_qwen3-30b-npu-mindspeed.sh
│   │   ├── run_qwen3-8b-megatron.sh
│   │   └── run_qwen3-8b.sh
│   ├── gspo_trainer/
│   │   ├── run_qwen3-1.7b.sh
│   │   ├── run_qwen3-235b-megatron.sh
│   │   └── run_qwen3-30b-gspo-megatron.sh
│   ├── multi_turn/
│   │   ├── config/
│   │   │   ├── interaction_config/
│   │   │   │   └── gsm8k_interaction_config.yaml
│   │   │   └── tool_config/
│   │   │       └── gsm8k_tool_config.yaml
│   │   └── gsm8k/
│   │       └── run_qwen2_5-3b_grpo_multiturn_sglang.sh
│   └── ppo_trainer/
│       ├── run_qwen2_5-72b.sh
│       ├── run_qwen3-8b-megatron.sh
│       └── run_qwen3-8b.sh
├── pyproject.toml
├── requirements-npu.txt
├── requirements.txt
├── setup.py
├── siirl/
│   ├── __init__.py
│   ├── dag_worker/
│   │   ├── __init__.py
│   │   ├── checkpoint_manager.py
│   │   ├── constants.py
│   │   ├── core_algos.py
│   │   ├── dag_utils.py
│   │   ├── dagworker.py
│   │   ├── data_structures.py
│   │   ├── metric_aggregator.py
│   │   ├── metrics_collector.py
│   │   └── validator.py
│   ├── data_coordinator/
│   │   ├── __init__.py
│   │   ├── data_buffer.py
│   │   ├── dataloader/
│   │   │   ├── __init__.py
│   │   │   ├── data_loader_node.py
│   │   │   ├── embodied_preprocess.py
│   │   │   ├── partitioned_dataset.py
│   │   │   └── vision_utils.py
│   │   ├── protocol.py
│   │   └── sample.py
│   ├── engine/
│   │   ├── __init__.py
│   │   ├── actor/
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   ├── dp_actor.py
│   │   │   ├── embodied_actor.py
│   │   │   └── megatron_actor.py
│   │   ├── base_worker/
│   │   │   ├── __init__.py
│   │   │   ├── base/
│   │   │   │   ├── __init__.py
│   │   │   │   └── worker.py
│   │   │   ├── megatron/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── npu_mbridge_patch.py
│   │   │   │   └── worker.py
│   │   │   ├── register_center/
│   │   │   │   ├── __init__.py
│   │   │   │   └── register_center.py
│   │   │   └── resouce_pool.py
│   │   ├── critic/
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   ├── dp_critic.py
│   │   │   └── megatron_critic.py
│   │   ├── fsdp_workers.py
│   │   ├── megatron_workers.py
│   │   ├── reward_manager/
│   │   │   ├── __init__.py
│   │   │   ├── dapo.py
│   │   │   ├── embodied.py
│   │   │   ├── naive.py
│   │   │   └── parallel.py
│   │   ├── reward_model/
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   └── megatron/
│   │   │       ├── __init__.py
│   │   │       └── reward_model.py
│   │   ├── rollout/
│   │   │   ├── __init__.py
│   │   │   ├── async_server.py
│   │   │   ├── base.py
│   │   │   ├── embodied_rollout.py
│   │   │   ├── hf_rollout.py
│   │   │   ├── schemas.py
│   │   │   ├── sglang_rollout/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── async_sglang_server.py
│   │   │   │   ├── sglang_rollout.py
│   │   │   │   └── utils.py
│   │   │   └── vllm_rollout/
│   │   │       ├── __init__.py
│   │   │       ├── vllm_async_server.py
│   │   │       └── vllm_rollout_spmd.py
│   │   └── sharding_manager/
│   │       ├── __init__.py
│   │       ├── base.py
│   │       ├── fsdp_hf.py
│   │       ├── fsdp_sglang.py
│   │       ├── fsdp_ulysses.py
│   │       ├── fsdp_vllm.py
│   │       ├── megatron_sglang.py
│   │       └── megatron_vllm.py
│   ├── environment/
│   │   └── embodied/
│   │       ├── __init__.py
│   │       ├── adapters/
│   │       │   ├── __init__.py
│   │       │   └── libero.py
│   │       ├── base.py
│   │       └── venv.py
│   ├── execution/
│   │   ├── dag/
│   │   │   ├── __init__.py
│   │   │   ├── builtin_pipelines.py
│   │   │   ├── config_loader.py
│   │   │   ├── node.py
│   │   │   ├── pipeline.py
│   │   │   ├── task_graph.py
│   │   │   └── task_loader.py
│   │   ├── metric_worker/
│   │   │   ├── metric_worker.py
│   │   │   └── utils.py
│   │   ├── rollout_flow/
│   │   │   ├── multi_agent/
│   │   │   │   ├── multiagent_generate.py
│   │   │   │   └── utils.py
│   │   │   └── multiturn/
│   │   │       ├── __init__.py
│   │   │       ├── agent_loop/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── agent_loop.py
│   │   │       │   ├── single_turn_agent_loop.py
│   │   │       │   └── tool_agent_loop.py
│   │   │       ├── interactions/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── base.py
│   │   │       │   ├── gsm8k_interaction.py
│   │   │       │   └── utils/
│   │   │       │       ├── __init__.py
│   │   │       │       └── interaction_registry.py
│   │   │       └── tools/
│   │   │           ├── __init__.py
│   │   │           ├── base_tool.py
│   │   │           ├── geo3k_tool.py
│   │   │           ├── gsm8k_tool.py
│   │   │           ├── mcp_base_tool.py
│   │   │           ├── mcp_search_tool.py
│   │   │           ├── sandbox_fusion_tools.py
│   │   │           ├── schemas.py
│   │   │           ├── search_tool.py
│   │   │           └── utils/
│   │   │               ├── __init__.py
│   │   │               ├── mcp_clients/
│   │   │               │   ├── McpClientManager.py
│   │   │               │   ├── __init__.py
│   │   │               │   └── utils.py
│   │   │               ├── search_r1_like_utils.py
│   │   │               └── tool_registry.py
│   │   └── scheduler/
│   │       ├── __init__.py
│   │       ├── enums.py
│   │       ├── graph_updater.py
│   │       ├── launch.py
│   │       ├── process_group_manager.py
│   │       ├── ray_actor_manager.py
│   │       ├── resource_manager.py
│   │       ├── reward.py
│   │       └── task_scheduler.py
│   ├── main_dag.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── embodied/
│   │   │   ├── openvla/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── configuration_prismatic.py
│   │   │   │   ├── modeling_prismatic.py
│   │   │   │   └── processing_prismatic.py
│   │   │   └── openvla_oft/
│   │   │       ├── __init__.py
│   │   │       ├── configuration_prismatic.py
│   │   │       ├── constants.py
│   │   │       ├── modeling_prismatic.py
│   │   │       ├── processing_prismatic.py
│   │   │       └── train_utils.py
│   │   ├── llama/
│   │   │   ├── __init__.py
│   │   │   └── megatron/
│   │   │       ├── __init__.py
│   │   │       ├── checkpoint_utils/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── llama_loader.py
│   │   │       │   ├── llama_loader_depracated.py
│   │   │       │   └── llama_saver.py
│   │   │       ├── layers/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── parallel_attention.py
│   │   │       │   ├── parallel_decoder.py
│   │   │       │   ├── parallel_linear.py
│   │   │       │   ├── parallel_mlp.py
│   │   │       │   └── parallel_rmsnorm.py
│   │   │       └── modeling_llama_megatron.py
│   │   ├── loader.py
│   │   ├── mcore/
│   │   │   ├── __init__.py
│   │   │   ├── config_converter.py
│   │   │   ├── loader.py
│   │   │   ├── mbridge.py
│   │   │   ├── model_forward.py
│   │   │   ├── model_forward_fused.py
│   │   │   ├── model_initializer.py
│   │   │   ├── patch_v012.py
│   │   │   ├── registry.py
│   │   │   ├── saver.py
│   │   │   ├── util.py
│   │   │   └── weight_converter.py
│   │   ├── model_utils/
│   │   │   ├── __init__.py
│   │   │   └── visual.py
│   │   ├── patcher.py
│   │   ├── qwen2/
│   │   │   ├── __init__.py
│   │   │   └── megatron/
│   │   │       ├── __init__.py
│   │   │       ├── checkpoint_utils/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── qwen2_loader.py
│   │   │       │   ├── qwen2_loader_depracated.py
│   │   │       │   └── qwen2_saver.py
│   │   │       ├── layers/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── parallel_attention.py
│   │   │       │   ├── parallel_decoder.py
│   │   │       │   ├── parallel_linear.py
│   │   │       │   ├── parallel_mlp.py
│   │   │       │   └── parallel_rmsnorm.py
│   │   │       └── modeling_qwen2_megatron.py
│   │   ├── registry.py
│   │   ├── transformers/
│   │   │   ├── __init__.py
│   │   │   ├── internvl.py
│   │   │   ├── internvl_chat/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── configuration_intern_vit.py
│   │   │   │   ├── configuration_internlm2.py
│   │   │   │   ├── configuration_internvl_chat.py
│   │   │   │   ├── modeling_intern_vit.py
│   │   │   │   ├── modeling_internlm2.py
│   │   │   │   ├── modeling_internvl_chat.py
│   │   │   │   ├── tokenization_internlm2.py
│   │   │   │   └── tokenization_internlm2_fast.py
│   │   │   ├── kimi_vl.py
│   │   │   ├── llama.py
│   │   │   ├── monkey_patch.py
│   │   │   ├── npu_patch.py
│   │   │   ├── qwen2.py
│   │   │   ├── qwen2_5_vl.py
│   │   │   ├── qwen2_vl.py
│   │   │   └── transformers_compat.py
│   │   └── weight_loader_registry.py
│   ├── params/
│   │   ├── __init__.py
│   │   ├── dag_args.py
│   │   ├── data_args.py
│   │   ├── display_dict.py
│   │   ├── embodied_args.py
│   │   ├── model_args.py
│   │   ├── parser.py
│   │   ├── profiler_args.py
│   │   └── training_args.py
│   ├── third_party/
│   │   ├── __init__.py
│   │   └── sglang/
│   │       ├── __init__.py
│   │       └── parallel_state.py
│   ├── user_interface/
│   │   ├── filter_interface/
│   │   │   ├── __init__.py
│   │   │   ├── dapo.py
│   │   │   └── embodied.py
│   │   └── rewards_interface/
│   │       └── custom_gsm8k_reward.py
│   └── utils/
│       ├── __init__.py
│       ├── checkpoint/
│       │   ├── __init__.py
│       │   ├── checkpoint_manager.py
│       │   ├── fsdp_checkpoint_manager.py
│       │   └── megatron_checkpoint_manager.py
│       ├── debug/
│       │   ├── __init__.py
│       │   ├── mstx_profile.py
│       │   ├── performance.py
│       │   └── profile.py
│       ├── embodied/
│       │   ├── __init__.py
│       │   ├── libero_utils.py
│       │   ├── openvla_utils.py
│       │   └── video_emb.py
│       ├── experimental/
│       │   ├── __init__.py
│       │   └── torch_functional.py
│       ├── extras/
│       │   ├── __init__.py
│       │   ├── device.py
│       │   ├── fs.py
│       │   ├── hdfs_io.py
│       │   ├── import_utils.py
│       │   ├── misc.py
│       │   ├── net_utils.py
│       │   ├── packages.py
│       │   ├── patch.py
│       │   ├── py_functional.py
│       │   └── ray_utils.py
│       ├── import_string.py
│       ├── kernel/
│       │   ├── __init__.py
│       │   ├── kernels.py
│       │   └── linear_cross_entropy.py
│       ├── logger/
│       │   ├── __init__.py
│       │   ├── aggregate_logger.py
│       │   ├── logging_utils.py
│       │   └── tracking.py
│       ├── megatron/
│       │   ├── __init__.py
│       │   ├── dist_checkpointing.py
│       │   ├── megatron_utils.py
│       │   ├── memory.py
│       │   ├── memory_buffer.py
│       │   ├── optimizer.py
│       │   ├── pipeline_parallel.py
│       │   ├── sequence_parallel.py
│       │   └── tensor_parallel.py
│       ├── memory_utils.py
│       ├── metrics/
│       │   ├── __init__.py
│       │   └── metric_utils.py
│       ├── model_utils/
│       │   ├── __init__.py
│       │   ├── activation_offload.py
│       │   ├── attention_utils.py
│       │   ├── flops_counter.py
│       │   ├── fsdp_utils.py
│       │   ├── model.py
│       │   ├── npu_utils.py
│       │   ├── seqlen_balancing.py
│       │   ├── tensordict_utils.py
│       │   ├── torch_dtypes.py
│       │   ├── torch_functional.py
│       │   ├── ulysses.py
│       │   └── vllm_utils.py
│       └── reward_score/
│           ├── __init__.py
│           ├── embodied.py
│           ├── geo3k.py
│           ├── gsm8k.py
│           ├── math.py
│           ├── math_batch.py
│           ├── math_dapo.py
│           ├── math_verify.py
│           ├── mm_eureka.py
│           ├── prime_code/
│           │   ├── __init__.py
│           │   ├── testing_util.py
│           │   └── utils.py
│           ├── prime_math/
│           │   ├── __init__.py
│           │   ├── grader.py
│           │   └── math_normalize.py
│           ├── sandbox_fusion/
│           │   ├── __init__.py
│           │   └── utils.py
│           └── search_r1_like_qa_em.py
└── tests/
    ├── __init__.py
    ├── dag/
    │   ├── test_config_loader.py
    │   ├── test_node.py
    │   ├── test_task_graph.py
    │   └── test_task_loader.py
    ├── dag_worker/
    │   ├── test_dag_worker.py
    │   ├── test_dapo_merge.py
    │   └── test_dapo_pipeline.py
    ├── data_buffer/
    │   ├── detailed_put_performance_test.py
    │   ├── performance_test_data_buffer.py
    │   └── test_data_buffer.py
    └── scheduler/
        ├── test_process_group_manager.py
        └── test_task_scheduler.py