gitextract_g7clz9ov/

├── data/
│   └── torl_data/
│       ├── test.parquet
│       ├── test_0524.parquet
│       └── train.parquet
├── readme.md
├── requirements.txt
├── scripts/
│   └── torl_1.5b.sh
└── verl/
    ├── __init__.py
    ├── models/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── llama/
    │   │   ├── __init__.py
    │   │   └── megatron/
    │   │       ├── __init__.py
    │   │       ├── checkpoint_utils/
    │   │       │   ├── __init__.py
    │   │       │   ├── llama_loader.py
    │   │       │   └── llama_saver.py
    │   │       ├── layers/
    │   │       │   ├── __init__.py
    │   │       │   ├── parallel_attention.py
    │   │       │   ├── parallel_decoder.py
    │   │       │   ├── parallel_linear.py
    │   │       │   ├── parallel_mlp.py
    │   │       │   └── parallel_rmsnorm.py
    │   │       └── modeling_llama_megatron.py
    │   ├── qwen2/
    │   │   ├── __init__.py
    │   │   └── megatron/
    │   │       ├── __init__.py
    │   │       ├── checkpoint_utils/
    │   │       │   ├── __init__.py
    │   │       │   ├── qwen2_loader.py
    │   │       │   └── qwen2_saver.py
    │   │       ├── layers/
    │   │       │   ├── __init__.py
    │   │       │   ├── parallel_attention.py
    │   │       │   ├── parallel_decoder.py
    │   │       │   ├── parallel_linear.py
    │   │       │   ├── parallel_mlp.py
    │   │       │   └── parallel_rmsnorm.py
    │   │       └── modeling_qwen2_megatron.py
    │   ├── registry.py
    │   ├── transformers/
    │   │   ├── __init__.py
    │   │   ├── llama.py
    │   │   ├── monkey_patch.py
    │   │   ├── qwen2.py
    │   │   └── qwen2_vl.py
    │   └── weight_loader_registry.py
    ├── protocol.py
    ├── single_controller/
    │   ├── __init__.py
    │   ├── base/
    │   │   ├── __init__.py
    │   │   ├── decorator.py
    │   │   ├── megatron/
    │   │   │   ├── __init__.py
    │   │   │   ├── worker.py
    │   │   │   └── worker_group.py
    │   │   ├── register_center/
    │   │   │   ├── __init__.py
    │   │   │   └── ray.py
    │   │   ├── worker.py
    │   │   └── worker_group.py
    │   └── ray/
    │       ├── __init__.py
    │       ├── base.py
    │       └── megatron.py
    ├── third_party/
    │   ├── __init__.py
    │   └── vllm/
    │       ├── __init__.py
    │       ├── vllm_spmd/
    │       │   ├── __init__.py
    │       │   └── dtensor_weight_loaders.py
    │       ├── vllm_v_0_3_1/
    │       │   ├── __init__.py
    │       │   ├── arg_utils.py
    │       │   ├── config.py
    │       │   ├── llm.py
    │       │   ├── llm_engine_sp.py
    │       │   ├── model_loader.py
    │       │   ├── model_runner.py
    │       │   ├── parallel_state.py
    │       │   ├── tokenizer.py
    │       │   ├── weight_loaders.py
    │       │   └── worker.py
    │       ├── vllm_v_0_4_2/
    │       │   ├── __init__.py
    │       │   ├── arg_utils.py
    │       │   ├── config.py
    │       │   ├── dtensor_weight_loaders.py
    │       │   ├── hf_weight_loader.py
    │       │   ├── llm.py
    │       │   ├── llm_engine_sp.py
    │       │   ├── megatron_weight_loaders.py
    │       │   ├── model_loader.py
    │       │   ├── model_runner.py
    │       │   ├── parallel_state.py
    │       │   ├── spmd_gpu_executor.py
    │       │   ├── tokenizer.py
    │       │   └── worker.py
    │       ├── vllm_v_0_5_4/
    │       │   ├── __init__.py
    │       │   ├── arg_utils.py
    │       │   ├── config.py
    │       │   ├── dtensor_weight_loaders.py
    │       │   ├── hf_weight_loader.py
    │       │   ├── llm.py
    │       │   ├── llm_engine_sp.py
    │       │   ├── megatron_weight_loaders.py
    │       │   ├── model_loader.py
    │       │   ├── model_runner.py
    │       │   ├── parallel_state.py
    │       │   ├── spmd_gpu_executor.py
    │       │   ├── tokenizer.py
    │       │   └── worker.py
    │       └── vllm_v_0_6_3/
    │           ├── __init__.py
    │           ├── arg_utils.py
    │           ├── config.py
    │           ├── dtensor_weight_loaders.py
    │           ├── hf_weight_loader.py
    │           ├── llm.py
    │           ├── llm_engine_sp.py
    │           ├── megatron_weight_loaders.py
    │           ├── model_loader.py
    │           ├── model_runner.py
    │           ├── parallel_state.py
    │           ├── spmd_gpu_executor.py
    │           ├── tokenizer.py
    │           └── worker.py
    ├── trainer/
    │   ├── __init__.py
    │   ├── config/
    │   │   ├── evaluation.yaml
    │   │   ├── generation.yaml
    │   │   ├── ppo_megatron_trainer.yaml
    │   │   ├── ppo_trainer.yaml
    │   │   └── sft_trainer.yaml
    │   ├── fsdp_sft_trainer.py
    │   ├── main_eval.py
    │   ├── main_generation.py
    │   ├── main_ppo.py
    │   ├── ppo/
    │   │   ├── __init__.py
    │   │   ├── core_algos.py
    │   │   └── ray_trainer.py
    │   └── runtime_env.yaml
    ├── utils/
    │   ├── __init__.py
    │   ├── checkpoint/
    │   │   ├── __init__.py
    │   │   ├── checkpoint_manager.py
    │   │   └── fsdp_checkpoint_manager.py
    │   ├── config.py
    │   ├── dataset/
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── rl_dataset.py
    │   │   ├── rm_dataset.py
    │   │   └── sft_dataset.py
    │   ├── debug/
    │   │   ├── __init__.py
    │   │   ├── performance.py
    │   │   └── trajectory_tracker.py
    │   ├── distributed.py
    │   ├── flops_counter.py
    │   ├── fs.py
    │   ├── fsdp_utils.py
    │   ├── hdfs_io.py
    │   ├── import_utils.py
    │   ├── logger/
    │   │   ├── __init__.py
    │   │   └── aggregate_logger.py
    │   ├── logging_utils.py
    │   ├── megatron/
    │   │   ├── __init__.py
    │   │   ├── memory.py
    │   │   ├── optimizer.py
    │   │   ├── pipeline_parallel.py
    │   │   ├── sequence_parallel.py
    │   │   └── tensor_parallel.py
    │   ├── megatron_utils.py
    │   ├── memory_buffer.py
    │   ├── model.py
    │   ├── py_functional.py
    │   ├── ray_utils.py
    │   ├── rendezvous/
    │   │   ├── __init__.py
    │   │   └── ray_backend.py
    │   ├── reward_score/
    │   │   ├── __init__.py
    │   │   ├── eval.py
    │   │   ├── geo3k.py
    │   │   ├── gsm8k.py
    │   │   ├── math.py
    │   │   ├── math_verifier.py
    │   │   ├── prime_code/
    │   │   │   ├── __init__.py
    │   │   │   ├── testing_util.py
    │   │   │   └── utils.py
    │   │   └── prime_math/
    │   │       ├── __init__.py
    │   │       ├── grader.py
    │   │       └── math_normalize.py
    │   ├── seqlen_balancing.py
    │   ├── tokenizer.py
    │   ├── torch_dtypes.py
    │   ├── torch_functional.py
    │   ├── tracking.py
    │   └── ulysses.py
    ├── version/
    │   └── version
    └── workers/
        ├── __init__.py
        ├── actor/
        │   ├── __init__.py
        │   ├── base.py
        │   ├── dp_actor.py
        │   └── megatron_actor.py
        ├── critic/
        │   ├── __init__.py
        │   ├── base.py
        │   ├── dp_critic.py
        │   └── megatron_critic.py
        ├── fsdp_workers.py
        ├── megatron_workers.py
        ├── reward_manager/
        │   ├── __init__.py
        │   ├── naive.py
        │   └── prime.py
        ├── reward_model/
        │   ├── __init__.py
        │   ├── base.py
        │   └── megatron/
        │       ├── __init__.py
        │       └── reward_model.py
        ├── rollout/
        │   ├── __init__.py
        │   ├── base.py
        │   ├── hf_rollout.py
        │   ├── naive/
        │   │   ├── __init__.py
        │   │   └── naive_rollout.py
        │   ├── tokenizer.py
        │   └── vllm_rollout/
        │       ├── __init__.py
        │       ├── fire_vllm_rollout.py
        │       ├── qwen_agent/
        │       │   ├── code/
        │       │   │   ├── code_interpreter.py
        │       │   │   └── utils/
        │       │   │       └── code_utils.py
        │       │   ├── llm/
        │       │   │   └── schema.py
        │       │   ├── log.py
        │       │   ├── settings.py
        │       │   ├── tools/
        │       │   │   ├── base.py
        │       │   │   ├── code_interpreter.py
        │       │   │   └── python_executor.py
        │       │   └── utils/
        │       │       └── utils.py
        │       ├── vllm_rollout.py
        │       └── vllm_rollout_spmd.py
        └── sharding_manager/
            ├── __init__.py
            ├── base.py
            ├── fsdp_ulysses.py
            ├── fsdp_vllm.py
            └── megatron_vllm.py