SYMBOL INDEX (4974 symbols across 527 files) FILE: docs/source/conf.py function get_version (line 29) | def get_version(): FILE: docs/source_en/conf.py function get_version (line 29) | def get_version(): FILE: examples/custom/dataset.py class CustomPreprocessor (line 7) | class CustomPreprocessor(ResponsePreprocessor): method preprocess (line 13) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: FILE: examples/custom/model_hf.py class MyModelLoader (line 22) | class MyModelLoader(ModelLoader): method get_config (line 24) | def get_config(self, model_dir: str) -> PretrainedConfig: method get_processor (line 27) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... method get_model (line 30) | def get_model(self, model_dir: str, config: PretrainedConfig, processo... FILE: examples/custom/my_qwen2_5_omni/my_register.py class Qwen2_5OmniLoader (line 35) | class Qwen2_5OmniLoader(ModelLoader): method get_config (line 37) | def get_config(self, model_dir: str) -> PretrainedConfig: method get_processor (line 45) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... method get_model (line 54) | def get_model(self, model_dir: str, config: PretrainedConfig, processo... class Qwen2_5OmniTemplate (line 105) | class Qwen2_5OmniTemplate(Template): method init_processor (line 115) | def init_processor(self, processor) -> None: method replace_tag (line 129) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method replace_ref (line 160) | def replace_ref(self, ref: str, index: int, inputs: StdTemplateInputs)... method replace_bbox (line 167) | def replace_bbox(self, bbox: List[int], index: int, inputs: StdTemplat... method packing_row (line 174) | def packing_row(self, row: List[Dict[str, Any]]) -> Dict[str, Any]: method _get_new_tokens_use_audio_in_video (line 187) | def _get_new_tokens_use_audio_in_video(self, i, *, video_grid_thw, vid... method _encode (line 214) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 306) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: method _get_position_ids (line 358) | def _get_position_ids(self, inputs: Dict[str, Any]): method _data_collator (line 381) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... method _data_collator_mm_data (line 396) | def _data_collator_mm_data(self, batch: List[Dict[str, Any]]) -> Dict[... method generate (line 412) | def generate(self, model, *args, **kwargs): FILE: examples/custom/my_qwen2_5_omni/test_register.py function infer_hf (line 13) | def infer_hf(): function test_my_qwen2_5_omni (line 63) | def test_my_qwen2_5_omni(): FILE: examples/deploy/agent/client.py function get_infer_request (line 8) | def get_infer_request(): function infer (line 31) | def infer(client, model: str, messages, tools): function infer_stream (line 49) | def infer_stream(client, model: str, messages, tools): FILE: examples/deploy/bert/client.py function infer_batch (line 6) | def infer_batch(engine: InferClient, infer_requests: List[InferRequest]): FILE: examples/deploy/client/llm/base/openai_client.py function infer (line 8) | def infer(client, model: str, messages): function run_client (line 21) | def run_client(host: str = '127.0.0.1', port: int = 8000): FILE: examples/deploy/client/llm/base/swift_client.py function infer_batch (line 8) | def infer_batch(engine: 'InferEngine', infer_requests: List['InferReques... function run_client (line 18) | def run_client(host: str = '127.0.0.1', port: int = 8000): FILE: examples/deploy/client/llm/chat/openai_client.py function infer (line 8) | def infer(client, model: str, messages): function infer_stream (line 18) | def infer_stream(client, model: str, messages): function run_client (line 28) | def run_client(host: str = '127.0.0.1', port: int = 8000): FILE: examples/deploy/client/llm/chat/swift_client.py function infer_batch (line 8) | def infer_batch(engine: 'InferEngine', infer_requests: List['InferReques... function infer_stream (line 25) | def infer_stream(engine: 'InferEngine', infer_request: 'InferRequest'): function run_client (line 39) | def run_client(host: str = '127.0.0.1', port: int = 8000): FILE: examples/deploy/client/mllm/openai_client.py function infer (line 9) | def infer(client, model: str, messages): function infer_stream (line 19) | def infer_stream(client, model: str, messages): function get_message (line 29) | def get_message(mm_type: Literal['text', 'image', 'video', 'audio']): function run_client (line 79) | def run_client(host: str = '127.0.0.1', port: int = 8000): FILE: examples/deploy/client/mllm/swift_client.py function infer_batch (line 8) | def infer_batch(engine: 'InferEngine', infer_requests: List['InferReques... function infer_stream (line 18) | def infer_stream(engine: 'InferEngine', infer_request: 'InferRequest'): function get_message (line 32) | def get_message(mm_type: Literal['text', 'image', 'video', 'audio']): function get_data (line 86) | def get_data(mm_type: Literal['text', 'image', 'video', 'audio']): function run_client (line 105) | def run_client(host: str = '127.0.0.1', port: int = 8000): FILE: examples/deploy/embedding/client.py function infer (line 8) | def infer(client, model: str, messages): function run_client (line 22) | def run_client(host: str = '127.0.0.1', port: int = 8000): FILE: examples/deploy/lora/client.py function infer_multilora (line 4) | def infer_multilora(engine: InferClient, infer_request: InferRequest): FILE: examples/deploy/reranker/client.py function infer (line 8) | def infer(client, model: str, messages): function run_client (line 16) | def run_client(host: str = '127.0.0.1', port: int = 8000): FILE: examples/deploy/reranker/client_generative.py function infer (line 8) | def infer(client, model: str, messages): function run_client (line 16) | def run_client(host: str = '127.0.0.1', port: int = 8000): FILE: examples/deploy/seq_cls/client.py function infer (line 8) | def infer(client, model: str, messages): function run_client (line 16) | def run_client(host: str = '127.0.0.1', port: int = 8000): FILE: examples/infer/demo.py function infer_batch (line 8) | def infer_batch(engine: 'InferEngine', infer_requests: List['InferReques... function infer_stream (line 19) | def infer_stream(engine: 'InferEngine', infer_request: 'InferRequest'): FILE: examples/infer/demo_agent.py function infer (line 8) | def infer(engine: 'InferEngine', infer_request: 'InferRequest'): function infer_stream (line 26) | def infer_stream(engine: 'InferEngine', infer_request: 'InferRequest'): function get_infer_request (line 53) | def get_infer_request(): function infer_continue_generate (line 79) | def infer_continue_generate(engine): FILE: examples/infer/demo_bert.py function infer_batch (line 9) | def infer_batch(engine: 'InferEngine', infer_requests: List['InferReques... FILE: examples/infer/demo_embedding.py function run_qwen3_emb (line 6) | def run_qwen3_emb(): function run_qwen3_vl_emb (line 33) | def run_qwen3_vl_emb(): FILE: examples/infer/demo_grounding.py function draw_bbox_qwen2_vl (line 9) | def draw_bbox_qwen2_vl(image, response, norm_bbox: Literal['norm1000', '... function infer_grounding (line 21) | def infer_grounding(): FILE: examples/infer/demo_hf.py function infer_hf (line 1) | def infer_hf(): function infer_swift (line 33) | def infer_swift(): FILE: examples/infer/demo_lora.py function infer_multilora (line 7) | def infer_multilora(infer_request: 'InferRequest', infer_backend: Litera... function infer_lora (line 37) | def infer_lora(infer_request: 'InferRequest'): FILE: examples/infer/demo_mllm.py function infer_batch (line 8) | def infer_batch(engine: 'InferEngine', infer_requests: List['InferReques... function infer_stream (line 19) | def infer_stream(engine: 'InferEngine', infer_request: 'InferRequest'): function get_message (line 33) | def get_message(mm_type: Literal['text', 'image', 'video', 'audio']): function get_data (line 80) | def get_data(mm_type: Literal['text', 'image', 'video', 'audio']): FILE: examples/infer/demo_reranker.py function run_qwen3_reranker (line 6) | def run_qwen3_reranker(): function run_qwen3_vl_reranker (line 29) | def run_qwen3_vl_reranker(): FILE: examples/infer/demo_reward_model.py function infer_batch (line 8) | def infer_batch(engine: 'InferEngine', infer_requests: List['InferReques... FILE: examples/infer/demo_vllm_reasoning_parser.py function main (line 10) | def main(engine: VllmEngine): function streaming_example (line 43) | def streaming_example(engine: VllmEngine): FILE: examples/train/agent/loss_scale/infer_lora.py function infer (line 8) | def infer(engine: 'InferEngine', infer_request: 'InferRequest'): function infer_stream (line 26) | def infer_stream(engine: 'InferEngine', infer_request: 'InferRequest'): function get_infer_request (line 53) | def get_infer_request(): FILE: examples/train/embedding/qwen3/infer.py function run_qwen3_emb (line 9) | def run_qwen3_emb(): FILE: examples/train/grpo/plugin/deepeyes/deepeyes_plugin.py function extract_answer (line 63) | def extract_answer(action_string: str) -> Dict[str, any]: function extract_action (line 68) | def extract_action(action_string: str) -> Dict[str, Any]: function get_chat_template (line 73) | def get_chat_template(): function get_gpt4_score_ICE (line 82) | def get_gpt4_score_ICE(): function get_prompt (line 135) | def get_prompt(predict_str, ground_truth, question): function load_pil_image (line 151) | def load_pil_image(img): function rule_math_verify (line 182) | def rule_math_verify(ground_truth, model_answer): class DeepEyesReward (line 188) | class DeepEyesReward(ORM): method __init__ (line 190) | def __init__(self, args, **kwargs): method __call__ (line 202) | def __call__(self, completions, reward_model, extra_info, data_source,... method compute_score (line 220) | def compute_score(self, predict_str: str, ground_truth: str, extra_inf... method compute_score_math (line 290) | def compute_score_math(self, predict_str: str, ground_truth: str, extr... class VisualToolBoxScheduler (line 348) | class VisualToolBoxScheduler(MultiTurnScheduler): method __init__ (line 353) | def __init__(self, infer_engine=None, max_turns=None, *args, **kwargs): method check_finished (line 356) | def check_finished(self, infer_request, response_choice, current_turn): method step (line 370) | def step(self, infer_request, response_choice, current_turn): method validate_bbox (line 408) | def validate_bbox(self, left, top, right, bottom): method maybe_resize_bbox (line 417) | def maybe_resize_bbox(self, bbox, origin_width, origin_height): FILE: examples/train/grpo/plugin/gsm8k/gsm8k_plugin.py class GSM8KAccuracy (line 7) | class GSM8KAccuracy(ORM): method extract_answer (line 10) | def extract_answer(text: str) -> str: method __call__ (line 23) | def __call__(self, completions, solution, **kwargs) -> List[float]: class GSM8KFormat (line 38) | class GSM8KFormat(ORM): method __call__ (line 40) | def __call__(self, completions, **kwargs) -> List[float]: FILE: examples/train/grpo/plugin/plugin.py class CountdownORM (line 40) | class CountdownORM(ORM): method __call__ (line 42) | def __call__(self, completions, target, nums, **kwargs) -> List[float]: class MultiModalAccuracyORM (line 95) | class MultiModalAccuracyORM(ORM): method __call__ (line 97) | def __call__(self, completions, solution, **kwargs) -> List[float]: class MultiTurnThinkingTips (line 142) | class MultiTurnThinkingTips(ORM): method __init__ (line 157) | def __init__(self, args=None, **kwargs): method __call__ (line 162) | def __call__(self, completions, **kwargs) -> List[float]: class CodeReward (line 185) | class CodeReward(ORM): method __init__ (line 187) | def __init__(self, args=None, **kwargs): method extract_code (line 197) | def extract_code(completion: str, language: str) -> str: method run_async_from_sync (line 203) | def run_async_from_sync(self, scripts: List[str], languages: List[str]... method run_async (line 217) | async def run_async(self, scripts: List[str], languages: List[str]) ->... method run_script (line 238) | async def run_script(self, sbx, script: str, language: str) -> float: method __call__ (line 249) | def __call__(self, completions, **kwargs) -> List[float]: class CodeFormat (line 310) | class CodeFormat(ORM): method __call__ (line 312) | def __call__(self, completions, **kwargs) -> List[float]: class CodeRewardByJudge0 (line 326) | class CodeRewardByJudge0(ORM): method __init__ (line 373) | def __init__(self, args, **kwargs): method extract_code (line 384) | def extract_code(completion: str, language: str) -> str: method get_language_id (line 391) | def get_language_id(cls, language): method _evaluate_code (line 396) | async def _evaluate_code(self, code, test_cases, language_id): method run_async_from_sync (line 426) | def run_async_from_sync(self): method run_async (line 435) | async def run_async(self): method __call__ (line 444) | def __call__(self, completions, **kwargs) -> List[float]: class AsyncGenRMReward (line 463) | class AsyncGenRMReward(AsyncORM): method __init__ (line 494) | def __init__(self, args, **kwargs): method _build_eval_prompt (line 531) | def _build_eval_prompt(self, question: str, completion: str) -> str: method _extract_score (line 544) | def _extract_score(self, response: str) -> float: method _score_single (line 562) | async def _score_single(self, session, question: str, completion: str)... method __call__ (line 602) | async def __call__(self, completions, messages, **kwargs) -> List[float]: class ToolUseFormatReward (line 642) | class ToolUseFormatReward(ORM): method __init__ (line 644) | def __init__(self, args=None, **kwargs): method __call__ (line 649) | def __call__(self, completions, solution, **kwargs) -> List[float]: class ToolUseLengthReward (line 706) | class ToolUseLengthReward(ORM): method __init__ (line 708) | def __init__(self, args=None, **kwargs): method __call__ (line 714) | def __call__(self, completions, solution, **kwargs): class ToolUseCorrectnessReward (line 746) | class ToolUseCorrectnessReward(ORM): method __init__ (line 748) | def __init__(self, args=None, **kwargs): method match_score (line 757) | def match_score(self, list1, list2): method compute_tool_call_reward (line 776) | def compute_tool_call_reward(self, gt_tools, pd_tools, max_possible_re... method __call__ (line 838) | def __call__(self, completions, solution, **kwargs): class CustomizedRMPlugin (line 914) | class CustomizedRMPlugin: method __init__ (line 922) | def __init__(self, model, template): method __call__ (line 926) | def __call__(self, inputs, **kwargs): class QwenLongPlugin (line 934) | class QwenLongPlugin(DefaultRMPlugin): method __init__ (line 940) | def __init__(self, model, template, accuracy_orm=None): method __call__ (line 965) | def __call__(self, inputs, **kwargs): method prepare_rm_inputs (line 981) | def prepare_rm_inputs(self, inputs: List[Dict], completions, ground_tr... method extract_reward (line 1002) | def extract_reward(model_output: str) -> float: method compute_rewards (line 1017) | def compute_rewards(self, results: List[ChatCompletionResponse]) -> Li... class ToolCallScheduler (line 1076) | class ToolCallScheduler(MultiTurnScheduler): method __init__ (line 1079) | def __init__(self, *args, **kwargs): method _calculator_tool (line 1086) | def _calculator_tool(self, expression: str) -> str: method _extract_tool_calls (line 1157) | def _extract_tool_calls(self, text: str): method _execute_tools (line 1170) | def _execute_tools(self, tool_calls): method check_finished (line 1185) | def check_finished(self, infer_request: 'RolloutInferRequest', respons... method step (line 1194) | def step(self, infer_request: 'RolloutInferRequest', response_choice: ... class CustomEnv (line 1225) | class CustomEnv(Env): class CustomCtxManager (line 1232) | class CustomCtxManager(ContextManager): FILE: examples/train/grpo/plugin/treepo/tree_rollout.py class SampleStatus (line 12) | class SampleStatus(Enum): class FinishedReason (line 20) | class FinishedReason(Enum): class DataSampleTree (line 27) | class DataSampleTree: method root_node (line 51) | def root_node(self): method depth (line 55) | def depth(self): method response_num (line 59) | def response_num(self): method response_truncate (line 62) | def response_truncate(self, truncate_len: int): method extend_response (line 75) | def extend_response(self, choice: ChatCompletionResponseChoice): method extend_response_text (line 85) | def extend_response_text(self, response_text: str): method extend_logprobs (line 88) | def extend_logprobs(self, logprobs: List[float]): function _repeat_list_interleave (line 92) | def _repeat_list_interleave(any_list, repeat_times): function _increment_tree_idx_depth (line 97) | def _increment_tree_idx_depth( function extract_last_boxed (line 106) | def extract_last_boxed(text): class AbstractDivergence (line 115) | class AbstractDivergence: method calc_weights (line 118) | def calc_weights(cls, root_idx, samples_to_go_deeper, **kwargs) -> Lis... method allocate_with_weights (line 122) | def allocate_with_weights(cls, weights, budget, max_divergence) -> Lis... method apply (line 164) | def apply(cls, root_idx, samples_to_go_deeper, divergence_budget, max_... class LogProbDivergence (line 183) | class LogProbDivergence(AbstractDivergence): method calc_weights (line 186) | def calc_weights(cls, root_idx, samples_to_go_deeper, **kwargs) -> Lis... class AvgDivergence (line 204) | class AvgDivergence(AbstractDivergence): method calc_weights (line 207) | def calc_weights(cls, root_idx, samples_to_go_deeper, **kwargs) -> Lis... FILE: examples/train/grpo/plugin/treepo/tree_rollout_plugin.py class TreeRolloutScheduler (line 15) | class TreeRolloutScheduler(MultiTurnScheduler): method __init__ (line 40) | def __init__(self, infer_engine=None, max_turns=None, *args, **kwargs): method async_infer (line 50) | async def async_infer(self, method run (line 85) | async def run(self, infer_request: Union[List[RolloutInferRequest], Ro... method step (line 189) | def step(self, sample: DataSampleTree, **kwargs): method check_finished (line 205) | def check_finished(self, sample: DataSampleTree, output: ChatCompletio... method roll_back_to_divergence (line 225) | def roll_back_to_divergence( FILE: examples/train/reranker/qwen3/infer.py function run_qwen3_reranker (line 7) | def run_qwen3_reranker(): FILE: examples/train/rft/rft.py function do_sample (line 16) | def do_sample(model: str, model_type: str, dataset: List[str], iter: int): function do_train (line 101) | def do_train(model: str, model_type: str, datasets: List[str], iter, cmd... function do_eval (line 146) | def do_eval(model, model_type: str, iter): function replace_math_dataset (line 180) | def replace_math_dataset(): function main (line 193) | def main(): FILE: examples/train/rlhf/opsd/opsd_plugin.py class OpenThoughtsOPSDPreprocessor (line 21) | class OpenThoughtsOPSDPreprocessor(RowPreprocessor): method preprocess (line 28) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: FILE: scripts/benchmark/exp.py function parse_args (line 12) | def parse_args(): function llm_exp (line 33) | def llm_exp(): FILE: scripts/benchmark/exp_utils.py class Experiment (line 18) | class Experiment: method __init__ (line 46) | def __init__(self, method load (line 70) | def load(self, _json): method priority (line 80) | def priority(self): method to_dict (line 83) | def to_dict(self): class ExpManager (line 90) | class ExpManager: method __init__ (line 94) | def __init__(self): method assert_gpu_not_overlap (line 97) | def assert_gpu_not_overlap(self): method run (line 105) | def run(self, exp: Experiment): method _build_eval_cmd (line 148) | def _build_eval_cmd(self, exp: Experiment): method _build_cmd (line 174) | def _build_cmd(self, exp: Experiment): method _find_free_gpu (line 226) | def _find_free_gpu(self, n): method prepare_experiments (line 236) | def prepare_experiments(self, args: Any): method _get_metric (line 273) | def _get_metric(exp: Experiment): method write_record (line 309) | def write_record(exp: Experiment): method _poll (line 315) | def _poll(self): method begin (line 350) | def begin(self, args: Any): function find_all_config (line 380) | def find_all_config(dir_or_file: str): FILE: scripts/benchmark/generate_report.py class ModelOutput (line 13) | class ModelOutput: method tuner_hyper_params (line 64) | def tuner_hyper_params(self): method hyper_parameters (line 94) | def hyper_parameters(self): method train_speed (line 101) | def train_speed(self): method infer_speed (line 108) | def infer_speed(self): function generate_sft_report (line 114) | def generate_sft_report(outputs: List[ModelOutput]): function generate_export_report (line 213) | def generate_export_report(outputs: List[ModelOutput]): function parse_output (line 285) | def parse_output(file): function generate_reports (line 409) | def generate_reports(): FILE: scripts/utils/run_dataset_info.py function get_cache_mapping (line 13) | def get_cache_mapping(fpath): function get_dataset_id (line 33) | def get_dataset_id(key): function run_dataset (line 40) | def run_dataset(key, template, cache_mapping): function write_dataset_info (line 79) | def write_dataset_info() -> None: FILE: scripts/utils/run_model_info.py function get_url_suffix (line 8) | def get_url_suffix(model_id): function get_cache_mapping (line 14) | def get_cache_mapping(fpath): function get_model_info_table (line 32) | def get_model_info_table(): FILE: scripts/utils/test_link_valid.py function check_link (line 10) | def check_link(url): function extract_links_from_md (line 18) | def extract_links_from_md(file_path): function check_links_in_folder (line 25) | def check_links_in_folder(folder_path): FILE: setup.py function readme (line 9) | def readme(): function get_version (line 18) | def get_version(): function parse_requirements (line 24) | def parse_requirements(fname='requirements.txt', with_version=True): FILE: swift/agent_template/base.py class AgentKeyword (line 20) | class AgentKeyword: class ToolDesc (line 27) | class ToolDesc: class ReactCompatMixin (line 35) | class ReactCompatMixin: method _split_action_action_input (line 45) | def _split_action_action_input(response: str, keyword: AgentKeyword) -... method get_toolcall (line 60) | def get_toolcall(self, response: str) -> List[Function]: method _format_tool_responses (line 76) | def _format_tool_responses( method _parse_tool_call (line 113) | def _parse_tool_call(content) -> Dict[str, Any]: method _format_tool_calls (line 123) | def _format_tool_calls(self, tool_call_messages) -> str: class BaseAgentTemplate (line 143) | class BaseAgentTemplate(ReactCompatMixin, ABC): method _get_tool_name (line 157) | def _get_tool_name(tool): method unwrap_tool (line 161) | def unwrap_tool(tool): method wrap_tool (line 168) | def wrap_tool(tool): method _parse_tool (line 175) | def _parse_tool(tool, lang: Literal['zh', 'en']) -> ToolDesc: method _parse_json (line 196) | def _parse_json(json_str: str) -> Optional[Any]: method _format_tools (line 218) | def _format_tools(self, FILE: swift/agent_template/deepseek_v3_1.py class DeepSeekV31AgentTemplate (line 11) | class DeepSeekV31AgentTemplate(BaseAgentTemplate): method get_toolcall (line 13) | def get_toolcall(self, response: str) -> List[Function]: method _get_tool_responses (line 30) | def _get_tool_responses(self, tool_messages): method _get_tool_calls (line 34) | def _get_tool_calls(self, tool_calls: List[str]): method _format_tool_responses (line 37) | def _format_tool_responses( method _format_tools (line 48) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... method _format_tool_calls (line 80) | def _format_tool_calls(self, tool_call_messages): FILE: swift/agent_template/extra.py class ReactGRPOAgentTemplate (line 7) | class ReactGRPOAgentTemplate(BaseAgentTemplate): method _format_tools (line 9) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... FILE: swift/agent_template/glm4.py class ChatGLM4AgentTemplate (line 11) | class ChatGLM4AgentTemplate(BaseAgentTemplate): method _find_function_call (line 15) | def _find_function_call(single_content: str) -> Optional[Function]: method get_toolcall (line 24) | def get_toolcall(self, response: str) -> List[Function]: method _format_tools (line 36) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... method _format_tool_responses (line 48) | def _format_tool_responses( method _format_tool_calls (line 65) | def _format_tool_calls(self, tool_call_messages) -> str: class GLM4AgentTemplate (line 73) | class GLM4AgentTemplate(ChatGLM4AgentTemplate): class GLM4_5AgentTemplate (line 77) | class GLM4_5AgentTemplate(BaseAgentTemplate): method _find_function_call (line 81) | def _find_function_call(single_content: str) -> Optional[Function]: method get_toolcall (line 94) | def get_toolcall(self, response: str) -> List[Function]: method _format_tools (line 106) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... method _format_tool_responses (line 133) | def _format_tool_responses( method _format_tool_calls (line 155) | def _format_tool_calls(self, tool_call_messages) -> str: class GLM4_7AgentTemplate (line 171) | class GLM4_7AgentTemplate(GLM4_5AgentTemplate): FILE: swift/agent_template/hermes.py class HermesAgentTemplate (line 11) | class HermesAgentTemplate(BaseAgentTemplate): method get_toolcall (line 13) | def get_toolcall(self, response: str) -> List[Function]: method _get_tool_responses (line 25) | def _get_tool_responses(self, tool_messages): method _get_tool_calls (line 32) | def _get_tool_calls(self, tool_calls: List[str]): method _format_tool_responses (line 35) | def _format_tool_responses( method _format_tools (line 57) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... method _format_tool_calls (line 76) | def _format_tool_calls(self, tool_call_messages): class HunyuanHermesAgentTemplate (line 84) | class HunyuanHermesAgentTemplate(HermesAgentTemplate): method get_toolcall (line 86) | def get_toolcall(self, response: str) -> List[Function]: method _get_tool_responses (line 97) | def _get_tool_responses(self, tool_messages): method _get_tool_calls (line 105) | def _get_tool_calls(self, tool_calls: List[str]): method _format_tools (line 109) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... FILE: swift/agent_template/llama.py class Llama3AgentTemplate (line 11) | class Llama3AgentTemplate(BaseAgentTemplate): method get_toolcall (line 17) | def get_toolcall(self, response: str) -> List[Function]: method _format_tool_responses (line 31) | def _format_tool_responses( method _format_tools (line 46) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... method _format_tool_calls (line 60) | def _format_tool_calls(self, tool_call_messages) -> str: class Llama4AgentTemplate (line 69) | class Llama4AgentTemplate(Llama3AgentTemplate): FILE: swift/agent_template/minimax_m2.py class MinimaxM2AgentTemplate (line 11) | class MinimaxM2AgentTemplate(BaseAgentTemplate): method get_toolcall (line 23) | def get_toolcall(self, response: str) -> List[Function]: method _format_tool_responses (line 62) | def _format_tool_responses( method _format_tools (line 106) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... method _format_tool_calls (line 138) | def _format_tool_calls(self, tool_call_messages): FILE: swift/agent_template/mistral.py class MistralAgentTemplate (line 11) | class MistralAgentTemplate(BaseAgentTemplate): method get_toolcall (line 13) | def get_toolcall(self, response: str) -> List[Function]: method _format_tool_responses (line 32) | def _format_tool_responses( method _format_tools (line 55) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... method _format_tool_calls (line 60) | def _format_tool_calls(self, tool_call_messages): FILE: swift/agent_template/qwen.py class QwenEnAgentTemplate (line 13) | class QwenEnAgentTemplate(BaseAgentTemplate): method _get_tool_names_descs (line 16) | def _get_tool_names_descs(self, tools): method _format_tools (line 27) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... class QwenZhAgentTemplate (line 46) | class QwenZhAgentTemplate(BaseAgentTemplate): method _get_tool_names_descs (line 49) | def _get_tool_names_descs(self, tools): method _format_tools (line 60) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... class QwenEnParallelAgentTemplate (line 79) | class QwenEnParallelAgentTemplate(QwenEnAgentTemplate): method _format_tools (line 81) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... class QwenZhParallelAgentTemplate (line 108) | class QwenZhParallelAgentTemplate(QwenZhAgentTemplate): method _format_tools (line 110) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... FILE: swift/agent_template/qwen3_coder.py function render_extra_keys (line 10) | def render_extra_keys(obj, handled_keys): class Qwen3CoderAgentTemplate (line 33) | class Qwen3CoderAgentTemplate(HermesAgentTemplate): method _find_function_call (line 36) | def _find_function_call(single_content: str) -> Optional[Function]: method get_toolcall (line 62) | def get_toolcall(self, response: str) -> List[Function]: method _format_tools (line 75) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... method _format_tool_calls (line 134) | def _format_tool_calls(self, tool_call_messages): method _get_tool_responses (line 158) | def _get_tool_responses(self, tool_messages): class Qwen3_5AgentTemplate (line 166) | class Qwen3_5AgentTemplate(Qwen3CoderAgentTemplate): method _format_tools (line 168) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... method _get_tool_responses (line 178) | def _get_tool_responses(self, tool_messages): FILE: swift/agent_template/react.py class ReactEnAgentTemplate (line 7) | class ReactEnAgentTemplate(BaseAgentTemplate): method _format_tools (line 9) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... class ReactZnAgentTemplate (line 39) | class ReactZnAgentTemplate(BaseAgentTemplate): method _format_tools (line 41) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... FILE: swift/agent_template/seed_oss.py class SeedAgentTemplate (line 11) | class SeedAgentTemplate(BaseAgentTemplate): method _py_type (line 26) | def _py_type(t: str) -> str: method get_toolcall (line 29) | def get_toolcall(self, response: str) -> List[Function]: method _get_tool_responses (line 47) | def _get_tool_responses(self, tool_messages: List[dict]) -> str: method _format_tool_responses (line 51) | def _format_tool_responses( method _build_tool_def_string (line 63) | def _build_tool_def_string(self, tool: dict) -> str: method _format_tools (line 104) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... method _format_tool_calls (line 136) | def _format_tool_calls(self, tool_call_messages: List[dict]) -> str: FILE: swift/agent_template/toolbench.py class ToolBenchAgentTemplate (line 8) | class ToolBenchAgentTemplate(BaseAgentTemplate): method _format_tools (line 10) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... FILE: swift/agent_template/youtu.py class YoutuAgentTemplate (line 11) | class YoutuAgentTemplate(HermesAgentTemplate): method _get_tool_responses (line 19) | def _get_tool_responses(self, tool_messages): method _format_tool_responses (line 26) | def _format_tool_responses( method _format_tools (line 49) | def _format_tools(self, tools: List[Union[str, dict]], system: Optiona... method _format_tool_calls (line 61) | def _format_tool_calls(self, tool_call_messages): FILE: swift/arguments/app_args.py class AppArguments (line 15) | class AppArguments(WebUIArguments, DeployArguments): method _init_torch_dtype (line 41) | def _init_torch_dtype(self) -> None: method __post_init__ (line 48) | def __post_init__(self): FILE: swift/arguments/base_args/base_args.py function get_supported_tuners (line 25) | def get_supported_tuners(): class BaseArguments (line 31) | class BaseArguments(GenerationArguments, QuantizeArguments, DataArgument... method _prepare_training_args (line 100) | def _prepare_training_args(self, training_args: Dict[str, Any]) -> None: method _init_lazy_tokenize (line 103) | def _init_lazy_tokenize(self): method _import_external_plugins (line 119) | def _import_external_plugins(self): method _check_is_adapter (line 135) | def _check_is_adapter(adapter_dir: str) -> bool: method _init_adapters (line 142) | def _init_adapters(self): method __post_init__ (line 149) | def __post_init__(self): method _init_model_kwargs (line 181) | def _init_model_kwargs(self): method is_adapter (line 189) | def is_adapter(self) -> bool: method supported_tuners (line 193) | def supported_tuners(self): method adapters_can_be_merged (line 197) | def adapters_can_be_merged(self): method from_pretrained (line 201) | def from_pretrained(cls, checkpoint_dir: str): method _init_ckpt_dir (line 212) | def _init_ckpt_dir(self, adapters=None): method load_args_from_ckpt (line 222) | def load_args_from_ckpt(self) -> None: method save_args (line 279) | def save_args(self, output_dir=None) -> None: method _init_device (line 288) | def _init_device(self): method get_template (line 292) | def get_template(self, processor: Optional[Processor] = None, **kwargs... method get_model_processor (line 304) | def get_model_processor(self, FILE: swift/arguments/base_args/data_args.py class DataArguments (line 12) | class DataArguments: method _init_custom_dataset_info (line 99) | def _init_custom_dataset_info(self): method __post_init__ (line 106) | def __post_init__(self): method _init_val_dataset_exists (line 120) | def _init_val_dataset_exists(self): method get_dataset_kwargs (line 124) | def get_dataset_kwargs(self): FILE: swift/arguments/base_args/generation_args.py class GenerationArguments (line 12) | class GenerationArguments: method _init_stream (line 57) | def _init_stream(self): method get_request_config (line 61) | def get_request_config(self): FILE: swift/arguments/base_args/model_args.py class ModelArguments (line 17) | class ModelArguments: method _init_device_map (line 93) | def _init_device_map(self): method _init_max_memory (line 104) | def _init_max_memory(self): method _init_torch_dtype (line 118) | def _init_torch_dtype(self) -> None: method _init_mixed_precision (line 128) | def _init_mixed_precision(self): method _init_rope_scaling (line 142) | def _init_rope_scaling(self): method _init_model_info (line 189) | def _init_model_info(self) -> torch.dtype: method _init_new_special_tokens (line 203) | def _init_new_special_tokens(self): method __post_init__ (line 217) | def __post_init__(self): method get_model_kwargs (line 226) | def get_model_kwargs(self): FILE: swift/arguments/base_args/quant_args.py class QuantizeArguments (line 11) | class QuantizeArguments: method get_quantization_config (line 45) | def get_quantization_config(self): method get_modules_to_not_convert (line 99) | def get_modules_to_not_convert(self): method __post_init__ (line 116) | def __post_init__(self): FILE: swift/arguments/base_args/template_args.py class TemplateArguments (line 13) | class TemplateArguments: method __post_init__ (line 137) | def __post_init__(self): method get_template_kwargs (line 155) | def get_template_kwargs(self): FILE: swift/arguments/deploy_args.py class DeployArguments (line 13) | class DeployArguments(InferArguments): method __post_init__ (line 51) | def __post_init__(self): method _init_adapters (line 58) | def _init_adapters(self): method _init_ckpt_dir (line 75) | def _init_ckpt_dir(self, adapters=None): method _init_stream (line 78) | def _init_stream(self): class RolloutArguments (line 83) | class RolloutArguments(DeployArguments): method __post_init__ (line 111) | def __post_init__(self): method _set_default_engine_type (line 117) | def _set_default_engine_type(self): method _check_args (line 127) | def _check_args(self): method _check_device_count (line 138) | def _check_device_count(self): FILE: swift/arguments/eval_args.py class EvalArguments (line 15) | class EvalArguments(DeployArguments): method __post_init__ (line 65) | def __post_init__(self): method _init_eval_url (line 75) | def _init_eval_url(self): method list_eval_dataset (line 81) | def list_eval_dataset(eval_backend=None): method _init_eval_dataset (line 99) | def _init_eval_dataset(self): method _init_result_path (line 117) | def _init_result_path(self, folder_name: str) -> None: method _init_torch_dtype (line 125) | def _init_torch_dtype(self) -> None: FILE: swift/arguments/export_args.py class ExportArguments (line 16) | class ExportArguments(MergeArguments, BaseArguments): method load_args_from_ckpt (line 85) | def load_args_from_ckpt(self) -> None: method _init_output_dir (line 90) | def _init_output_dir(self): method __post_init__ (line 120) | def __post_init__(self): FILE: swift/arguments/infer_args.py class LmdeployArguments (line 17) | class LmdeployArguments: method get_lmdeploy_engine_kwargs (line 37) | def get_lmdeploy_engine_kwargs(self): class SglangArguments (line 51) | class SglangArguments: method get_sglang_engine_kwargs (line 103) | def get_sglang_engine_kwargs(self): class InferArguments (line 128) | class InferArguments(MergeArguments, LmdeployArguments, SglangArguments,... method _get_result_path (line 166) | def _get_result_path(self, folder_name: str) -> str: method _init_result_path (line 174) | def _init_result_path(self, folder_name: str) -> None: method _init_stream (line 184) | def _init_stream(self): method _init_ddp (line 193) | def _init_ddp(self): method __post_init__ (line 203) | def __post_init__(self) -> None: method _init_vllm_async_engine (line 216) | def _init_vllm_async_engine(self): FILE: swift/arguments/merge_args.py class MergeArguments (line 10) | class MergeArguments: FILE: swift/arguments/pretrain_args.py class PretrainArguments (line 8) | class PretrainArguments(SftArguments): FILE: swift/arguments/rlhf_args.py class RewardModelArguments (line 16) | class RewardModelArguments: class TeacherModelArguments (line 37) | class TeacherModelArguments: class PPOArguments (line 85) | class PPOArguments: class GRPOArguments (line 121) | class GRPOArguments(GRPOArgumentsMixin): class RLHFArguments (line 166) | class RLHFArguments(TeacherModelArguments, GRPOArguments, PPOArguments, ... method _prepare_training_args (line 262) | def _prepare_training_args(self, training_args: Dict[str, Any]) -> None: method __post_init__ (line 266) | def __post_init__(self): method _process_loss_type (line 306) | def _process_loss_type(self): method _init_grpo (line 330) | def _init_grpo(self): method _init_rollout (line 378) | def _init_rollout(self): method _init_padding_side (line 404) | def _init_padding_side(self): method _init_max_completion_length (line 409) | def _init_max_completion_length(self): method _init_metric_for_best_model (line 413) | def _init_metric_for_best_model(self): method _init_simpo (line 419) | def _init_simpo(self): method _init_rm (line 429) | def _init_rm(self): method _init_external_vllm (line 434) | def _init_external_vllm(self): method _set_default (line 451) | def _set_default(self): method _check_grpo (line 469) | def _check_grpo(self): method _external_vllm_warning (line 507) | def _external_vllm_warning(self): method _check_padding_free (line 517) | def _check_padding_free(self): method _check_sequence_parallel (line 526) | def _check_sequence_parallel(self): method _init_teacher_deepspeed (line 534) | def _init_teacher_deepspeed(self): method _check_gkd (line 556) | def _check_gkd(self): FILE: swift/arguments/sampling_args.py class SamplingArguments (line 15) | class SamplingArguments(BaseArguments): method _init_model_info (line 86) | def _init_model_info(self): method __post_init__ (line 95) | def __post_init__(self): FILE: swift/arguments/sft_args.py class SwanlabArguments (line 17) | class SwanlabArguments: method _init_swanlab (line 64) | def _init_swanlab(self): class SftArguments (line 123) | class SftArguments(SwanlabArguments, TunerArguments, BaseArguments, Seq2... method _check_padding_free (line 184) | def _check_padding_free(self): method __post_init__ (line 195) | def __post_init__(self) -> None: method _init_override (line 235) | def _init_override(self): method _init_deepspeed (line 246) | def _init_deepspeed(self): method _init_fsdp (line 277) | def _init_fsdp(self): method _check_fsdp2_compatibility (line 324) | def _check_fsdp2_compatibility(self): method _handle_pai_compat (line 354) | def _handle_pai_compat(self) -> None: method _add_version (line 366) | def _add_version(self): method _init_output_dir (line 385) | def _init_output_dir(self): method _init_eval_strategy (line 390) | def _init_eval_strategy(self): method _init_metric (line 402) | def _init_metric(self): FILE: swift/arguments/tuner_args.py class TunerArguments (line 12) | class TunerArguments: method __post_init__ (line 197) | def __post_init__(self): method _init_multimodal_full (line 204) | def _init_multimodal_full(self): FILE: swift/arguments/webui_args.py class WebUIArguments (line 6) | class WebUIArguments: FILE: swift/callbacks/activation_cpu_offload.py function is_torch_npu_available (line 16) | def is_torch_npu_available() -> bool: function _get_unique_tensor_key (line 30) | def _get_unique_tensor_key(tensor): function get_device_name (line 35) | def get_device_name() -> str: class FSDPParameterFilter (line 50) | class FSDPParameterFilter: method __init__ (line 52) | def __init__(self): method __call__ (line 55) | def __call__(self, tensor): method update_model_parameters (line 58) | def update_model_parameters(self, model): function get_torch_device (line 65) | def get_torch_device() -> Any: class CpuOffloadHookWithOffloadHandler (line 78) | class CpuOffloadHookWithOffloadHandler: method __init__ (line 86) | def __init__( method __enter__ (line 97) | def __enter__(self): method __exit__ (line 101) | def __exit__(self, *args: Any): method on_save_for_backward (line 105) | def on_save_for_backward(self, tensor: torch.Tensor) -> Any: method on_get_saved_tensor (line 109) | def on_get_saved_tensor(self, saved_state: Any) -> torch.Tensor: class OffloadHandler (line 114) | class OffloadHandler: method __init__ (line 117) | def __init__(self) -> None: method tensor_push (line 120) | def tensor_push(self, tensor: torch.Tensor, **kwargs) -> Any: method tensor_pop (line 126) | def tensor_pop(self, tensor_tag: Any, **kwargs): class GroupCommitFunction (line 133) | class GroupCommitFunction(torch.autograd.Function): method forward (line 141) | def forward(ctx, tensor, cpu_offload_handler): method backward (line 149) | def backward(ctx, grad_output): class SynchronizedGroupOffloadHandler (line 159) | class SynchronizedGroupOffloadHandler(OffloadHandler): method __init__ (line 165) | def __init__(self, num_offload_group, tensor_need_offloading_checker=(... method groupid_reset (line 173) | def groupid_reset(self): method on_group_commit_forward (line 183) | def on_group_commit_forward(self): method on_group_commit_backward (line 189) | def on_group_commit_backward(self): method offload (line 195) | def offload(src_tensor, pin_memory=True): method reload (line 220) | def reload(state, non_blocking=None): method tensor_push (line 227) | def tensor_push(self, tensor: torch.Tensor, **kwargs): method tensor_pop (line 242) | def tensor_pop(self, tensor_tag, **kwargs): class AsyncDoubleBufferGroupOffloadHandler (line 253) | class AsyncDoubleBufferGroupOffloadHandler(SynchronizedGroupOffloadHandl... method __init__ (line 260) | def __init__( method tensor_push (line 295) | def tensor_push(self, tensor: torch.Tensor, **kwargs) -> Any: method tensor_pop (line 317) | def tensor_pop(self, tensor_tag, **kwargs): method bulk_offload_group (line 330) | def bulk_offload_group(self, group_to_offload): method synchronize_on_group_commit_forward (line 351) | def synchronize_on_group_commit_forward(self, current_group): method on_group_commit_forward (line 379) | def on_group_commit_forward(self): method bulk_reload_group (line 387) | def bulk_reload_group(self, group_to_reload): method on_group_commit_backward (line 405) | def on_group_commit_backward(self): function get_activation_offload_context (line 430) | def get_activation_offload_context(num_layers: int = 1, class ActivationHandler (line 448) | class ActivationHandler: method __init__ (line 450) | def __init__(self, offload_ctx, sync_func, tensor_filter, enable_ckpt): method pre_forward (line 461) | def pre_forward(self, module): method post_forward (line 466) | def post_forward(self, module): method _pack_kwargs (line 470) | def _pack_kwargs(self, *args, **kwargs): method _unpack_kwargs (line 479) | def _unpack_kwargs(self, flat_args, kwarg_keys): method _ckpt_forward (line 487) | def _ckpt_forward(self, forward_method, *args, **kwargs): method forward (line 501) | def forward(self, module, forward_method, *args, **kwargs): method wrap_module_forward_method (line 517) | def wrap_module_forward_method(self, module): function enable_activation_offloading (line 531) | def enable_activation_offloading(model, strategy, enable_ckpt=False): class ActivationCpuOffloadCallBack (line 592) | class ActivationCpuOffloadCallBack(TrainerCallback): method __init__ (line 594) | def __init__(self, args: TrainingArguments, trainer): method on_train_begin (line 597) | def on_train_begin(self, args: TrainingArguments, state: TrainerState,... FILE: swift/callbacks/adalora.py class AdaloraCallback (line 11) | class AdaloraCallback(TrainerCallback): method __init__ (line 13) | def __init__(self, args: 'TrainingArguments', trainer: 'Trainer'): method on_train_begin (line 19) | def on_train_begin(self, _args, state, control, **kwargs): method on_step_end (line 30) | def on_step_end(self, _args, state, control, **kwargs): FILE: swift/callbacks/base.py class TrainerCallback (line 9) | class TrainerCallback(HfTrainerCallback): method __init__ (line 11) | def __init__(self, args: 'TrainingArguments', trainer: 'Trainer'): FILE: swift/callbacks/deepspeed_elastic.py class DeepspeedElasticCallback (line 10) | class DeepspeedElasticCallback(TrainerCallback): method __init__ (line 12) | def __init__(self, args=None, trainer=None): method on_init_end (line 16) | def on_init_end(self, args: TrainingArguments, state: TrainerState, co... class GracefulExitCallback (line 49) | class GracefulExitCallback(TrainerCallback): method __init__ (line 51) | def __init__(self, args=None, trainer=None): method on_step_end (line 59) | def on_step_end(self, args, state, control, **kwargs): method on_save (line 77) | def on_save(self, args, state, control, **kwargs): FILE: swift/callbacks/early_stop.py class EarlyStopCallback (line 15) | class EarlyStopCallback(TrainerCallback): method __init__ (line 18) | def __init__(self, args: 'TrainingArguments', trainer: 'Trainer'): method on_save (line 24) | def on_save(self, args: 'TrainingArguments', state: TrainerState, cont... FILE: swift/callbacks/lisa.py class LISACallback (line 12) | class LISACallback(TrainerCallback): method __init__ (line 14) | def __init__(self, args: 'TrainingArguments', trainer: 'Trainer'): method freeze_all_layers (line 36) | def freeze_all_layers(self): method on_step_begin (line 42) | def on_step_begin(self, args, state, control, **kwargs): method switch_active_layers (line 47) | def switch_active_layers(self): FILE: swift/callbacks/perf_log.py class PerfMetricsLogCallback (line 35) | class PerfMetricsLogCallback(TrainerCallback): method __init__ (line 38) | def __init__(self, args: 'TrainingArguments', trainer: 'Trainer'): method on_init_end (line 44) | def on_init_end(self, args: 'TrainingArguments', state: TrainerState, ... method on_step_begin (line 62) | def on_step_begin(self, args: 'TrainingArguments', state: TrainerState... method on_step_end (line 65) | def on_step_end(self, args: 'TrainingArguments', state: TrainerState, ... method on_log (line 68) | def on_log(self, args: 'TrainingArguments', state: TrainerState, contr... method _estimate_device_tflops_by_dtype (line 77) | def _estimate_device_tflops_by_dtype(device: torch.device, dtype: torc... method _retrieve_flops_from_map (line 121) | def _retrieve_flops_from_map(device): FILE: swift/cli/_megatron/main.py function cli_main (line 17) | def cli_main(): FILE: swift/cli/main.py function use_torchrun (line 29) | def use_torchrun() -> bool: function get_torchrun_args (line 37) | def get_torchrun_args() -> Optional[List[str]]: function prepare_config_args (line 49) | def prepare_config_args(argv): function cli_main (line 84) | def cli_main(route_mapping: Optional[Dict[str, str]] = None, is_megatron... FILE: swift/cli/merge_lora.py class SwiftMergeLoRA (line 6) | class SwiftMergeLoRA(SwiftPipeline): method run (line 10) | def run(self): FILE: swift/cli/sft.py function try_init_unsloth (line 4) | def try_init_unsloth(): FILE: swift/cli/utils.py function try_use_single_device_mode (line 5) | def try_use_single_device_mode(): FILE: swift/dataloader/dispatcher.py class DataLoaderDispatcher (line 8) | class DataLoaderDispatcher: method __init__ (line 10) | def __init__(self, base_dataloader, device=None, skip_batches: int = 0): method rank (line 16) | def rank(self): method world_size (line 20) | def world_size(self): method group (line 24) | def group(self): method _scatter_object_list (line 27) | def _scatter_object_list(self, inputs): method _skip_batches (line 35) | def _skip_batches(self, base_iter): method __iter__ (line 40) | def __iter__(self): FILE: swift/dataloader/shard.py class BatchSamplerShard (line 10) | class BatchSamplerShard: method __init__ (line 12) | def __init__( method rank (line 38) | def rank(self): method world_size (line 42) | def world_size(self): method __iter__ (line 45) | def __iter__(self): method set_epoch (line 70) | def set_epoch(self, epoch: int): method __len__ (line 73) | def __len__(self) -> int: class DataLoaderShard (line 80) | class DataLoaderShard(DataLoader): method __init__ (line 82) | def __init__(self, dataset, device=None, **dataloader_params): method set_epoch (line 86) | def set_epoch(self, epoch: int): method __iter__ (line 92) | def __iter__(self): FILE: swift/dataset/dataset/llm.py class AlpacaZhPreprocessor (line 15) | class AlpacaZhPreprocessor(AlpacaPreprocessor): method concat_inst_input (line 18) | def concat_inst_input(cls, instruction, input_): class LongAlpacaPreprocessor (line 33) | class LongAlpacaPreprocessor(AlpacaPreprocessor): method preprocess (line 35) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class RuozhibaPreprocessor (line 53) | class RuozhibaPreprocessor(RowPreprocessor): method preprocess (line 55) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class MathTrnPreprocessor (line 77) | class MathTrnPreprocessor(ResponsePreprocessor): method preprocess (line 79) | def preprocess(self, row): function _repair_ms_bench (line 93) | def _repair_ms_bench(messages: str) -> Optional[List[Dict[str, str]]]: function _repair_agent_messages (line 115) | def _repair_agent_messages(messages: List[Dict[str, str]], use_mini: boo... class FireflyPreprocessor (line 159) | class FireflyPreprocessor(ResponsePreprocessor): method preprocess (line 167) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class SyntheticText2SqlPreprocessor (line 214) | class SyntheticText2SqlPreprocessor(ResponsePreprocessor): method preprocess (line 216) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: function _repair_toolbench (line 234) | def _repair_toolbench(conversations: List[Dict[str, str]]) -> List[Dict[... class BlossomMathPreprocessor (line 250) | class BlossomMathPreprocessor(ResponsePreprocessor): method preprocess (line 252) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class TigerBotLawPreprocessor (line 276) | class TigerBotLawPreprocessor(ResponsePreprocessor): method preprocess (line 278) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class LeetcodePythonPreprocessor (line 305) | class LeetcodePythonPreprocessor(ResponsePreprocessor): method preprocess (line 307) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class StsbPreprocessor (line 325) | class StsbPreprocessor(RowPreprocessor): method __init__ (line 327) | def __init__(self, sim_threshold: Optional[float] = None): method preprocess (line 331) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class StsbGeneratePreprocessor (line 349) | class StsbGeneratePreprocessor(ResponsePreprocessor): method preprocess (line 355) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class StsbRegressionPreprocessor (line 362) | class StsbRegressionPreprocessor(StsbGeneratePreprocessor): method preprocess (line 364) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class MTEBRerankPreprocessor (line 386) | class MTEBRerankPreprocessor(RowPreprocessor): method preprocess (line 388) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: function _repair_conversations_agent_instruct (line 417) | def _repair_conversations_agent_instruct(s: str) -> List[Dict[str, Any]]: class MultiRoleAgentPreprocessor (line 432) | class MultiRoleAgentPreprocessor(RowPreprocessor): method preprocess (line 434) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class CoundownTaskPreprocessor (line 495) | class CoundownTaskPreprocessor(ResponsePreprocessor): method preprocess (line 497) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class HC3Preprocessor (line 516) | class HC3Preprocessor(ResponsePreprocessor): method preprocess (line 523) | def preprocess(self, row): class HC3ClsPreprocessor (line 532) | class HC3ClsPreprocessor(HC3Preprocessor): method preprocess (line 534) | def preprocess(self, row): class DureaderPreprocessor (line 591) | class DureaderPreprocessor(RowPreprocessor): method preprocess (line 593) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class HHRLHFPreprocessor (line 618) | class HHRLHFPreprocessor(RowPreprocessor): method _to_messages (line 621) | def _to_messages(data): method preprocess (line 628) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class XlamFunctionCallingPreprocessor (line 653) | class XlamFunctionCallingPreprocessor(RowPreprocessor): method preprocess (line 655) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class XlamFunctionCallingGRPOPreprocessor (line 663) | class XlamFunctionCallingGRPOPreprocessor(ResponsePreprocessor): method preprocess (line 665) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class HHRLHFCNPreprocessor (line 689) | class HHRLHFCNPreprocessor(MessagesPreprocessor): method preprocess (line 691) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: function repair_conversations (line 706) | def repair_conversations(s: Union[str, Any]) -> Any: class EmojiPreprocessr (line 724) | class EmojiPreprocessr(ResponsePreprocessor): method preprocess (line 726) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class GuanacoPreprocessor (line 762) | class GuanacoPreprocessor(RowPreprocessor): method preprocess (line 764) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class FunctionCallChatmlPreprocessor (line 805) | class FunctionCallChatmlPreprocessor(MessagesPreprocessor): method preprocess (line 807) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class Dolly15kPreprocessor (line 826) | class Dolly15kPreprocessor(RowPreprocessor): method preprocess (line 828) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class OrpoDPOMix40kPreprocessor (line 857) | class OrpoDPOMix40kPreprocessor(MessagesPreprocessor): method preprocess (line 859) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class SelfCognitionPreprocessor (line 882) | class SelfCognitionPreprocessor(ResponsePreprocessor): method __init__ (line 884) | def __init__(self, *args, query_suffix: str = '', response_prefix: str... method set_name_author (line 891) | def set_name_author(self, name, author): method preprocess (line 895) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: FILE: swift/dataset/dataset/mllm.py class ShareGPT4oPreprocessor (line 16) | class ShareGPT4oPreprocessor(MessagesPreprocessor): method preprocess (line 18) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: method prepare_dataset (line 29) | def prepare_dataset(self, dataset): class GPT4vDataset (line 51) | class GPT4vDataset(ResponsePreprocessor): method preprocess (line 53) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class GarbagePreprocessor (line 84) | class GarbagePreprocessor(ResponsePreprocessor): method preprocess (line 86) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class SA1BPairedCaptionPreprocessor (line 102) | class SA1BPairedCaptionPreprocessor(RowPreprocessor): method preprocess (line 104) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class SA1BDenseCaptionPreprocessor (line 129) | class SA1BDenseCaptionPreprocessor(RowPreprocessor): method preprocess (line 134) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class COCO2014Preprocess (line 161) | class COCO2014Preprocess(ResponsePreprocessor): method preprocess (line 163) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class MantisPreprocessor (line 185) | class MantisPreprocessor(MessagesPreprocessor): method __init__ (line 187) | def __init__(self, *, subset: str, columns: Optional[Dict[str, str]] =... method prepare_dataset (line 191) | def prepare_dataset(self, dataset: HfDataset) -> HfDataset: method preprocess (line 201) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class LLaVADataPreprocessor (line 231) | class LLaVADataPreprocessor(MessagesPreprocessor): method prepare_dataset (line 233) | def prepare_dataset(self, dataset): method preprocess (line 239) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class PixelProsePreprocessor (line 276) | class PixelProsePreprocessor(RowPreprocessor): method preprocess (line 278) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class AIShell1Preprocessor (line 309) | class AIShell1Preprocessor(ResponsePreprocessor): method preprocess (line 311) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class EmoSchemaPreprocessor (line 330) | class EmoSchemaPreprocessor(ResponsePreprocessor): method prepare_dataset (line 332) | def prepare_dataset(self, dataset: HfDataset) -> HfDataset: method preprocess (line 344) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class EmoSchemaClsPreprocessor (line 362) | class EmoSchemaClsPreprocessor(EmoSchemaPreprocessor): method preprocess (line 364) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: function _generate_url_list (line 388) | def _generate_url_list(_url, _range): class LLaVAVideo178KPreprocessor (line 395) | class LLaVAVideo178KPreprocessor(MessagesPreprocessor): method __init__ (line 397) | def __init__(self, *, subset: str, columns: Optional[Dict[str, str]] =... method prepare_dataset (line 448) | def prepare_dataset(self, dataset: HfDataset) -> HfDataset: method preprocess (line 453) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class MovieChat1KPreprocessor (line 483) | class MovieChat1KPreprocessor(ResponsePreprocessor): method prepare_dataset (line 485) | def prepare_dataset(self, dataset: HfDataset) -> HfDataset: method preprocess (line 511) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class VideoChatGPTPreprocessor (line 531) | class VideoChatGPTPreprocessor(ResponsePreprocessor): method prepare_dataset (line 533) | def prepare_dataset(self, dataset: HfDataset) -> HfDataset: method preprocess (line 542) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: function preprocess_mind2web (line 567) | def preprocess_mind2web(dataset, **kwargs): class ShareGPT4VPreprocessor (line 698) | class ShareGPT4VPreprocessor(MessagesPreprocessor): method prepare_dataset (line 700) | def prepare_dataset(self, dataset): method preprocess (line 715) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class TextCapsPreprocessor (line 751) | class TextCapsPreprocessor(ResponsePreprocessor): method preprocess (line 753) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class TextCapsEmbPreprocessor (line 760) | class TextCapsEmbPreprocessor(RowPreprocessor): method preprocess (line 762) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class TextCapsReRankPreprocessor (line 778) | class TextCapsReRankPreprocessor(RowPreprocessor): method __init__ (line 780) | def __init__(self, method prepare_dataset (line 791) | def prepare_dataset(self, dataset): method preprocess (line 811) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class RefCOCOPreprocessor (line 866) | class RefCOCOPreprocessor(ResponsePreprocessor, GroundingMixin): method __init__ (line 869) | def __init__(self, task_type, **kwargs): method prepare_dataset (line 873) | def prepare_dataset(self, dataset): method preprocess (line 879) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class LLaVAInstructPreprocessor (line 944) | class LLaVAInstructPreprocessor(MessagesPreprocessor): method prepare_dataset (line 946) | def prepare_dataset(self, dataset): method preprocess (line 952) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class LLaVAPretrainPreprocessor (line 983) | class LLaVAPretrainPreprocessor(MessagesPreprocessor): method prepare_dataset (line 985) | def prepare_dataset(self, dataset): method preprocess (line 997) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class OcrvqaPreprocessor (line 1041) | class OcrvqaPreprocessor(RowPreprocessor): method preprocess (line 1043) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class ScienceQAPreprocessor (line 1067) | class ScienceQAPreprocessor(RowPreprocessor): method preprocess (line 1069) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class GritPreprocessor (line 1086) | class GritPreprocessor(RowPreprocessor, GroundingMixin): method __init__ (line 1088) | def __init__(self, task_type, **kwargs): method has_overlap (line 1093) | def has_overlap(start_ends): method replace_intervals_with_tags (line 1100) | def replace_intervals_with_tags(response, start_ends): method preprocess (line 1110) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class GQAPreprocessor (line 1170) | class GQAPreprocessor(RowPreprocessor): method prepare_dataset (line 1172) | def prepare_dataset(self, dataset): method preprocess (line 1176) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class CocoPreprocessor (line 1202) | class CocoPreprocessor(ResponsePreprocessor): method preprocess (line 1214) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class LLaVAMixSFTPreprocessor (line 1231) | class LLaVAMixSFTPreprocessor(RowPreprocessor): method preprocess (line 1233) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class LatexocrPreprocessor (line 1260) | class LatexocrPreprocessor(ResponsePreprocessor): method preprocess (line 1262) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class CapchaImagesPreprocessor (line 1278) | class CapchaImagesPreprocessor(ResponsePreprocessor): method preprocess (line 1280) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class ClevrPreprocessor (line 1293) | class ClevrPreprocessor(ResponsePreprocessor): method preprocess (line 1295) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class Voc2007MultilabelPreprocessor (line 1311) | class Voc2007MultilabelPreprocessor(ResponsePreprocessor): method preprocess (line 1315) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class Geometry3KPreprocessor (line 1330) | class Geometry3KPreprocessor(ResponsePreprocessor): method preprocess (line 1332) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: FILE: swift/dataset/dataset_meta.py class SubsetDataset (line 27) | class SubsetDataset: method __post_init__ (line 40) | def __post_init__(self): method set_default (line 44) | def set_default(self, dataset_meta: 'DatasetMeta') -> 'SubsetDataset': class BaseDatasetLoader (line 53) | class BaseDatasetLoader(ABC): method load (line 56) | def load( method download_ms_dataset (line 66) | def download_ms_dataset(ms_dataset_id: str, files: List[str], force_do... method concat_datasets (line 96) | def concat_datasets(datasets: List[HfDataset]) -> Optional[HfDataset]: method interleave_datasets (line 104) | def interleave_datasets(datasets, *args, **kwargs): method shuffle_dataset (line 112) | def shuffle_dataset(dataset, seed: int, buffer_size: int = 1000): method post_process (line 120) | def post_process( class DatasetMeta (line 174) | class DatasetMeta: method __post_init__ (line 193) | def __post_init__(self): FILE: swift/dataset/dataset_syntax.py class DatasetSyntax (line 14) | class DatasetSyntax: method __post_init__ (line 20) | def __post_init__(self): method get_raw (line 26) | def get_raw(self): method _safe_split (line 32) | def _safe_split(s: str, method parse (line 56) | def parse(cls, dataset: str) -> 'DatasetSyntax': method get_dataset_meta (line 81) | def get_dataset_meta(self, use_hf: bool): method _get_dataset_meta_mapping (line 92) | def _get_dataset_meta_mapping() -> Dict[Tuple[str, str], DatasetMeta]: method get_dataset_name (line 108) | def get_dataset_name(dataset_id: str) -> str: method _get_matched_dataset_meta (line 120) | def _get_matched_dataset_meta(self, dataset_meta_mapping): FILE: swift/dataset/indexed_dataset.py class IndexedDatasetBuilder (line 13) | class IndexedDatasetBuilder: method __init__ (line 16) | def __init__(self, dataset_name: str): method _write_worker (line 30) | def _write_worker(self): method add_items (line 52) | def add_items(self, items: List[Any]) -> None: method finalize (line 58) | def finalize(self): class BinReader (line 73) | class BinReader: method __init__ (line 75) | def __init__(self, bin_path: str): method read_buffer (line 84) | def read_buffer(self, offset: int, size: int) -> bytes: method __del__ (line 89) | def __del__(self): class IndexedDataset (line 95) | class IndexedDataset(Dataset): method get_cache_dir (line 100) | def get_cache_dir(dataset_name: str): method __init__ (line 107) | def __init__(self, dataset_name: str): method __getitem__ (line 122) | def __getitem__(self, index: int): method __len__ (line 131) | def __len__(self): FILE: swift/dataset/loader.py class DatasetLoader (line 21) | class DatasetLoader(BaseDatasetLoader): method __init__ (line 23) | def __init__( method _load_dataset_path (line 43) | def _load_dataset_path( method _load_repo_dataset (line 64) | def _load_repo_dataset( method _select_subsets (line 132) | def _select_subsets(subsets: List[str], dataset_meta: DatasetMeta) -> ... method load (line 151) | def load( function init_self_cognition_preprocessor (line 179) | def init_self_cognition_preprocessor( function load_dataset (line 206) | def load_dataset( FILE: swift/dataset/media.py class MediaResource (line 13) | class MediaResource: method get_url (line 27) | def get_url(media_type): method download (line 33) | def download(media_type_or_url: Union[str, List[str]], method move_directory_contents (line 57) | def move_directory_contents(src_dir, dst_dir): method _safe_download (line 74) | def _safe_download(media_type: Union[str, List[str]], method safe_save (line 121) | def safe_save(image, file_name, folder, format='JPEG'): FILE: swift/dataset/packing.py function calculate_matched_group (line 16) | def calculate_matched_group(sequences, packing_length: int, is_finished:... class PackingDataset (line 29) | class PackingDataset(Dataset): method __init__ (line 32) | def __init__( method create_packed_idx (line 89) | def create_packed_idx(self, rank, offset, lengths): method __getitem__ (line 104) | def __getitem__(self, index): method __len__ (line 109) | def __len__(self): class IterablePackingDataset (line 113) | class IterablePackingDataset(IterableDataset): method __init__ (line 115) | def __init__( method _processor (line 145) | def _processor(self): method _put_data_in_queue (line 156) | def _put_data_in_queue(self, iterator) -> int: method _fetch_data_out_queue (line 165) | def _fetch_data_out_queue(self, last_res, num_samples): method cyclic_iter (line 177) | def cyclic_iter(iterable): method __iter__ (line 182) | def __iter__(self): FILE: swift/dataset/preprocessor/core.py class RowPreprocessor (line 25) | class RowPreprocessor: method __init__ (line 36) | def __init__(self, method _check_messages (line 61) | def _check_messages(row: Dict[str, Any]) -> None: method _cast_mm_data (line 79) | def _cast_mm_data(row: Dict[str, Any]) -> None: method _check_rejected_response (line 102) | def _check_rejected_response(row: Dict[str, Any]) -> None: method preprocess (line 110) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: method prepare_dataset (line 113) | def prepare_dataset(self, dataset: DATASET_TYPE) -> DATASET_TYPE: method batched_to_rows (line 117) | def batched_to_rows(batched_row: Dict[str, Any]): method rows_to_batched (line 123) | def rows_to_batched(rows: List[Dict[str, Any]]): method _remove_prefix_keys (line 136) | def _remove_prefix_keys(row, prefix: str): method _check_objects (line 145) | def _check_objects(row): method batched_preprocess (line 167) | def batched_preprocess(self, batched_row: Dict[str, Any], *, strict: b... method get_features_dataset (line 210) | def get_features_dataset(dataset: DATASET_TYPE) -> DATASET_TYPE: method safe_rename_columns (line 217) | def safe_rename_columns(dataset, columns): method remove_useless_columns (line 237) | def remove_useless_columns(dataset: DATASET_TYPE) -> DATASET_TYPE: method _patch_arrow_writer (line 247) | def _patch_arrow_writer(): method _cast_pil_image (line 284) | def _cast_pil_image(self, dataset): method __call__ (line 291) | def __call__( class ResponsePreprocessor (line 356) | class ResponsePreprocessor(RowPreprocessor): method __init__ (line 359) | def __init__(self, *, columns: Optional[Dict[str, str]] = None, **kwar... method preprocess (line 372) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class AlpacaPreprocessor (line 394) | class AlpacaPreprocessor(ResponsePreprocessor): method concat_inst_input (line 397) | def concat_inst_input(cls, instruction, input_): method preprocess (line 405) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: function default_repair_messages (line 415) | def default_repair_messages(s: Union[str, Any]) -> Any: class MessagesPreprocessor (line 421) | class MessagesPreprocessor(RowPreprocessor): method __init__ (line 423) | def __init__( method _is_sharegpt_format (line 461) | def _is_sharegpt_format(message: Dict[str, str]) -> bool: method sharegpt_to_messages (line 466) | def sharegpt_to_messages(self, messages: List[Dict[str, str]], system:... method to_std_messages (line 479) | def to_std_messages(self, messages: List[Dict[str, str]], system: Opti... method _to_std_key (line 496) | def _to_std_key(messages: List[Dict[str, str]], std_key: str, optional... method preprocess (line 502) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class ClsPreprocessor (line 523) | class ClsPreprocessor(ResponsePreprocessor): method preprocess (line 525) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class AutoPreprocessor (line 531) | class AutoPreprocessor: method __init__ (line 533) | def __init__(self, *, columns: Optional[Dict[str, str]] = None, **kwar... method _get_preprocessor (line 537) | def _get_preprocessor(self, dataset: DATASET_TYPE) -> RowPreprocessor: method __call__ (line 546) | def __call__( FILE: swift/dataset/preprocessor/extra.py class GroundingMixin (line 8) | class GroundingMixin: method construct_grounding_prompt (line 47) | def construct_grounding_prompt(self): class TextGenerationPreprocessor (line 55) | class TextGenerationPreprocessor(ResponsePreprocessor): method __init__ (line 57) | def __init__(self, method preprocess (line 67) | def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: class ClsGenerationPreprocessor (line 72) | class ClsGenerationPreprocessor(ResponsePreprocessor): method __init__ (line 74) | def __init__(self, method preprocess (line 100) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: FILE: swift/dataset/register.py function get_dataset_list (line 14) | def get_dataset_list(): function register_dataset (line 26) | def register_dataset(dataset_meta: DatasetMeta, *, exist_ok: bool = Fals... function _preprocess_d_info (line 43) | def _preprocess_d_info(d_info: Dict[str, Any], *, base_dir: Optional[str... function _register_d_info (line 72) | def _register_d_info(d_info: Dict[str, Any], *, base_dir: Optional[str] ... function register_dataset_info (line 84) | def register_dataset_info(dataset_info: Union[str, List[str], None] = No... FILE: swift/dataset/utils.py function sample_dataset (line 18) | def sample_dataset( class LazyLLMDataset (line 57) | class LazyLLMDataset(Dataset): method __init__ (line 60) | def __init__(self, method __getitem__ (line 85) | def __getitem__(self, idx: int) -> Dict[str, Any]: method __len__ (line 111) | def __len__(self) -> int: class EncodePreprocessor (line 115) | class EncodePreprocessor(RowPreprocessor): method __init__ (line 117) | def __init__(self, template: 'Template'): method preprocess (line 121) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class AddLengthPreprocessor (line 125) | class AddLengthPreprocessor(EncodePreprocessor): method preprocess (line 127) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: function get_temporary_cache_files_directory (line 136) | def get_temporary_cache_files_directory(prefix=None): FILE: swift/hub/hub.py class HubOperation (line 23) | class HubOperation: method patch_hub (line 27) | def patch_hub(cls): method try_login (line 31) | def try_login(cls, token: Optional[str] = None) -> bool: method create_model_repo (line 43) | def create_model_repo(cls, repo_id: str, token: Optional[str] = None, ... method push_to_hub (line 54) | def push_to_hub(cls, method load_dataset (line 81) | def load_dataset(cls, method download_model (line 102) | def download_model(cls, class MSHub (line 124) | class MSHub(HubOperation): method create_repo (line 128) | def create_repo(repo_id: str, method upload_folder (line 150) | def upload_folder( method patch_hub (line 175) | def patch_hub(cls): method try_login (line 194) | def try_login(cls, token: Optional[str] = None) -> bool: method create_model_repo (line 205) | def create_model_repo(cls, repo_id: str, token: Optional[str] = None, ... method push_to_hub (line 244) | def push_to_hub(cls, method load_dataset (line 283) | def load_dataset(cls, method download_model (line 311) | def download_model(cls, method add_patterns_to_file (line 325) | def add_patterns_to_file(repo, method add_patterns_to_gitignore (line 365) | def add_patterns_to_gitignore(repo, patterns: List[str], commit_messag... method add_patterns_to_gitattributes (line 369) | def add_patterns_to_gitattributes(repo, patterns: List[str], commit_me... class HFHub (line 382) | class HFHub(HubOperation): method try_login (line 385) | def try_login(cls, token: Optional[str] = None) -> bool: method create_model_repo (line 389) | def create_model_repo(cls, repo_id: str, token: Optional[str] = None, ... method push_to_hub (line 393) | def push_to_hub(cls, method load_dataset (line 419) | def load_dataset(cls, method download_model (line 442) | def download_model(cls, function get_hub (line 459) | def get_hub(use_hf: Optional[bool] = None): FILE: swift/infer_engine/base.py class BaseInferEngine (line 9) | class BaseInferEngine(ABC): method infer (line 12) | def infer(self, method infer_async (line 39) | async def infer_async(self, FILE: swift/infer_engine/grpo_vllm_engine.py class GRPOVllmEngine (line 23) | class GRPOVllmEngine(VllmEngine): method infer (line 25) | def infer( method async_infer (line 61) | async def async_infer(self, method _batch_infer_stream (line 85) | async def _batch_infer_stream(self, method _create_chat_completion_response (line 107) | def _create_chat_completion_response(self, result, inputs, request_con... method _add_adapter (line 142) | def _add_adapter(self, adapter_request: Optional[Union[AdapterRequest,... FILE: swift/infer_engine/infer_client.py class InferClient (line 16) | class InferClient(InferEngine): method __init__ (line 18) | def __init__(self, method models (line 44) | def models(self): method get_model_list (line 53) | def get_model_list(self) -> ModelList: method _get_request_kwargs (line 59) | def _get_request_kwargs(self) -> Dict[str, Any]: method get_model_list_async (line 67) | async def get_model_list_async(self) -> ModelList: method infer (line 74) | def infer( method _prepare_request_data (line 100) | def _prepare_request_data(model: str, infer_request: InferRequest, req... method _parse_stream_data (line 112) | def _parse_stream_data(data: bytes) -> Optional[str]: method infer_async (line 120) | async def infer_async( FILE: swift/infer_engine/infer_engine.py class InferEngine (line 21) | class InferEngine(BaseInferEngine, ProcessorMixin): method __init__ (line 23) | def __init__(self, template: Template): method _get_template (line 37) | def _get_template(self, processor: Processor, template_type: Optional[... method _get_stop_words (line 48) | def _get_stop_words(self, stop_words: List[Union[str, List[int], None]... method _get_stop_token_ids (line 60) | def _get_stop_token_ids(self, stop_words: List[Union[str, List[int], N... method async_iter_to_iter (line 79) | def async_iter_to_iter(self, async_iter, prog_bar, metrics) -> Iterator: method batch_run (line 111) | async def batch_run(tasks): method _batch_infer_stream (line 114) | def _batch_infer_stream( method _get_usage_info (line 147) | def _get_usage_info(num_prompt_tokens: int, num_generated_tokens: int)... method _update_usage_info (line 155) | def _update_usage_info(origin_use_info: UsageInfo, num_generated_token... method _update_metrics (line 163) | def _update_metrics(result, metrics: Optional[List[Metric]] = None): method infer (line 176) | def infer(self, method _get_toolcall (line 190) | def _get_toolcall(self, response: str) -> Optional[List[ChatCompletion... method _get_num_tokens (line 199) | def _get_num_tokens(inputs: Dict[str, Any]) -> int: method set_default_max_tokens (line 210) | def set_default_max_tokens(self, request_config: RequestConfig, inputs... method _get_logprobs (line 228) | def _get_logprobs(self, method _get_finish_reason (line 253) | def _get_finish_reason(max_tokens: int, completion_tokens: int, is_fin... method thread_run (line 264) | def thread_run(target, args=(), kwargs=None): method safe_asyncio_run (line 283) | def safe_asyncio_run(coro): method _batch_encode (line 290) | def _batch_encode(self, infer_requests: List[InferRequest], strict: bo... method _add_error_list (line 311) | def _add_error_list(outputs, error_list): FILE: swift/infer_engine/lmdeploy_engine.py class LmdeployEngine (line 38) | class LmdeployEngine(InferEngine): method __init__ (line 40) | def __init__( method _get_processor (line 92) | def _get_processor(self): method _prepare_engine_kwargs (line 102) | def _prepare_engine_kwargs(self, engine_kwargs): method _patch_pipeline (line 128) | def _patch_pipeline(self): method _prepare_engine (line 140) | def _prepare_engine(self): method _load_generation_config (line 145) | def _load_generation_config(self): method _add_stop_words (line 161) | def _add_stop_words(self, generation_config: LmdeployGenerationConfig,... method _prepare_generation_config (line 168) | def _prepare_generation_config(self, request_config: RequestConfig) ->... method _infer_stream_async (line 191) | async def _infer_stream_async( method _infer_full_async (line 240) | async def _infer_full_async( method infer_async (line 293) | async def infer_async(self, method _batch_infer_stream (line 333) | def _batch_infer_stream(self, *args, **kwargs): method infer (line 340) | def infer( FILE: swift/infer_engine/patch.py function patch_auto_tokenizer (line 8) | def patch_auto_tokenizer(tokenizer: PreTrainedTokenizerBase): function patch_auto_config (line 23) | def patch_auto_config(config: PretrainedConfig): FILE: swift/infer_engine/protocol.py class InferRequest (line 19) | class InferRequest: method __post_init__ (line 69) | def __post_init__(self): method remove_response (line 77) | def remove_response(messages) -> Optional[str]: method _to_printable (line 81) | def _to_printable(obj, key: Optional[str] = None): method to_printable (line 96) | def to_printable(self): class RolloutInferRequest (line 101) | class RolloutInferRequest(InferRequest): function random_uuid (line 134) | def random_uuid() -> str: class Model (line 139) | class Model: class ModelList (line 148) | class ModelList: class RequestConfig (line 154) | class RequestConfig: method __post_init__ (line 186) | def __post_init__(self): class CompletionRequestMixin (line 192) | class CompletionRequestMixin: class EmbeddingRequestMixin (line 198) | class EmbeddingRequestMixin: class ChatCompletionRequestMixin (line 205) | class ChatCompletionRequestMixin: method __post_init__ (line 211) | def __post_init__(self): class MultiModalRequestMixin (line 227) | class MultiModalRequestMixin: method to_base64 (line 234) | def to_base64(mm_data: Union[str, Image.Image, bytes]) -> str: method __post_init__ (line 253) | def __post_init__(self): class CompletionRequest (line 264) | class CompletionRequest(RequestConfig, MultiModalRequestMixin, Completio... method __post_init__ (line 266) | def __post_init__(self): class EmbeddingRequest (line 272) | class EmbeddingRequest(RequestConfig, MultiModalRequestMixin, EmbeddingR... method __post_init__ (line 274) | def __post_init__(self): method parse (line 278) | def parse(self) -> Tuple['InferRequest', 'RequestConfig']: class ChatCompletionRequest (line 289) | class ChatCompletionRequest(RequestConfig, MultiModalRequestMixin, ChatC... method __post_init__ (line 291) | def __post_init__(self): method convert_to_base64 (line 297) | def convert_to_base64(self): method parse (line 332) | def parse(self) -> Tuple['InferRequest', 'RequestConfig']: method from_cmpl_request (line 342) | def from_cmpl_request(cls, cmpl_request: Union[CompletionRequest, Embe... class UsageInfo (line 355) | class UsageInfo: class Function (line 362) | class Function: method __post_init__ (line 366) | def __post_init__(self): class ChatCompletionMessageToolCall (line 374) | class ChatCompletionMessageToolCall: class ChatMessage (line 381) | class ChatMessage: class ChatCompletionResponseChoice (line 389) | class ChatCompletionResponseChoice: method to_cmpl_choice (line 396) | def to_cmpl_choice(self) -> 'CompletionResponseChoice': class EmbeddingResponseData (line 403) | class EmbeddingResponseData: class EmbeddingResponse (line 410) | class EmbeddingResponse: class CompletionResponseChoice (line 420) | class CompletionResponseChoice: class ChatCompletionResponse (line 428) | class ChatCompletionResponse: method to_cmpl_response (line 438) | def to_cmpl_response(self) -> 'CompletionResponse': class RolloutOutput (line 445) | class RolloutOutput(BaseModel): method _wrap_flat_list (line 483) | def _wrap_flat_list(cls, v): method model_post_init (line 488) | def model_post_init(self, __context): method mminfo_to_serializable (line 493) | def mminfo_to_serializable(self): class CompletionResponse (line 506) | class CompletionResponse: class DeltaMessage (line 516) | class DeltaMessage: class ChatCompletionResponseStreamChoice (line 524) | class ChatCompletionResponseStreamChoice: method to_cmpl_choice (line 530) | def to_cmpl_choice(self) -> 'CompletionResponseStreamChoice': class CompletionResponseStreamChoice (line 537) | class CompletionResponseStreamChoice: class ChatCompletionStreamResponse (line 545) | class ChatCompletionStreamResponse: method to_cmpl_response (line 553) | def to_cmpl_response(self) -> 'CompletionStreamResponse': class CompletionStreamResponse (line 561) | class CompletionStreamResponse: class InitCommunicatorRequest (line 570) | class InitCommunicatorRequest(BaseModel): class UpdateWeightsRequest (line 576) | class UpdateWeightsRequest(BaseModel): FILE: swift/infer_engine/sglang_engine.py class SglangEngine (line 27) | class SglangEngine(InferEngine): method __init__ (line 29) | def __init__( method _get_processor (line 103) | def _get_processor(self): method _prepare_server_args (line 114) | def _prepare_server_args(self, engine_kwargs): method _load_generation_config (line 152) | def _load_generation_config(self) -> None: method _prepare_generation_config (line 166) | def _prepare_generation_config(self, request_config: RequestConfig) ->... method _add_stop_words (line 179) | def _add_stop_words(self, generation_config: Dict[str, Any], request_c... method _create_chat_completion_response (line 184) | def _create_chat_completion_response(self, output, inputs, return_deta... method infer (line 212) | def infer( method infer_async (line 222) | async def infer_async(self, method _infer_embedding_async (line 249) | async def _infer_embedding_async(self, inputs: Dict[str, Any], **kwarg... method _infer_full_async (line 262) | async def _infer_full_async(self, inputs: Dict[str, Any], generation_c... method _infer_stream_async (line 269) | async def _infer_stream_async(self, inputs: Dict[str, Any], generation... method _create_chat_completion_stream_response (line 281) | def _create_chat_completion_stream_response(self, output, infer_stream... FILE: swift/infer_engine/transformers_engine.py class _GenerationConfig (line 32) | class _GenerationConfig(GenerationConfig): method __repr__ (line 34) | def __repr__(self) -> str: class TransformersEngine (line 44) | class TransformersEngine(InferEngine): method __init__ (line 46) | def __init__( method _get_model_processor (line 104) | def _get_model_processor(self, model_id_or_path, **kwargs): method _start_infer_worker (line 120) | def _start_infer_worker(self): method _fetch_infer_requests (line 124) | def _fetch_infer_requests(self): method _infer_worker (line 147) | def _infer_worker(self): method _add_adapter (line 169) | def _add_adapter(self, adapter_path: str, adapter_name: Optional[str] ... method _prepare_generation_config (line 172) | def _prepare_generation_config(self, request_config: RequestConfig) ->... method _add_stop_words (line 180) | def _add_stop_words(self, generation_config: _GenerationConfig, reques... method preprocess_logits (line 186) | def preprocess_logits(batched_logits: Optional[List[torch.Tensor]], ba... method _update_batched_logprobs (line 205) | def _update_batched_logprobs(batched_logprobs: List[torch.Tensor], log... method _infer_stream (line 218) | def _infer_stream(self, inputs: Dict[str, Any], *, generation_config: ... method _get_adapter_names (line 318) | def _get_adapter_names(self, adapter_request: Optional[AdapterRequest]... method _infer_forward (line 329) | def _infer_forward(self, inputs: Dict[str, Any], adapter_request: Opti... method _infer_full (line 387) | def _infer_full(self, inputs: Dict[str, Any], *, generation_config: Ge... method infer_async (line 455) | async def infer_async( method _infer (line 490) | def _infer( method infer (line 551) | def infer( FILE: swift/infer_engine/utils.py class AdapterRequest (line 24) | class AdapterRequest: class InferTools (line 29) | class InferTools: method _is_chinese_char (line 32) | def _is_chinese_char(cp: int) -> bool: class InferStreamer (line 43) | class InferStreamer(InferTools): method __init__ (line 45) | def __init__(self, template, **decode_kwargs): method _align_blank_suffix (line 55) | def _align_blank_suffix(self, response: str) -> str: method _get_response (line 66) | def _get_response(self, response: str, is_finished: bool, token_len: i... method get_printable_text (line 87) | def get_printable_text(self, raw_tokens: List[int], is_finished: bool)... class StreamerMixin (line 97) | class StreamerMixin: method __init__ (line 99) | def __init__(self): method __iter__ (line 102) | def __iter__(self): method __next__ (line 105) | def __next__(self) -> torch.Tensor: class TokensIteratorStreamer (line 113) | class TokensIteratorStreamer(StreamerMixin, BaseStreamer): method put (line 115) | def put(self, value: torch.Tensor) -> None: method end (line 118) | def end(self) -> None: class LogitsStreamer (line 122) | class LogitsStreamer(LogitsProcessor): method __init__ (line 124) | def __init__(self): method __call__ (line 127) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... function _set_generation_config_default_value (line 132) | def _set_generation_config_default_value(model_generation_config: Genera... function prepare_generation_config (line 143) | def prepare_generation_config(model_generation_config: Optional[Generati... function patch_lmdeploy (line 177) | def patch_lmdeploy(load_weights=False): function patch_npu_vllm (line 359) | def patch_npu_vllm(vllm_device: str): function patch_vllm_memory_leak (line 377) | def patch_vllm_memory_leak(): FILE: swift/infer_engine/vllm_engine.py class VllmEngine (line 51) | class VllmEngine(InferEngine): method __init__ (line 53) | def __init__( method _get_processor (line 168) | def _get_processor(self): method _prepare_engine (line 180) | def _prepare_engine(self) -> None: method _prepare_engine_kwargs (line 186) | def _prepare_engine_kwargs(self, max_model_len, engine_kwargs) -> None: method _prepare_reasoning_parser (line 257) | def _prepare_reasoning_parser(self, reasoning_parser: Optional[str]) -... method _fix_vllm_bug (line 275) | def _fix_vllm_bug(self) -> None: method _load_generation_config (line 291) | def _load_generation_config(self) -> None: method _add_stop_words (line 310) | def _add_stop_words(self, generation_config: SamplingParams, request_c... method _version_ge (line 318) | def _version_ge(base_version: str): method _add_adapter (line 324) | def _add_adapter(self, adapter_request: Optional[AdapterRequest] = None): method _add_request (line 337) | def _add_request(self, method _get_logprobs (line 397) | def _get_logprobs(self, method _prepare_generation_config (line 410) | def _prepare_generation_config(self, request_config: RequestConfig) ->... method inner_model (line 459) | def inner_model(self): method inner_model_executor (line 463) | def inner_model_executor(self): method _infer_stream_async (line 466) | async def _infer_stream_async( method _create_chat_completion_stream_response (line 484) | def _create_chat_completion_stream_response(self, result, request_conf... method _create_embedding_response (line 554) | def _create_embedding_response(self, result, generation_config, reques... method _create_chat_completion_response (line 561) | def _create_chat_completion_response( method _create_seq_cls_response (line 616) | def _create_seq_cls_response( method _infer_full_async (line 653) | async def _infer_full_async( method _batch_infer_stream (line 674) | def _batch_infer_stream(self, *args, **kwargs): method infer (line 679) | def infer( method infer_async (line 758) | async def infer_async( method patch_remove_log (line 793) | def patch_remove_log(): FILE: swift/loss/base.py class BaseLoss (line 10) | class BaseLoss(ABC): method __init__ (line 23) | def __init__(self, args: 'TrainingArguments', trainer: 'Trainer'): method __call__ (line 36) | def __call__(self, outputs, labels, *, num_items_in_batch=None, loss_s... FILE: swift/loss/causal_lm.py class CustomCrossEntropyLoss (line 5) | class CustomCrossEntropyLoss(BaseLoss): method __call__ (line 7) | def __call__(self, outputs, labels, *, num_items_in_batch=None, loss_s... FILE: swift/loss/embedding.py class SiameseDistanceMetric (line 19) | class SiameseDistanceMetric(Enum): function _parse_pair_sentence (line 27) | def _parse_pair_sentence(outputs): class CosineSimilarityLoss (line 45) | class CosineSimilarityLoss(BaseLoss): method __call__ (line 47) | def __call__(self, outputs, labels, **kwargs) -> torch.Tensor: class ContrastiveLoss (line 56) | class ContrastiveLoss(BaseLoss): method __call__ (line 58) | def __call__(self, outputs, labels, **kwargs) -> torch.Tensor: class OnlineContrastiveLoss (line 68) | class OnlineContrastiveLoss(BaseLoss): method __call__ (line 70) | def __call__(self, outputs, labels, **kwargs) -> torch.Tensor: function _parse_multi_negative_sentences (line 88) | def _parse_multi_negative_sentences(sentences, labels, hard_negatives=No... class InfonceLoss (line 113) | class InfonceLoss(BaseLoss): method __call__ (line 115) | def __call__(self, outputs, labels, **kwargs) -> torch.Tensor: FILE: swift/loss/reranker.py class PointwiseRerankerLoss (line 12) | class PointwiseRerankerLoss(BaseLoss): method __call__ (line 14) | def __call__(self, outputs, labels, **kwargs) -> torch.Tensor: class ListwiseRerankerLoss (line 23) | class ListwiseRerankerLoss(BaseLoss): method __call__ (line 25) | def __call__(self, outputs, labels, **kwargs): FILE: swift/loss_scale/agent.py class AgentFlanLossScale (line 9) | class AgentFlanLossScale(ConfigLossScale): method get_loss_scale (line 13) | def get_loss_scale(self, context: str, *, query: Optional[str] = None): class REACTLossScale (line 19) | class REACTLossScale(ConfigLossScale): class QwenLossScale (line 23) | class QwenLossScale(ConfigLossScale): class HermesLossScale (line 27) | class HermesLossScale(ConfigLossScale): class AlphaUmiLossScale (line 31) | class AlphaUmiLossScale(ConfigLossScale): FILE: swift/loss_scale/base.py class LossScale (line 12) | class LossScale: method __init__ (line 34) | def __init__(self, base_strategy: Literal['default', 'last_round', 'al... method get_loss_scale (line 48) | def get_loss_scale(self, context: str, **kwargs) -> Tuple[List[str], L... method __call__ (line 66) | def __call__(self, context_list: List[str], context_types: List[Contex... method is_loss_scale_binary (line 120) | def is_loss_scale_binary(self): class ConfigLossScale (line 125) | class ConfigLossScale(LossScale): method __init__ (line 141) | def __init__(self, base_strategy: Literal['default', 'last_round', 'al... method is_loss_scale_binary (line 160) | def is_loss_scale_binary(self): method get_loss_scale (line 167) | def get_loss_scale(self, context: str, *, query: Optional[str] = None): FILE: swift/loss_scale/mapping.py function get_loss_scale (line 19) | def get_loss_scale(loss_scale: str) -> LossScale: FILE: swift/loss_scale/other.py class IgnoreEmptyThinkLossScale (line 5) | class IgnoreEmptyThinkLossScale(ConfigLossScale): FILE: swift/loss_scale/utils.py function calculate_loss_scale (line 7) | def calculate_loss_scale(query: str, FILE: swift/megatron/arguments/export_args.py class MegatronExportArguments (line 14) | class MegatronExportArguments(MegatronBaseArguments): method _init_output_dir (line 22) | def _init_output_dir(self): method _init_megatron_args (line 37) | def _init_megatron_args(self): method _init_convert (line 55) | def _init_convert(self): FILE: swift/megatron/arguments/megatron_args.py class RLHFMegatronArgumentsMixin (line 25) | class RLHFMegatronArgumentsMixin: method _init_kto (line 182) | def _init_kto(self): method __post_init__ (line 189) | def __post_init__(self): method _init_grpo (line 235) | def _init_grpo(self): class MegatronTunerMixin (line 322) | class MegatronTunerMixin: method __post_init__ (line 346) | def __post_init__(self): class MegatronArguments (line 354) | class MegatronArguments(RLHFMegatronArgumentsMixin, MegatronTunerMixin): method load_args_config (line 560) | def load_args_config(ckpt_dir: Optional[str]) -> Dict[str, Any]: method _set_default (line 579) | def _set_default(self): method _init_mixed_precision (line 594) | def _init_mixed_precision(self): method __post_init__ (line 601) | def __post_init__(self): method _init_teacher_model (line 715) | def _init_teacher_model(self): method _init_vpp_size (line 726) | def _init_vpp_size(self): method _load_adapter_config (line 746) | def _load_adapter_config(self): method init_iters (line 765) | def init_iters(self, train_dataset, val_dataset): method _init_multimodal_full (line 804) | def _init_multimodal_full(self): method _map_dtype (line 824) | def _map_dtype(self): method _init_weigh_decay (line 843) | def _init_weigh_decay(self): FILE: swift/megatron/arguments/megatron_base_args.py class MegatronBaseArguments (line 13) | class MegatronBaseArguments(MegatronArguments, BaseArguments): method _init_megatron_args (line 15) | def _init_megatron_args(self): method __post_init__ (line 18) | def __post_init__(self): FILE: swift/megatron/arguments/pretrain_args.py class MegatronPretrainArguments (line 4) | class MegatronPretrainArguments(MegatronSftArguments): FILE: swift/megatron/arguments/rlhf_args.py class MegatronRLHFArguments (line 9) | class MegatronRLHFArguments(MegatronSftArguments): method __post_init__ (line 16) | def __post_init__(self): method _init_truncation_strategy (line 23) | def _init_truncation_strategy(self): FILE: swift/megatron/arguments/sft_args.py class MegatronSftArguments (line 13) | class MegatronSftArguments(MegatronBaseArguments): method _init_output_dir (line 18) | def _init_output_dir(self): method _init_ckpt_dir (line 28) | def _init_ckpt_dir(self, adapters=None): method _init_megatron_args (line 38) | def _init_megatron_args(self): method __post_init__ (line 42) | def __post_init__(self): FILE: swift/megatron/callbacks/base.py class MegatronCallback (line 8) | class MegatronCallback: method __init__ (line 10) | def __init__(self, trainer: 'BaseMegatronTrainer'): method on_train_begin (line 15) | def on_train_begin(self): method on_train_end (line 18) | def on_train_end(self): method on_step_begin (line 21) | def on_step_begin(self): method on_step_end (line 24) | def on_step_end(self): method on_log (line 27) | def on_log(self, logs): method on_eval_begin (line 30) | def on_eval_begin(self): method on_eval_end (line 33) | def on_eval_end(self): method on_eval_step (line 36) | def on_eval_step(self): method on_save (line 39) | def on_save(self, output_dir): FILE: swift/megatron/callbacks/default_flow.py class DefaultFlowCallback (line 7) | class DefaultFlowCallback(MegatronCallback): method on_train_begin (line 9) | def on_train_begin(self): method on_step_end (line 15) | def on_step_end(self): method on_eval_begin (line 35) | def on_eval_begin(self): method on_eval_end (line 40) | def on_eval_end(self): FILE: swift/megatron/callbacks/print.py class PrintCallback (line 14) | class PrintCallback(MegatronCallback): method __init__ (line 16) | def __init__(self, trainer): method on_train_begin (line 23) | def on_train_begin(self): method on_train_end (line 34) | def on_train_end(self): method on_step_end (line 38) | def on_step_end(self): method on_eval_begin (line 43) | def on_eval_begin(self): method on_eval_end (line 47) | def on_eval_end(self): method on_eval_step (line 51) | def on_eval_step(self): method on_log (line 54) | def on_log(self, logs): FILE: swift/megatron/callbacks/swanlab.py class SwanlabCallback (line 9) | class SwanlabCallback(MegatronCallback): method __init__ (line 11) | def __init__(self, trainer): method setup (line 21) | def setup(self): method on_log (line 32) | def on_log(self, logs): FILE: swift/megatron/callbacks/tensorboard.py class TensorboardCallback (line 7) | class TensorboardCallback(MegatronCallback): method __init__ (line 9) | def __init__(self, trainer): method on_log (line 23) | def on_log(self, logs): method on_train_end (line 29) | def on_train_end(self): FILE: swift/megatron/callbacks/utils.py function rewrite_logs (line 4) | def rewrite_logs(logs): FILE: swift/megatron/callbacks/wandb.py class WandbCallback (line 9) | class WandbCallback(MegatronCallback): method __init__ (line 11) | def __init__(self, trainer): method setup (line 21) | def setup(self): method on_log (line 28) | def on_log(self, logs): FILE: swift/megatron/convert.py function convert_hf2mcore (line 31) | def convert_hf2mcore(args: ExportArguments) -> None: function convert_mcore2hf (line 65) | def convert_mcore2hf(args: ExportArguments) -> None: FILE: swift/megatron/init.py function _patch_transformer_engine (line 26) | def _patch_transformer_engine(): function _patch__batched_p2p_ops (line 45) | def _patch__batched_p2p_ops(): function _patch_mla_attention (line 57) | def _patch_mla_attention(): function _patch_peft_BaseTuner (line 357) | def _patch_peft_BaseTuner(): function _patch_TEGroupedLinear (line 375) | def _patch_TEGroupedLinear(): function _patch_mtp (line 389) | def _patch_mtp(): function _patch_peft_ModulesToSaveWrapper (line 487) | def _patch_peft_ModulesToSaveWrapper(): function _patch_TransformerLayer (line 525) | def _patch_TransformerLayer(): function _patch_compile_helpers (line 557) | def _patch_compile_helpers(): function _patch_flash_attn (line 568) | def _patch_flash_attn(): function _patch_torch_FileSystemReader (line 575) | def _patch_torch_FileSystemReader(): function _patch_validate_non_overlapping_shards_metadata (line 618) | def _patch_validate_non_overlapping_shards_metadata(): function _patch_TELinear (line 636) | def _patch_TELinear(): function _patch__write_item (line 646) | def _patch__write_item(): function _patch_mrope (line 665) | def _patch_mrope(): function _patch_unified_memory (line 778) | def _patch_unified_memory(): function _patch_dsa (line 804) | def _patch_dsa(): function init_megatron_env (line 955) | def init_megatron_env(): FILE: swift/megatron/model/constant.py class LLMMegatronModelType (line 2) | class LLMMegatronModelType: class MLLMMegatronModelType (line 12) | class MLLMMegatronModelType: class MegatronModelType (line 29) | class MegatronModelType(LLMMegatronModelType, MLLMMegatronModelType): FILE: swift/megatron/model/gpt_bridge.py class GPTBridge (line 34) | class GPTBridge: method __init__ (line 44) | def __init__(self, args, attr_prefix: Literal['', 'teacher_'] = ''): method get_hf_mlp_prefix (line 115) | def get_hf_mlp_prefix(self, layer_idx): method _get_hf_mlp (line 121) | def _get_hf_mlp(self, layer_idx): method _init_meta_hf_model (line 124) | def _init_meta_hf_model(self): method _get_tp_split_dim (line 129) | def _get_tp_split_dim(self, mg_key: Optional[str]) -> Optional[int]: method _split_tp (line 180) | def _split_tp(self, hf_weight, tp_dim, is_expert, is_embedding: bool): method _set_weight (line 196) | def _set_weight( method _copy_scale_inv (line 240) | def _copy_scale_inv(tensor, scale_inv): method fp8_quantizer (line 251) | def fp8_quantizer(self): method _is_fp8_param (line 259) | def _is_fp8_param(param): method _set_module (line 266) | def _set_module(self, mg_module, hf_state_dict, hf_prefix: str, to_mco... method _all_gather_tp (line 326) | def _all_gather_tp(self, tensor, tp_dim, is_expert): method _broadcast_ep_pp (line 353) | def _broadcast_ep_pp(self, tensor, is_expert): method _get_weight (line 380) | def _get_weight( method _set_state_dict (line 440) | def _set_state_dict(self, method _remove_prefix (line 517) | def _remove_prefix(state_dict, prefix: str): method _add_prefix (line 523) | def _add_prefix(state_dict, prefix: str): method _filter_prefix (line 529) | def _filter_prefix(state_dict, prefix: str): method _is_moe (line 535) | def _is_moe(state_dict): method _set_attn_state (line 541) | def _set_attn_state(self, mg_attn, hf_state_dict, hf_prefix: str, laye... method _set_qk_layernorm (line 667) | def _set_qk_layernorm(self, mg_attn, hf_attn, hf_state_dict, to_mcore): method get_e_score_correction_bias_key (line 673) | def get_e_score_correction_bias_key(self, hf_mlp): method _set_moe_state (line 680) | def _set_moe_state( method _get_hf_grouped (line 746) | def _get_hf_grouped(self): method _get_transpose (line 755) | def _get_transpose(self): method _set_mlp_state (line 761) | def _set_mlp_state( method _set_indexer (line 1270) | def _set_indexer(self, mg_indexer, hf_state_dict, hf_prefix: str, to_m... method _set_linear_attn_state (line 1286) | def _set_linear_attn_state(self, mg_attn, hf_state_dict, hf_prefix: st... method _set_mla_attn_state (line 1412) | def _set_mla_attn_state( method _set_layer_attn (line 1452) | def _set_layer_attn(self, mg_layer, hf_state_dict, layer_idx: int, to_... method _set_layer_mlp (line 1463) | def _set_layer_mlp(self, mg_layer, hf_state_dict, layer_idx: int, to_m... method _set_layer_state (line 1478) | def _set_layer_state(self, mg_layer, hf_state_dict, hf_prefix: str, la... method _convert_pre_process (line 1492) | def _convert_pre_process(self, mg_model, hf_state_dict, hf_prefix: str... method _convert_post_process (line 1509) | def _convert_post_process(self, mg_model, hf_state_dict, hf_prefix: st... method _convert_hf_state_dict (line 1532) | def _convert_hf_state_dict(self, hf_state_dict, to_mcore): method _convert (line 1544) | def _convert(self, mg_models, hf_state_dict, hf_prefix: str, to_mcore:... method _convert_mtp_extra (line 1621) | def _convert_mtp_extra(self, mtp_layer, hf_state_dict, to_mcore, origi... method _convert_mtp_layer (line 1626) | def _convert_mtp_layer(self, lm_model, hf_state_dict, hf_prefix: str, ... method load_weights (line 1663) | def load_weights(self, mg_models, hf_model_dir: str, is_peft_format: b... method export_weights (line 1686) | def export_weights(self, method save_weights (line 1725) | def save_weights(self, class MultimodalGPTBridge (line 1818) | class MultimodalGPTBridge(GPTBridge): FILE: swift/megatron/model/gpt_model.py class OutputLayerLinear (line 36) | class OutputLayerLinear(TELinear): method forward (line 38) | def forward(self, hidden_states, *args, **kwargs): method sharded_state_dict (line 41) | def sharded_state_dict( class GPTModel (line 55) | class GPTModel(McoreGPTModel): method __init__ (line 58) | def __init__( method _patch_apply_rotary_pos_emb (line 151) | def _patch_apply_rotary_pos_emb(self): method _preprocess (line 191) | def _preprocess( method forward (line 278) | def forward( method _postprocess (line 355) | def _postprocess( method get_input_tensor (line 515) | def get_input_tensor(self): FILE: swift/megatron/model/gpts/glm4.py class Glm4SelfAttention (line 22) | class Glm4SelfAttention(SelfAttention): method __init__ (line 24) | def __init__( method forward (line 38) | def forward(self, hidden_states, *args, **kwargs): class Glm4MLP (line 45) | class Glm4MLP(MLP): method __init__ (line 47) | def __init__( method forward (line 61) | def forward(self, hidden_states, *args, **kwargs): method sharded_state_dict (line 67) | def sharded_state_dict(self, class Glm4Bridge (line 84) | class Glm4Bridge(GPTBridge): method _set_layer_attn (line 86) | def _set_layer_attn(self, mg_layer, hf_state_dict, layer_idx: int, to_... class Glm4Loader (line 95) | class Glm4Loader(MegatronModelLoader): method get_transformer_layer_spec (line 97) | def get_transformer_layer_spec(self, vp_stage: Optional[int] = None): FILE: swift/megatron/model/gpts/minimax_m2.py class MinimaxM2SelfAttention (line 22) | class MinimaxM2SelfAttention(SelfAttention): method __init__ (line 24) | def __init__( method get_query_key_value_tensors (line 51) | def get_query_key_value_tensors(self, *_args, **kwargs): class MinimaxM2Bridge (line 69) | class MinimaxM2Bridge(GPTBridge): method _set_qk_layernorm (line 71) | def _set_qk_layernorm(self, mg_attn, hf_attn, hf_state_dict, to_mcore): method get_hf_mlp_prefix (line 75) | def get_hf_mlp_prefix(self, layer_idx): method get_e_score_correction_bias_key (line 78) | def get_e_score_correction_bias_key(self, hf_mlp): method _set_moe_state (line 81) | def _set_moe_state( class MinimaxM2Loader (line 103) | class MinimaxM2Loader(MegatronModelLoader): method get_transformer_layer_spec (line 105) | def get_transformer_layer_spec(self, vp_stage: Optional[int] = None): FILE: swift/megatron/model/gpts/olmoe.py class OLMoESelfAttention (line 25) | class OLMoESelfAttention(SelfAttentionBase): method __init__ (line 27) | def __init__(self, config: MegatronModelConfig, submodules: SelfAttent... method get_query_key_value_tensors (line 42) | def get_query_key_value_tensors(self, hidden_states, key_value_states=... function get_olmoe_decoder_block_spec (line 75) | def get_olmoe_decoder_block_spec( class OLMoEBridge (line 113) | class OLMoEBridge(GPTBridge): method _set_attn_state (line 115) | def _set_attn_state(self, mg_attn, hf_state_dict, hf_prefix: str, laye... class OlMoELoader (line 218) | class OlMoELoader(MegatronModelLoader): method get_transformer_layer_spec (line 220) | def get_transformer_layer_spec(self, vp_stage: Optional[int] = None): FILE: swift/megatron/model/gpts/qwen3_emb.py class Qwen3EmbBridge (line 8) | class Qwen3EmbBridge(GPTBridge): method _convert_hf_state_dict (line 10) | def _convert_hf_state_dict(self, hf_state_dict, to_mcore): FILE: swift/megatron/model/gpts/qwen3_next.py class Qwen3NextRMSNorm (line 62) | class Qwen3NextRMSNorm(torch.nn.Module): method __init__ (line 71) | def __init__(self, config: MegatronModelConfig, hidden_size: int, eps:... method _norm (line 78) | def _norm(self, x): method forward (line 81) | def forward(self, hidden_states): class Qwen3NextSelfAttention (line 89) | class Qwen3NextSelfAttention(SelfAttention): method __init__ (line 91) | def __init__(self, config: MegatronModelConfig, submodules: SelfAttent... method forward (line 133) | def forward( method get_query_key_value_tensors (line 385) | def get_query_key_value_tensors(self, hidden_states, key_value_states=... class Qwen3NextGatedDeltaNet (line 457) | class Qwen3NextGatedDeltaNet(_HuggingFaceModule, _Qwen3NextGatedDeltaNet): method __init__ (line 459) | def __init__(self, config: MegatronModelConfig, submodules: SelfAttent... method forward (line 467) | def forward(self, hidden_states: torch.Tensor, **kwargs): class Qwen3NextBridge (line 504) | class Qwen3NextBridge(GPTBridge): method _set_layer_attn (line 510) | def _set_layer_attn(self, mg_layer, hf_state_dict, layer_idx: int, to_... method _set_layer_mlp (line 520) | def _set_layer_mlp(self, mg_layer, hf_state_dict, layer_idx: int, to_m... method _convert_mtp_extra (line 531) | def _convert_mtp_extra(self, mtp_layer, hf_state_dict, to_mcore, origi... class Qwen3NextLoader (line 541) | class Qwen3NextLoader(MegatronModelLoader): method get_transformer_layer_spec (line 544) | def get_transformer_layer_spec(self, vp_stage: Optional[int] = None): method get_mtp_block_spec (line 590) | def get_mtp_block_spec(self, *args, **kwargs): FILE: swift/megatron/model/mm_gpt_model.py class MultimodalGPTModel (line 20) | class MultimodalGPTModel(MegatronModule): method __init__ (line 22) | def __init__(self, method _patch_word_embeddings (line 47) | def _patch_word_embeddings(self, kwargs): method forward (line 81) | def forward( method set_input_tensor (line 114) | def set_input_tensor(self, input_tensor: torch.Tensor) -> None: method get_input_tensor (line 117) | def get_input_tensor(self): method shared_embedding_or_output_weight (line 120) | def shared_embedding_or_output_weight(self) -> torch.Tensor: FILE: swift/megatron/model/mm_gpts/glm.py class Glm4vVit (line 11) | class Glm4vVit(HuggingFaceModule): method __init__ (line 16) | def __init__(self, config): method get_inputs_embeds (line 20) | def get_inputs_embeds(self, inputs_embeds, **kwargs): class Glm4vBridge (line 35) | class Glm4vBridge(Glm4Bridge, MultimodalGPTBridge): FILE: swift/megatron/model/mm_gpts/internvl.py class Internvl3Bridge (line 11) | class Internvl3Bridge(GPTBridge): method _init_meta_hf_model (line 18) | def _init_meta_hf_model(self): class Internvl3Vit (line 25) | class Internvl3Vit(HuggingFaceModule): method __init__ (line 30) | def __init__(self, config): method get_inputs_embeds (line 46) | def get_inputs_embeds(self, inputs_embeds, **kwargs): class InternvlHfBridge (line 75) | class InternvlHfBridge(MultimodalGPTBridge): class InternvlHfVit (line 84) | class InternvlHfVit(HuggingFaceModule): method __init__ (line 89) | def __init__(self, config): method get_inputs_embeds (line 105) | def get_inputs_embeds(self, inputs_embeds, **kwargs): FILE: swift/megatron/model/mm_gpts/kimi_vl.py class KimiVLBridge (line 13) | class KimiVLBridge(MultimodalGPTBridge): class KimiVLVit (line 21) | class KimiVLVit(HuggingFaceModule): method __init__ (line 26) | def __init__(self, config): method get_inputs_embeds (line 31) | def get_inputs_embeds(self, inputs_embeds, **kwargs): FILE: swift/megatron/model/mm_gpts/llama4.py class Llama4Vit (line 19) | class Llama4Vit(HuggingFaceModule): method __init__ (line 24) | def __init__(self, config): method get_inputs_embeds (line 28) | def get_inputs_embeds(self, inputs_embeds, **kwargs): class Llama4Bridge (line 51) | class Llama4Bridge(GPTBridge): class Llama4Loader (line 59) | class Llama4Loader(MegatronModelLoader): method get_transformer_layer_spec (line 61) | def get_transformer_layer_spec(self, vp_stage: Optional[int] = None): FILE: swift/megatron/model/mm_gpts/qwen.py class Qwen2_5VL_Vit (line 14) | class Qwen2_5VL_Vit(HuggingFaceModule): method __init__ (line 20) | def __init__(self, config): method get_inputs_embeds (line 35) | def get_inputs_embeds(self, inputs_embeds, **kwargs): class Qwen2_5VLBridge (line 39) | class Qwen2_5VLBridge(MultimodalGPTBridge): class Qwen2VL_Vit (line 60) | class Qwen2VL_Vit(Qwen2_5VL_Vit): class Qwen2_5OmniBridge (line 75) | class Qwen2_5OmniBridge(GPTBridge): class Qwen2_5Omni_Vit (line 83) | class Qwen2_5Omni_Vit(HuggingFaceModule): method __init__ (line 89) | def __init__(self, config): method prepare_model (line 93) | def prepare_model(self, hf_model): method get_inputs_embeds (line 97) | def get_inputs_embeds(self, inputs_embeds, **kwargs): class Ovis2_5Bridge (line 138) | class Ovis2_5Bridge(GPTBridge): class Ovis2_5Vit (line 146) | class Ovis2_5Vit(HuggingFaceModule): method __init__ (line 151) | def __init__(self, config): method get_inputs_embeds (line 157) | def get_inputs_embeds(self, inputs_embeds, **kwargs): FILE: swift/megatron/model/mm_gpts/qwen3_5.py class Qwen3_5MoeGatedDeltaNet (line 24) | class Qwen3_5MoeGatedDeltaNet(_HuggingFaceModule, _Qwen3_5MoeGatedDeltaN... method __init__ (line 26) | def __init__(self, config: TransformerConfig, submodules: SelfAttentio... method forward (line 37) | def forward(self, hidden_states: torch.Tensor, **kwargs): class Qwen3_5Vit (line 74) | class Qwen3_5Vit(HuggingFaceModule): method __init__ (line 79) | def __init__(self, config): method get_inputs_embeds (line 84) | def get_inputs_embeds(self, inputs_embeds, **kwargs): class Qwen3_5Bridge (line 88) | class Qwen3_5Bridge(Qwen3NextBridge): class Qwen3_5Loader (line 94) | class Qwen3_5Loader(Qwen3NextLoader): FILE: swift/megatron/model/mm_gpts/qwen3_5_gdn.py class Qwen3_5Bridge (line 14) | class Qwen3_5Bridge(MultimodalGPTBridge): method _set_layer_attn (line 17) | def _set_layer_attn(self, mg_layer, hf_state_dict, layer_idx: int, to_... method _convert_mtp_extra (line 31) | def _convert_mtp_extra(self, mtp_layer, hf_state_dict, to_mcore, origi... class Qwen3_5Loader (line 35) | class Qwen3_5Loader(MegatronModelLoader): method get_transformer_layer_spec (line 37) | def get_transformer_layer_spec(self, vp_stage: Optional[int] = None): method build_model (line 49) | def build_model( FILE: swift/megatron/model/mm_gpts/qwen3_vl.py class Qwen3Omni_Vit (line 35) | class Qwen3Omni_Vit(HuggingFaceModule): method __init__ (line 43) | def __init__(self, config): method prepare_model (line 47) | def prepare_model(self, hf_model): method _get_inputs_embeds (line 51) | def _get_inputs_embeds(self, inputs_embeds, inputs, visual, processor,... method get_inputs_embeds (line 154) | def get_inputs_embeds(self, inputs_embeds, **kwargs): class Qwen3VLTransformerBlock (line 185) | class Qwen3VLTransformerBlock(gpt_model.TransformerBlock): method _checkpointed_forward (line 188) | def _checkpointed_forward( method forward (line 298) | def forward( method _deepstack_process (line 449) | def _deepstack_process(self, hidden_states: torch.Tensor, visual_pos_m... class Qwen3OmniBridge (line 460) | class Qwen3OmniBridge(GPTBridge): class Qwen3VL_Vit (line 468) | class Qwen3VL_Vit(HuggingFaceModule): method __init__ (line 473) | def __init__(self, config): method get_inputs_embeds (line 478) | def get_inputs_embeds(self, inputs_embeds, **kwargs): class Qwen3VLLoader (line 483) | class Qwen3VLLoader(MegatronModelLoader): method _patch_transformer_block (line 485) | def _patch_transformer_block(self): method __init__ (line 491) | def __init__(self, args, hf_config): FILE: swift/megatron/model/mm_gpts/utils.py function patch_hf_initialize_weight (line 14) | def patch_hf_initialize_weight(): function patch_device_map_meta (line 29) | def patch_device_map_meta(model_cls): class HuggingFaceModule (line 44) | class HuggingFaceModule(_HuggingFaceModule, ABC): method __init__ (line 47) | def __init__(self, config, ignore_init_model_cls=None): method prepare_model (line 68) | def prepare_model(self, hf_model): method get_inputs_embeds (line 72) | def get_inputs_embeds(self, inputs_embeds, **kwargs): FILE: swift/megatron/model/model_config.py function _eval_pattern (line 17) | def _eval_pattern(pattern): function no_rope_freq_type (line 29) | def no_rope_freq_type(x): function linear_attn_freq_type (line 53) | def linear_attn_freq_type(x): function moe_freq_type (line 72) | def moe_freq_type(x): function tuple_type (line 99) | def tuple_type(x): class MegatronModelConfig (line 113) | class MegatronModelConfig(TransformerConfig): method _augment_mindspeed_defaults (line 213) | def _augment_mindspeed_defaults(self): method __post_init__ (line 249) | def __post_init__(self): method _format_config (line 289) | def _format_config(self): method _check_npu (line 305) | def _check_npu(self): function _convert_config (line 379) | def _convert_config(config, _internal_call=False) -> Dict[str, Any]: function convert_hf_config (line 414) | def convert_hf_config(config) -> Dict[str, Any]: function _check_attention_backend (line 541) | def _check_attention_backend(args, config): function _check_padding_free (line 548) | def _check_padding_free(args, config): function get_mcore_model_config (line 566) | def get_mcore_model_config(args, hf_config): FILE: swift/megatron/model/modules/gated_delta_net.py class GatedDeltaNet (line 24) | class GatedDeltaNet(_GatedDeltaNet): method forward (line 26) | def forward( FILE: swift/megatron/model/modules/gated_self_attention.py class GatedSelfAttention (line 13) | class GatedSelfAttention(SelfAttention): method get_query_key_value_tensors (line 15) | def get_query_key_value_tensors(self, hidden_states, key_value_states=... FILE: swift/megatron/model/register.py class MegatronModelMeta (line 28) | class MegatronModelMeta: method __post_init__ (line 37) | def __post_init__(self): function register_megatron_model (line 44) | def register_megatron_model(megatron_model_meta: MegatronModelMeta, *, e... function get_megatron_model_meta (line 57) | def get_megatron_model_meta(model_type: str) -> Optional[MegatronModelMe... class MegatronModelLoader (line 69) | class MegatronModelLoader: method __init__ (line 72) | def __init__(self, args, hf_config): method _replace_spec_dsa (line 81) | def _replace_spec_dsa(self, layer_spec): method get_transformer_layer_spec (line 95) | def get_transformer_layer_spec(self, vp_stage: Optional[int] = None): method _get_transformer_layer_spec (line 107) | def _get_transformer_layer_spec(self): method get_mtp_block_spec (line 119) | def get_mtp_block_spec(self, transformer_layer_spec, vp_stage: Optiona... method _set_shared_expert_gate (line 131) | def _set_shared_expert_gate(self, transformer_layer_spec): method build_model (line 139) | def build_model( function get_mcore_model (line 161) | def get_mcore_model(args, hf_config): FILE: swift/megatron/model/rope.py class DummyConfig (line 20) | class DummyConfig(RotaryEmbeddingConfigMixin): method __init__ (line 22) | def __init__(self, **kwargs): function _get_dummy_config (line 27) | def _get_dummy_config(config): function _compute_default_rope_parameters (line 53) | def _compute_default_rope_parameters( function _get_rope_type (line 100) | def _get_rope_type(rope_scaling: Optional[Dict[str, Any]]): function get_rope_inv_freq (line 109) | def get_rope_inv_freq(config, seq_len=None): function longrope_frequency_update (line 121) | def longrope_frequency_update(config, model, inv_freq, seq_len: int): function dynamic_frequency_update (line 138) | def dynamic_frequency_update(config, model, inv_freq, seq_len: int): function dynamic_rope_update (line 155) | def dynamic_rope_update(model, inv_freq, seq_len: int): function _compute_dynamic_alpha_ntk_parameters (line 166) | def _compute_dynamic_alpha_ntk_parameters( FILE: swift/megatron/pipelines/export/export.py class MegatronExport (line 18) | class MegatronExport(SwiftPipeline): method run (line 22) | def run(self): method convert_mcore2hf (line 30) | def convert_mcore2hf(self) -> None: method convert_hf2mcore (line 80) | def convert_hf2mcore(self) -> None: function megatron_export_main (line 127) | def megatron_export_main(args: Optional[Union[List[str], MegatronExportA... FILE: swift/megatron/pipelines/train/pretrain.py class MegatronPretrain (line 11) | class MegatronPretrain(MegatronSft): function megatron_pretrain_main (line 16) | def megatron_pretrain_main(args: Optional[Union[List[str], MegatronPretr... FILE: swift/megatron/pipelines/train/rlhf.py class MegatronRLHF (line 13) | class MegatronRLHF(MegatronSft): method prepare_trainer (line 17) | def prepare_trainer(self): method _prepare_template (line 36) | def _prepare_template(self) -> None: method _get_dataset (line 41) | def _get_dataset(self): method _prepare_vllm_client (line 48) | def _prepare_vllm_client(self): function megatron_rlhf_main (line 72) | def megatron_rlhf_main(args: Optional[Union[List[str], MegatronRLHFArgum... FILE: swift/megatron/pipelines/train/sft.py class MegatronSft (line 22) | class MegatronSft(SwiftSft): method prepare_trainer (line 26) | def prepare_trainer(self): method _set_seed (line 35) | def _set_seed(self): method __init__ (line 38) | def __init__(self, args: Optional[Union[List[str], MegatronSftArgument... method run (line 61) | def run(self): function megatron_sft_main (line 87) | def megatron_sft_main(args: Optional[Union[List[str], MegatronSftArgumen... FILE: swift/megatron/trainers/base.py class BaseMegatronTrainer (line 53) | class BaseMegatronTrainer(ABC): method __init__ (line 55) | def __init__(self, args, template: Template): method _load_checkpoint (line 100) | def _load_checkpoint(self): method call_event (line 112) | def call_event(self, event, **kwargs): method on_log (line 116) | def on_log(self, logs, prefix=''): method _log_callback (line 123) | def _log_callback(self, logs, n_steps): method prepare_model (line 177) | def prepare_model(self): method _prepare_peft_model (line 184) | def _prepare_peft_model(self, models): method get_optimizer_and_scheduler (line 194) | def get_optimizer_and_scheduler(self): method _get_data_collator (line 210) | def _get_data_collator(self): method cyclic_iter (line 217) | def cyclic_iter(self, iterable, use_origin_cyclic: bool = False): method _get_param_groups_mcore_016 (line 242) | def _get_param_groups_mcore_016( method _get_param_groups (line 260) | def _get_param_groups( method _patch_get_param_groups (line 416) | def _patch_get_param_groups(self): method _load_iteration (line 429) | def _load_iteration(self): method _prepare_vit_gradient_checkpointing (line 456) | def _prepare_vit_gradient_checkpointing(self, model): method _initialize_embedding (line 471) | def _initialize_embedding(model): method _all_reduce_metric (line 492) | def _all_reduce_metric(self, method merge_lora_adapters (line 502) | def merge_lora_adapters(self, adapter_name='default'): method unmerge_lora_adapters (line 511) | def unmerge_lora_adapters(self): method copy_path (line 521) | def copy_path(src_path: str, tgt_path: str): method _prepare_data_iterator (line 535) | def _prepare_data_iterator(self, train_dataset, val_dataset=None, use_... method train (line 543) | def train(self, train_dataset, val_dataset): method _determine_best_metric (line 654) | def _determine_best_metric(self, metrics) -> bool: method save_checkpoint (line 672) | def save_checkpoint(self): method _rotate_checkpoints (line 727) | def _rotate_checkpoints(self, output_dir: str): method _sorted_checkpoints (line 744) | def _sorted_checkpoints(self, output_dir: str): method training_log (line 760) | def training_log(self, metrics, grad_norm): method evaluate (line 768) | def evaluate(self, val_data_iterator): method compute_eval_metrics (line 794) | def compute_eval_metrics(self, metrics): method _replace_data_iterator (line 801) | def _replace_data_iterator(self, data_iterator): method train_step (line 804) | def train_step(self, train_data_iterator): method _aggregated_metrics (line 830) | def _aggregated_metrics(self, metrics, total_metrics): method _prepare_dataloader (line 851) | def _prepare_dataloader(self, train_dataset, val_dataset=None): method _create_dataloader (line 882) | def _create_dataloader(self, dataset, batch_sampler): method forward_step (line 897) | def forward_step(self, data_iterator, model): method _prepare_batch (line 900) | def _prepare_batch(self, data, vp_stage=None, num_samples=None): method get_batch (line 916) | def get_batch(self, data_iterator, vp_stage=None): method _collect_config_info (line 920) | def _collect_config_info(self) -> Dict[str, str]: method get_last_tokens (line 941) | def get_last_tokens(self, output_tensor, packed_seq_params=None, atten... FILE: swift/megatron/trainers/batch_sampler.py class MegatronPretrainingSampler (line 10) | class MegatronPretrainingSampler: method __init__ (line 12) | def __init__(self, method __len__ (line 40) | def __len__(self): method get_start_end_idx (line 43) | def get_start_end_idx(self): method __iter__ (line 48) | def __iter__(self): class MegatronPretrainingRandomSampler (line 65) | class MegatronPretrainingRandomSampler: method __init__ (line 67) | def __init__( method __len__ (line 110) | def __len__(self): method __iter__ (line 113) | def __iter__(self): FILE: swift/megatron/trainers/dpo_trainer.py class DummyDPOTrainer (line 15) | class DummyDPOTrainer(DPOTrainer): method __init__ (line 17) | def __init__(self, args): class MegatronDPOTrainer (line 29) | class MegatronDPOTrainer(MegatronRLHFTrainer): method __init__ (line 31) | def __init__(self, args, template): method loss_func (line 35) | def loss_func(self, output_tensor: torch.Tensor, *, labels: torch.Tens... method forward_step (line 78) | def forward_step(self, data_iterator, model): FILE: swift/megatron/trainers/embedding_trainer.py class MegatronEmbeddingTrainer (line 13) | class MegatronEmbeddingTrainer(BaseMegatronTrainer): method __init__ (line 15) | def __init__(self, args, template): method loss_func (line 25) | def loss_func(self, output_tensor: torch.Tensor, *, labels: torch.Tens... method forward_step (line 35) | def forward_step(self, data_iterator, model): FILE: swift/megatron/trainers/gkd_trainer.py class DataSource (line 27) | class DataSource(str, Enum): class MegatronGKDTrainer (line 34) | class MegatronGKDTrainer(MegatronRolloutMixin, MegatronRLHFTrainer): method __init__ (line 36) | def __init__(self, args: MegatronArguments, template, **kwargs): method train (line 77) | def train(self, train_dataset, val_dataset): method prepare_model (line 82) | def prepare_model(self): method _offload_teacher_models (line 116) | def _offload_teacher_models(self): method _load_teacher_models_to_gpu (line 121) | def _load_teacher_models_to_gpu(self): method load_teacher_model_context (line 127) | def load_teacher_model_context(self): method _template_context (line 147) | def _template_context(self, template: Template, max_length: Optional[i... method _build_opsd_teacher_data (line 156) | def _build_opsd_teacher_data(self, inputs: List[Dict]) -> Optional[Lis... method _encode_batch (line 172) | def _encode_batch(self, batch: List[Dict]) -> Dict[str, torch.Tensor]: method _get_random_num (line 185) | def _get_random_num(self) -> float: method _determine_data_source (line 198) | def _determine_data_source(self) -> DataSource: method _init_resample_data_iterator (line 230) | def _init_resample_data_iterator(self, train_dataset): method resample_encode_failed_inputs (line 258) | def resample_encode_failed_inputs(self, inputs: List[Dict], max_resamp... method _compute_teacher_logits (line 297) | def _compute_teacher_logits(self, encoded_batches: List[Dict], vp_stag... method _compute_teacher_logits_local (line 303) | def _compute_teacher_logits_local(self, encoded_batches: List[Dict], v... method _compute_teacher_logits_from_api (line 331) | def _compute_teacher_logits_from_api(self, encoded_batches: List[Dict]... method _replace_data_iterator (line 362) | def _replace_data_iterator(self, data_iterator): method _align_vocab_size (line 412) | def _align_vocab_size( method generalized_jsd_loss (line 446) | def generalized_jsd_loss( method _vocab_parallel_topk (line 545) | def _vocab_parallel_topk(self, logits: torch.Tensor, k: int) -> tuple: method _tp_gather_topk (line 582) | def _tp_gather_topk(self, logits: torch.Tensor, indices: torch.Tensor)... method _jsd_topk (line 609) | def _jsd_topk(self, student_logits, teacher_topk_logprobs, teacher_top... method loss_func (line 642) | def loss_func(self, method forward_step (line 714) | def forward_step(self, data_iterator, model): FILE: swift/megatron/trainers/grpo_trainer.py class MegatronGRPOTrainer (line 44) | class MegatronGRPOTrainer(MegatronRolloutMixin, MegatronRLHFTrainer): method __init__ (line 46) | def __init__(self, args: MegatronArguments, template: Template, **kwar... method train (line 59) | def train(self, train_dataset, val_dataset): method _init_grpo_params (line 64) | def _init_grpo_params(self): method _init_rollout_engine (line 120) | def _init_rollout_engine(self): method _prepare_rewards (line 136) | def _prepare_rewards(self): method _prepare_scheduler (line 192) | def _prepare_scheduler(self): method _init_resample_data_iterator (line 209) | def _init_resample_data_iterator(self, train_dataset): method _replace_data_iterator (line 238) | def _replace_data_iterator(self, data_iterator): method _batch_encode (line 255) | def _batch_encode(self, infer_requests: List[Dict], template: Template... method _get_encoded_batch (line 273) | def _get_encoded_batch(self, encoded_list, rollout_batch, template): method _generate_and_score_completions (line 365) | def _generate_and_score_completions(self, batch): method _generate_completions (line 444) | def _generate_completions(self, batch): method _rollout (line 492) | def _rollout(self, batch) -> List[RolloutOutput]: method postprocess_rollout_data (line 507) | def postprocess_rollout_data(self, batch, outputs): method _get_request_config (line 565) | def _get_request_config(self) -> RequestConfig: method _server_rollout (line 581) | def _server_rollout(self, method _score_completions (line 616) | def _score_completions(self, inputs: DataType) -> torch.Tensor: method _compute_rewards_per_func (line 630) | def _compute_rewards_per_func(self, batch: DataType) -> torch.Tensor: method _compute_advantages (line 694) | def _compute_advantages(self, method _dynamic_sampling (line 854) | def _dynamic_sampling(self, rollout_batch: DataType, method _maybe_compute_logps (line 930) | def _maybe_compute_logps(self, batch: Dict[str, Any]) -> Dict[str, Any]: method _compute_kl_from_batches (line 969) | def _compute_kl_from_batches(self, mini_batch_data: List[Dict[str, Any... method _disable_maxlength_template_context (line 1006) | def _disable_maxlength_template_context(self, template: Template): method _maybe_replace_response_token (line 1015) | def _maybe_replace_response_token(self, batch): method on_policy (line 1029) | def on_policy(self): method forward_step (line 1033) | def forward_step(self, data_iterator, model): method loss_func (line 1122) | def loss_func(self, output_tensor: torch.Tensor, data: Dict[str, Any]): method model_forward (line 1412) | def model_forward(self, model, data_iterator, no_grad=True, per_token=... method inputs2requests (line 1444) | def inputs2requests(self, inputs: Union[DataType, List[RolloutInferReq... method _preprocess_inputs (line 1512) | def _preprocess_inputs(self, inputs: DataType) -> DataType: method resample_encode_failed_inputs (line 1520) | def resample_encode_failed_inputs(self, inputs: DataType, max_resample... method _add_prompt_id_to_inputs (line 1580) | def _add_prompt_id_to_inputs(self, inputs: DataType) -> DataType: method get_num_iters_per_step (line 1602) | def get_num_iters_per_step(self): method get_local_rollout_batch (line 1628) | def get_local_rollout_batch(self, batch): method _template_context (line 1645) | def _template_context(self, template: Template): method _prepare_metrics (line 1654) | def _prepare_metrics(self): method _apply_chat_template_to_messages_list (line 1671) | def _apply_chat_template_to_messages_list(self, messages_list: DataType): method _set_inputs_system (line 1680) | def _set_inputs_system(self, batch: DataType) -> DataType: method _compute_sequence_level_ratios (line 1717) | def _compute_sequence_level_ratios(self, is_ratio: torch.Tensor, compl... method _apply_rollout_importance_sampling (line 1734) | def _apply_rollout_importance_sampling(self, rollout_log_ratio: torch.... method _compute_off_policy_sequence_mask (line 1785) | def _compute_off_policy_sequence_mask( method _compute_rollout_offpolicy_metrics (line 1827) | def _compute_rollout_offpolicy_metrics( method _compute_is_correction_metrics (line 1911) | def _compute_is_correction_metrics( method _prepare_model_inputs (line 1970) | def _prepare_model_inputs(self, inputs: 'DataType') -> Dict[str, Any]: method _collect_config_info (line 1980) | def _collect_config_info(self) -> Dict[str, str]: FILE: swift/megatron/trainers/kto_trainer.py class DummyKTOTrainer (line 15) | class DummyKTOTrainer(KTOTrainer): method gather_for_metrics (line 18) | def gather_for_metrics(self, input_data, *args, **kwargs): method __init__ (line 27) | def __init__(self, args): class MegatronKTOTrainer (line 37) | class MegatronKTOTrainer(MegatronRLHFTrainer): method __init__ (line 39) | def __init__(self, args, template): method _kto_get_logps (line 43) | def _kto_get_logps(self, output_tensor, data, is_KL: bool, is_ref: boo... method _get_kto_length (line 50) | def _get_kto_length(self, data: Dict[str, Any]) -> int: method loss_func (line 56) | def loss_func(self, output_tensor, *, data, kl_data, label): method _get_input_tensor (line 100) | def _get_input_tensor(input_tensor, is_KL: bool, is_ref: bool, length:... method forward_step (line 112) | def forward_step(self, data_iterator, model): method _prepare_batch (line 157) | def _prepare_batch(self, data, vp_stage=None, num_samples=None): method _log_callback (line 170) | def _log_callback(self, logs, n_steps): FILE: swift/megatron/trainers/reranker_trainer.py class MegatronRerankerTrainer (line 15) | class MegatronRerankerTrainer(BaseMegatronTrainer): method __init__ (line 17) | def __init__(self, args, template): method _get_listwise_reranker_preds (line 28) | def _get_listwise_reranker_preds(logits, labels): method loss_func (line 39) | def loss_func(self, output_tensor: torch.Tensor, *, labels: torch.Tens... method prepare_model (line 56) | def prepare_model(self): method forward_step (line 62) | def forward_step(self, data_iterator, model): FILE: swift/megatron/trainers/reward_trainer.py class MegatronRewardTrainer (line 12) | class MegatronRewardTrainer(MegatronRLHFTrainer): method __init__ (line 14) | def __init__(self, args, template): method loss_func (line 18) | def loss_func(self, output_tensor, *, data): method forward_step (line 45) | def forward_step(self, data_iterator, model): FILE: swift/megatron/trainers/rlhf_mixin.py class MegatronRLHFTrainer (line 17) | class MegatronRLHFTrainer(BaseMegatronTrainer): method _load_checkpoint (line 19) | def _load_checkpoint(self): method prepare_model (line 27) | def prepare_model(self): method _get_data_collator (line 44) | def _get_data_collator(self): method null_ref_context (line 50) | def null_ref_context(self): method get_logps (line 70) | def get_logps(self, output_tensor, labels, packed_seq_params, num_samp... method _postprocess_packed_tensor_cp (line 94) | def _postprocess_packed_tensor_cp(self, tensor, packed_seq_params, num... FILE: swift/megatron/trainers/rollout_mixin.py function create_rollout_group (line 32) | def create_rollout_group(trainer) -> torch.distributed.ProcessGroup: class MegatronRolloutMixin (line 102) | class MegatronRolloutMixin: method _init_rollout_params (line 104) | def _init_rollout_params(self): method _get_rollout_group (line 136) | def _get_rollout_group(self): method _get_local_rollout_batch (line 140) | def _get_local_rollout_batch(self, batch: List[Dict]) -> List[Dict]: method _gather_rollout_results (line 174) | def _gather_rollout_results(self, local_batch: List[Dict]) -> List[Dict]: method _init_rollout_engine (line 186) | def _init_rollout_engine(self): method _prepare_vllm_engine (line 229) | def _prepare_vllm_engine(self): method _move_model_to_vllm (line 276) | def _move_model_to_vllm(self): method _export_and_load_weights (line 296) | def _export_and_load_weights(self): method _load_weights_to_server_in_buckets (line 311) | def _load_weights_to_server_in_buckets(self, weight_iterator): method _sync_bucket_to_server (line 332) | def _sync_bucket_to_server(self, bucket_params: List[Tuple[str, torch.... method _generate_completions (line 346) | def _generate_completions(self, batch: DataType) -> DataType: method _rollout (line 391) | def _rollout(self, batch: DataType) -> List[RolloutOutput]: method _get_request_config (line 401) | def _get_request_config(self) -> RequestConfig: method _server_rollout (line 412) | def _server_rollout(self, inputs: DataType, request_config: RequestCon... method _colocate_rollout (line 438) | def _colocate_rollout(self, batch: DataType, request_config: RequestCo... method _preprocess_rollout_inputs (line 465) | def _preprocess_rollout_inputs(self, inputs: DataType) -> DataType: method _set_inputs_system (line 474) | def _set_inputs_system(self, inputs: DataType) -> DataType: method _inputs_to_requests (line 486) | def _inputs_to_requests(self, inputs: DataType) -> List[RolloutInferRe... method _postprocess_rollout_outputs (line 529) | def _postprocess_rollout_outputs(self, inputs: DataType, outputs: List... method offload_context (line 569) | def offload_context(self): FILE: swift/megatron/trainers/trainer.py class MegatronTrainer (line 18) | class MegatronTrainer(BaseMegatronTrainer): method seq_cls_loss_func (line 20) | def seq_cls_loss_func(self, output_tensor, *, labels: torch.Tensor, pa... method loss_func (line 50) | def loss_func(self, method _compute_channel_loss (line 87) | def _compute_channel_loss(self, losses, loss_mask, channels, packed_se... method forward_step (line 115) | def forward_step(self, data_iterator, model): FILE: swift/megatron/trainers/utils.py function get_batch_on_this_pp_rank (line 26) | def get_batch_on_this_pp_rank(args, data, vp_stage=None): function get_packed_seq_params (line 49) | def get_packed_seq_params(position_ids: torch.Tensor) -> PackedSeqParams: function get_batch_on_this_cp_rank (line 65) | def get_batch_on_this_cp_rank(args, batch: Dict[str, Any]): function gather (line 95) | def gather(tensor, group: Optional[torch.distributed.ProcessGroup] = None): function gather_object (line 105) | def gather_object(object: Any, group: Optional[torch.distributed.Process... function load_megatron_model_to_gpu (line 116) | def load_megatron_model_to_gpu(models, load_grad=True): function offload_megatron_model_to_cpu (line 143) | def offload_megatron_model_to_cpu(models): function load_megatron_copy_params (line 179) | def load_megatron_copy_params(optimizers): function offload_megatron_copy_params (line 220) | def offload_megatron_copy_params(optimizers): function load_megatron_optimizer (line 261) | def load_megatron_optimizer(optimizers): function offload_megatron_optimizer (line 285) | def offload_megatron_optimizer(optimizers): function log_gpu_memory (line 304) | def log_gpu_memory(prefix: str = '', info_once: bool = False): class TrainerState (line 314) | class TrainerState: method global_step (line 330) | def global_step(self) -> int: class MegatronDataLoaderDispatcher (line 334) | class MegatronDataLoaderDispatcher(DataLoaderDispatcher): method group (line 337) | def group(self): function build_streaming_dataloader (line 341) | def build_streaming_dataloader(args, dataset, collate_fn): FILE: swift/megatron/trainers/vocab_parallel_utils.py function vocab_parallel_log_softmax (line 19) | def vocab_parallel_log_softmax(logits: torch.Tensor) -> torch.Tensor: function vocab_parallel_entropy (line 56) | def vocab_parallel_entropy(log_probs: torch.Tensor, chunk_size: int = 51... function vocab_parallel_kl_div (line 105) | def vocab_parallel_kl_div(input_log_probs: torch.Tensor, target_log_prob... function vocab_parallel_gather_logps (line 134) | def vocab_parallel_gather_logps( function compute_logps_and_entropy_from_logits (line 196) | def compute_logps_and_entropy_from_logits( FILE: swift/megatron/tuners/lora.py class LoraParallelLinear (line 36) | class LoraParallelLinear(MegatronModule, LoraLayer): method __init__ (line 38) | def __init__( method update_layer (line 85) | def update_layer(self, adapter_name, r, *, lora_alpha, lora_dropout, i... method _get_rng_context (line 224) | def _get_rng_context(self, lora): method reset_lora_parameters (line 233) | def reset_lora_parameters(self, adapter_name, init_lora_weights): method _patch_router_gating (line 267) | def _patch_router_gating(self): method forward (line 298) | def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any): method sharded_state_dict (line 372) | def sharded_state_dict( method get_delta_weights (line 400) | def get_delta_weights(self, adapter) -> List[torch.Tensor]: method merge (line 423) | def merge(self, safe_merge: bool = False, adapter_names: Optional[list... method unmerge (line 475) | def unmerge(self) -> None: function dispatch_megatron (line 510) | def dispatch_megatron( FILE: swift/megatron/utils/convert_utils.py function _test_params_sum (line 18) | def _test_params_sum(model): function _find_modules (line 38) | def _find_modules(model, recurse: bool = True, prefix='', ignore_modules... function _model_cpu_forward_context (line 60) | def _model_cpu_forward_context(modules, function get_examples (line 95) | def get_examples(is_multimodal: bool) -> Dict[str, Any]: function broadcast_mg_logits (line 149) | def broadcast_mg_logits(mg_logits=None, src_rank=None): function test_convert_precision (line 172) | def test_convert_precision(args, hf_model, mg_model, template, test_conv... FILE: swift/megatron/utils/megatron_lm_utils.py function _patch_megatron_timeout (line 40) | def _patch_megatron_timeout(distributed_timeout_minutes): function _initialize_mpu (line 57) | def _initialize_mpu(args): function set_random_seed (line 85) | def set_random_seed( function initialize_megatron (line 107) | def initialize_megatron(args): function _get_rng_state (line 121) | def _get_rng_state(): function _generate_state_dict (line 145) | def _generate_state_dict(args, function _filter_adapter_state_dict (line 180) | def _filter_adapter_state_dict(state_dict, is_peft_format: bool, adapter... function _preprocess_common_before_consistancy_check (line 216) | def _preprocess_common_before_consistancy_check(common_state_dict): function get_sharded_sd_metadata (line 226) | def get_sharded_sd_metadata(args): function save_mcore_checkpoint (line 246) | def save_mcore_checkpoint( function init_persistent_async_worker (line 325) | def init_persistent_async_worker(): function schedule_async_save (line 332) | def schedule_async_save(async_request: AsyncRequest): function maybe_finalize_async_save (line 341) | def maybe_finalize_async_save(args, blocking: bool = False, terminate=Fa... function is_empty_async_queue (line 360) | def is_empty_async_queue() -> bool: function _load_iteration (line 365) | def _load_iteration(tracker_path: str): function load_mcore_checkpoint (line 378) | def load_mcore_checkpoint(args, function wrap_model (line 496) | def wrap_model(args, models, wrap_with_ddp: bool = True): function get_optimizer_param_scheduler (line 552) | def get_optimizer_param_scheduler(args, optimizer): function unwrap_model (line 585) | def unwrap_model(models, module_instances=None): function should_disable_forward_pre_hook (line 610) | def should_disable_forward_pre_hook(args): function enable_forward_pre_hook (line 615) | def enable_forward_pre_hook(model_chunks): function disable_forward_pre_hook (line 621) | def disable_forward_pre_hook(model_chunks, param_sync=True): function initialize_tp_communicators (line 627) | def initialize_tp_communicators(args, config): function warmup_jit_function (line 657) | def warmup_jit_function(config, args): FILE: swift/megatron/utils/parallel_utils.py function reduce_max_stat_across_model_parallel_group (line 7) | def reduce_max_stat_across_model_parallel_group(stat: float) -> float: function logical_and_across_model_parallel_group (line 20) | def logical_and_across_model_parallel_group(input: bool) -> bool: function split_cp_inputs (line 30) | def split_cp_inputs(inputs: torch.Tensor, cu_seqlens: Optional[torch.Ten... FILE: swift/megatron/utils/patcher.py function patch_torch_dist_shard (line 12) | def patch_torch_dist_shard(thread_count): function patch_merge_fn (line 22) | def patch_merge_fn(state_dict_model): FILE: swift/megatron/utils/utils.py function find_all_linears (line 30) | def find_all_linears(model, extra_layers=None): function find_router (line 41) | def find_router(model): function find_embedding (line 45) | def find_embedding(model): function get_multimodal_target_regex (line 49) | def get_multimodal_target_regex( function get_target_modules (line 100) | def get_target_modules(args, model): function get_modules_to_save (line 137) | def get_modules_to_save(args, model): function set_linear_is_expert (line 147) | def set_linear_is_expert(model): function _patch_deepcopy (line 155) | def _patch_deepcopy(): function prepare_adapter (line 183) | def prepare_adapter(args, model): function _prepare_full_vit (line 219) | def _prepare_full_vit(args, model): function prepare_mcore_model (line 229) | def prepare_mcore_model(args, model): function tuners_sharded_state_dict (line 246) | def tuners_sharded_state_dict( function copy_original_module_weight (line 269) | def copy_original_module_weight(model): function copy_ref_adapter_weight (line 279) | def copy_ref_adapter_weight(model, ref_adapter_name: str): function forward_step_helper (line 297) | def forward_step_helper(args, model, inputs, dtype=None): function get_padding_to (line 329) | def get_padding_to(args): function get_local_layer_specs (line 346) | def get_local_layer_specs(config, layer_specs, vp_stage=None): FILE: swift/metrics/acc.py function compute_acc (line 10) | def compute_acc(preds, class AccMetrics (line 44) | class AccMetrics(EvalMetrics): method compute_metrics (line 46) | def compute_metrics(self, eval_prediction: EvalPrediction) -> Dict[str... method preprocess_logits_for_metrics (line 56) | def preprocess_logits_for_metrics(self, logits: torch.Tensor, labels: ... FILE: swift/metrics/base.py class EvalMetrics (line 11) | class EvalMetrics(ABC): method __init__ (line 13) | def __init__(self, args: 'TrainingArguments', trainer: 'Trainer'): method compute_metrics (line 18) | def compute_metrics(self, eval_prediction: EvalPrediction) -> Dict[str... method preprocess_logits_for_metrics (line 21) | def preprocess_logits_for_metrics(self, logits: torch.Tensor, labels: ... FILE: swift/metrics/embedding.py class EmbedddingMetricMixin (line 14) | class EmbedddingMetricMixin(Metric): method __init__ (line 16) | def __init__(self): method update (line 21) | def update(self, last_hidden_state, labels): method compute (line 25) | def compute(self): class PairedMetrics (line 31) | class PairedMetrics(EvalMetrics, EmbedddingMetricMixin): method __init__ (line 33) | def __init__(self, *args, **kwargs): method compute_metrics (line 37) | def compute_metrics(self, eval_prediction: EvalPrediction) -> Dict[str... method _calculate_metrics (line 42) | def _calculate_metrics(self, predictions, labels): class InfonceMetrics (line 76) | class InfonceMetrics(EvalMetrics, EmbedddingMetricMixin): method __init__ (line 78) | def __init__(self, *args, **kwargs): method compute_metrics (line 82) | def compute_metrics(self, eval_prediction: EvalPrediction) -> Dict[str... method _calculate_metrics (line 87) | def _calculate_metrics(self, predictions, labels): FILE: swift/metrics/nlg.py function compute_rouge_bleu (line 12) | def compute_rouge_bleu(preds: List[str], labels: List[str]): class NlgMetrics (line 33) | class NlgMetrics(EvalMetrics): method compute_metrics (line 35) | def compute_metrics(self, eval_prediction: EvalPrediction) -> Dict[str... FILE: swift/metrics/reranker.py class RerankerMetrics (line 10) | class RerankerMetrics(EvalMetrics, Metric): method __init__ (line 12) | def __init__(self, *args, **kwargs): method update (line 18) | def update(self, logits, labels): method compute (line 22) | def compute(self): method compute_metrics (line 27) | def compute_metrics(self, eval_prediction: EvalPrediction) -> Dict[str... method _calculate_metrics (line 30) | def _calculate_metrics(self, logits, labels): FILE: swift/metrics/utils.py class Metric (line 13) | class Metric(ABC): method __init__ (line 15) | def __init__(self): method add_state (line 19) | def add_state(self, name: str, default=None, default_factory=None) -> ... method reset (line 31) | def reset(self): method update (line 38) | def update(self, *args, **kwargs): method compute (line 42) | def compute(self): class InferStats (line 46) | class InferStats(Metric): method __init__ (line 48) | def __init__(self): method update (line 54) | def update(self, output): method compute (line 59) | def compute(self): class MeanMetric (line 73) | class MeanMetric(Metric): method __init__ (line 75) | def __init__(self, nan_value=0, device=None, group=None): method update (line 85) | def update(self, state: torch.Tensor): method compute (line 102) | def compute(self): FILE: swift/model/constant.py class LLMModelType (line 6) | class LLMModelType: class BertModelType (line 122) | class BertModelType: class RMModelType (line 128) | class RMModelType: class MLLMModelType (line 136) | class MLLMModelType: class ModelType (line 265) | class ModelType(LLMModelType, MLLMModelType, BertModelType, RMModelType): method get_model_name_list (line 268) | def get_model_name_list(cls) -> List[str]: FILE: swift/model/model_arch.py class LLMModelArch (line 10) | class LLMModelArch: class MLLMModelArch (line 27) | class MLLMModelArch: class ModelArch (line 93) | class ModelArch(LLMModelArch, MLLMModelArch): class ModelKeys (line 100) | class ModelKeys: class MultiModelKeys (line 127) | class MultiModelKeys(ModelKeys): method __post_init__ (line 134) | def __post_init__(self): function register_model_arch (line 146) | def register_model_arch(model_arch: ModelKeys, *, exist_ok: bool = False... function get_model_arch (line 764) | def get_model_arch(arch_name: Optional[str]) -> Optional[MultiModelKeys]: FILE: swift/model/model_meta.py class Model (line 21) | class Model: class ModelGroup (line 31) | class ModelGroup: method __post_init__ (line 40) | def __post_init__(self): class BaseModelLoader (line 45) | class BaseModelLoader(ABC): method __init__ (line 48) | def __init__(self, model_info, model_meta, *args, **kwargs): method load (line 52) | def load(self) -> Tuple[Optional[PreTrainedModel], PreTrainedTokenizer... class ModelMeta (line 57) | class ModelMeta: method __post_init__ (line 81) | def __post_init__(self): method get_matched_model_group (line 96) | def get_matched_model_group(self, model_name: str) -> Optional[ModelGr... method check_requires (line 105) | def check_requires(self, model_info=None): class ModelInfo (line 125) | class ModelInfo: method __post_init__ (line 141) | def __post_init__(self): function get_model_name (line 145) | def get_model_name(model_id_or_path: str) -> Optional[str]: function get_matched_model_meta (line 161) | def get_matched_model_meta(model_id_or_path: str) -> Optional[ModelMeta]: function _get_arch_mapping (line 173) | def _get_arch_mapping(): function get_matched_model_types (line 186) | def get_matched_model_types(architectures: Optional[List[str]]) -> List[... function _read_args_json_model_type (line 195) | def _read_args_json_model_type(model_dir): function _get_model_info (line 203) | def _get_model_info(model_dir: str, model_type: Optional[str], quantizat... function get_model_info_meta (line 246) | def get_model_info_meta( FILE: swift/model/models/baai.py class Emu3GenLoader (line 15) | class Emu3GenLoader(ModelLoader): method get_processor (line 17) | def get_processor(self, model_dir, config) -> Processor: method get_model (line 33) | def get_model(self, model_dir: str, config, processor, model_kwargs): class Emu3ChatLoader (line 54) | class Emu3ChatLoader(ModelLoader): method get_processor (line 56) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class BgeRerankerLoader (line 92) | class BgeRerankerLoader(ModelLoader): method get_model (line 94) | def get_model(self, *args, **kwargs) -> PreTrainedModel: FILE: swift/model/models/baichuan.py class BaichuanLoader (line 17) | class BaichuanLoader(ModelLoader): method get_model (line 19) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class BaichuanM1Loader (line 46) | class BaichuanM1Loader(BaichuanLoader): method get_model (line 48) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: function patch_baichuan2_lm_head_forward (line 76) | def patch_baichuan2_lm_head_forward(self, hidden_states: Tensor) -> Tensor: class Baichuan2Loader (line 89) | class Baichuan2Loader(ModelLoader): method get_model (line 91) | def get_model(self, model_dir: str, config, *args, **kwargs) -> PreTra... FILE: swift/model/models/baidu.py class ErnieVLLoader (line 57) | class ErnieVLLoader(ModelLoader): method get_model (line 59) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... class PaddleOCR1_5Loader (line 88) | class PaddleOCR1_5Loader(ModelLoader): method get_config (line 90) | def get_config(self, model_dir: str) -> PretrainedConfig: method get_model (line 95) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: FILE: swift/model/models/bert.py class ModernBertLoader (line 14) | class ModernBertLoader(ModelLoader): method get_model (line 16) | def get_model(self, model_dir: str, config, *args, **kwargs) -> PreTra... class GTEBertLoader (line 36) | class GTEBertLoader(ModelLoader): method get_model (line 38) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class GTEBertReranker (line 63) | class GTEBertReranker(ModelLoader): method get_model (line 65) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: FILE: swift/model/models/codefuse.py class CodeLlamaLoader (line 41) | class CodeLlamaLoader(ModelLoader): method get_processor (line 43) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... FILE: swift/model/models/deepseek.py class DeepseekLoader (line 16) | class DeepseekLoader(ModelLoader): method get_model (line 18) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class DeepseekV32Loader (line 129) | class DeepseekV32Loader(ModelLoader): method get_config (line 131) | def get_config(self, model_dir: str): method get_model (line 138) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class DeepseekVLLoader (line 169) | class DeepseekVLLoader(ModelLoader): method get_config (line 171) | def get_config(self, model_dir: str): method _get_model (line 186) | def _get_model(self, model_dir: str, llm_prefix, *args, **kwargs) -> P... method get_model (line 195) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class DeepseekJanusLoader (line 216) | class DeepseekJanusLoader(DeepseekVLLoader): method get_model (line 218) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: method get_config (line 221) | def get_config(self, model_dir: str): class DeepseekVL2Loader (line 261) | class DeepseekVL2Loader(DeepseekVLLoader): method get_config (line 263) | def get_config(self, model_dir: str): method get_model (line 278) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class DeepseekOCRLoader (line 300) | class DeepseekOCRLoader(ModelLoader): method get_model (line 303) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: method get_processor (line 312) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class DeepseekOCR2Loader (line 326) | class DeepseekOCR2Loader(DeepseekOCRLoader): FILE: swift/model/models/gemma.py class PaligemmaVisionLoader (line 12) | class PaligemmaVisionLoader(ModelLoader): method get_model (line 14) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Gemma3TextLoader (line 94) | class Gemma3TextLoader(ModelLoader): method get_config (line 96) | def get_config(self, model_dir): class Gemma3VisionLoader (line 122) | class Gemma3VisionLoader(ModelLoader): method get_config (line 124) | def get_config(self, model_dir): method get_model (line 129) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Gemma3nLoader (line 159) | class Gemma3nLoader(ModelLoader): method get_model (line 161) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: FILE: swift/model/models/glm.py function remove_property (line 22) | def remove_property(tokenizer_cls: Type[PreTrainedTokenizerBase], tokeni... function _patch_tokenizer (line 28) | def _patch_tokenizer(tokenizer): class ChatGLMLoader (line 43) | class ChatGLMLoader(ModelLoader): method get_model (line 45) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... method get_processor (line 59) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class ChatGLM4Loader (line 110) | class ChatGLM4Loader(ChatGLMLoader): method get_processor (line 112) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class ChatGLM4vLoader (line 174) | class ChatGLM4vLoader(ChatGLMLoader): method get_model (line 176) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: method get_processor (line 190) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class GLM4vLoader (line 220) | class GLM4vLoader(ModelLoader): method get_model (line 222) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class CogVLMLoader (line 265) | class CogVLMLoader(ModelLoader): method get_model (line 267) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: method get_processor (line 273) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class CogVLM2Loader (line 317) | class CogVLM2Loader(ModelLoader): method get_model (line 319) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class GLMEdgeVLoader (line 377) | class GLMEdgeVLoader(ModelLoader): method get_processor (line 379) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class Glm4vMoeLoader (line 440) | class Glm4vMoeLoader(ModelLoader): method get_model (line 442) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class GLMOCRLoader (line 472) | class GLMOCRLoader(ModelLoader): method get_model (line 474) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: FILE: swift/model/models/internlm.py class InternVLLoader (line 88) | class InternVLLoader(ModelLoader): method get_processor (line 90) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... method get_model (line 94) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Interns1Loader (line 374) | class Interns1Loader(ModelLoader): method get_model (line 376) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class InternVLHfLoader (line 397) | class InternVLHfLoader(Interns1Loader): method get_model (line 399) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Xcomposer2Loader (line 474) | class Xcomposer2Loader(ModelLoader): method get_model (line 477) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Xcomposer2_4khdLoader (line 517) | class Xcomposer2_4khdLoader(Xcomposer2Loader): class Xcomposer2_5Loader (line 537) | class Xcomposer2_5Loader(Xcomposer2Loader): FILE: swift/model/models/llama.py class LlamaLoader (line 14) | class LlamaLoader(ModelLoader): method get_config (line 16) | def get_config(self, model_dir): class Llama3_2VisionLoader (line 242) | class Llama3_2VisionLoader(ModelLoader): method get_model (line 244) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Llama4Loader (line 270) | class Llama4Loader(ModelLoader): method get_model (line 272) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Llama3OmniLoader (line 301) | class Llama3OmniLoader(ModelLoader): method get_model (line 303) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... FILE: swift/model/models/llava.py class LlavaLlamaHfLoader (line 15) | class LlavaLlamaHfLoader(ModelLoader): method get_config (line 17) | def get_config(self, model_dir: str): method get_model (line 22) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: function _patch_llava (line 45) | def _patch_llava(model): class LlavahfLoader (line 61) | class LlavahfLoader(ModelLoader): method get_model (line 63) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class LlavaOnevisionHfLoader (line 87) | class LlavaOnevisionHfLoader(ModelLoader): method get_model (line 89) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class LlavaNextHfLoader (line 114) | class LlavaNextHfLoader(ModelLoader): method get_model (line 116) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class LlavaNextYiHfLoader (line 205) | class LlavaNextYiHfLoader(LlavaNextHfLoader): method get_config (line 207) | def get_config(self, model_dir: str) -> PretrainedConfig: class LlavaNextVideoHfLoader (line 230) | class LlavaNextVideoHfLoader(ModelLoader): method get_model (line 232) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class LlavaNextVideoYiHfLoader (line 257) | class LlavaNextVideoYiHfLoader(LlavaNextVideoHfLoader): method get_config (line 259) | def get_config(self, model_dir: str) -> PretrainedConfig: class LlavaLoader (line 283) | class LlavaLoader(ModelLoader): method get_config (line 286) | def get_config(self, model_dir: str): method get_model (line 306) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... class Llama3LlavaNextLoader (line 340) | class Llama3LlavaNextLoader(LlavaLoader): class LlavaMistralLoader (line 361) | class LlavaMistralLoader(LlavaLoader): class LlavaLlamaLoader (line 382) | class LlavaLlamaLoader(LlavaLoader): class LlavaNextQwenLoader (line 401) | class LlavaNextQwenLoader(LlavaLoader): class LlavaOnevisionLoader (line 421) | class LlavaOnevisionLoader(ModelLoader): method get_config (line 423) | def get_config(self, model_dir: str) -> PretrainedConfig: method get_model (line 428) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: FILE: swift/model/models/llm.py class GrokLoader (line 15) | class GrokLoader(ModelLoader): method get_processor (line 17) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class PolyLMLoader (line 36) | class PolyLMLoader(ModelLoader): method get_processor (line 38) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class YuanLoader (line 58) | class YuanLoader(ModelLoader): method get_processor (line 60) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... FILE: swift/model/models/mamba.py class MambaLoader (line 13) | class MambaLoader(ModelLoader): method get_model (line 15) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: FILE: swift/model/models/microsoft.py class Phi3VisionLoader (line 16) | class Phi3VisionLoader(ModelLoader): method get_processor (line 19) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... method get_model (line 25) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Phi4MultimodalLoader (line 49) | class Phi4MultimodalLoader(ModelLoader): method get_processor (line 51) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... method get_model (line 63) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class FlorenceLoader (line 84) | class FlorenceLoader(ModelLoader): method get_model (line 86) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... class Phi3SmallLoader (line 117) | class Phi3SmallLoader(ModelLoader): method get_model (line 119) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: FILE: swift/model/models/minicpm.py function _patch_minicpmv_device_map (line 33) | def _patch_minicpmv_device_map(model) -> None: class MiniCPMVLoader (line 58) | class MiniCPMVLoader(ModelLoader): method get_model (line 60) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... class MiniCPMV2Loader (line 90) | class MiniCPMV2Loader(MiniCPMVLoader): method get_model (line 92) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class MiniCPMO2Loader (line 133) | class MiniCPMO2Loader(MiniCPMV2Loader): method get_model (line 135) | def get_model(self, model_dir: str, config, *args, **kwargs) -> PreTra... FILE: swift/model/models/minimax.py class MiniMaxVLLoader (line 17) | class MiniMaxVLLoader(ModelLoader): method get_model (line 19) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... method get_processor (line 66) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class MinimaxTextLoader (line 92) | class MinimaxTextLoader(ModelLoader): method get_model (line 94) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... FILE: swift/model/models/mistral.py class DevstralLoader (line 119) | class DevstralLoader(ModelLoader): method get_processor (line 121) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class Mistral3Loader (line 142) | class Mistral3Loader(ModelLoader): method get_model (line 144) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Mistral3_2506Loader (line 189) | class Mistral3_2506Loader(Mistral3Loader): method get_processor (line 191) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... FILE: swift/model/models/mllm.py class Idefics3Loader (line 20) | class Idefics3Loader(ModelLoader): method get_model (line 22) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class PixtralLoader (line 45) | class PixtralLoader(ModelLoader): method get_model (line 47) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class MolMoeLoader (line 70) | class MolMoeLoader(ModelLoader): method get_model (line 72) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class MolmoLoader (line 107) | class MolmoLoader(ModelLoader): method get_model (line 109) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class MegrezOmniLoader (line 136) | class MegrezOmniLoader(ModelLoader): method get_model (line 138) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: method _get_model_processor (line 148) | def _get_model_processor(self, model_dir, config): class JinaRerankerM0Loader (line 185) | class JinaRerankerM0Loader(ModelLoader): method get_model (line 187) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class KeyeVLLoader (line 269) | class KeyeVLLoader(ModelLoader): method get_processor (line 271) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class DotsOCRLoader (line 312) | class DotsOCRLoader(ModelLoader): method get_model (line 314) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Sail2VLLoader (line 334) | class Sail2VLLoader(ModelLoader): method get_model (line 336) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: FILE: swift/model/models/moonshot.py class KimiVLLoader (line 13) | class KimiVLLoader(ModelLoader): method get_model (line 15) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: FILE: swift/model/models/mplug.py class MplugOwl2Loader (line 21) | class MplugOwl2Loader(ModelLoader): method _get_model (line 23) | def _get_model(self, model_dir: str, vocab_size, *args, **kwargs) -> P... method get_model (line 38) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: method get_processor (line 41) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class MplugOwl2_1Loader (line 59) | class MplugOwl2_1Loader(QwenLoader, MplugOwl2Loader): method get_model (line 61) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class MplugOwl3Loader (line 77) | class MplugOwl3Loader(ModelLoader): method get_model (line 79) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: method _get_model_processor (line 104) | def _get_model_processor(self, model_dir, config): class DocOwl2Loader (line 142) | class DocOwl2Loader(ModelLoader): method _get_model_processor (line 144) | def _get_model_processor(self, model_dir, config): FILE: swift/model/models/qwen.py class QwenLoader (line 30) | class QwenLoader(ModelLoader): method get_model (line 32) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... method _update_attn_impl (line 51) | def _update_attn_impl(self, config): method get_processor (line 55) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... function _qwen_vl_audio_decode (line 115) | def _qwen_vl_audio_decode(self, *args, skip_special_tokens=False, **kwar... function fix_qwen_inplace_bug (line 125) | def fix_qwen_inplace_bug(model) -> None: class QwenAudioLoader (line 133) | class QwenAudioLoader(QwenLoader): method get_model (line 135) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: method get_processor (line 140) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... function _qwen_vl_visual_block_forward (line 169) | def _qwen_vl_visual_block_forward( class QwenVLLoader (line 185) | class QwenVLLoader(QwenLoader): method get_model (line 187) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... method get_processor (line 217) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... function _get_new_read_video_func (line 639) | def _get_new_read_video_func(read_video_func, read_backend): function patch_qwen_vl_utils (line 659) | def patch_qwen_vl_utils(vision_process): function compat_qwen_vl_utils (line 709) | def compat_qwen_vl_utils(image_patch_size: int): class Qwen2VLLoader (line 724) | class Qwen2VLLoader(ModelLoader): method get_model (line 726) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... method _check_qwen_vl_utils (line 734) | def _check_qwen_vl_utils(self): method get_processor (line 745) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class Qwen2_5VLLoader (line 803) | class Qwen2_5VLLoader(Qwen2VLLoader): method get_model (line 805) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: function patch_Qwen3VLMoeTextExperts_dtype (line 838) | def patch_Qwen3VLMoeTextExperts_dtype(): function _forward_qwen3_vl_or_qwen3_omni (line 852) | def _forward_qwen3_vl_or_qwen3_omni( function _patch_deepstack_process (line 935) | def _patch_deepstack_process(model): function _compat_qwen3_vl_mixed_data (line 958) | def _compat_qwen3_vl_mixed_data(model, processor, is_moe: bool = False): class Qwen3VLLoader (line 1052) | class Qwen3VLLoader(Qwen2VLLoader): method _check_qwen_vl_utils (line 1054) | def _check_qwen_vl_utils(self): method get_model (line 1058) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... class Qwen3VLMoeLoader (line 1095) | class Qwen3VLMoeLoader(Qwen3VLLoader): method get_model (line 1097) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... class Qwen3_5MoeLoader (line 1125) | class Qwen3_5MoeLoader(Qwen3VLLoader): method get_model (line 1127) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... class Qwen3_5Loader (line 1157) | class Qwen3_5Loader(Qwen3VLLoader): method get_model (line 1159) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... class Qwen2_5OmniLoader (line 1193) | class Qwen2_5OmniLoader(ModelLoader): method _check_qwen_omni_utils (line 1195) | def _check_qwen_omni_utils(self): method get_config (line 1204) | def get_config(self, model_dir): method get_model (line 1214) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: method get_processor (line 1225) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... function _compat_qwen3_omni_mixed_data (line 1253) | def _compat_qwen3_omni_mixed_data(model, processor): class Qwen3OmniLoader (line 1407) | class Qwen3OmniLoader(ModelLoader): method _check_qwen_omni_utils (line 1409) | def _check_qwen_omni_utils(self): method get_config (line 1413) | def get_config(self, model_dir: str): method get_model (line 1423) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... method get_processor (line 1436) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... class MidashengLMLoader (line 1464) | class MidashengLMLoader(ModelLoader): method get_model (line 1466) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Qwen2AudioLoader (line 1487) | class Qwen2AudioLoader(ModelLoader): method get_model (line 1489) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class OvisLoader (line 1512) | class OvisLoader(ModelLoader): method get_processor (line 1514) | def get_processor(self, model_dir, config) -> Processor: method get_model (line 1518) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Ovis2_5Loader (line 1596) | class Ovis2_5Loader(ModelLoader): method get_model (line 1598) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Qwen3VLEmbLoader (line 1689) | class Qwen3VLEmbLoader(Qwen3VLLoader): method _check_qwen_vl_utils (line 1691) | def _check_qwen_vl_utils(self): class Qwen3VLRerankerLoader (line 1714) | class Qwen3VLRerankerLoader(Qwen3VLLoader): method _check_qwen_vl_utils (line 1716) | def _check_qwen_vl_utils(self): FILE: swift/model/models/skywork.py class SkyworkLoader (line 14) | class SkyworkLoader(ModelLoader): method get_processor (line 16) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... FILE: swift/model/models/stepfun.py class GotOCR2Loader (line 15) | class GotOCR2Loader(ModelLoader): method get_model (line 17) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class GotOCR2HfLoader (line 36) | class GotOCR2HfLoader(ModelLoader): method get_model (line 38) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class StepAudioLoader (line 58) | class StepAudioLoader(ModelLoader): method get_model (line 60) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: function _patch_step_audio2_mini (line 91) | def _patch_step_audio2_mini(model): class StepAudio2MiniLoader (line 109) | class StepAudio2MiniLoader(ModelLoader): method get_model (line 111) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: class Step3VLLoader (line 133) | class Step3VLLoader(ModelLoader): method get_config (line 135) | def get_config(self, model_dir: str) -> PretrainedConfig: method get_processor (line 140) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... method get_model (line 144) | def get_model(self, model_dir: str, config: PretrainedConfig, processo... FILE: swift/model/models/telechat.py class TeleChatLoader (line 12) | class TeleChatLoader(ModelLoader): method get_model (line 14) | def get_model(self, model_dir: str, config, processor, **kwargs) -> Pr... FILE: swift/model/models/tencent.py class HunyuanVLLoader (line 12) | class HunyuanVLLoader(ModelLoader): method get_config (line 14) | def get_config(self, model_dir: str): method get_model (line 18) | def get_model(self, model_dir: str, *args, **kwargs) -> PreTrainedModel: FILE: swift/model/models/valley.py class ValleyLoader (line 15) | class ValleyLoader(ModelLoader): method get_config (line 17) | def get_config(self, model_dir: str): method get_model (line 27) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... FILE: swift/model/models/yi.py class YiVLLoader (line 17) | class YiVLLoader(ModelLoader): method get_config (line 19) | def get_config(self, model_dir: str) -> PretrainedConfig: method get_processor (line 33) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... method get_model (line 36) | def get_model(self, model_dir: str, config, processor, **kwargs) -> Pr... FILE: swift/model/npu_patcher.py function _set_default_hccl_connect_timeout_for_npu (line 25) | def _set_default_hccl_connect_timeout_for_npu() -> None: class NPUCastError (line 36) | class NPUCastError(RuntimeError): function _get_first_parameter (line 40) | def _get_first_parameter(module: torch.nn.Module) -> torch.nn.Parameter ... function _needs_fp32_cast_for_npu (line 46) | def _needs_fp32_cast_for_npu( function _cast_to_fp32 (line 60) | def _cast_to_fp32(module: torch.nn.Module) -> torch.nn.Module: function wrapped_fsdp2_prepare_model (line 81) | def wrapped_fsdp2_prepare_model( function wrapped_prepare_fsdp2 (line 101) | def wrapped_prepare_fsdp2( class NpuRMSNorm (line 117) | class NpuRMSNorm(nn.Module): method __init__ (line 119) | def __init__(self, hidden_size, eps=1e-6): method forward (line 124) | def forward(self, hidden_states): method extra_repr (line 127) | def extra_repr(self): function npu_apply_rotary_pos_emb (line 131) | def npu_apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueez... function npu_swiglu_forward (line 140) | def npu_swiglu_forward(self, hidden_state): class NpuGmmFunction (line 145) | class NpuGmmFunction(torch.autograd.Function): method forward (line 148) | def forward(ctx, x, weight, group_list, split_size): method backward (line 157) | def backward(ctx, grad_outputs): function npu_moe_block_forward (line 172) | def npu_moe_block_forward(self, hidden_states: torch.Tensor) -> torch.Te... class GmmFunction (line 236) | class GmmFunction(torch.autograd.Function): method forward (line 239) | def forward(ctx, x, weight, group_list): method backward (line 252) | def backward(ctx, grad_output): class NpuMoeFused (line 275) | class NpuMoeFused: method npu_moe_experts_forward (line 278) | def npu_moe_experts_forward(self, hidden_states: torch.Tensor, routing... method npu_moe_sparse_block_forward (line 293) | def npu_moe_sparse_block_forward(self, hidden_states: torch.Tensor) ->... function _setattr_path (line 306) | def _setattr_path(root: Any, path: str, value: Any) -> None: function _apply_patch_map (line 314) | def _apply_patch_map(root: Any, patch_map: dict[str, Any]) -> None: FILE: swift/model/patcher.py function patch_fixed_float_dtype (line 29) | def patch_fixed_float_dtype(module: torch.nn.Module, dtype): function patch_fixed_device (line 42) | def patch_fixed_device(module: torch.nn.Module, device): function patch_output_clone (line 55) | def patch_output_clone(module: torch.nn.Module): function patch_get_input_embeddings (line 64) | def patch_get_input_embeddings(model, embedding_keys: str): function patch_output_normalizer (line 72) | def patch_output_normalizer(module: torch.nn.Module, model_meta): function patch_output_to_input_device (line 102) | def patch_output_to_input_device(module: torch.nn.Module): function patch_device_map (line 117) | def patch_device_map(): function patch_ignore_check_imports (line 134) | def patch_ignore_check_imports(): function get_lm_head_model (line 148) | def get_lm_head_model(model, model_meta=None, lm_heads=None): function transformers_seq_cls_forward (line 168) | def transformers_seq_cls_forward(self, *args, origin_forward, padding_si... function _patch_sequence_classification (line 243) | def _patch_sequence_classification(model, model_meta): function patch_automodel_for_sequence_classification (line 270) | def patch_automodel_for_sequence_classification(model_info=None, function patch_automodel (line 370) | def patch_automodel(model_info, model_meta, auto_model_cls, return_dummy... function _get_max_memory (line 398) | def _get_max_memory(device_ids: List[int]) -> Dict[Union[int, str], int]: function _sync_max_memory (line 416) | def _sync_max_memory(max_memory: Dict[Union[int, str], int]) -> Dict[Uni... function patch_mp_ddp (line 436) | def patch_mp_ddp(): function patch_get_dynamic_module (line 477) | def patch_get_dynamic_module(): function patch_tp_plan (line 492) | def patch_tp_plan(load_model: bool): function revert_padding_free (line 505) | def revert_padding_free(outputs: Dict[str, Any], inputs: Dict[str, Any],... function gather_sequence_parallel_outputs (line 560) | def gather_sequence_parallel_outputs( function patch_attach_align_device_hook_on_blocks (line 586) | def patch_attach_align_device_hook_on_blocks(): function patch_module_forward (line 600) | def patch_module_forward(module, new_forward): FILE: swift/model/register.py function register_model (line 31) | def register_model(model_meta: ModelMeta, *, exist_ok: bool = False) -> ... function load_by_unsloth (line 45) | def load_by_unsloth(args): function _patch_awq_compat (line 98) | def _patch_awq_compat(model_info): function _set_property (line 120) | def _set_property(model, key): function fix_do_sample_warning (line 133) | def fix_do_sample_warning(generation_config: GenerationConfig) -> None: function get_model_list (line 143) | def get_model_list() -> List[str]: class ModelLoader (line 160) | class ModelLoader(BaseModelLoader): method __init__ (line 162) | def __init__( method _postprocess_config (line 213) | def _postprocess_config(self, config): method get_config (line 244) | def get_config(self, model_dir: str) -> PretrainedConfig: method _get_tokenizer (line 248) | def _get_tokenizer(self, processor): method get_processor (line 256) | def get_processor(self, model_dir: str, config: PretrainedConfig) -> P... method get_model (line 266) | def get_model(self, model_dir: str, config: PretrainedConfig, processo... method _patch_generative_reranker (line 325) | def _patch_generative_reranker(self, model, processor): method _postprocess_model (line 334) | def _postprocess_model(self, model_dir, model): method _add_new_special_tokens (line 349) | def _add_new_special_tokens(self, model, processor, config): method _postprocess_processor (line 365) | def _postprocess_processor(self, processor: Processor): method _compat_transformers5 (line 380) | def _compat_transformers5(self, model): method _update_attn_impl (line 385) | def _update_attn_impl(self, config): method _deepspeed_set_z3_leaf_modules (line 388) | def _deepspeed_set_z3_leaf_modules(self, model, z3_leaf_modules): method _init_generation_config (line 442) | def _init_generation_config(self, model, model_dir): method _get_model_processor (line 451) | def _get_model_processor(self, model_dir, config): method load (line 458) | def load(self) -> Tuple[Optional[PreTrainedModel], Processor]: class SentenceTransformersLoader (line 472) | class SentenceTransformersLoader(ModelLoader): method get_model (line 474) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... class RewardModelLoader (line 493) | class RewardModelLoader(ModelLoader): method get_model (line 495) | def get_model(self, model_dir: str, config, processor, model_kwargs) -... function get_model_processor (line 501) | def get_model_processor( function get_processor (line 618) | def get_processor( FILE: swift/model/utils.py class AttnImpl (line 26) | class AttnImpl: method to_use_flash_attn (line 31) | def to_use_flash_attn(attn_impl: Optional[str], auto_value: _T = None)... method update_attn_impl (line 37) | def update_attn_impl(config: PretrainedConfig, function get_llm_model (line 55) | def get_llm_model(model: torch.nn.Module, model_meta=None, inner_backbon... function use_submodel_func (line 90) | def use_submodel_func(model, submodel_name: str, func_list: Optional[Lis... class InitModelStrategy (line 124) | class InitModelStrategy: method is_uninitialized (line 127) | def is_uninitialized(param: torch.Tensor) -> bool: method constant_init (line 153) | def constant_init(param: torch.Tensor, c: float = 0) -> None: method uniform_init (line 157) | def uniform_init(param: torch.Tensor, a: float = -0.1, b: float = 0.1)... method normal_init (line 161) | def normal_init(param: torch.Tensor, mean: float = 0.0, std: float = 0... method _init_high_dim (line 165) | def _init_high_dim(param: torch.Tensor, init_func, *args, **kwargs) ->... method xavier_uniform_init (line 173) | def xavier_uniform_init(param: torch.Tensor) -> None: method xavier_normal_init (line 177) | def xavier_normal_init(param: torch.Tensor) -> None: method kaiming_uniform_init (line 181) | def kaiming_uniform_init(param: torch.Tensor) -> None: method kaiming_normal_init (line 186) | def kaiming_normal_init(param: torch.Tensor) -> None: method orthogonal_init (line 190) | def orthogonal_init(param: torch.Tensor) -> None: method init_parameters (line 205) | def init_parameters(model: nn.Module, init_strategy: str) -> None: function get_default_device_map (line 224) | def get_default_device_map(): function get_default_torch_dtype (line 240) | def get_default_torch_dtype(torch_dtype: Optional[torch.dtype]): function _patch_conv3d (line 261) | def _patch_conv3d(): function save_checkpoint (line 289) | def save_checkpoint(model: Optional[PreTrainedModel], function get_ckpt_dir (line 328) | def get_ckpt_dir(model_dir: str, adapters_dir: Optional[List[str]]) -> str: FILE: swift/optimizers/base.py class OptimizerCallback (line 13) | class OptimizerCallback: method __init__ (line 28) | def __init__(self, args: 'TrainingArguments', trainer: 'Trainer'): method create_optimizer_and_scheduler (line 32) | def create_optimizer_and_scheduler(self, num_training_steps: int) -> N... method create_optimizer (line 50) | def create_optimizer(self) -> Optimizer: method create_scheduler (line 53) | def create_scheduler(self, num_training_steps: int, optimizer: Optimiz... FILE: swift/optimizers/galore/adafactor.py class Adafactor (line 11) | class Adafactor(Optimizer): method __init__ (line 96) | def __init__( method _get_lr (line 129) | def _get_lr(param_group, param_state): method _get_options (line 140) | def _get_options(param_group, param_shape): method _rms (line 146) | def _rms(tensor): method _approx_sq_grad (line 150) | def _approx_sq_grad(exp_avg_sq_row, exp_avg_sq_col): method step (line 158) | def step(self, closure=None): FILE: swift/optimizers/galore/adamw.py class AdamW (line 13) | class AdamW(Optimizer): method __init__ (line 35) | def __init__( method step (line 58) | def step(self, closure: Callable = None): FILE: swift/optimizers/galore/adamw8bit.py class AdamW8bit (line 9) | class AdamW8bit(Optimizer2State): method __init__ (line 11) | def __init__(self, method step (line 39) | def step(self, closure=None): FILE: swift/optimizers/galore/galore_projector.py class GaLoreProjector (line 6) | class GaLoreProjector: method __init__ (line 8) | def __init__(self, rank, verbose=False, update_proj_gap=200, scale=1.0... method project (line 16) | def project(self, full_rank_grad, iter): method project_back (line 51) | def project_back(self, low_rank_grad): method get_orthogonal_matrix (line 73) | def get_orthogonal_matrix(self, weights, rank, type): FILE: swift/optimizers/galore/utils.py class GaLoreConfig (line 27) | class GaLoreConfig: class GaloreOptimizerWrapper (line 59) | class GaloreOptimizerWrapper(Optimizer): method __init__ (line 61) | def __init__(self, optimizers: Dict[Any, Optimizer]): method zero_grad (line 65) | def zero_grad(self, *args, **kwargs) -> None: method step (line 69) | def step(self, *args, **kwargs) -> None: class GaloreSchedulerWrapper (line 74) | class GaloreSchedulerWrapper(LRScheduler): method __init__ (line 76) | def __init__(self, lr_schedulers: Dict[Any, LRScheduler]): method step (line 79) | def step(self, *args, **kwargs) -> None: function _create_optimizer_and_scheduler (line 85) | def _create_optimizer_and_scheduler(model: nn.Module, args: 'TrainingArg... function get_optimizer (line 175) | def get_optimizer(args: 'TrainingArguments', config: GaLoreConfig) -> Tu... class GaloreOptimizerCallback (line 222) | class GaloreOptimizerCallback(OptimizerCallback): method create_optimizer_and_scheduler (line 224) | def create_optimizer_and_scheduler(self, num_training_steps: int): FILE: swift/optimizers/lorap.py class LorapOptimizerCallback (line 7) | class LorapOptimizerCallback(OptimizerCallback): method create_optimizer (line 9) | def create_optimizer(self) -> Optimizer: FILE: swift/optimizers/multimodal.py function get_param_startswith (line 13) | def get_param_startswith(model, class MultimodalOptimizerCallback (line 43) | class MultimodalOptimizerCallback(OptimizerCallback): method create_optimizer (line 45) | def create_optimizer(self): FILE: swift/optimizers/muon.py class MuonOptimizerCallback (line 8) | class MuonOptimizerCallback(OptimizerCallback): method create_optimizer (line 10) | def create_optimizer(self): FILE: swift/optimizers/muonclip.py class _MaxLogitsTracker (line 15) | class _MaxLogitsTracker: method _get_and_reset (line 38) | def _get_and_reset(cls) -> Optional[float]: method _update (line 44) | def _update(cls, v: float): method enable_softmax (line 52) | def enable_softmax(cls): method enable_sdpa (line 86) | def enable_sdpa(cls): method enable_flash_attn (line 123) | def enable_flash_attn(cls): method enable_all (line 170) | def enable_all(cls): method consume (line 179) | def consume(cls) -> Optional[float]: class MuonClip (line 183) | class MuonClip(Optimizer): method __init__ (line 192) | def __init__( method newton_schulz (line 219) | def newton_schulz(G: torch.Tensor, steps: int = 5, eps: float = 1e-7) ... method _is_qk_weight (line 243) | def _is_qk_weight(self, group) -> bool: method step (line 247) | def step(self, closure=None, max_logits: Optional[float] = None): class MuonClipOptimizerCallback (line 316) | class MuonClipOptimizerCallback(OptimizerCallback): method create_optimizer (line 318) | def create_optimizer(self): FILE: swift/pipelines/app/app.py class SwiftApp (line 16) | class SwiftApp(SwiftPipeline): method run (line 20) | def run(self): function app_main (line 42) | def app_main(args: Optional[Union[List[str], AppArguments]] = None): FILE: swift/pipelines/app/build_ui.py function clear_session (line 12) | def clear_session(): function modify_system_session (line 16) | def modify_system_session(system: str): function _history_to_messages (line 21) | def _history_to_messages(history: History, system: Optional[str]): function _parse_text (line 46) | def _parse_text(text: str) -> str: function model_chat (line 53) | async def model_chat(history: History, real_history: History, system: Op... function add_text (line 80) | def add_text(history: History, real_history: History, query: str): function add_file (line 88) | def add_file(history: History, real_history: History, file): function build_ui (line 96) | def build_ui(base_url: str, FILE: swift/pipelines/base.py class SwiftPipeline (line 14) | class SwiftPipeline(ABC, ProcessorMixin): method __init__ (line 17) | def __init__(self, args: Optional[Union[List[str], args_class]] = None): method _set_seed (line 24) | def _set_seed(self): method _parse_args (line 31) | def _parse_args(self, args: Optional[Union[List[str], args_class]] = N... method _compat_dsw_gradio (line 44) | def _compat_dsw_gradio(args) -> None: method main (line 49) | def main(self): method run (line 57) | def run(self): FILE: swift/pipelines/eval/eval.py class SwiftEval (line 18) | class SwiftEval(SwiftPipeline): method run (line 22) | def run(self): method get_task_result (line 47) | def get_task_result(self, task_cfg: TaskConfig): method get_task_cfg (line 67) | def get_task_cfg(self, dataset: List[str], eval_backend: str, url: str): method get_native_task_cfg (line 91) | def get_native_task_cfg(self, dataset: List[str], url: str): method get_opencompass_task_cfg (line 107) | def get_opencompass_task_cfg(self, dataset: List[str], url: str): method get_vlmeval_task_cfg (line 133) | def get_vlmeval_task_cfg(self, dataset: List[str], url: str): function eval_main (line 159) | def eval_main(args: Optional[Union[List[str], EvalArguments]] = None): FILE: swift/pipelines/eval/utils.py class BatchInferInput (line 24) | class BatchInferInput: class _QueueItem (line 38) | class _QueueItem: class EvalModel (line 55) | class EvalModel(ModelAPI): method __init__ (line 63) | def __init__( method generate (line 104) | def generate( function _process_batches (line 150) | def _process_batches() -> None: function convert_config (line 214) | def convert_config(config: GenerateConfig) -> RequestConfig: function convert_request (line 240) | def convert_request(messages: List[EvalChatMessage], tools: List[ToolInf... FILE: swift/pipelines/export/cached_dataset.py class ExportCachedDataset (line 13) | class ExportCachedDataset(SwiftSft): method __init__ (line 17) | def __init__(self, args: Optional[Union[List[str], ExportArguments]] =... method _post_process_datasets (line 31) | def _post_process_datasets(self, datasets: List) -> List: method main (line 34) | def main(self): function export_cached_dataset (line 46) | def export_cached_dataset(args: Optional[Union[List[str], ExportArgument... FILE: swift/pipelines/export/export.py class SwiftExport (line 16) | class SwiftExport(SwiftPipeline): method run (line 20) | def run(self): function export_main (line 53) | def export_main(args: Optional[Union[List[str], ExportArguments]] = None): FILE: swift/pipelines/export/merge_lora.py function check_tie_word_embeddings (line 13) | def check_tie_word_embeddings(model): function merge_lora (line 27) | def merge_lora(args: ExportArguments, device_map=None, replace_if_exists... FILE: swift/pipelines/export/ollama.py function replace_and_concat (line 14) | def replace_and_concat(template: 'Template', template_list: List, placeh... function export_to_ollama (line 33) | def export_to_ollama(args: ExportArguments): FILE: swift/pipelines/export/quant.py class QuantEngine (line 21) | class QuantEngine(ProcessorMixin): method __init__ (line 23) | def __init__(self, args: ExportArguments): method quantize (line 36) | def quantize(self): method _prepare_gptq_dataset (line 74) | def _prepare_gptq_dataset(self, examples: List[Dict[str, torch.LongTen... method _get_quant_dataset (line 85) | def _get_quant_dataset(self, *args, **kwargs): method _patch_awq_move_embed (line 134) | def _patch_awq_move_embed(awq_model): method awq_model_quantize (line 148) | def awq_model_quantize(self) -> None: method _patch_gptq (line 176) | def _patch_gptq(self): method get_block_name_to_quantize (line 189) | def get_block_name_to_quantize(model: nn.Module) -> Optional[str]: method _get_experts (line 208) | def _get_experts(block): method get_modules_in_block_to_quantize (line 214) | def get_modules_in_block_to_quantize(model, block_name: str): method _patch_gptq_block (line 240) | def _patch_gptq_block(self, model, block_name_to_quantize): method gptq_model_quantize (line 262) | def gptq_model_quantize(self, v2: bool = False): function quantize_model (line 289) | def quantize_model(args: ExportArguments): FILE: swift/pipelines/infer/deploy.py class SwiftDeploy (line 28) | class SwiftDeploy(SwiftInfer): method get_infer_engine (line 33) | def get_infer_engine(args: InferArguments, template=None, **kwargs): method _register_app (line 43) | def _register_app(self): method __init__ (line 52) | def __init__(self, args: Optional[Union[List[str], DeployArguments]] =... method _log_stats_hook (line 60) | async def _log_stats_hook(self): method _compute_infer_stats (line 66) | def _compute_infer_stats(self): method lifespan (line 72) | def lifespan(self, app: FastAPI): method _get_model_list (line 83) | def _get_model_list(self): method health (line 90) | async def health(self) -> Response: method ping (line 97) | async def ping(self) -> Response: method get_available_models (line 101) | async def get_available_models(self): method _check_model (line 106) | async def _check_model(self, request: ChatCompletionRequest) -> Option... method _check_api_key (line 112) | def _check_api_key(self, raw_request: Request) -> Optional[str]: method _check_max_logprobs (line 124) | def _check_max_logprobs(self, request): method create_error_response (line 131) | def create_error_response(status_code: Union[int, str, HTTPStatus], me... method _post_process (line 135) | def _post_process(self, request_info, response, return_cmpl_response: ... method _set_request_config (line 163) | def _set_request_config(self, request_config) -> None: method create_chat_completion (line 172) | async def create_chat_completion(self, method create_completion (line 217) | async def create_completion(self, request: CompletionRequest, raw_requ... method create_embedding (line 221) | async def create_embedding(self, request: EmbeddingRequest, raw_reques... method run (line 225) | def run(self): function deploy_main (line 238) | def deploy_main(args: Optional[Union[List[str], DeployArguments]] = None... function is_accessible (line 242) | def is_accessible(port: int): function _deploy_main (line 251) | def _deploy_main(args): function run_deploy (line 257) | def run_deploy(args: DeployArguments, return_url: bool = False): FILE: swift/pipelines/infer/infer.py class SwiftInfer (line 20) | class SwiftInfer(SwiftPipeline): method __init__ (line 24) | def __init__(self, args: Optional[Union[List[str], InferArguments]] = ... method __getattr__ (line 42) | def __getattr__(self, key: str): method get_infer_engine (line 51) | def get_infer_engine(args: InferArguments, template=None, **kwargs): method run (line 90) | def run(self) -> List[Dict[str, Any]]: method parse_data_from_response (line 102) | def parse_data_from_response(response): method infer_single (line 113) | def infer_single(self, infer_request: Union[InferRequest, Dict[str, An... method infer_cli (line 128) | def infer_cli(self) -> List[Dict[str, Any]]: method _prepare_val_dataset (line 176) | def _prepare_val_dataset(self) -> HfDataset: method _calc_metric (line 200) | def _calc_metric(self): method infer_dataset (line 218) | def infer_dataset(self) -> List[Dict[str, Any]]: method _batch_infer (line 271) | def _batch_infer(self, val_dataset, request_config): function infer_main (line 306) | def infer_main(args: Optional[Union[List[str], InferArguments]] = None): FILE: swift/pipelines/infer/rollout.py class WeightSyncWorkerExtension (line 82) | class WeightSyncWorkerExtension: method init_communicator (line 88) | def init_communicator(self, host: str, port: int, world_size: int) -> ... method update_named_param (line 126) | def update_named_param(self, name: str, dtype: str, shape: Sequence[in... method update_adapter_flattened_param (line 156) | def update_adapter_flattened_param(self, lora_int_id: int, peft_config... method update_adapter_param (line 179) | def update_adapter_param(self, lora_int_id: int, peft_config: Dict, lo... method update_flattened_params (line 213) | def update_flattened_params(self, metadatas: list[Dict]) -> None: method close_communicator (line 240) | def close_communicator(self) -> None: function safe_set_start_method (line 256) | def safe_set_start_method(): function get_rollout_engine_type (line 261) | def get_rollout_engine_type(args: RolloutArguments, engine: GRPOVllmEngi... function llm_worker (line 286) | def llm_worker(args: RolloutArguments, data_parallel_rank: int, master_p... function async_llm_worker (line 323) | async def async_llm_worker(args: RolloutArguments, data_parallel_rank: i... function llm_worker_entry (line 359) | def llm_worker_entry(*args, **kwargs): class SwiftRolloutDeploy (line 363) | class SwiftRolloutDeploy(SwiftPipeline): method _register_rl_rollout_app (line 367) | def _register_rl_rollout_app(self): method __init__ (line 380) | def __init__(self, args: Optional[Union[List[str], RolloutArguments]] ... method _start_data_parallel_workers (line 393) | def _start_data_parallel_workers(self): method lifespan (line 403) | async def lifespan(self, app: FastAPI): method get_infer_engine (line 424) | def get_infer_engine(args: RolloutArguments, template=None, **kwargs): method health (line 460) | async def health(self): method get_world_size (line 466) | async def get_world_size(self): method init_communicator (line 481) | async def init_communicator(self, request: InitCommunicatorRequest): method update_named_param (line 503) | async def update_named_param(self, request: UpdateWeightsRequest): method update_adapter_flattened_param (line 525) | async def update_adapter_flattened_param(self, request: UpdateFlattene... method update_adapter_param (line 537) | async def update_adapter_param(self, request: UpdateAdapterRequest): method update_flattened_params (line 559) | async def update_flattened_params(self, request: UpdateFlattenedParams... method reset_prefix_cache (line 578) | async def reset_prefix_cache(self): method get_engine_type (line 589) | async def get_engine_type(self): method close_communicator (line 621) | async def close_communicator(self): method infer (line 630) | async def infer( method run (line 657) | def run(self): function rollout_main (line 662) | def rollout_main(args: Optional[Union[List[str], RolloutArguments]] = No... function is_accessible (line 666) | def is_accessible(port: int): function run_rollout (line 676) | def run_rollout(args: RolloutArguments, return_url: bool = False): FILE: swift/pipelines/infer/utils.py class InferCliState (line 14) | class InferCliState: method clear (line 26) | def clear(self): method add_query (line 32) | def add_query(self, query: str) -> None: method add_response (line 39) | def add_response(self, response: str) -> None: method to_dict (line 42) | def to_dict(self): method input_mm_data (line 53) | def input_mm_data(self) -> None: method _input_multiline (line 70) | def _input_multiline(prompt: str) -> str: method input_text (line 82) | def input_text(self) -> str: method check_query (line 91) | def check_query(self, query: str) -> Optional[str]: FILE: swift/pipelines/sampling/base.py class Sampler (line 12) | class Sampler: method __init__ (line 14) | def __init__(self, input_args: SamplingArguments): method _prepare_model_tokenizer (line 25) | def _prepare_model_tokenizer(self): method _prepare_prm (line 30) | def _prepare_prm(self): method _prepare_orm (line 40) | def _prepare_orm(self): method _prepare_template (line 49) | def _prepare_template(self) -> None: method truncate_input (line 54) | def truncate_input(self, slices: List[Dict[str, Any]]): method do_sample (line 58) | def do_sample(self, data): FILE: swift/pipelines/sampling/distill_sampler.py class OpenAIEngine (line 12) | class OpenAIEngine: method __init__ (line 14) | def __init__( method infer (line 26) | def infer( class DistillSampler (line 71) | class DistillSampler(VanillaSampler): method __init__ (line 73) | def __init__(self, *args, **kwargs): method _prepare_sampler (line 80) | def _prepare_sampler(self): method _prepare_model_tokenizer (line 84) | def _prepare_model_tokenizer(self): method _prepare_template (line 87) | def _prepare_template(self): method extract_choice (line 90) | def extract_choice(self, resp): method generate (line 105) | def generate(self, data): FILE: swift/pipelines/sampling/sampling.py class SwiftSampling (line 18) | class SwiftSampling(SwiftPipeline): method __init__ (line 22) | def __init__(self, args: Optional[Union[List[str], SamplingArguments]]... method _get_dataset (line 39) | def _get_dataset(self): method run (line 50) | def run(self): function sampling_main (line 103) | def sampling_main(args: Optional[Union[List[str], SamplingArguments]] = ... FILE: swift/pipelines/sampling/utils.py function get_messages_md5 (line 14) | def get_messages_md5(row: Dict[str, Any]): function get_reward (line 21) | def get_reward(model: Any, FILE: swift/pipelines/sampling/vanilla_sampler.py class VanillaSampler (line 17) | class VanillaSampler(Sampler): method __init__ (line 19) | def __init__(self, *args, **kwargs): method _prepare_sampler (line 25) | def _prepare_sampler(self): method read_cache (line 45) | def read_cache(self): method convert_data_to_rows (line 68) | def convert_data_to_rows(data): method check_row_valid (line 81) | def check_row_valid(rows): method generate (line 94) | def generate(self, data): method get_orm_score (line 154) | def get_orm_score(self, infer_requests, ground_truth): method get_prm_score (line 159) | def get_prm_score(self, infer_requests, ground_truth): method do_sample (line 166) | def do_sample(self, data): FILE: swift/pipelines/train/kto.py class KTOPreprocessor (line 12) | class KTOPreprocessor(RowPreprocessor): method batched_preprocess (line 14) | def batched_preprocess(self, batched_row: Dict[str, Any], **kwargs) ->... function _get_kl_dataset (line 31) | def _get_kl_dataset(dataset: Optional[HfDataset], function prepare_kto_dataset (line 42) | def prepare_kto_dataset(args, train_dataset, val_dataset): FILE: swift/pipelines/train/pretrain.py class SwiftPretrain (line 11) | class SwiftPretrain(SwiftSft): function pretrain_main (line 16) | def pretrain_main(args: Optional[Union[List[str], PretrainArguments]] = ... FILE: swift/pipelines/train/rlhf.py class SwiftRLHF (line 23) | class SwiftRLHF(SwiftSft): method _get_model_task_type (line 28) | def _get_model_task_type(model_dir): method _prepare_single_model (line 55) | def _prepare_single_model(self, key, origin_key, model_type, model_rev... method _prepare_model_tokenizer (line 111) | def _prepare_model_tokenizer(self): method prepare_model (line 168) | def prepare_model(cls, args, model, *, template=None, train_dataset=No... method _prepare_template (line 186) | def _prepare_template(self) -> None: method _get_dataset (line 195) | def _get_dataset(self): method _prepare_chord_sft_dataset (line 202) | def _prepare_chord_sft_dataset(self): method _get_trainer_kwargs (line 218) | def _get_trainer_kwargs(self): function rlhf_main (line 243) | def rlhf_main(args: Optional[Union[List[str], RLHFArguments]] = None): FILE: swift/pipelines/train/sft.py class SwiftSft (line 22) | class SwiftSft(SwiftPipeline, TunerMixin): method __init__ (line 26) | def __init__(self, args: Optional[Union[List[str], SftArguments]] = No... method _prepare_flash_ckpt (line 34) | def _prepare_flash_ckpt(self): method _prepare_generation_config (line 41) | def _prepare_generation_config(self): method _prepare_model_tokenizer (line 49) | def _prepare_model_tokenizer(self, **kwargs): method _prepare_template (line 65) | def _prepare_template(self) -> None: method _get_dataset (line 78) | def _get_dataset(self): method _save_val_dataset (line 100) | def _save_val_dataset(self, val_dataset): method _prepare_dataset (line 110) | def _prepare_dataset(self): method _post_process_datasets (line 138) | def _post_process_datasets(self, datasets: List) -> List: method run (line 172) | def run(self): method _get_trainer_kwargs (line 199) | def _get_trainer_kwargs(self): method _handle_trainer_state (line 202) | def _handle_trainer_state(self, trainer, is_write_rank: bool): method _save_trainer_state (line 218) | def _save_trainer_state(self, trainer): method _get_resume_checkpoint (line 245) | def _get_resume_checkpoint(self, trainer): method train (line 265) | def train(self, trainer): method _stat_dataset (line 279) | def _stat_dataset(dataset: Union[HfDataset, PackingDataset, LazyLLMDat... method _show_dataset (line 291) | def _show_dataset(self, train_dataset, val_dataset): method _encode_dataset (line 309) | def _encode_dataset(self, train_dataset, val_dataset, pre_process=True): function sft_main (line 353) | def sft_main(args: Optional[Union[List[str], SftArguments]] = None): FILE: swift/pipelines/train/tuner.py function apply_liger (line 19) | def apply_liger(model_type: str): function get_target_modules (line 89) | def get_target_modules(args, model) -> Union[str, List[str]]: function get_modules_to_save (line 111) | def get_modules_to_save(args, model, task_type=None): function get_vera_target_modules (line 124) | def get_vera_target_modules(model, config): function prepare_adapter (line 144) | def prepare_adapter(args: SftArguments, model, *, template=None, train_d... class TunerMixin (line 319) | class TunerMixin: method prepare_model (line 322) | def prepare_model(cls, args, model, *, template=None, train_dataset=No... FILE: swift/pipelines/utils.py function prepare_adapter (line 15) | def prepare_adapter(args, model, adapters=None): function prepare_model_template (line 37) | def prepare_model_template(args, **kwargs): function _select_dataset (line 49) | def _select_dataset(dataset, max_length): function get_cached_dataset (line 63) | def get_cached_dataset(args): FILE: swift/ray/__init__.py function try_init_ray (line 6) | def try_init_ray(): FILE: swift/ray/arguments.py class RayArguments (line 9) | class RayArguments: method __post_init__ (line 26) | def __post_init__(self): FILE: swift/ray/base.py function get_args (line 17) | def get_args(): class RayHelper (line 23) | class RayHelper: method initialize (line 37) | def initialize(device_groups: Dict[str, Any]): method teardown (line 56) | def teardown(): method is_called_from_init (line 62) | def is_called_from_init(): method ray_inited (line 77) | def ray_inited(): method is_worker (line 86) | def is_worker(): method worker (line 91) | def worker(group: Union[str, List[str]]): method collect_func (line 121) | def collect_func(method: Union[Literal['none', 'flatten'], Callable], ... method function (line 142) | def function(group: str, method execute_all_sync (line 194) | def execute_all_sync(group, dispatch, execute, method_name: str, *args... method execute_all_async (line 199) | def execute_all_async(group, dispatch, execute, method_name: str, *arg... method _create_workers (line 238) | def _create_workers(group: Union[str, List[str]], *args, **kwargs): FILE: swift/ray/resource_manager.py class NodeGroup (line 10) | class NodeGroup: function get_node_rank (line 15) | def get_node_rank(): class ResourceManager (line 19) | class ResourceManager: method __init__ (line 23) | def __init__(self, groups: Dict[str, Any]): method resource (line 129) | def resource(self, worker): method destroy_placement_group (line 132) | def destroy_placement_group(self): FILE: swift/rewards/orm.py class ORM (line 16) | class ORM: method __init__ (line 27) | def __init__(self, args: Optional[Union['GRPOConfig', 'MegatronArgumen... method __call__ (line 30) | def __call__(self, **kwargs) -> List[float]: class AsyncORM (line 34) | class AsyncORM: method __init__ (line 62) | def __init__(self, args: Optional[Union['GRPOConfig', 'MegatronArgumen... method __call__ (line 65) | async def __call__(self, **kwargs) -> List[float]: class MathAccuracy (line 69) | class MathAccuracy(ORM): method __init__ (line 71) | def __init__(self, args=None, **kwargs): method __call__ (line 78) | def __call__(self, completions, solution, **kwargs) -> List[float]: class Format (line 123) | class Format(ORM): method __call__ (line 125) | def __call__(self, completions, **kwargs) -> List[float]: class ReActFormat (line 132) | class ReActFormat(ORM): method __call__ (line 134) | def __call__(self, completions, **kwargs) -> List[float]: class CosineReward (line 141) | class CosineReward(ORM): method __init__ (line 143) | def __init__(self, args: Optional[Union['GRPOConfig', 'MegatronArgumen... method cosfn (line 153) | def cosfn(t, T, min_value, max_value): method __call__ (line 157) | def __call__(self, completions, solution, **kwargs) -> List[float]: class RepetitionPenalty (line 176) | class RepetitionPenalty(ORM): method __init__ (line 178) | def __init__(self, args: Optional[Union['GRPOConfig', 'MegatronArgumen... method zipngram (line 184) | def zipngram(text: str, ngram_size: int): method __call__ (line 188) | def __call__(self, completions, **kwargs) -> List[float]: class SoftOverlong (line 216) | class SoftOverlong(ORM): method __init__ (line 218) | def __init__(self, args: Optional[Union['GRPOConfig', 'MegatronArgumen... method __call__ (line 224) | def __call__(self, completions, **kwargs) -> List[float]: class ReactORM (line 235) | class ReactORM(ORM): method evaluate_action_reward (line 238) | def evaluate_action_reward(action_pred: list, action_ref: list, cand_l... method parse_action (line 309) | def parse_action(text): method parse_output (line 327) | def parse_output(text): method __call__ (line 331) | def __call__(self, infer_requests: List[Union['InferRequest', Dict]], ... method evaluate_rougel (line 365) | def evaluate_rougel(cand_list: list, ref_list: list): class MathORM (line 378) | class MathORM(ORM): method __init__ (line 380) | def __init__(self, args=None, **kwargs): method check_terminate (line 389) | def check_terminate(answers: Union[str, List[str]]) -> List[bool]: method extract_boxed_result (line 398) | def extract_boxed_result(text): method clean_latex (line 407) | def clean_latex(latex_str): method parse_expression (line 413) | def parse_expression(latex_str): method compare_consecutive (line 423) | def compare_consecutive(first, second): method __call__ (line 434) | def __call__(self, infer_requests: List[Union['InferRequest', Dict]], ... FILE: swift/rewards/prm.py class PRM (line 12) | class PRM: method __call__ (line 14) | def __call__(self, **kwargs) -> List[Any]: class QwenMaxPRM (line 45) | class QwenMaxPRM(PRM): method __call__ (line 47) | def __call__(self, infer_requests: List[Union['InferRequest', Dict]], ... class ClientPRM (line 96) | class ClientPRM(PRM): method __init__ (line 98) | def __init__(self, api_key=None, base_url=None, model=None): method __call__ (line 112) | def __call__(self, infer_requests: List[Union['InferRequest', Dict]], ... FILE: swift/rewards/rm_plugin.py class DefaultRMPlugin (line 20) | class DefaultRMPlugin: method __init__ (line 29) | def __init__(self, model, template): method __call__ (line 33) | def __call__(self, inputs, **kwargs): class GenRMPlugin (line 41) | class GenRMPlugin(DefaultRMPlugin): method __init__ (line 43) | def __init__(self, model, template): method __call__ (line 71) | def __call__(self, inputs, **kwargs): method prepare_rm_inputs (line 94) | def prepare_rm_inputs(self, inputs: List[Dict]) -> List[Dict]: method extract_reward (line 122) | def extract_reward(model_output: str) -> float: method messages_to_query (line 143) | def messages_to_query(messages): method compute_rewards (line 198) | def compute_rewards(self, results: List['ChatCompletionResponse']) -> ... FILE: swift/rlhf_trainers/args_mixin.py class VllmArguments (line 8) | class VllmArguments: method __post_init__ (line 58) | def __post_init__(self): method get_vllm_engine_kwargs (line 65) | def get_vllm_engine_kwargs(self): class RolloutTrainerArgumentsMixin (line 100) | class RolloutTrainerArgumentsMixin(VllmArguments): class GRPOArgumentsMixin (line 187) | class GRPOArgumentsMixin(RolloutTrainerArgumentsMixin): FILE: swift/rlhf_trainers/arguments.py class DPOConfig (line 29) | class DPOConfig(TrainArgumentsMixin, HfDPOConfig): method __post_init__ (line 32) | def __post_init__(self): class CPOConfig (line 38) | class CPOConfig(TrainArgumentsMixin, HfCPOConfig): method __post_init__ (line 40) | def __post_init__(self): class ORPOConfig (line 46) | class ORPOConfig(TrainArgumentsMixin, HfORPOConfig): method __post_init__ (line 48) | def __post_init__(self): class KTOConfig (line 54) | class KTOConfig(TrainArgumentsMixin, HfKTOConfig): method __post_init__ (line 56) | def __post_init__(self): class RewardConfig (line 62) | class RewardConfig(TrainArgumentsMixin, HfRewardConfig): method __post_init__ (line 64) | def __post_init__(self): class PPOConfig (line 70) | class PPOConfig(TrainArgumentsMixin, HfPPOConfig): method __post_init__ (line 72) | def __post_init__(self): class GKDConfig (line 78) | class GKDConfig(RolloutTrainerArgumentsMixin, TrainArgumentsMixin, HfGKD... method __post_init__ (line 85) | def __post_init__(self): class GRPOConfig (line 92) | class GRPOConfig(GRPOArgumentsMixin, TrainArgumentsMixin, HfGRPOConfig): method __post_init__ (line 94) | def __post_init__(self): method check_num_generations (line 116) | def check_num_generations(self): FILE: swift/rlhf_trainers/cpo_trainer.py class CPOTrainer (line 20) | class CPOTrainer(RLHFTrainerMixin, SwiftMixin, HFCPOTrainer): method __init__ (line 22) | def __init__(self, model: Optional[Union[PreTrainedModel, nn.Module, s... FILE: swift/rlhf_trainers/dpo_trainer.py function new_gather_function (line 22) | def new_gather_function(tensor): class DPOTrainer (line 28) | class DPOTrainer(RLHFTrainerMixin, SwiftMixin, DataLoaderMixin, HFDPOTra... method __init__ (line 30) | def __init__(self, method concatenated_forward (line 77) | def concatenated_forward( method training_step (line 178) | def training_step(self, model, inputs, *args, **kwargs): method prediction_step (line 182) | def prediction_step(self, model, inputs, *args, **kwargs): FILE: swift/rlhf_trainers/gkd_trainer.py class DataSource (line 48) | class DataSource(str, Enum): class GKDTrainer (line 57) | class GKDTrainer(RolloutTrainerMixin, SwiftMixin, HFGKDTrainer): method __init__ (line 59) | def __init__(self, model: Optional[Union[PreTrainedModel, nn.Module, s... method _get_data_collator (line 128) | def _get_data_collator(self, args, template): method _build_opsd_teacher_data (line 131) | def _build_opsd_teacher_data(self, inputs): method _compute_jsd_loss (line 152) | def _compute_jsd_loss(self, outputs_student, outputs_teacher, inputs, ... method generate_on_policy_outputs (line 204) | def generate_on_policy_outputs(self, model, inputs, generation_config,... method compute_loss (line 244) | def compute_loss(self, model, inputs, return_outputs=False, num_items_... method _prepare_batch_inputs (line 416) | def _prepare_batch_inputs(self, inputs: list, encode_prompt_only: bool... method training_step (line 451) | def training_step(self, method _fetch_teacher_logprobs_from_api (line 587) | def _fetch_teacher_logprobs_from_api(self, encoded_inputs: Dict[str, t... method prediction_step (line 598) | def prediction_step(self, model, inputs, *args, **kwargs): method offload_context (line 612) | def offload_context(self): method _get_random_num (line 635) | def _get_random_num(self) -> float: method load_teacher_model_context (line 651) | def load_teacher_model_context(self): method _prepare_liger_loss (line 663) | def _prepare_liger_loss(self): method generalized_jsd_loss (line 681) | def generalized_jsd_loss( method _prepare_logging (line 760) | def _prepare_logging(self): method _apply_chat_template_to_messages_list (line 773) | def _apply_chat_template_to_messages_list(self, messages_list: DataType): method log (line 783) | def log(self, logs: Dict[str, float], start_time: Optional[float] = No... function _build_teacher_session (line 829) | def _build_teacher_session(max_retries=5): function fetch_teacher_logprobs (line 854) | def fetch_teacher_logprobs(base_url, input_ids, topk=20, timeout=300.0): FILE: swift/rlhf_trainers/grpo_trainer.py class GRPOTrainer (line 84) | class GRPOTrainer(RolloutTrainerMixin, SwiftMixin, HFGRPOTrainer): method __init__ (line 87) | def __init__(self, method _get_data_collator (line 166) | def _get_data_collator(self, args, template): method _get_train_sampler (line 169) | def _get_train_sampler(self, train_dataset=None): method _prepare_inputs (line 183) | def _prepare_inputs(self, generation_batch: Dict[str, Union[torch.Tensor, method _generate_completions (line 211) | def _generate_completions(self, inputs: DataType) -> DataType: method _generate_and_score_completions (line 231) | def _generate_and_score_completions(self, inputs: DataType) -> DataType: method _score_completions (line 303) | def _score_completions(self, inputs: DataType) -> torch.Tensor: method _compute_rewards_per_func (line 335) | def _compute_rewards_per_func(self, inputs: DataType) -> torch.Tensor: method _compute_advantages (line 404) | def _compute_advantages(self, inputs: DataType, rewards_per_func: torc... method _dynamic_sampling (line 684) | def _dynamic_sampling(self, inputs, rewards_per_func): method compute_std (line 735) | def compute_std(self, inputs: DataType, rewards_per_func: torch.Tensor... method split_by_mini_batches (line 775) | def split_by_mini_batches(self, inputs: DataType) -> List[DataType]: method null_ref_context (line 848) | def null_ref_context(self): method _prepare_batch_inputs (line 859) | def _prepare_batch_inputs(self, inputs: DataType) -> List[DataType]: method _apply_chat_template_to_messages_list (line 1052) | def _apply_chat_template_to_messages_list(self, messages_list: DataType): method compute_loss (line 1062) | def compute_loss(self, model, inputs, return_outputs=False, num_items_... method _compute_loss (line 1073) | def _compute_loss(self, model, inputs): method _compute_loss_single (line 1087) | def _compute_loss_single(self, model, inputs): method _compute_loss_and_metrics (line 1093) | def _compute_loss_and_metrics(self, model, inputs): method _update_metrics (line 1309) | def _update_metrics(self, metrics_data): method _compute_loss_chunked (line 1347) | def _compute_loss_chunked(self, model, inputs: DataType): method _aggregate_and_update_metrics (line 1405) | def _aggregate_and_update_metrics(self, all_metrics_data, mode): method _unpad_logps_and_entropies (line 1488) | def _unpad_logps_and_entropies(self, method _get_logps_via_sp (line 1519) | def _get_logps_via_sp(self, method _get_logps_via_local_forward (line 1613) | def _get_logps_via_local_forward(self, method _get_per_token_logps_and_entropies (line 1657) | def _get_per_token_logps_and_entropies(self, method _get_per_token_logps_and_entropies_single (line 1676) | def _get_per_token_logps_and_entropies_single(self, method _get_per_token_logps_and_entropies_chunked (line 1728) | def _get_per_token_logps_and_entropies_chunked(self, method _get_last_hidden_state (line 1793) | def _get_last_hidden_state(self, unwrapped_model, inputs, logits_to_ke... method _get_rollout_is_correction (line 1812) | def _get_rollout_is_correction(self, old_per_token_logps, rollout_per_... method compute_liger_loss (line 1826) | def compute_liger_loss(self, unwrapped_model, inputs): method evaluation_loop (line 1865) | def evaluation_loop(self, dataloader, *args, **kwargs): method training_step (line 1876) | def training_step(self, model: nn.Module, inputs: DataType, num_items_... method old_policy (line 1883) | def old_policy(self): method offload_context (line 1892) | def offload_context(self): method log (line 1911) | def log(self, logs: Dict[str, float], start_time: Optional[float] = No... method is_async_generate_eval_rollout_done (line 1979) | def is_async_generate_eval_rollout_done(self): method is_async_generate_train_rollout_done (line 1982) | def is_async_generate_train_rollout_done(self): method _gather_and_flatten (line 1985) | def _gather_and_flatten(self, local_list, dtype=None, device=None, fla... method _group_inputs_by_request_id (line 2026) | def _group_inputs_by_request_id(self, inputs: DataType) -> Dict[str, L... method _get_trajectory_inputs (line 2052) | def _get_trajectory_inputs(self, inputs: DataType) -> Dict[str, List[D... method _get_last_indices (line 2087) | def _get_last_indices(self, request_ids: List[str]) -> torch.Tensor: method get_chunked_inputs (line 2093) | def get_chunked_inputs(self, inputs, start_idx, end_idx): method _prepare_liger_loss (line 2111) | def _prepare_liger_loss(self): method _prepare_metrics (line 2130) | def _prepare_metrics(self): method _collect_config_info (line 2147) | def _collect_config_info(self) -> Dict[str, str]: method _prepare_algorithm_params (line 2159) | def _prepare_algorithm_params(self): method _prepare_chord_dataset (line 2201) | def _prepare_chord_dataset(self): method _prepare_rewards (line 2207) | def _prepare_rewards(self, reward_funcs, reward_model=None, reward_tem... method _prepare_resample_data_iterator (line 2290) | def _prepare_resample_data_iterator(self): method _compute_sequence_level_ratios (line 2331) | def _compute_sequence_level_ratios(self, is_ratio: torch.Tensor, compl... method _apply_rollout_importance_sampling (line 2348) | def _apply_rollout_importance_sampling(self, rollout_log_ratio: torch.... method _compute_off_policy_sequence_mask (line 2398) | def _compute_off_policy_sequence_mask( method _compute_rollout_offpolicy_metrics (line 2440) | def _compute_rollout_offpolicy_metrics( method _compute_is_correction_metrics (line 2560) | def _compute_is_correction_metrics( method _prepare_model_inputs (line 2623) | def _prepare_model_inputs(self, inputs: 'DataType') -> Dict[str, Any]: method _get_eval_sampler (line 2633) | def _get_eval_sampler(self, eval_dataset): FILE: swift/rlhf_trainers/kto_trainer.py class KTOTrainer (line 24) | class KTOTrainer(RLHFTrainerMixin, SwiftMixin, HFKTOTrainer): method __init__ (line 26) | def __init__(self, method forward (line 51) | def forward( method _get_model_kwargs (line 82) | def _get_model_kwargs(self, inputs, prefix: str): method get_batch_logps (line 92) | def get_batch_logps( method _compute_kl_logps (line 125) | def _compute_kl_logps(self, model, batch): FILE: swift/rlhf_trainers/orpo_trainer.py class ORPOTrainer (line 19) | class ORPOTrainer(RLHFTrainerMixin, SwiftMixin, HFORPOTrainer): method __init__ (line 21) | def __init__(self, model: Optional[Union[PreTrainedModel, nn.Module, s... FILE: swift/rlhf_trainers/ppo_trainer.py class PPOTrainer (line 23) | class PPOTrainer(SwiftMixin, HFPPOTrainer): method _patch_dataloader (line 27) | def _patch_dataloader(collate_fn): method __init__ (line 40) | def __init__(self, model: PreTrainedModel, ref_model: PreTrainedModel,... method create_loss_and_eval_metric (line 68) | def create_loss_and_eval_metric(self, args): method train (line 71) | def train(self, *args, **kwargs): method _save_checkpoint (line 75) | def _save_checkpoint(self, *args, **kwargs): method save_model (line 86) | def save_model(self, output_dir: Optional[str] = None, _internal_call:... method _save (line 98) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method _prepare_gradient_checkpointing (line 107) | def _prepare_gradient_checkpointing(self, model): method generate_completions (line 114) | def generate_completions(self, *args, **kwargs): FILE: swift/rlhf_trainers/reward_trainer.py class RewardTrainer (line 28) | class RewardTrainer(RLHFTrainerMixin, SwiftMixin, HFRewardTrainer): method __init__ (line 30) | def __init__(self, *args, **kwargs): method compute_loss (line 46) | def compute_loss(self, method visualize_samples (line 82) | def visualize_samples(self, num_print_samples: int): FILE: swift/rlhf_trainers/rlhf_mixin.py class RLHFTrainerMixin (line 19) | class RLHFTrainerMixin: method __init__ (line 21) | def __init__(self, method create_loss_and_eval_metric (line 59) | def create_loss_and_eval_metric(self, args): method _prepare_inputs (line 62) | def _prepare_inputs(self, inputs): method get_train_dataloader (line 68) | def get_train_dataloader(self, *args, **kwargs): method concatenated_forward (line 82) | def concatenated_forward( method compute_loss (line 119) | def compute_loss(self, model, inputs, return_outputs=False, num_items_... method _get_train_sampler (line 128) | def _get_train_sampler(self, train_dataset=None): method get_per_token_logps (line 134) | def get_per_token_logps( FILE: swift/rlhf_trainers/rollout_mixin.py class DataCache (line 49) | class DataCache: class AsyncGenerateCallback (line 54) | class AsyncGenerateCallback(TrainerCallback): method __init__ (line 57) | def __init__(self, trainer): method on_train_begin (line 60) | def on_train_begin(self, args, state, control, **kwargs): class RolloutTrainerMixin (line 66) | class RolloutTrainerMixin(RLHFTrainerMixin): method __init__ (line 79) | def __init__(self, method prepare_rollout (line 90) | def prepare_rollout(self): method _prepare_rollout_params (line 96) | def _prepare_rollout_params(self): method _prepare_vllm (line 128) | def _prepare_vllm(self): method _prepare_vllm_engine (line 192) | def _prepare_vllm_engine(self): method split_batches (line 267) | def split_batches(self): method _move_model_to_vllm (line 376) | def _move_model_to_vllm(self, skip_async_check=False): method _move_adapter_to_vllm (line 396) | def _move_adapter_to_vllm(self): method _load_state_dict_to_vllm (line 456) | def _load_state_dict_to_vllm(self, state_dict): method _fix_param_name_to_vllm (line 477) | def _fix_param_name_to_vllm(self, name: str, extra_prefixes: Optional[... method _process_state_dict_for_vllm (line 484) | def _process_state_dict_for_vllm(self, method _merge_lora_into_state_dict (line 529) | def _merge_lora_into_state_dict(self, state_dict: Dict[str, torch.Tens... method _collect_state_dict_for_vllm (line 598) | def _collect_state_dict_for_vllm(self, parameter_group=None, parameter... method _move_full_model_to_vllm (line 652) | def _move_full_model_to_vllm(self): method _rollout (line 694) | def _rollout(self, method _get_request_config (line 706) | def _get_request_config(self) -> RequestConfig: method _set_inputs_system (line 721) | def _set_inputs_system(self, inputs: DataType) -> DataType: method _infer_single_or_multi_turn (line 733) | def _infer_single_or_multi_turn(self, method _colocate_multi_turn_infer (line 746) | def _colocate_multi_turn_infer(self, inputs: DataType, first_turn_roll... method _fast_infer (line 916) | def _fast_infer(self, inputs: DataType) -> DataType: method _preprocess_inputs (line 966) | def _preprocess_inputs(self, inputs: DataType) -> DataType: method _add_prompt_id_to_inputs (line 973) | def _add_prompt_id_to_inputs(self, inputs: DataType) -> DataType: method _server_rollout (line 995) | def _server_rollout(self, inputs: DataType, request_config: RequestCon... method _colocate_rollout (line 1047) | def _colocate_rollout(self, inputs: DataType, request_config: RequestC... method _engine_infer (line 1069) | def _engine_infer( method _extract_logprobs_from_choice (line 1091) | def _extract_logprobs_from_choice(response_choice) -> List[float]: method _postprocess_rollout_outputs (line 1106) | def _postprocess_rollout_outputs(self, inputs: DataType, outputs: List... method offload_model (line 1179) | def offload_model(self, model): method load_model (line 1197) | def load_model(self, model): method offload_optimizer (line 1214) | def offload_optimizer(self): method load_optimizer (line 1252) | def load_optimizer(self): method offload_context (line 1284) | def offload_context(self): method _prepare_scheduler (line 1293) | def _prepare_scheduler(self): method multi_turn_completion_length_context (line 1315) | def multi_turn_completion_length_context(self): method inputs2requests (line 1350) | def inputs2requests(self, inputs: Union[DataType, List[RolloutInferReq... method async_generate_rollout (line 1418) | def async_generate_rollout(self, all_inputs): method _prepare_async_generate (line 1441) | def _prepare_async_generate(self): method _queue (line 1451) | def _queue(self): method _wait_queue (line 1458) | def _wait_queue(self): method _sort_by_request_id (line 1463) | def _sort_by_request_id(self, all_outputs: List[RolloutOutput]) -> Lis... method _prefetch (line 1471) | def _prefetch(self, dataloader: DataLoader): method _disable_sp_context (line 1484) | def _disable_sp_context(self, template: Optional[Template] = None): method _template_context (line 1524) | def _template_context(self, method _prepare_resample_data_iterator (line 1543) | def _prepare_resample_data_iterator(self): method resample_encode_failed_inputs (line 1567) | def resample_encode_failed_inputs(self, inputs: DataType, max_resample... FILE: swift/rlhf_trainers/utils.py class TensorLoRARequest (line 44) | class TensorLoRARequest(LoRARequest): method config (line 50) | def config(self): method embeddings (line 54) | def embeddings(self): function chunk_list (line 60) | def chunk_list(lst: list, n: int) -> list[list]: function is_valid_ipv6_address (line 80) | def is_valid_ipv6_address(address: str) -> bool: function format_host_for_url (line 89) | def format_host_for_url(host: str) -> str: function resolve_hostname (line 96) | def resolve_hostname(hostname: str) -> str: function patch_stateless_process_group_for_ipv6 (line 122) | def patch_stateless_process_group_for_ipv6(): function nanstd (line 209) | def nanstd(tensor: torch.Tensor, function aggressive_empty_cache (line 249) | def aggressive_empty_cache(force_sync: bool = True, max_retries: int = 3... function prepare_deepspeed (line 295) | def prepare_deepspeed(model, accelerator, deepspeed_config=None, deepspe... function memory_time_profiling_context (line 392) | def memory_time_profiling_context( function round_robin (line 464) | def round_robin(num_reqs, num_workers): function patch_lora_merge (line 483) | def patch_lora_merge(model, parameter_group=None): function patch_lora_unmerge (line 553) | def patch_lora_unmerge(model): function profiling_context (line 581) | def profiling_context(trainer, name: str): function profiling_decorator (line 602) | def profiling_decorator(func): class _ForwardRedirection (line 612) | class _ForwardRedirection: method __call__ (line 619) | def __call__(self, wrapper_module: nn.Module, original_module: nn.Modu... method on_after_inner_forward (line 650) | def on_after_inner_forward(self, wrapper_module: nn.Module, original_m... method on_after_outer_forward (line 653) | def on_after_outer_forward(self, wrapper_module: nn.Module, original_m... function entropy_from_logits (line 657) | def entropy_from_logits(logits, chunk_size: int = 1) -> torch.Tensor: function load_pil_img (line 681) | def load_pil_img(img) -> Image: function replace_assistant_response_with_ids (line 704) | def replace_assistant_response_with_ids(messages: 'Messages', function patch_save_last_checkpoint (line 790) | def patch_save_last_checkpoint(): function get_gather_if_zero3_context (line 807) | def get_gather_if_zero3_context(trainer, is_zero3: Optional[bool] = None): function prepare_fsdp (line 820) | def prepare_fsdp(model, accelerator, evaluation_mode: bool = True): function patch_vllm_moe_model_weight_loader (line 855) | def patch_vllm_moe_model_weight_loader(model): function patch_vllm_load_adapter (line 949) | def patch_vllm_load_adapter(): class FlattenedTensorMetadata (line 1052) | class FlattenedTensorMetadata(BaseModel): method ensure_shape_tuple (line 1063) | def ensure_shape_tuple(cls, v: Any) -> Tuple[int, ...]: method ensure_dtype_str (line 1075) | def ensure_dtype_str(cls, v: Any) -> str: class TensorMetadata (line 1084) | class TensorMetadata(BaseModel): class UpdateFlattenedAdapterRequest (line 1092) | class UpdateFlattenedAdapterRequest(BaseModel): class UpdateFlattenedParamsRequest (line 1098) | class UpdateFlattenedParamsRequest(BaseModel): class UpdateAdapterRequest (line 1102) | class UpdateAdapterRequest(BaseModel): class FlattenedTensorBucket (line 1109) | class FlattenedTensorBucket: method __init__ (line 1115) | def __init__( method get_flattened_tensor (line 1168) | def get_flattened_tensor(self) -> torch.Tensor: method get_metadata (line 1172) | def get_metadata(self) -> List[FlattenedTensorMetadata]: method reconstruct_tensors (line 1176) | def reconstruct_tensors(self) -> Dict[str, torch.Tensor]: function identity_data_collator (line 1196) | def identity_data_collator(features): function mu_schedule_function (line 1201) | def mu_schedule_function(global_step: int, mu_warmup_steps: int, mu_deca... function create_cyclic_iterator (line 1230) | def create_cyclic_iterator(iterable): function get_chord_sft_dataloader (line 1237) | def get_chord_sft_dataloader(trainer, function make_chord_sft_dataset (line 1288) | def make_chord_sft_dataset(trainer, chord_sft_dataset): function compute_chord_loss (line 1309) | def compute_chord_loss(trainer, grpo_loss: torch.Tensor) -> torch.Tensor: function set_expandable_segments (line 1356) | def set_expandable_segments(enable: bool) -> None: function peft_config_to_dict (line 1391) | def peft_config_to_dict(peft_config): function _create_parameter_buckets (line 1401) | def _create_parameter_buckets(named_params, bucket_size_mb=512): function _process_bucket_with_flattened_tensor (line 1427) | def _process_bucket_with_flattened_tensor(trainer, bucket_params): function get_even_process_data (line 1451) | def get_even_process_data(trainer, global_data: List[T]) -> List[T]: function check_vllm_version_ge (line 1489) | def check_vllm_version_ge(min_version: str) -> bool: function pad_logps_back_to_batch (line 1506) | def pad_logps_back_to_batch(logps_rmpad: Optional[torch.Tensor], FILE: swift/rlhf_trainers/vllm_client.py class VLLMClient (line 36) | class VLLMClient: method __init__ (line 38) | def __init__(self, method check_server (line 82) | def check_server(self, total_timeout: float = 0.0, retry_interval: flo... method infer (line 117) | def infer( method init_communicator (line 182) | def init_communicator(self, device: Union[int, str] = 0): method update_named_param (line 213) | def update_named_param(self, name: str, weights: torch.Tensor): method update_adapter_flattened_param (line 252) | def update_adapter_flattened_param(self, peft_config, metadatas, flatt... method update_adapter_param (line 302) | def update_adapter_param(self, peft_config, lora_params): method update_flattened_params (line 366) | def update_flattened_params(self, metadatas, flattened_tensor): method update_model_params (line 409) | def update_model_params(self, model: nn.Module): method reset_prefix_cache (line 413) | def reset_prefix_cache(self): method get_engine_type (line 432) | def get_engine_type(self): method close_communicator (line 445) | def close_communicator(self): FILE: swift/rollout/gym_env.py class ContextManager (line 12) | class ContextManager(ABC): method __init__ (line 15) | def __init__(self, ctx_config): method manage_context (line 19) | def manage_context(self, history: Messages, trajectory_id: str) -> Mes... class DummyContextManager (line 31) | class DummyContextManager(ContextManager): method __init__ (line 33) | def __init__(self, ctx_config): method manage_context (line 36) | def manage_context(self, history: Messages, trajectory_id: str) -> Mes... class Env (line 44) | class Env(ABC): method __init__ (line 47) | def __init__(self, env_config): method reset (line 52) | async def reset(self, config: RolloutInferRequest) -> Tuple[str, Dict[... method step (line 67) | async def step(self, action: Messages) -> Tuple[str, float, bool, Dict... method close (line 83) | async def close(self): function count_qwen_tokens (line 88) | def count_qwen_tokens(messages: List[Dict[str, Any]], max_tokens: int = ... class SimpleMathEnv (line 113) | class SimpleMathEnv(Env): method __init__ (line 116) | def __init__(self, env_config): method reset (line 121) | async def reset(self, config: RolloutInferRequest) -> Tuple[str, Dict[... method step (line 133) | async def step(self, action: Messages) -> Tuple[str, float, bool, Dict... method close (line 151) | async def close(self): FILE: swift/rollout/multi_turn.py class RolloutScheduler (line 16) | class RolloutScheduler(ABC): method __init__ (line 18) | def __init__(self, infer_engine: Optional[GRPOVllmEngine] = None, max_... method async_infer (line 24) | async def async_infer(self, method run (line 96) | async def run(self, infer_request: 'RolloutInferRequest', request_conf... method __getattr__ (line 109) | def __getattr__(self, key: str): method engine (line 126) | def engine(self): method tokenizer (line 130) | def tokenizer(self): class MultiTurnScheduler (line 139) | class MultiTurnScheduler(RolloutScheduler, ABC): method run (line 181) | async def run(self, infer_request: 'RolloutInferRequest', request_conf... method step (line 358) | def step(self, infer_request: 'RolloutInferRequest', response_choice: ... method check_finished (line 382) | def check_finished(self, infer_request: 'RolloutInferRequest', respons... method _extract_logprobs_from_choice (line 412) | def _extract_logprobs_from_choice(response_choice: 'ChatCompletionResp... class ThinkingModelTipsScheduler (line 428) | class ThinkingModelTipsScheduler(MultiTurnScheduler): method __init__ (line 442) | def __init__(self, *args, **kwargs): method run (line 451) | async def run(self, infer_request: 'RolloutInferRequest', request_conf... method check_finished (line 504) | def check_finished(self, infer_request: 'RolloutInferRequest', respons... method step (line 520) | def step(self, infer_request: 'RolloutInferRequest', response_choice: ... method _is_thinking_template (line 526) | def _is_thinking_template(self) -> bool: method _build_messages (line 533) | def _build_messages(self, original_messages: Messages) -> Messages: class MathTipsScheduler (line 582) | class MathTipsScheduler(MultiTurnScheduler): method __init__ (line 585) | def __init__(self, *args, **kwargs): method _get_tips_token_ids (line 592) | def _get_tips_token_ids(self, tokenizer) -> List[int]: method check_finished (line 599) | def check_finished(self, infer_request: 'RolloutInferRequest', respons... method step (line 613) | def step(self, infer_request: 'RolloutInferRequest', response_choice: ... class GYMScheduler (line 682) | class GYMScheduler(RolloutScheduler): method __init__ (line 684) | def __init__(self, infer_engine: GRPOVllmEngine, max_turns: Optional[i... method _create_env (line 689) | async def _create_env(self, env_config: Dict) -> Env: method _create_context_manager (line 696) | async def _create_context_manager(self, ctx_config: Dict) -> ContextMa... method _close_env_async (line 708) | async def _close_env_async(self, env: Env): method run (line 723) | async def run(self, infer_request: 'RolloutInferRequest', request_conf... FILE: swift/sequence_parallel/ulysses.py function _generate_layout_params (line 18) | def _generate_layout_params(scatter_idx, seq_world_size, input): function post_all2all (line 39) | def post_all2all(permute_idx, res_shape): function pre_all2all_fun (line 54) | def pre_all2all_fun(permute_idx, inp_shape, input): function single_all_to_all (line 64) | def single_all_to_all(input, scatter_idx, gather_idx, group, **kwargs): class _SeqAllToAll (line 82) | class _SeqAllToAll(torch.autograd.Function): method forward (line 85) | def forward( method backward (line 99) | def backward(ctx: Any, *grad_output: torch.Tensor) -> Tuple[None, torc... class DistributedAttention (line 103) | class DistributedAttention(torch.nn.Module): method __init__ (line 105) | def __init__( method forward (line 118) | def forward(self, query: torch.Tensor, key: torch.Tensor, value: torch... class SequenceParallel (line 163) | class SequenceParallel: method __init__ (line 167) | def __init__(self): method real_position_ids (line 180) | def real_position_ids(self) -> torch.Tensor: method _prepare_flash_attn (line 184) | def _prepare_flash_attn(self, base_model: torch.nn.Module): method _prepare_forward_hook (line 364) | def _prepare_forward_hook(self, base_model: torch.nn.Module): method _prepare_moe_aux_loss (line 394) | def _prepare_moe_aux_loss(self, base_model: torch.nn.Module): method prepare (line 428) | def prepare(self, sp_size: int, model: torch.nn.Module, tokenizer: Pre... method _mask_qkv (line 462) | def _mask_qkv(self, query, key, value, mask): method pad (line 470) | def pad(self, tensor, padding_value, position_ids=None, dim=1): method gather (line 508) | def gather(self, local_output, dim: int, position_ids=None): method _split_packed (line 567) | def _split_packed(self, value, cu_seqlens, dim=1): method split (line 585) | def split(self, input, dim: int, position_ids=None): method pad_and_split_mm_tokens (line 609) | def pad_and_split_mm_tokens(self, visual_mask, mm_embeds): method pad_and_split_inputs (line 630) | def pad_and_split_inputs(self, method _gather_object_dp (line 722) | def _gather_object_dp(self, input_data): method _init_device_mesh (line 728) | def _init_device_mesh(self): method sp_group (line 760) | def sp_group(self): method sp_rank (line 765) | def sp_rank(self): method dp_group (line 770) | def dp_group(self): method dp_rank (line 775) | def dp_rank(self): method rp_group (line 780) | def rp_group(self): method rp_rank (line 786) | def rp_rank(self): method prepare_inputs (line 791) | def prepare_inputs(self, inputs): FILE: swift/sequence_parallel/utils.py class GatherTensor (line 14) | class GatherTensor(torch.autograd.Function): method forward (line 18) | def forward(ctx, tensor, dim=0, position_ids=None): method backward (line 26) | def backward(ctx, grad_output): class GatherLoss (line 31) | class GatherLoss(torch.autograd.Function): method forward (line 35) | def forward(ctx, loss, labels, gather_idx=None, position_ids=None): method backward (line 56) | def backward(ctx, *grad_output): class ChunkedCrossEntropyLoss (line 66) | class ChunkedCrossEntropyLoss(torch.autograd.Function): method forward (line 69) | def forward(ctx, logits, labels, chunk_size): method backward (line 88) | def backward(ctx: Any, *grad_outputs: Any): class SequenceParallelSampler (line 108) | class SequenceParallelSampler(Sampler): method __init__ (line 111) | def __init__(self, sp_instance, dataset, shuffle: bool = True, seed=No... method __iter__ (line 132) | def __iter__(self) -> Iterator[int]: method __len__ (line 146) | def __len__(self) -> int: method set_epoch (line 149) | def set_epoch(self, epoch: int) -> None: class SequenceParallelDispatcher (line 153) | class SequenceParallelDispatcher(DataLoaderDispatcher): method __init__ (line 156) | def __init__(self, dataloader, sp_instance, device=None, skip_batches:... method rank (line 163) | def rank(self): method world_size (line 167) | def world_size(self): method group (line 171) | def group(self): class RingComm (line 175) | class RingComm: method __init__ (line 177) | def __init__(self, process_group: dist.ProcessGroup): method send_recv (line 191) | def send_recv(self, to_send: torch.Tensor, recv_tensor: Optional[torch... method commit (line 203) | def commit(self): method wait (line 208) | def wait(self): method send_recv_kv (line 216) | def send_recv_kv( FILE: swift/sequence_parallel/zigzag_ring_attn.py function get_half_index (line 12) | def get_half_index(cu_seqlens, *, front: bool): function get_half_lse (line 41) | def get_half_lse(lse, cu_seqlens, *, front: bool): function update_out_and_lse (line 68) | def update_out_and_lse(out, lse, block_out, block_lse): function _get_default_args (line 102) | def _get_default_args(func): function get_default_args (line 112) | def get_default_args(func): function squeeze_batch (line 120) | def squeeze_batch(*t): function padding (line 131) | def padding(tensor, cu_seqlens, padding_value, front): function forward (line 164) | def forward(q, k, v, causal, cu_seqlens, max_seqlen, block_seq_len, drop... function backward (line 211) | def backward(dout, q, k, v, out, softmax_lse, causal, cu_seqlens, max_se... function lse_grad (line 262) | def lse_grad(out, lse, block_out, block_lse, sig, grad_out, grad_lse): function zigzag_ring_flash_attn_varlen_forward (line 289) | def zigzag_ring_flash_attn_varlen_forward( function zigzag_ring_flash_attn_varlen_backward (line 372) | def zigzag_ring_flash_attn_varlen_backward( class ZigZagRingFlashAttnVarlenFunc (line 547) | class ZigZagRingFlashAttnVarlenFunc(torch.autograd.Function): method forward (line 550) | def forward( method backward (line 617) | def backward(ctx, dout, *args): function zigzag_ring_flash_attn_varlen_func (line 646) | def zigzag_ring_flash_attn_varlen_func( FILE: swift/template/base.py class MaxLengthError (line 36) | class MaxLengthError(ValueError): class Template (line 40) | class Template(ProcessorMixin): method __init__ (line 73) | def __init__( method init_env_args (line 163) | def init_env_args(self): method init_processor (line 169) | def init_processor(self, processor: Processor) -> None: method _get_model (line 195) | def _get_model(self): method _load_image (line 205) | def _load_image(image, load_images: bool): method _get_height_width (line 222) | def _get_height_width(inputs: StdTemplateInputs) -> None: method normalize_bbox (line 231) | def normalize_bbox(self, inputs: StdTemplateInputs) -> None: method _preprocess_function_call (line 253) | def _preprocess_function_call(self, inputs: StdTemplateInputs) -> None: method prepare_engine_kwargs (line 280) | def prepare_engine_kwargs(self) -> Dict[str, Any]: method _preprocess_inputs (line 283) | def _preprocess_inputs( method _replace_image_tags (line 319) | def _replace_image_tags(inputs: StdTemplateInputs): method _replace_start_image_tags (line 339) | def _replace_start_image_tags(inputs: StdTemplateInputs): method _extend_tokens (line 350) | def _extend_tokens( method forward_context (line 372) | def forward_context(self, model, inputs): method get_base_model (line 378) | def get_base_model(model): method _rlhf_encode (line 384) | def _rlhf_encode(self, inputs: TemplateInputs, check_rejected=True) ->... method _kto_encode (line 404) | def _kto_encode(self, inputs: TemplateInputs) -> Dict[str, Any]: method _embedding_encode (line 409) | def _embedding_encode(self, inputs: TemplateInputs) -> Dict[str, Any]: method _reranker_encode (line 448) | def _reranker_encode(self, inputs: TemplateInputs) -> Dict[str, Any]: method _seq_cls_encode (line 487) | def _seq_cls_encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method encode (line 499) | def encode(self, method packing_row (line 577) | def packing_row(self, row: List[Dict[str, Any]]) -> Dict[str, Any]: method _post_encode (line 600) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... method _get_seq_cls_logprobs (line 604) | def _get_seq_cls_logprobs(pred: int, logprobs: torch.Tensor, top_logpr... method decode_seq_cls (line 618) | def decode_seq_cls(self, logits: torch.Tensor, top_logprobs: int): method decode (line 634) | def decode(self, generate_ids: List[int], *, is_finished: bool = True,... method decode_prm (line 643) | def decode_prm(self, input_ids: torch.Tensor, logits: torch.Tensor) ->... method generate_context (line 647) | def generate_context(self): method generate (line 661) | def generate(self, model, *args, **kwargs): method skip_stop_tokens (line 668) | def skip_stop_tokens(self, generate_ids: List[int], is_finished: bool ... method prepare_generate_kwargs (line 691) | def prepare_generate_kwargs(self, generate_kwargs: Dict[str, Any], *, ... method _save_pil_image (line 698) | def _save_pil_image(image: Image.Image) -> str: method _concat_context_list (line 710) | def _concat_context_list( method _simplify_context_list (line 741) | def _simplify_context_list(self, context_list: List[Context], loss_sca... method _split_special_tokens (line 772) | def _split_special_tokens(context_list: List[Context], method _tokenize (line 790) | def _tokenize(self, context, **kwargs): method replace_tag (line 793) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method replace_ref (line 825) | def replace_ref(self, ref: str, index: int, inputs: StdTemplateInputs)... method replace_cot_process (line 839) | def replace_cot_process(self, inputs: StdTemplateInputs) -> List[Conte... method _get_bbox_str (line 852) | def _get_bbox_str(bbox: List[int]) -> str: method replace_bbox (line 858) | def replace_bbox(self, bbox: List[int], index: int, inputs: StdTemplat... method _pre_tokenize_images (line 872) | def _pre_tokenize_images(self, context_list: List[Context], loss_scale... method _pre_tokenize (line 891) | def _pre_tokenize(self, context_list: List[Context], loss_scale_list: ... method _add_default_tags (line 941) | def _add_default_tags(inputs: StdTemplateInputs): method _encode_context_list (line 971) | def _encode_context_list(self, method _add_dynamic_eos (line 996) | def _add_dynamic_eos(input_ids: List[int], labels: List[int], loss_sca... method _get_std_messages (line 1012) | def _get_std_messages(messages): method _jinja_encode (line 1018) | def _jinja_encode(self, inputs: StdTemplateInputs): method _get_system (line 1037) | def _get_system(self, inputs: StdTemplateInputs) -> Optional[str]: method _is_add_non_thinking_round (line 1049) | def _is_add_non_thinking_round(self, messages, i: int, start_idx: int): method _add_non_thinking_prefix (line 1053) | def _add_non_thinking_prefix(self, inputs) -> None: method _remove_thinking_content (line 1072) | def _remove_thinking_content(self, content: str) -> str: method _remove_history_thinking (line 1076) | def _remove_history_thinking(self, inputs) -> None: method _swift_prepare_inputs (line 1088) | def _swift_prepare_inputs(self, inputs: StdTemplateInputs): method _swift_encode (line 1130) | def _swift_encode(self, inputs: StdTemplateInputs): method _truncate (line 1241) | def _truncate(self, input_ids: List[int], labels: Optional[List[int]],... method _get_length (line 1264) | def _get_length(input_ids, labels): method _encode_truncated (line 1274) | def _encode_truncated(self, inputs: StdTemplateInputs): method _encode (line 1331) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _get_megatron_cp_length (line 1376) | def _get_megatron_cp_length(self, length) -> int: method _handle_megatron_cp (line 1382) | def _handle_megatron_cp(self, batch: List[Dict[str, Any]]) -> None: method debug_logger (line 1396) | def debug_logger(self, inputs): method _split_list (line 1409) | def _split_list(inputs: List[int], x: int) -> List[List[int]]: method replace_video2image (line 1419) | def replace_video2image(self, load_video_func, inputs, replace_tag: Ca... method get_generate_ids (line 1434) | def get_generate_ids(self, generate_ids: Union[torch.Tensor, List[int]], method post_process_generate_response (line 1440) | def post_process_generate_response(self, response: str, inputs: StdTem... method pre_forward_hook (line 1443) | def pre_forward_hook(self, model: nn.Module, args, kwargs): method is_training (line 1462) | def is_training(self): method set_mode (line 1465) | def set_mode(self, mode: Literal['transformers', 'vllm', 'lmdeploy', '... method register_post_encode_hook (line 1471) | def register_post_encode_hook(self, models: List[nn.Module]) -> None: method remove_post_encode_hook (line 1496) | def remove_post_encode_hook(self): method data_collator (line 1509) | def data_collator(self, batch: List[Dict[str, Any]], *, padding_to: Op... method _fetch_inputs_startswith (line 1545) | def _fetch_inputs_startswith(batch: List[Dict[str, Any]], prefix: str)... method fetch_inputs (line 1556) | def fetch_inputs(batch: List[Dict[str, Any]], keys: Optional[List[str]... method gather_list (line 1563) | def gather_list(batch: List[Dict[str, Any]], attr_name: str) -> Option... method concat_tensor (line 1572) | def concat_tensor(batch: List[Dict[str, Any]], attr_name: str, dim: in... method _rlhf_data_collator (line 1579) | def _rlhf_data_collator(self, method _kto_data_collator (line 1597) | def _kto_data_collator(self, batch: List[Dict[str, Any]], *, padding_t... method _embedding_data_collator (line 1617) | def _embedding_data_collator(self, method _reranker_data_collator (line 1651) | def _reranker_data_collator(self, method _seq_cls_data_collator (line 1686) | def _seq_cls_data_collator(self, method _data_collator (line 1706) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... method _pad_3d_position_ids (line 1825) | def _pad_3d_position_ids(self, method _data_collator_mm_data (line 1858) | def _data_collator_mm_data(self, batch: List[Dict[str, Any]]) -> Dict[... method _sp_data_collator (line 1879) | def _sp_data_collator(self, res, padding_to, tokenizer, padding_side): method print_inputs (line 1899) | def print_inputs(self, inputs: Dict[str, Any]) -> None: method prepare_lmdeploy_pytorch_inputs (line 1961) | async def prepare_lmdeploy_pytorch_inputs(self, inputs) -> None: method prepare_lmdeploy_turbomind_inputs (line 1978) | async def prepare_lmdeploy_turbomind_inputs(self, inputs: Dict[str, An... method _pad_sequence (line 2001) | def _pad_sequence(self, sequences: List[torch.Tensor], padding_value: ... method safe_decode (line 2027) | def safe_decode(self, input_ids: List[int], **kwargs) -> str: method _patch_flash_attention_forward (line 2066) | def _patch_flash_attention_forward(modeling_module, position_ids, use_... method _get_inputs_embeds_hf (line 2091) | def _get_inputs_embeds_hf(inputs_embeds, inputs, visual, processor, co... method _concat_text_position_ids (line 2147) | def _concat_text_position_ids(position_ids): FILE: swift/template/constant.py class LLMTemplateType (line 6) | class LLMTemplateType: class RMTemplateType (line 125) | class RMTemplateType: class MLLMTemplateType (line 129) | class MLLMTemplateType: class TemplateType (line 258) | class TemplateType(LLMTemplateType, MLLMTemplateType, RMTemplateType): method get_template_name_list (line 261) | def get_template_name_list(cls) -> List[str]: FILE: swift/template/grounding.py function _shuffle_colors (line 12) | def _shuffle_colors(nums: List[Any]) -> List[Any]: function generate_colors (line 29) | def generate_colors(): function download_file (line 36) | def download_file(url: str) -> str: function _calculate_brightness (line 52) | def _calculate_brightness(image, region: List[int]): function draw_bbox (line 60) | def draw_bbox(image: Image.Image, FILE: swift/template/register.py function register_template (line 16) | def register_template(template_meta: TemplateMeta, *, exist_ok: bool = F... function _read_args_json_template_type (line 23) | def _read_args_json_template_type(model_dir): function get_template_meta (line 31) | def get_template_meta(model_info: 'ModelInfo', function get_template (line 55) | def get_template( FILE: swift/template/template_inputs.py class StdTemplateInputs (line 15) | class StdTemplateInputs: method __post_init__ (line 34) | def __post_init__(self): method to_history (line 47) | def to_history(self): method is_multimodal (line 53) | def is_multimodal(self): method from_dict (line 57) | def from_dict(cls, inputs: Dict[str, Any]) -> 'StdTemplateInputs': method remove_messages_media (line 104) | def remove_messages_media(messages: Messages) -> Dict[str, Any]: class TemplateInputs (line 135) | class TemplateInputs: method __post_init__ (line 141) | def __post_init__(self): method _compat_rejected_response (line 162) | def _compat_rejected_response(inputs: Dict[str, Any]): method from_dict (line 188) | def from_dict(cls, inputs: Dict[str, Any]) -> 'TemplateInputs': FILE: swift/template/template_meta.py class TemplateMeta (line 13) | class TemplateMeta: method to_generate_template_meta (line 54) | def to_generate_template_meta(self) -> 'TemplateMeta': method _has_system (line 67) | def _has_system(prefix_or_prompt: Prompt) -> bool: method _replace_system (line 71) | def _replace_system(prefix: Prompt) -> Prompt: method _check_template_meta (line 74) | def _check_template_meta(self): method __post_init__ (line 81) | def __post_init__(self): method _token_attr_to_id (line 102) | def _token_attr_to_id(tokenizer: PreTrainedTokenizerBase, value: Optio... method init (line 116) | def init(self, tokenizer: PreTrainedTokenizerBase) -> None: method check_system (line 142) | def check_system(self, system: Optional[str]) -> None: FILE: swift/template/templates/baai.py class Emu3GenTemplate (line 18) | class Emu3GenTemplate(Template): method init_processor (line 29) | def init_processor(self, processor) -> None: method _encode (line 41) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _process_prompt_train (line 61) | def _process_prompt_train(self, raw_prompt, image_token_ids): method _process_prompt_test (line 71) | def _process_prompt_test(self, raw_prompt): method prepare_for_output (line 88) | def prepare_for_output(self, output: str) -> str: method prepare_generate_kwargs (line 91) | def prepare_generate_kwargs(self, generate_kwargs: Dict[str, Any], *, ... method decode (line 118) | def decode(self, generate_ids: List[int], **kwargs) -> Any: method to_imgstr (line 125) | def to_imgstr(self, image_tokens): method format_image_prompt (line 132) | def format_image_prompt(self, image_tokens): method smart_resize (line 140) | def smart_resize(self, image): class Emu3ChatTemplate (line 156) | class Emu3ChatTemplate(Template): method _encode (line 160) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: FILE: swift/template/templates/baidu.py class ERNIETemplateMeta (line 16) | class ERNIETemplateMeta(TemplateMeta): class ErnieThinkingTemplate (line 27) | class ErnieThinkingTemplate(Template): method _swift_prepare_inputs (line 29) | def _swift_prepare_inputs(self, inputs) -> None: class ERNIEThinkingTemplateMeta (line 44) | class ERNIEThinkingTemplateMeta(TemplateMeta): class PaddleOCRTemplate (line 76) | class PaddleOCRTemplate(Template): method replace_tag (line 82) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 89) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 114) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... class ERNIE_VLTemplate (line 176) | class ERNIE_VLTemplate(Template): method replace_tag (line 179) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 184) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 235) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... method generate (line 242) | def generate(self, model, *args, **kwargs): class PaddleOCR1_5Template (line 263) | class PaddleOCR1_5Template(PaddleOCRTemplate): method _post_encode (line 267) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... FILE: swift/template/templates/deepseek.py class DeepseekTemplateMeta (line 21) | class DeepseekTemplateMeta(TemplateMeta): class DeepseekVLTemplate (line 45) | class DeepseekVLTemplate(Template): method _encode (line 53) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 105) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... method _data_collator (line 113) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... method generate (line 132) | def generate(self, model, *args, **kwargs): method decode (line 189) | def decode(self, generate_ids: List[int], **kwargs) -> Any: class DeepseekVLTemplateMeta (line 218) | class DeepseekVLTemplateMeta(DeepseekTemplateMeta): class DeepseekJanus (line 230) | class DeepseekJanus(DeepseekVLTemplate): class DeepseekOCR (line 238) | class DeepseekOCR(Template): method init_env_args (line 242) | def init_env_args(self): method image_size (line 254) | def image_size(self): method crop_threshold (line 260) | def crop_threshold(self): method _load_dynamic_modules (line 264) | def _load_dynamic_modules(self): method BasicImageTransform (line 275) | def BasicImageTransform(self): method dynamic_preprocess (line 280) | def dynamic_preprocess(self): method _preprocess_image (line 284) | def _preprocess_image(self, images, image_token_id): method _encode (line 398) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator_mm_data (line 418) | def _data_collator_mm_data(self, batch: List[Dict[str, Any]]) -> Dict[... class DeepseekOCR2 (line 446) | class DeepseekOCR2(DeepseekOCR): class DeepseekV2_5TemplateMeta (line 460) | class DeepseekV2_5TemplateMeta(TemplateMeta): class DeepseekV3_1Template (line 472) | class DeepseekV3_1Template(Template): method _is_add_non_thinking_round (line 475) | def _is_add_non_thinking_round(self, messages, i, start_idx): class DeepseekVL2Template (line 492) | class DeepseekVL2Template(DeepseekVLTemplate): method _encode (line 496) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 527) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... FILE: swift/template/templates/dots.py class DotsOCRTemplate (line 12) | class DotsOCRTemplate(Template): method replace_tag (line 16) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 26) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: FILE: swift/template/templates/gemma.py class GemmaTemplateMeta (line 17) | class GemmaTemplateMeta(TemplateMeta): class PaliGemmaTemplate (line 30) | class PaliGemmaTemplate(Template): method replace_tag (line 33) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 43) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class Gemma3TextTemplateMeta (line 71) | class Gemma3TextTemplateMeta(TemplateMeta): class Gemma3Template (line 79) | class Gemma3Template(Template): method _swift_encode (line 81) | def _swift_encode(self, inputs: StdTemplateInputs): class Gemma3VisionTemplate (line 95) | class Gemma3VisionTemplate(Gemma3Template): method replace_tag (line 99) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 104) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class Gemma3nTemplate (line 137) | class Gemma3nTemplate(Gemma3Template): method replace_tag (line 142) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 157) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator_mm_data (line 226) | def _data_collator_mm_data(self, batch: List[Dict[str, Any]]) -> Dict[... FILE: swift/template/templates/glm.py class GLMTemplateMeta (line 17) | class GLMTemplateMeta(TemplateMeta): class GLM4Template (line 21) | class GLM4Template(Template): method _swift_encode (line 24) | def _swift_encode(self, inputs: StdTemplateInputs): method decode (line 36) | def decode(self, *args, **kwargs): class ChatGLM4TemplateMeta (line 50) | class ChatGLM4TemplateMeta(GLMTemplateMeta): class GLM4TemplateMeta (line 62) | class GLM4TemplateMeta(ChatGLM4TemplateMeta): class GLM4_5TemplateMeta (line 69) | class GLM4_5TemplateMeta(GLM4TemplateMeta): class ChatGLM4VTemplate (line 76) | class ChatGLM4VTemplate(Template): method replace_tag (line 78) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 85) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 107) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class GLM4vPackingTemplateMixin (line 115) | class GLM4vPackingTemplateMixin: method packing_row (line 119) | def packing_row(self, row: List[Dict[str, Any]]) -> Dict[str, Any]: method _get_position_ids (line 127) | def _get_position_ids(self, inputs: Dict[str, Any]): method _data_collator (line 148) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... method _patch_create_causal_mask (line 161) | def _patch_create_causal_mask(self, modeling_module): class GLM4VTemplate (line 179) | class GLM4VTemplate(GLM4vPackingTemplateMixin, Template): method init_processor (line 186) | def init_processor(self, processor) -> None: method replace_tag (line 197) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 211) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 302) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: class GLM4_5Template (line 316) | class GLM4_5Template(GLM4Template): method _jinja_encode (line 318) | def _jinja_encode(self, inputs: StdTemplateInputs): class GLM4_5VTemplate (line 341) | class GLM4_5VTemplate(GLM4vPackingTemplateMixin, GLM4_5Template): method replace_tag (line 345) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 352) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 378) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: method init_processor (line 387) | def init_processor(self, processor) -> None: class CogTemplate (line 447) | class CogTemplate(Template): method replace_tag (line 452) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 456) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 478) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class CogVLMTemplateMeta (line 508) | class CogVLMTemplateMeta(TemplateMeta): class Cog2VideoTemplate (line 519) | class Cog2VideoTemplate(CogTemplate): method _encode (line 522) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class GLMEdgeVTemplate (line 553) | class GLMEdgeVTemplate(Template): method replace_tag (line 556) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 561) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class GLMOCRTemplate (line 580) | class GLMOCRTemplate(Template): method init_processor (line 585) | def init_processor(self, processor) -> None: method replace_tag (line 591) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 598) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 626) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: FILE: swift/template/templates/idefics3.py class Idefics3Template (line 11) | class Idefics3Template(Template): method _encode (line 14) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: FILE: swift/template/templates/internlm.py class InternLMXComposer2Template (line 39) | class InternLMXComposer2Template(Template): method replace_tag (line 45) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 52) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 84) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: method _data_collator (line 136) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class Xcomposer2TemplateMeta (line 144) | class Xcomposer2TemplateMeta(TemplateMeta): class InternLMXComposer2_5Template (line 168) | class InternLMXComposer2_5Template(InternLMXComposer2Template): class InternLMXComposer2_4khdTemplate (line 180) | class InternLMXComposer2_4khdTemplate(InternLMXComposer2Template): FILE: swift/template/templates/internvl.py class InternvlTemplate (line 19) | class InternvlTemplate(Template): method init_env_args (line 25) | def init_env_args(self): method replace_tag (line 30) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 38) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method forward_context (line 63) | def forward_context(self, model, inputs): method _post_encode (line 72) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... class Internvl2Template (line 104) | class Internvl2Template(InternvlTemplate): method init_env_args (line 107) | def init_env_args(self): method replace_tag (line 112) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method replace_ref (line 121) | def replace_ref(self, ref: str, index: int, inputs: StdTemplateInputs)... method replace_bbox (line 124) | def replace_bbox(self, bbox: List[int], index: int, inputs: StdTemplat... method _encode (line 127) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class Internvl3_5GPTTemplate (line 185) | class Internvl3_5GPTTemplate(Internvl2Template, GptTemplate): class InternvlhfTemplate (line 192) | class InternvlhfTemplate(Internvl2Template): method init_env_args (line 194) | def init_env_args(self): method replace_tag (line 197) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 211) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 318) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... FILE: swift/template/templates/kwai.py class KeyeTemplateMeta (line 18) | class KeyeTemplateMeta(ChatmlTemplateMeta): class KeyeVLTemplate (line 23) | class KeyeVLTemplate(Template): method replace_tag (line 28) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 48) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 90) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: method _data_collator_mm_data (line 280) | def _data_collator_mm_data(self, batch: List[Dict[str, Any]]) -> Dict[... class KeyeVL1_5Template (line 292) | class KeyeVL1_5Template(KeyeVLTemplate): method _post_encode (line 294) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: FILE: swift/template/templates/llama.py class Llama3TemplateMeta (line 35) | class Llama3TemplateMeta(TemplateMeta): function _get_llama3_2_prefix (line 51) | def _get_llama3_2_prefix() -> Prompt: class Llama3_2TemplateMeta (line 60) | class Llama3_2TemplateMeta(Llama3TemplateMeta): class Llama3_2VisionTemplate (line 68) | class Llama3_2VisionTemplate(Template): method replace_tag (line 70) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 75) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 98) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class Llama4Template (line 116) | class Llama4Template(Template): method replace_tag (line 119) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 126) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class Llama4TemplateMeta (line 148) | class Llama4TemplateMeta(TemplateMeta): class Llama3_1OmniTemplate (line 173) | class Llama3_1OmniTemplate(Template): method _encode (line 177) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 192) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... FILE: swift/template/templates/llava.py class LlavaHfTemplate (line 20) | class LlavaHfTemplate(Template): method image_token_index (line 24) | def image_token_index(self): method replace_tag (line 29) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 34) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class LlavaVideoHfTemplate (line 82) | class LlavaVideoHfTemplate(Template): method replace_tag (line 84) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 96) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class Llava1_6HfTemplate (line 124) | class Llava1_6HfTemplate(LlavaHfTemplate): method _data_collator (line 126) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class LlavaMistralTemplateMeta (line 136) | class LlavaMistralTemplateMeta(TemplateMeta): class LLava1_6YiHfTemplate (line 159) | class LLava1_6YiHfTemplate(Llava1_6HfTemplate): method replace_tag (line 161) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... class LlavaOneVisionHfTemplate (line 182) | class LlavaOneVisionHfTemplate(Llava1_6HfTemplate): method _encode (line 184) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class LlavaLlama3_1HfTemplate (line 223) | class LlavaLlama3_1HfTemplate(LlavaHfTemplate): method _encode (line 229) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class LLavaLlama3HfTemplate (line 244) | class LLavaLlama3HfTemplate(Template): method _encode (line 248) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class LLavaTemplate (line 263) | class LLavaTemplate(Template): method replace_tag (line 267) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 272) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 287) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class LLavaOneVision1_5Template (line 312) | class LLavaOneVision1_5Template(Template): method init_env_args (line 319) | def init_env_args(self): method replace_tag (line 323) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method replace_ref (line 343) | def replace_ref(self, ref: str, index: int, inputs: StdTemplateInputs)... method replace_bbox (line 349) | def replace_bbox(self, bbox: List[int], index: int, inputs: StdTemplat... method _encode (line 355) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 394) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: FILE: swift/template/templates/llm.py class GMETemplate (line 37) | class GMETemplate(Qwen2VLTemplate): method _preprocess_inputs (line 39) | def _preprocess_inputs(self, inputs: StdTemplateInputs) -> None: class JinaRerankerM0Template (line 49) | class JinaRerankerM0Template(Qwen2VLTemplate): method _preprocess_inputs (line 51) | def _preprocess_inputs(self, inputs: StdTemplateInputs) -> None: class TeleChatTemplateMeta (line 185) | class TeleChatTemplateMeta(TemplateMeta): class HunyuanTemplate (line 316) | class HunyuanTemplate(Template): method _remove_thinking_content (line 318) | def _remove_thinking_content(self, content: str) -> str: class GptTemplate (line 339) | class GptTemplate(Template): method _get_gpt_oss_prefix (line 342) | def _get_gpt_oss_prefix(self): method _swift_prepare_inputs (line 349) | def _swift_prepare_inputs(self, inputs: StdTemplateInputs): class GptOssTemplateMeta (line 365) | class GptOssTemplateMeta(TemplateMeta): class YoutuLLMTemplate (line 413) | class YoutuLLMTemplate(Template): method _remove_thinking_content (line 415) | def _remove_thinking_content(self, content: str) -> str: method _add_non_thinking_prefix (line 420) | def _add_non_thinking_prefix(self, inputs) -> None: method _remove_history_thinking (line 432) | def _remove_history_thinking(self, inputs) -> None: FILE: swift/template/templates/megrez.py class MegrezTemplateMeta (line 15) | class MegrezTemplateMeta(TemplateMeta): class MegrezOmniTemplate (line 27) | class MegrezOmniTemplate(Template): method replace_tag (line 31) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 38) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 80) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... method _data_collator (line 85) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... FILE: swift/template/templates/microsoft.py class FlorenceTemplate (line 16) | class FlorenceTemplate(Template): method _add_default_tags (line 22) | def _add_default_tags(inputs: StdTemplateInputs) -> None: method replace_tag (line 25) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method replace_bbox (line 29) | def replace_bbox(self, bbox: List[int], index: int, inputs: StdTemplat... method _encode (line 32) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 54) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... method decode (line 63) | def decode(self, generate_ids: List[int], **kwargs) -> Any: class Phi3TemplateMeta (line 87) | class Phi3TemplateMeta(TemplateMeta): class Phi4TemplateMeta (line 100) | class Phi4TemplateMeta(TemplateMeta): class Phi3VisionTemplate (line 114) | class Phi3VisionTemplate(Template): method replace_tag (line 117) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 124) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class Phi4MMTemplate (line 155) | class Phi4MMTemplate(Template): method replace_tag (line 158) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 169) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 190) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... FILE: swift/template/templates/midashenglm.py class MiDashengLMTemplate (line 17) | class MiDashengLMTemplate(Template): method init_env_args (line 21) | def init_env_args(self): method replace_tag (line 25) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 30) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 48) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... FILE: swift/template/templates/minicpm.py class MinicpmTemplateMeta (line 21) | class MinicpmTemplateMeta(TemplateMeta): function _remove_idx (line 31) | def _remove_idx(arr: List[int], idx_list: List[int]) -> List[int]: class MiniCPMVTemplate (line 40) | class MiniCPMVTemplate(Template): method replace_tag (line 46) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method prepare_lmdeploy_turbomind_inputs (line 53) | async def prepare_lmdeploy_turbomind_inputs(self, inputs: Dict[str, An... method _encode (line 82) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 132) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... method _data_collator (line 136) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class MiniCPMV2_5Template (line 147) | class MiniCPMV2_5Template(MiniCPMVTemplate): class MiniCPMV2_6Template (line 157) | class MiniCPMV2_6Template(MiniCPMVTemplate): method init_env_args (line 159) | def init_env_args(self): method replace_tag (line 165) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 175) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class MiniCPMV4_5Template (line 251) | class MiniCPMV4_5Template(MiniCPMV2_6Template): method _encode (line 253) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 306) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... FILE: swift/template/templates/minimax.py class MinimaxTemplateMeta (line 16) | class MinimaxTemplateMeta(TemplateMeta): class MinimaxVLTemplate (line 40) | class MinimaxVLTemplate(Template): method replace_tag (line 44) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method calc_num_image_tokens (line 49) | def calc_num_image_tokens(self, image_inputs): method _encode (line 94) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 110) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class MinimaxM2TemplateMeta (line 125) | class MinimaxM2TemplateMeta(TemplateMeta): FILE: swift/template/templates/minimind.py class MiniMindTemplateMeta (line 12) | class MiniMindTemplateMeta(TemplateMeta): FILE: swift/template/templates/mistral.py class Mistral3TemplateMeta (line 28) | class Mistral3TemplateMeta(TemplateMeta): class Mistral2503Template (line 39) | class Mistral2503Template(Template): method replace_tag (line 43) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 48) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: class Mistral2506Template (line 147) | class Mistral2506Template(Mistral2503Template): method _get_mistral_system (line 149) | def _get_mistral_system(self): method _swift_encode (line 160) | def _swift_encode(self, inputs: StdTemplateInputs): class Mistral2512Template (line 170) | class Mistral2512Template(Mistral2506Template): method _get_mistral_system (line 172) | def _get_mistral_system(self): class Mistral2512ThinkingTemplate (line 187) | class Mistral2512ThinkingTemplate(Mistral2506Template): method _get_mistral_system (line 189) | def _get_mistral_system(self): FILE: swift/template/templates/molmo.py class MolmoTemplate (line 12) | class MolmoTemplate(Template): method replace_tag (line 15) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 19) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method generate (line 37) | def generate(self, model, **kwargs): method _data_collator (line 46) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... FILE: swift/template/templates/moonshot.py class MoonlightTemplateMeta (line 18) | class MoonlightTemplateMeta(TemplateMeta): class KimiVLTemplate (line 36) | class KimiVLTemplate(Template): method replace_tag (line 41) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 46) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator_mm_data (line 72) | def _data_collator_mm_data(self, batch: List[Dict[str, Any]]) -> Dict[... method _post_encode (line 79) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... FILE: swift/template/templates/mplug.py class mPlugOwl2Template (line 18) | class mPlugOwl2Template(Template): method replace_tag (line 20) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 25) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 44) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class mPlugOwl3Template (line 63) | class mPlugOwl3Template(Template): method init_env_args (line 66) | def init_env_args(self): method _get_image_token_list (line 70) | def _get_image_token_list(self, cut_shape): method replace_tag (line 80) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 89) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 132) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... method _data_collator (line 154) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class mPlugOwl3_241101Template (line 162) | class mPlugOwl3_241101Template(mPlugOwl3Template): method _post_encode (line 165) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... class mPlugOwl3TemplateMeta (line 175) | class mPlugOwl3TemplateMeta(QwenTemplateMeta): class DocOwl2Template (line 186) | class DocOwl2Template(Template): method replace_tag (line 188) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 193) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 201) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... FILE: swift/template/templates/pixtral.py class PixtralTemplate (line 12) | class PixtralTemplate(Template): method _encode (line 16) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 46) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... FILE: swift/template/templates/qwen.py class QwenTemplateMeta (line 27) | class QwenTemplateMeta(ChatmlTemplateMeta): class Qwen2_5TemplateMeta (line 35) | class Qwen2_5TemplateMeta(QwenTemplateMeta): class Qwen2_5MathTemplateMeta (line 40) | class Qwen2_5MathTemplateMeta(QwenTemplateMeta): class Qwen3MixedTemplateMeta (line 56) | class Qwen3MixedTemplateMeta(QwenTemplateMeta): class Qwen3EmbTemplate (line 95) | class Qwen3EmbTemplate(Template): method _preprocess_inputs (line 97) | def _preprocess_inputs(self, inputs: StdTemplateInputs) -> None: class Qwen3RerankerTemplate (line 117) | class Qwen3RerankerTemplate(Template): method _preprocess_inputs (line 120) | def _preprocess_inputs(self, inputs: StdTemplateInputs) -> None: method prepare_engine_kwargs (line 133) | def prepare_engine_kwargs(self) -> Dict[str, Any]: class QwenPRMTemplate (line 157) | class QwenPRMTemplate(Template): method _preprocess_inputs (line 160) | def _preprocess_inputs( method make_step_rewards (line 170) | def make_step_rewards(logits, token_masks): method decode_prm (line 182) | def decode_prm(self, input_ids: torch.Tensor, logits: torch.Tensor) ->... class QwenVLTemplate (line 191) | class QwenVLTemplate(Template): method _load_image (line 195) | def _load_image(image, load_images: bool): method replace_tag (line 200) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method replace_ref (line 213) | def replace_ref(self, ref: str, index: int, inputs: StdTemplateInputs)... method replace_bbox (line 216) | def replace_bbox(self, bbox: List[int], index: int, inputs: StdTemplat... class QwenAudioTemplate (line 223) | class QwenAudioTemplate(Template): method replace_tag (line 225) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _tokenize (line 233) | def _tokenize(self, context, **kwargs): method _encode (line 237) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 247) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class Qwen2AudioTemplate (line 257) | class Qwen2AudioTemplate(Template): method init_env_args (line 259) | def init_env_args(self) -> None: method replace_tag (line 263) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 271) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 281) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class Qwen2VLTemplate (line 296) | class Qwen2VLTemplate(Template): method init_env_args (line 304) | def init_env_args(self): method replace_tag (line 309) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method replace_ref (line 343) | def replace_ref(self, ref: str, index: int, inputs: StdTemplateInputs)... method replace_bbox (line 349) | def replace_bbox(self, bbox: List[int], index: int, inputs: StdTemplat... method _encode (line 355) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method forward_context (line 399) | def forward_context(self, model, inputs): method _post_encode (line 411) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: method _data_collator_mm_data (line 423) | def _data_collator_mm_data(self, batch: List[Dict[str, Any]]) -> Dict[... method packing_row (line 430) | def packing_row(self, row: List[Dict[str, Any]]) -> Dict[str, Any]: method _get_position_ids (line 438) | def _get_position_ids(self, inputs: Dict[str, Any]): method _data_collator (line 465) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class Qwen2_5VLTemplate (line 490) | class Qwen2_5VLTemplate(Qwen2VLTemplate): class Qwen3VLTemplate (line 504) | class Qwen3VLTemplate(Qwen2VLTemplate): method _encode (line 507) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 552) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: class Qwen3_5Template (line 561) | class Qwen3_5Template(Qwen3VLTemplate): method _post_encode (line 565) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: class Qwen3VLEmbTemplate (line 580) | class Qwen3VLEmbTemplate(Qwen3VLTemplate): method _preprocess_inputs (line 582) | def _preprocess_inputs(self, inputs: StdTemplateInputs) -> None: class Qwen3VLRerankerTemplate (line 597) | class Qwen3VLRerankerTemplate(Qwen3VLTemplate): method _preprocess_inputs (line 600) | def _preprocess_inputs(self, inputs: StdTemplateInputs) -> None: class Qwen2_5OmniTemplate (line 619) | class Qwen2_5OmniTemplate(Qwen2_5VLTemplate): method init_processor (line 623) | def init_processor(self, processor) -> None: method replace_tag (line 638) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _get_feat_extract_output_lengths (line 685) | def _get_feat_extract_output_lengths(self, input_lengths): method _get_new_tokens_use_audio_in_video (line 693) | def _get_new_tokens_use_audio_in_video(self, i, *, video_grid_thw, vid... method _encode (line 735) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 822) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: method _get_position_ids (line 858) | def _get_position_ids(self, inputs: Dict[str, Any]): method _data_collator_mm_data (line 884) | def _data_collator_mm_data(self, batch: List[Dict[str, Any]]) -> Dict[... method generate (line 904) | def generate(self, model, *args, **kwargs): class Qwen3OmniTemplate (line 913) | class Qwen3OmniTemplate(Qwen2_5OmniTemplate): method _post_encode (line 918) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: class Ovis1_6Template (line 927) | class Ovis1_6Template(Template): method init_env_args (line 931) | def init_env_args(self): method replace_tag (line 935) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 940) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 965) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: method _data_collator (line 983) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class Ovis2Template (line 1009) | class Ovis2Template(Ovis1_6Template): method init_env_args (line 1013) | def init_env_args(self): method replace_tag (line 1017) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... class Ovis2_5Template (line 1034) | class Ovis2_5Template(Template): method init_env_args (line 1039) | def init_env_args(self) -> None: method replace_tag (line 1046) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 1060) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 1101) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... method _data_collator_mm_data (line 1130) | def _data_collator_mm_data(self, batch: List[Dict[str, Any]]) -> Dict[... class MarcoO1TemplateMeta (line 1148) | class MarcoO1TemplateMeta(QwenTemplateMeta): FILE: swift/template/templates/seed.py class SeedTemplate (line 17) | class SeedTemplate(Template): method get_thinking_budget (line 19) | def get_thinking_budget(self, inputs: StdTemplateInputs): method get_reflect_interval (line 57) | def get_reflect_interval(self, inputs: StdTemplateInputs): method insert_budget_markers (line 72) | def insert_budget_markers(text: str, tokenizer, interval: int, total_b... method _prepare_system (line 98) | def _prepare_system(self, inputs): method _swift_prepare_inputs (line 121) | def _swift_prepare_inputs(self, inputs: StdTemplateInputs): method _simplify_context_list (line 147) | def _simplify_context_list(self, context_list, loss_scale_list, inputs): method _jinja_encode (line 157) | def _jinja_encode(self, inputs: StdTemplateInputs): class SeedTemplateMeta (line 162) | class SeedTemplateMeta(TemplateMeta): class SailVLTemplate (line 181) | class SailVLTemplate(Template): method __init__ (line 183) | def __init__(self, *args, **kwargs): method replace_tag (line 189) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 198) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 227) | def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Di... class SailVLTemplateMeta (line 255) | class SailVLTemplateMeta(ChatmlTemplateMeta): FILE: swift/template/templates/stepfun.py class GOTImageEvalProcessor (line 17) | class GOTImageEvalProcessor: method __init__ (line 19) | def __init__(self, image_size=384, mean=None, std=None): method __call__ (line 35) | def __call__(self, item): class GOT_OCR2Template (line 39) | class GOT_OCR2Template(Template): method replace_tag (line 42) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 49) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 59) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class GOT_OCR2HfTemplate (line 75) | class GOT_OCR2HfTemplate(Template): method replace_tag (line 78) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 85) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: # 暂时照抄上面 method _data_collator (line 92) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class StepAudioTemplate (line 111) | class StepAudioTemplate(Template): method replace_tag (line 114) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... class StepAudio2MiniTemplate (line 123) | class StepAudio2MiniTemplate(Template): method load_audio (line 126) | def load_audio(self, file_path, target_rate=16000, max_length=None): method _mel_filters (line 143) | def _mel_filters(self, n_mels: int) -> 'torch.Tensor': method log_mel_spectrogram (line 153) | def log_mel_spectrogram(self, audio, n_mels=128, padding=479): method compute_token_num (line 176) | def compute_token_num(self, max_feature_len): method padding_mels (line 191) | def padding_mels(self, data: List['torch.Tensor']): method audio_process (line 212) | def audio_process(self, audio): method replace_tag (line 223) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 228) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 270) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... class Step3VLTemplate (line 303) | class Step3VLTemplate(Template): method replace_tag (line 309) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 314) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 362) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: method _data_collator_mm_data (line 395) | def _data_collator_mm_data(self, batch: List[Dict[str, Any]]) -> Dict[... FILE: swift/template/templates/tencent.py class HunYuanVLTemplateMeta (line 14) | class HunYuanVLTemplateMeta(TemplateMeta): class HunYuanVLTemplate (line 23) | class HunYuanVLTemplate(Template): method replace_tag (line 29) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method _encode (line 36) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _post_encode (line 89) | def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: method _pad_3d_position_ids (line 108) | def _pad_3d_position_ids(self, FILE: swift/template/templates/utils.py class ChatmlTemplateMeta (line 13) | class ChatmlTemplateMeta(TemplateMeta): class EmptyTemplateMeta (line 23) | class EmptyTemplateMeta(TemplateMeta): FILE: swift/template/templates/valley.py class ValleyTemplateMeta (line 17) | class ValleyTemplateMeta(ChatmlTemplateMeta): class ValleyTemplate (line 25) | class ValleyTemplate(Template): method replace_tag (line 29) | def replace_tag(self, media_type: Literal['image', 'video', 'audio'], ... method preprocess_images (line 37) | def preprocess_images(self, image_binary_list): method process_images (line 65) | def process_images(self, inputs, images_binary): method _encode (line 108) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 123) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... FILE: swift/template/templates/yi.py class YiVLTemplate (line 24) | class YiVLTemplate(Template): method _encode (line 28) | def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: method _data_collator (line 45) | def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: O... FILE: swift/template/utils.py class ContextType (line 20) | class ContextType: class StopWordsCriteria (line 26) | class StopWordsCriteria(StoppingCriteria): method __init__ (line 31) | def __init__(self, tokenizer: PreTrainedTokenizerBase, stop_words: Lis... method __call__ (line 38) | def __call__(self, input_ids: torch.Tensor, scores: torch.Tensor, **kw... function fetch_one (line 59) | def fetch_one(element: Union[Tuple, List, Set, Dict, Any], item_type: Op... function findall (line 71) | def findall(token_list: List[int], sub_token_list: Union[int, List[int]]... function align_image_inputs (line 87) | def align_image_inputs(input_ids: List[int], labels: List[int], new_inpu... function _split_str_by_regex (line 124) | def _split_str_by_regex(text: str, regex_delimiters: List[str]) -> List[... function split_str_parts_by (line 137) | def split_str_parts_by(text: str, delimiters: List[str], regex_mode: boo... function get_last_user_round (line 168) | def get_last_user_round(messages): function history_to_messages (line 176) | def history_to_messages(history: History, function messages_to_history (line 200) | def messages_to_history(messages: 'Messages') -> Dict[str, Any]: function update_generation_config_eos_token (line 227) | def update_generation_config_eos_token(generation_config, template): FILE: swift/template/vision_utils.py function _build_transform (line 22) | def _build_transform(input_size): function _find_closest_aspect_ratio (line 35) | def _find_closest_aspect_ratio(aspect_ratio, target_ratios, width, heigh... function _dynamic_preprocess (line 51) | def _dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, us... function rescale_image (line 88) | def rescale_image(img: Image.Image, max_pixels: int) -> Image.Image: function _check_path (line 104) | def _check_path(path: str) -> Union[str, None]: function load_file (line 128) | def load_file(path: Union[str, bytes, _T]) -> Union[BytesIO, _T]: function load_image (line 164) | def load_image(image: Union[str, bytes, Image.Image]) -> Image.Image: function load_batch (line 173) | def load_batch(path_list: List[Union[str, None, Any, BytesIO]], function load_video_hf (line 184) | def load_video_hf(videos: List[str]): function _get_index (line 206) | def _get_index(bound, fps, max_frame, first_idx=0, num_segments=32): function transform_image (line 219) | def transform_image(image, input_size=448, max_num=12): function load_video_internvl (line 227) | def load_video_internvl(video: Union[str, bytes], bound=None, num_segmen... function load_video_cogvlm2 (line 241) | def load_video_cogvlm2(video: Union[str, bytes]) -> np.ndarray: function load_video_llava (line 259) | def load_video_llava(video: Union[str, bytes]) -> np.ndarray: function load_video_minicpmv_mplug_owl3 (line 278) | def load_video_minicpmv_mplug_owl3(video: Union[str, bytes], max_num_fra... function load_audio (line 299) | def load_audio(audio: Union[str, bytes], sampling_rate: int, return_sr: ... function load_video_valley (line 306) | def load_video_valley(video: Union[str, bytes]): function load_video_ovis2 (line 317) | def load_video_ovis2(video_path, num_frames): function load_video_ovis2_5 (line 333) | def load_video_ovis2_5(video_path, num_frames): FILE: swift/trainers/arguments.py class TrainArgumentsMixin (line 17) | class TrainArgumentsMixin: method _patch_liger_kernel (line 206) | def _patch_liger_kernel(): method _init_liger (line 219) | def _init_liger(self): method _init_callbacks (line 227) | def _init_callbacks(self): method __post_init__ (line 238) | def __post_init__(self): class TrainingArguments (line 280) | class TrainingArguments(TrainArgumentsMixin, HfTrainingArguments): method __post_init__ (line 282) | def __post_init__(self): class Seq2SeqTrainingArguments (line 288) | class Seq2SeqTrainingArguments(TrainArgumentsMixin, HfSeq2SeqTrainingArg... method __post_init__ (line 290) | def __post_init__(self): FILE: swift/trainers/embedding_trainer.py class EmbeddingTrainer (line 9) | class EmbeddingTrainer(Trainer): method __init__ (line 11) | def __init__(self, *args, **kwargs): method evaluation_loop (line 15) | def evaluation_loop(self, *args, **kwargs): FILE: swift/trainers/mixin.py class SwiftMixin (line 60) | class SwiftMixin: method __init__ (line 63) | def __init__(self, method _get_data_collator (line 141) | def _get_data_collator(self, args, template): method _add_callbacks (line 145) | def _add_callbacks(self): method _collect_config_info (line 149) | def _collect_config_info(self) -> Dict[str, str]: method tokenizer (line 171) | def tokenizer(self): method _patch_deepspeed_load_checkpoint (line 176) | def _patch_deepspeed_load_checkpoint(self): method get_use_logits_to_keep (line 198) | def get_use_logits_to_keep(self, default_value: bool = True): method _save_initial_model (line 208) | def _save_initial_model(self, output_dir): method _save_converted_model (line 220) | def _save_converted_model(self, output_dir): method _load_rng_state (line 254) | def _load_rng_state(self, *args, **kwargs): method _load_optimizer_and_scheduler (line 259) | def _load_optimizer_and_scheduler(self, *args, **kwargs): method _save_model (line 272) | def _save_model(self, output_dir: Optional[str] = None, state_dict=None): method _save (line 362) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method _rotate_flash_checkpoints (line 393) | def _rotate_flash_checkpoints(self, use_mtime=False, output_dir=None) ... method get_last_checkpoint (line 428) | def get_last_checkpoint(self): method _get_last_checkpoint_step (line 442) | def _get_last_checkpoint_step(self): method get_resume_checkpoint (line 450) | def get_resume_checkpoint(self): method get_resume_checkpoint_until_find_ucp (line 474) | def get_resume_checkpoint_until_find_ucp(self): method wait_latest_checkpoint (line 489) | def wait_latest_checkpoint(self, timeout=None, max_steps=None): method _fix_zero3_gather_all_parameters (line 497) | def _fix_zero3_gather_all_parameters(self) -> None: method _save_checkpoint (line 516) | def _save_checkpoint(self, *args, **kwargs): method _save_flash_checkpoint (line 527) | def _save_flash_checkpoint(self, model, trial, metrics=None): method _fix_grad_norm_nan (line 642) | def _fix_grad_norm_nan(): method _patch_tasks (line 661) | def _patch_tasks(self): method _fix_gradient_checkpointing (line 809) | def _fix_gradient_checkpointing(self): method _prepare_gradient_checkpointing (line 839) | def _prepare_gradient_checkpointing(self, model) -> None: method train (line 870) | def train(self, *args, **kwargs): method push_to_hub (line 900) | def push_to_hub(self, *args, **kwargs): method compute_custom_metrics (line 905) | def compute_custom_metrics(metrics, key_prefix: str = ''): method log (line 934) | def log(self, logs: Dict[str, float], *args, **kwargs) -> None: method _maybe_log_save_evaluate (line 941) | def _maybe_log_save_evaluate(self, tr_loss, *args, **kwargs): method create_loss_and_eval_metric (line 974) | def create_loss_and_eval_metric(self, args): method create_optimizer_and_scheduler (line 984) | def create_optimizer_and_scheduler(self, num_training_steps: int): method _get_listwise_reranker_preds (line 989) | def _get_listwise_reranker_preds(logits, labels): method _compute_acc (line 1000) | def _compute_acc(self, outputs, labels, cu_seqlens=None) -> None: method _evalscope_eval (line 1059) | def _evalscope_eval(self): method prepare_logits_to_keep (line 1095) | def prepare_logits_to_keep(self, inputs): method get_cu_seqlens (line 1118) | def get_cu_seqlens(self, position_ids, logits_to_keep) -> torch.Tensor: method _patch_skip_first_batches (line 1130) | def _patch_skip_first_batches(self): class DataLoaderMixin (line 1148) | class DataLoaderMixin: method get_sp_dataloader (line 1150) | def get_sp_dataloader(self, dataset, batch_size, skip_batches=0): method get_train_dataloader (line 1192) | def get_train_dataloader(self, skip_batches=0): method _disable_group_by_length (line 1244) | def _disable_group_by_length(self): method get_eval_dataloader (line 1252) | def get_eval_dataloader(self, eval_dataset=None): FILE: swift/trainers/patcher.py function add_train_message (line 17) | def add_train_message(logs, state, start_time, start_step) -> None: class ProgressCallbackNew (line 33) | class ProgressCallbackNew(ProgressCallback): method on_train_begin (line 35) | def on_train_begin(self, args, state, control, **kwargs): method on_prediction_step (line 42) | def on_prediction_step(self, args, state: TrainerState, control, eval_... method on_log (line 51) | def on_log(self, args: TrainingArguments, state: TrainerState, control... class DefaultFlowCallbackNew (line 61) | class DefaultFlowCallbackNew(DefaultFlowCallback): method on_step_end (line 63) | def on_step_end(self, args: TrainingArguments, state: TrainerState, co... method on_epoch_end (line 74) | def on_epoch_end(self, args: TrainingArguments, state: TrainerState, c... class PrinterCallbackNew (line 87) | class PrinterCallbackNew(PrinterCallback): method on_train_begin (line 89) | def on_train_begin(self, args, state, control, **kwargs): method on_log (line 94) | def on_log(self, args, state, control, logs=None, **kwargs): FILE: swift/trainers/reranker_trainer.py class RerankerTrainer (line 11) | class RerankerTrainer(Trainer): method __init__ (line 13) | def __init__(self, *args, **kwargs): method compute_loss (line 17) | def compute_loss(self, model, inputs, return_outputs=False, num_items_... method evaluation_loop (line 47) | def evaluation_loop(self, *args, **kwargs): FILE: swift/trainers/seq2seq_trainer.py class Seq2SeqTrainer (line 25) | class Seq2SeqTrainer(SwiftMixin, DataLoaderMixin, HfSeq2SeqTrainer): method __init__ (line 28) | def __init__(self, *args, **kwargs): method _predict_data_collator (line 37) | def _predict_data_collator(batch): method _patch_predict_with_generate (line 41) | def _patch_predict_with_generate(self): method evaluate (line 55) | def evaluate(self, *args, **kwargs): method prediction_step (line 62) | def prediction_step( method _prepare_inputs (line 98) | def _prepare_inputs(self, inputs): method compute_loss (line 121) | def compute_loss(self, model, inputs, return_outputs=False, num_items_... method training_step (line 225) | def training_step(self, model, inputs, *args, **kwargs): FILE: swift/trainers/trainer.py class Trainer (line 17) | class Trainer(SwiftMixin, DataLoaderMixin, HfTrainer): method _prepare_inputs (line 20) | def _prepare_inputs(self, inputs): method _patch_loss_function (line 38) | def _patch_loss_function(self): method train (line 62) | def train(self, *args, **kwargs): method compute_loss (line 66) | def compute_loss(self, model, inputs, return_outputs=False, num_items_... FILE: swift/trainers/trainer_factory.py class TrainerFactory (line 12) | class TrainerFactory: method get_cls (line 48) | def get_cls(args, mapping: Dict[str, str]): method get_trainer_cls (line 58) | def get_trainer_cls(cls, args): method get_training_args (line 62) | def get_training_args(cls, args): FILE: swift/trainers/utils.py function can_return_loss (line 26) | def can_return_loss(model: Module) -> bool: function find_labels (line 38) | def find_labels(model: Module) -> List[str]: function get_function (line 51) | def get_function(method_or_function: Union[MethodType, FunctionType]) ->... function is_instance_of_ms_model (line 57) | def is_instance_of_ms_model(model: Module) -> bool: function per_token_loss_func_sp (line 67) | def per_token_loss_func_sp(outputs, labels, enable_dft_loss=False, **kwa... function per_token_loss_func (line 99) | def per_token_loss_func(outputs, labels, enable_dft_loss: bool = False, ... function _kwargs_to_args (line 117) | def _kwargs_to_args(func, args, kwargs) -> Optional[List[Any]]: function _add_gradient_checkpointing (line 131) | def _add_gradient_checkpointing(module_list): function find_module_list (line 161) | def find_module_list(model) -> Optional[nn.ModuleList]: function dynamic_gradient_checkpointing (line 173) | def dynamic_gradient_checkpointing(model, including_vit: bool = False) -... function disable_gradient_checkpointing (line 199) | def disable_gradient_checkpointing(model: PreTrainedModel, gradient_chec... function gather_for_unpadded_tensors (line 223) | def gather_for_unpadded_tensors(input_data, use_gather_object=False): function calculate_max_steps (line 242) | def calculate_max_steps(args: 'TrainingArguments', dataset) -> int: function extract_version (line 255) | def extract_version(name: str) -> Optional[int]: function get_previous_version_from_path (line 265) | def get_previous_version_from_path(current_path: str) -> Optional[str]: function get_resume_dir (line 290) | def get_resume_dir(output_dir): function replace_index_file (line 294) | def replace_index_file(output_dir: str): function patch_modelscope_hub_timeout (line 319) | def patch_modelscope_hub_timeout(): FILE: swift/tuner_plugin/base.py class Tuner (line 10) | class Tuner: method prepare_model (line 14) | def prepare_model(args: 'SftArguments', model: torch.nn.Module) -> tor... method save_pretrained (line 27) | def save_pretrained( method from_pretrained (line 47) | def from_pretrained(model: torch.nn.Module, model_id: str, **kwargs) -... class PeftTuner (line 61) | class PeftTuner(Tuner): method save_pretrained (line 65) | def save_pretrained( method from_pretrained (line 79) | def from_pretrained(model: torch.nn.Module, model_id: str, **kwargs) -... FILE: swift/tuner_plugin/dummy.py class DummyTuner (line 11) | class DummyTuner(PeftTuner): method prepare_model (line 14) | def prepare_model(args: 'SftArguments', model: torch.nn.Module) -> tor... FILE: swift/tuner_plugin/ia3.py class IA3Tuner (line 15) | class IA3Tuner(PeftTuner): method prepare_model (line 18) | def prepare_model(args: 'SftArguments', model: torch.nn.Module) -> tor... FILE: swift/tuner_plugin/lora_llm.py function is_vit_aligner_param (line 17) | def is_vit_aligner_param(model_arch, parameter_name: str) -> bool: class LoRALLMTuner (line 24) | class LoRALLMTuner(Tuner): method from_pretrained (line 28) | def from_pretrained(model: torch.nn.Module, model_id: str, **kwargs) -... method save_pretrained (line 35) | def save_pretrained( method prepare_model (line 55) | def prepare_model(args: 'SftArguments', model: torch.nn.Module) -> tor... FILE: swift/tuners/adapter.py class AdapterConfig (line 18) | class AdapterConfig(SwiftConfig): method __post_init__ (line 60) | def __post_init__(self): class Adapter (line 65) | class Adapter(SwiftAdapter): method prepare_model (line 68) | def prepare_model(model: nn.Module, config: AdapterConfig, adapter_nam... method activate_adapter (line 122) | def activate_adapter(module: torch.nn.Module, adapter_name: str, activ... class AdapterModule (line 131) | class AdapterModule(nn.Module, ActivationMixin): method __init__ (line 143) | def __init__( method init_weights (line 163) | def init_weights(self): method forward (line 172) | def forward(self, x, identity=None): FILE: swift/tuners/base.py class SwiftModel (line 30) | class SwiftModel(nn.Module): method __init__ (line 43) | def __init__(self, method model (line 111) | def model(self): method _deactivate_all_parts (line 114) | def _deactivate_all_parts(self): method load_state_dict (line 123) | def load_state_dict(self, state_dict, strict=True, adapter_name: str =... method state_dict (line 160) | def state_dict(self, method __getattr__ (line 230) | def __getattr__(self, key: str): method load_state_file (line 240) | def load_state_file(path, device: Optional[str] = None): method create_optimizer_param_groups (line 260) | def create_optimizer_param_groups(self, **defaults): method from_pretrained (line 295) | def from_pretrained(cls, method _prepare_model (line 375) | def _prepare_model( method create_or_update_model_card (line 392) | def create_or_update_model_card(self, output_dir: str): method add_weighted_adapter (line 433) | def add_weighted_adapter( method save_pretrained (line 525) | def save_pretrained(self, method _save_state_dict (line 602) | def _save_state_dict(output_state_dict, save_directory, safe_serializa... method disable_adapter (line 611) | def disable_adapter(self): method set_active_adapters (line 618) | def set_active_adapters(self, adapter_names: Union[List[str], str], of... method activate_adapter (line 640) | def activate_adapter(self, adapter_name: str): method deactivate_adapter (line 655) | def deactivate_adapter(self, adapter_name: str, offload: str = None): method get_trainable_parameters (line 671) | def get_trainable_parameters(self): class Swift (line 693) | class Swift: method prepare_model (line 697) | def prepare_model(model: Union[nn.Module, SwiftModel], config: Union[S... method merge_and_unload (line 717) | def merge_and_unload(model: Union[PeftModel, SwiftModel], **kwargs): method grpo_context (line 740) | def grpo_context(model: Union[SwiftModel, torch.nn.Module], processor): method merge (line 764) | def merge(model: Union[PeftModel, SwiftModel], **kwargs): method unmerge (line 776) | def unmerge(model: Union[PeftModel, SwiftModel], **kwargs): method save_to_peft_format (line 788) | def save_to_peft_format(ckpt_dir: str, output_dir: str) -> None: method from_pretrained (line 852) | def from_pretrained(model: Union[nn.Module, SwiftModel, PeftModel], FILE: swift/tuners/llamapro.py class LLaMAProConfig (line 16) | class LLaMAProConfig(SwiftConfig): method __post_init__ (line 38) | def __post_init__(self): class LLaMAPro (line 43) | class LLaMAPro(SwiftAdapter): method prepare_model (line 46) | def prepare_model(model: nn.Module, config: LLaMAProConfig, adapter_na... method _update_module_attr (line 134) | def _update_module_attr(config: LLaMAProConfig, module_list): method get_model_key_mapping (line 168) | def get_model_key_mapping(cls, model_type, config) -> ModelKeys: method search_correct_model_type (line 176) | def search_correct_model_type(cls, module: nn.Module): method _update_module_weight (line 204) | def _update_module_weight(config: LLaMAProConfig, module_list, new_mod... method _set_module_list (line 222) | def _set_module_list(config, module: nn.Module, module_list: nn.Module... method _find_module_list (line 229) | def _find_module_list(config, module: nn.Module) -> nn.ModuleList: method activate_adapter (line 234) | def activate_adapter(module: torch.nn.Module, adapter_name: str, activ... method has_additional_modules (line 238) | def has_additional_modules(): FILE: swift/tuners/longlora/llama.py function _preprocess_qkv_fa2 (line 18) | def _preprocess_qkv_fa2(attn_module, query_states, key_states, value_sta... function _preprocess_qkv (line 42) | def _preprocess_qkv(attn_module, query_states, key_states, value_states,... function _postprocess_qkv (line 67) | def _postprocess_qkv(attn_module, attn_output, q_len): function _postprocess_qkv_fa2 (line 80) | def _postprocess_qkv_fa2(attn_module, attn_output, q_len): function eager_forward (line 93) | def eager_forward( function fa2_forward (line 190) | def fa2_forward( function sdpa_forward (line 302) | def sdpa_forward( function replace_llama_attn (line 390) | def replace_llama_attn(model: nn.Module): FILE: swift/tuners/longlora/longlora.py class LongLoRAModelType (line 13) | class LongLoRAModelType: class LongLoRAConfig (line 18) | class LongLoRAConfig(LoRAConfig): method __post_init__ (line 41) | def __post_init__(self): class LongLoRA (line 46) | class LongLoRA(LoRA): method prepare_model (line 49) | def prepare_model(model: nn.Module, config: LongLoRAConfig, adapter_na... function mark_embedding_normalizer_as_trainable (line 78) | def mark_embedding_normalizer_as_trainable(model: nn.Module, extra_param... FILE: swift/tuners/lora.py class LoRAConfig (line 18) | class LoRAConfig(LoraConfig, SwiftConfig): method __post_init__ (line 47) | def __post_init__(self): method can_be_saved_to_peft (line 52) | def can_be_saved_to_peft(self) -> bool: method to_peft_config (line 58) | def to_peft_config(self) -> LoraConfig: method save_pretrained (line 70) | def save_pretrained(self, save_directory: str, **kwargs) -> None: class LoRA (line 74) | class LoRA(SwiftAdapter): method prepare_model (line 77) | def prepare_model(model: nn.Module, config: LoRAConfig, adapter_name: ... method activate_adapter (line 159) | def activate_adapter(module: torch.nn.Module, adapter_name: str, activ... method unpatch_lora (line 168) | def unpatch_lora(model, config: LoRAConfig, adapter_name: str): FILE: swift/tuners/lora_layers.py class LoRAActivationMixin (line 31) | class LoRAActivationMixin(ActivationMixin): method active_adapters (line 34) | def active_adapters(self): method active_adapter (line 38) | def active_adapter(self) -> str: method set_adapter (line 41) | def set_adapter(self, adapter_names, inference_mode: bool = False, off... method save_memory (line 58) | def save_memory(self, adapter_name, activate, offload=None): method merge (line 68) | def merge(self, *args, **kwargs): class Linear8bitLt (line 79) | class Linear8bitLt(LoRAActivationMixin, _Linear8bitLt): method __init__ (line 81) | def __init__( function dispatch_bnb_8bit (line 91) | def dispatch_bnb_8bit(target: torch.nn.Module, adapter_name: str, module... class Linear4bit (line 116) | class Linear4bit(LoRAActivationMixin, _Linear4bit): method __init__ (line 118) | def __init__( function dispatch_bnb_4bit (line 128) | def dispatch_bnb_4bit(target: torch.nn.Module, adapter_name: str, module... function dispatch_default (line 151) | def dispatch_default( class Embedding (line 197) | class Embedding(LoRAActivationMixin, _Embedding): method __init__ (line 199) | def __init__( class Linear (line 210) | class Linear(LoRAActivationMixin, _Linear): method __init__ (line 212) | def __init__(self, *args, module_key: str, **kwargs): class Conv2d (line 218) | class Conv2d(LoRAActivationMixin, _Conv2d): method __init__ (line 220) | def __init__(self, *args, module_key: str, **kwargs): class LoraParallelLinear (line 226) | class LoraParallelLinear(LoRAActivationMixin, _LoraParallelLinear): method __init__ (line 228) | def __init__(self, *args, module_key: str, **kwargs): class LoraModel (line 234) | class LoraModel(_LoraModel): method __init__ (line 238) | def __init__(self, model, config, adapter_name): method _mark_only_adapters_as_trainable (line 245) | def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None: method inject_adapter (line 262) | def inject_adapter(self, method _convert_dtype (line 347) | def _convert_dtype(self, target: nn.Module, lora_dtype: str): method _create_and_replace (line 365) | def _create_and_replace( method _replace_module (line 442) | def _replace_module(self, parent, child_name, new_module, child): method _create_new_module (line 477) | def _create_new_module(lora_config, adapter_name, target, **kwargs): class LoRALayer (line 513) | class LoRALayer(ActivationMixin): method __init__ (line 515) | def __init__( class MergedLinear (line 540) | class MergedLinear(nn.Linear, LoRALayer): method __init__ (line 542) | def __init__(self, method reset_parameters (line 591) | def reset_parameters(self): method zero_pad (line 598) | def zero_pad(self, x): method merge_AB (line 603) | def merge_AB(self): method merge (line 611) | def merge(self, **kwargs): method unmerge (line 617) | def unmerge(self, **kwargs): method forward (line 624) | def forward(self, x: torch.Tensor, **kwargs): function mark_lora_as_trainable (line 641) | def mark_lora_as_trainable(model: nn.Module, adapter_name: str, bias: st... function lora_state_dict (line 658) | def lora_state_dict(state_dict, adapter_name: str, bias: str = 'none') -... FILE: swift/tuners/mapping.py class SwiftTuners (line 16) | class SwiftTuners: FILE: swift/tuners/neftune.py class NEFTuneConfig (line 13) | class NEFTuneConfig(SwiftConfig): method __post_init__ (line 25) | def __post_init__(self): class NEFTune (line 30) | class NEFTune(SwiftAdapter): method prepare_model (line 33) | def prepare_model(model: nn.Module, config: NEFTuneConfig, adapter_nam... method activate_adapter (line 61) | def activate_adapter(module: torch.nn.Module, adapter_name: str, activ... method freeze_model (line 67) | def freeze_model(): method has_additional_modules (line 71) | def has_additional_modules(): FILE: swift/tuners/part.py class PartConfig (line 17) | class PartConfig(SwiftConfig): method __post_init__ (line 27) | def __post_init__(self): class Part (line 32) | class Part(SwiftAdapter): method target_module_matched (line 35) | def target_module_matched(module_key: str, config: PartConfig): method prepare_model (line 39) | def prepare_model(model: nn.Module, config: PartConfig, adapter_name: ... method activate_adapter (line 112) | def activate_adapter(module: torch.nn.Module, adapter_name: str, activ... FILE: swift/tuners/peft.py class LoraConfig (line 42) | class LoraConfig(peft.LoraConfig): method to_peft_config (line 50) | def to_peft_config(self) -> peft.LoraConfig: method save_pretrained (line 57) | def save_pretrained(self, save_directory: str, **kwargs) -> None: method from_pretrained (line 68) | def from_pretrained(cls, pretrained_model_name_or_path: str, subfolder... function _create_and_replace_hook (line 87) | def _create_and_replace_hook(self, peft_config, adapter_name, target, *a... function _convert_dtype (line 97) | def _convert_dtype(target: torch.nn.Module, adapter_name: str, lora_dtyp... function create_optimizer_param_groups (line 108) | def create_optimizer_param_groups(self: PeftModel, **defaults): function adalora_forward (line 170) | def adalora_forward(self, *args, **kwargs): function adalora_mask_to_budget (line 206) | def adalora_mask_to_budget(self, model, budget): function keep_device_forward (line 262) | def keep_device_forward(self, *args, **kwargs): function hot_patch_peft_module (line 271) | def hot_patch_peft_module(): function get_wrapped_class (line 332) | def get_wrapped_class(module_class): function wrap_module (line 355) | def wrap_module(module): FILE: swift/tuners/prompt.py class PromptConfig (line 17) | class PromptConfig(SwiftConfig): method __post_init__ (line 60) | def __post_init__(self): class Prompt (line 65) | class Prompt(SwiftAdapter): method prepare_model (line 68) | def prepare_model(model: nn.Module, config: PromptConfig, adapter_name... method activate_adapter (line 137) | def activate_adapter(module: torch.nn.Module, adapter_name: str, activ... class PromptModule (line 146) | class PromptModule(nn.Module, ActivationMixin): method __init__ (line 160) | def __init__(self, dim, layer_num, adapter_name, module_key, prompt_le... method forward (line 173) | def forward(self, x): method patch_attention_mask (line 190) | def patch_attention_mask(self, m): method extract (line 199) | def extract(self, x): FILE: swift/tuners/reft.py class ReftConfig (line 16) | class ReftConfig(SwiftConfig): method __post_init__ (line 42) | def __post_init__(self): class Reft (line 51) | class Reft(SwiftAdapter): method prepare_model (line 54) | def prepare_model(model: nn.Module, config: ReftConfig, adapter_name: ... method has_additional_modules (line 203) | def has_additional_modules(): method activate_adapter (line 207) | def activate_adapter(module: torch.nn.Module, adapter_name: str, activ... FILE: swift/tuners/restuning.py class ResTuningConfig (line 18) | class ResTuningConfig(SwiftConfig): method __post_init__ (line 89) | def __post_init__(self): class ResTuning (line 95) | class ResTuning(SwiftAdapter): method prepare_model (line 98) | def prepare_model(model: nn.Module, config: ResTuningConfig, adapter_n... method activate_adapter (line 244) | def activate_adapter(module: torch.nn.Module, adapter_name: str, activ... class ResTuningBypassModule (line 253) | class ResTuningBypassModule(nn.Module, ActivationMixin): method __init__ (line 257) | def __init__( method forward (line 284) | def forward(self, x_list, origin_arg, **kwargs): class ResTunerBypassBlock (line 297) | class ResTunerBypassBlock(nn.Module): method __init__ (line 299) | def __init__(self, dim, layer_num=-1, depth=-1, use_upsample=False, ze... method forward (line 318) | def forward(self, x_stem, x_bypass, target_size=None, **kwargs): FILE: swift/tuners/restuning_components.py class ResTuner (line 13) | class ResTuner(nn.Module): method __init__ (line 15) | def __init__(self, dim=None, layer_num=-1, depth=-1, zero_init_last=Fa... method forward (line 59) | def forward(self, x, *args, **kwargs): class ResAdapter (line 67) | class ResAdapter(nn.Module): method __init__ (line 69) | def __init__(self, method _zero_init_weights (line 111) | def _zero_init_weights(self, m): method _kaiming_init_weights (line 116) | def _kaiming_init_weights(self, m): method _xavier_init_weights (line 121) | def _xavier_init_weights(self, m): method forward (line 126) | def forward(self, x): class ResGroupAdapter (line 154) | class ResGroupAdapter(nn.Module): method __init__ (line 156) | def __init__(self, method _zero_init_weights (line 192) | def _zero_init_weights(self, m): method _kaiming_init_weights (line 197) | def _kaiming_init_weights(self, m): method _xavier_init_weights (line 202) | def _xavier_init_weights(self, m): method forward (line 207) | def forward(self, x): class Identity (line 235) | class Identity(nn.Module): method __init__ (line 237) | def __init__(self): method forward (line 240) | def forward(self, inputs, *args, **kwargs): class Upsample (line 244) | class Upsample(nn.Module): method __init__ (line 253) | def __init__(self, channels, use_conv=False, out_channels=None, paddin... method init_weights (line 262) | def init_weights(self): method forward (line 271) | def forward(self, x, target_size=None, *args, **kwargs): function init_weight_type (line 282) | def init_weight_type(dim, weight_type): function apply_data_weight (line 312) | def apply_data_weight(data, scaling, weight_type): function detach_tensors (line 324) | def detach_tensors(feats): function probe_tensors (line 336) | def probe_tensors(module, feats, name): function probe_input_pre_hook (line 341) | def probe_input_pre_hook(self, args): function probe_output_hook (line 347) | def probe_output_hook(self, args, result): FILE: swift/tuners/scetuning/scetuning.py class SCETuningConfig (line 17) | class SCETuningConfig(SwiftConfig): method __post_init__ (line 52) | def __post_init__(self): class SCETuning (line 57) | class SCETuning(SwiftAdapter): method prepare_model (line 60) | def prepare_model(model: nn.Module, config: SCETuningConfig, adapter_n... method activate_adapter (line 186) | def activate_adapter(module: torch.nn.Module, adapter_name: str, activ... class SCETunerModule (line 195) | class SCETunerModule(nn.Module, ActivationMixin): method __init__ (line 197) | def __init__(self, method forward (line 225) | def forward(self, x, x_shortcut=None, use_shortcut=True, **kwargs): FILE: swift/tuners/scetuning/scetuning_components.py function detach_tensors (line 11) | def detach_tensors(feats): function probe_tensors (line 23) | def probe_tensors(module, feats, name): function probe_input_pre_hook (line 28) | def probe_input_pre_hook(self, args): function probe_output_hook (line 34) | def probe_output_hook(self, args, result): function choose_weight_type (line 40) | def choose_weight_type(weight_type, dim): function get_weight_value (line 56) | def get_weight_value(weight_type, scaling, x): class SCEAdapter (line 66) | class SCEAdapter(nn.Module): method __init__ (line 68) | def __init__(self, method _zero_init_weights (line 88) | def _zero_init_weights(self, m): method _kaiming_init_weights (line 93) | def _kaiming_init_weights(self, m): method init_weights (line 97) | def init_weights(self): method init_scaling (line 104) | def init_scaling(self): method forward (line 110) | def forward(self, x, x_shortcut=None, use_shortcut=True, **kwargs): FILE: swift/tuners/side.py class SideConfig (line 20) | class SideConfig(SwiftConfig): method __post_init__ (line 53) | def __post_init__(self): class Side (line 58) | class Side(SwiftAdapter): method prepare_model (line 61) | def prepare_model(model: nn.Module, config: SideConfig, adapter_name: ... method activate_adapter (line 118) | def activate_adapter(module: torch.nn.Module, adapter_name: str, activ... class SideModule (line 127) | class SideModule(nn.Module, ActivationMixin): method __init__ (line 140) | def __init__(self, dim, adapter_name, module_key, side_module_name='fc... method forward (line 161) | def forward(self, x, x_main): class FCN4 (line 170) | class FCN4(nn.Module): method __init__ (line 174) | def __init__(self, out_dims=-1, **kwargs): method forward (line 195) | def forward(self, x): class Mlp (line 207) | class Mlp(nn.Module): method __init__ (line 211) | def __init__( method forward (line 236) | def forward(self, x): FILE: swift/tuners/utils.py class SwiftConfig (line 30) | class SwiftConfig: method __dict__ (line 37) | def __dict__(self): method to_dict (line 40) | def to_dict(self): method save_pretrained (line 43) | def save_pretrained(self, save_directory, **kwargs): method from_pretrained (line 65) | def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): method from_json_file (line 97) | def from_json_file(cls, path_json_file, **kwargs): class SwiftOutput (line 112) | class SwiftOutput: class ActivationMixin (line 148) | class ActivationMixin: method __init__ (line 154) | def __init__(self, module_key): method mark_all_sub_modules_as_plugin (line 162) | def mark_all_sub_modules_as_plugin(self: torch.nn.Module): method indent (line 169) | def indent(self): method unique_thread (line 173) | def unique_thread(self): method set_activation (line 176) | def set_activation(self, adapter_name, activate=True): method is_activated (line 182) | def is_activated(self, adapter_name): method get_activated_adapters (line 186) | def get_activated_adapters(self): class OffloadHelper (line 190) | class OffloadHelper: method __init__ (line 192) | def __init__(self): method offload_weight (line 201) | def offload_weight(weight, weight_name, offload_folder, index=None): method load_offloaded_weight (line 220) | def load_offloaded_weight(weight_file, weight_info): method offload_disk (line 239) | def offload_disk(self, module: torch.nn.Module, adapter_name, module_k... method load_disk (line 249) | def load_disk(self, module: torch.nn.Module, adapter_name, module_key): class SwiftAdapter (line 278) | class SwiftAdapter: method prepare_model (line 283) | def prepare_model(model: torch.nn.Module, config: SwiftConfig, adapter... method activate_adapter (line 287) | def activate_adapter(module: torch.nn.Module, adapter_name: str, activ... method save_memory (line 291) | def save_memory(module: torch.nn.Module, adapter_name: str, module_key... method offload (line 300) | def offload(module: torch.nn.Module, adapter_name, module_key, offload... method load (line 321) | def load(module: torch.nn.Module, adapter_name, module_key): method get_model_key_mapping (line 334) | def get_model_key_mapping(cls, model_type, config) -> ModelKeys: method state_dict_load_hook (line 350) | def state_dict_load_hook(model: torch.nn.Module, state_dict: Dict[str,... method has_additional_modules (line 354) | def has_additional_modules(): class ModulesToSaveWrapper (line 358) | class ModulesToSaveWrapper(ActivationMixin, _ModulesToSaveWrapper): method __init__ (line 360) | def __init__(self, *args, module_key, **kwargs): method active_adapter (line 366) | def active_adapter(self): method set_adapter (line 374) | def set_adapter(self, adapter_name: str, offload: str = None): method deactivate_adapter (line 382) | def deactivate_adapter(self, adapter_name: str, offload: str = None): method enable_adapters (line 391) | def enable_adapters(self, enabled: bool): function set_adapter (line 399) | def set_adapter(model, adapter_name, activate, offload): function set_trainable (line 408) | def set_trainable(model, adapter_name): function swift_to_peft_format (line 423) | def swift_to_peft_format(ckpt_dir: str, output_dir: str) -> str: FILE: swift/ui/app.py class SwiftWebUI (line 44) | class SwiftWebUI(SwiftPipeline): method run (line 49) | def run(self): function webui_main (line 113) | def webui_main(args: Optional[Union[List[str], WebUIArguments]] = None): FILE: swift/ui/base.py function update_data (line 33) | def update_data(fn): class BaseUI (line 94) | class BaseUI: method build_ui (line 121) | def build_ui(cls, base_tab: Type['BaseUI']): method after_build_ui (line 137) | def after_build_ui(cls, base_tab: Type['BaseUI']): method do_build_ui (line 141) | def do_build_ui(cls, base_tab: Type['BaseUI']): method save_cache (line 146) | def save_cache(cls, key, value): method list_cache (line 154) | def list_cache(cls, key): method load_cache (line 168) | def load_cache(cls, key, timestamp) -> BaseArguments: method clear_cache (line 177) | def clear_cache(cls, key): method choice (line 185) | def choice(cls, elem_id): method default (line 194) | def default(cls, elem_id): method locale (line 205) | def locale(cls, elem_id, lang): method locales (line 210) | def locales(cls, lang): method elements (line 221) | def elements(cls): method valid_elements (line 231) | def valid_elements(cls): method element_keys (line 240) | def element_keys(cls): method valid_element_keys (line 244) | def valid_element_keys(cls): method element (line 251) | def element(cls, elem_id): method argument (line 257) | def argument(cls, elem_id): method set_lang (line 262) | def set_lang(cls, lang): method get_choices_from_dataclass (line 268) | def get_choices_from_dataclass(dataclass): method get_default_value_from_dataclass (line 290) | def get_default_value_from_dataclass(dataclass): method get_argument_names (line 307) | def get_argument_names(dataclass): method update_input_model (line 314) | def update_input_model(cls, method update_all_settings (line 410) | def update_all_settings(cls, model, train_record, base_tab): method update_ddp_num (line 423) | def update_ddp_num(cls, gpu_ids, use_ddp): FILE: swift/ui/llm_eval/eval.py class Eval (line 12) | class Eval(BaseUI): method do_build_ui (line 99) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_eval/llm_eval.py class LLMEval (line 23) | class LLMEval(BaseUI): method do_build_ui (line 70) | def do_build_ui(cls, base_tab: Type['BaseUI']): method eval (line 104) | def eval(cls, *args): method eval_model (line 199) | def eval_model(cls, *args): FILE: swift/ui/llm_eval/model.py class Model (line 12) | class Model(BaseUI): method do_build_ui (line 62) | def do_build_ui(cls, base_tab: Type['BaseUI']): method after_build_ui (line 74) | def after_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_eval/runtime.py class EvalRuntime (line 13) | class EvalRuntime(Runtime): method do_build_ui (line 83) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_export/export.py class Export (line 9) | class Export(BaseUI): method do_build_ui (line 76) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_export/llm_export.py class LLMExport (line 22) | class LLMExport(BaseUI): method do_build_ui (line 67) | def do_build_ui(cls, base_tab: Type['BaseUI']): method export (line 101) | def export(cls, *args): method export_model (line 201) | def export_model(cls, *args): FILE: swift/ui/llm_export/model.py class Model (line 12) | class Model(BaseUI): method do_build_ui (line 64) | def do_build_ui(cls, base_tab: Type['BaseUI']): method after_build_ui (line 79) | def after_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_export/runtime.py class ExportRuntime (line 8) | class ExportRuntime(Runtime): FILE: swift/ui/llm_grpo/advanced.py class GRPOAdvanced (line 5) | class GRPOAdvanced(Advanced): FILE: swift/ui/llm_grpo/dataset.py class GRPODataset (line 5) | class GRPODataset(Dataset): FILE: swift/ui/llm_grpo/external_rollout.py class LLMRollout (line 23) | class LLMRollout(BaseUI): method do_build_ui (line 111) | def do_build_ui(cls, base_tab: Type['BaseUI']): method rollout (line 145) | def rollout(cls, *args): method rollout_model (line 246) | def rollout_model(cls, *args): method external_rollout_display (line 256) | def external_rollout_display(cls, mode): FILE: swift/ui/llm_grpo/external_runtime.py class RolloutRuntime (line 16) | class RolloutRuntime(Runtime): method do_build_ui (line 86) | def do_build_ui(cls, base_tab: Type['BaseUI']): method kill_task (line 116) | def kill_task(cls, task): FILE: swift/ui/llm_grpo/grpo_advanced.py class GrpoAdvanced (line 11) | class GrpoAdvanced(BaseUI): method do_build_ui (line 192) | def do_build_ui(cls, base_tab: Type['BaseUI']): method after_build_ui (line 229) | def after_build_ui(cls, base_tab: Type['BaseUI']): method update_input_models (line 240) | def update_input_models(cls, FILE: swift/ui/llm_grpo/hyper.py class GRPOHyper (line 5) | class GRPOHyper(Hyper): FILE: swift/ui/llm_grpo/llm_grpo.py class LLMGRPO (line 27) | class LLMGRPO(LLMTrain): method do_build_ui (line 216) | def do_build_ui(cls, base_tab: Type['BaseUI']): method prepare_sub_to_filter (line 323) | def prepare_sub_to_filter(cls): FILE: swift/ui/llm_grpo/lora.py class GRPOLoRA (line 5) | class GRPOLoRA(LoRA): FILE: swift/ui/llm_grpo/model.py class GRPOModel (line 5) | class GRPOModel(Model): FILE: swift/ui/llm_grpo/optimizer.py class GRPOOptimizer (line 5) | class GRPOOptimizer(Optimizer): FILE: swift/ui/llm_grpo/quantization.py class GRPOQuantization (line 5) | class GRPOQuantization(Quantization): FILE: swift/ui/llm_grpo/report_to.py class GRPOReportTo (line 5) | class GRPOReportTo(ReportTo): FILE: swift/ui/llm_grpo/reward.py class Reward (line 8) | class Reward(BaseUI): method do_build_ui (line 41) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_grpo/rollout.py class Rollout (line 8) | class Rollout(BaseUI): method do_build_ui (line 202) | def do_build_ui(cls, base_tab: Type['BaseUI']): method update_num_gen (line 232) | def update_num_gen(per_device_batch_size, steps_per_generation, num_pr... FILE: swift/ui/llm_grpo/runtime.py class GRPORuntime (line 11) | class GRPORuntime(Runtime): method save_cmd (line 159) | def save_cmd(cls, cmd): FILE: swift/ui/llm_grpo/save.py class GRPOSave (line 5) | class GRPOSave(Save): FILE: swift/ui/llm_grpo/target.py class GRPOTarget (line 5) | class GRPOTarget(Target): FILE: swift/ui/llm_grpo/tuner.py class GRPOTuner (line 11) | class GRPOTuner(Tuner): method do_build_ui (line 18) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_infer/generate.py class Generate (line 8) | class Generate(BaseUI): method do_build_ui (line 56) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_infer/llm_infer.py class LLMInfer (line 26) | class LLMInfer(BaseUI): method do_build_ui (line 124) | def do_build_ui(cls, base_tab: Type['BaseUI']): method deploy (line 189) | def deploy(cls, *args): method deploy_model (line 293) | def deploy_model(cls, *args): method register_clean_hook (line 302) | def register_clean_hook(cls): method signal_handler (line 308) | def signal_handler(*args, **kwargs): method clear_session (line 313) | def clear_session(cls): method _replace_tag_with_media (line 317) | def _replace_tag_with_media(cls, infer_request: InferRequest): method agent_type (line 338) | def agent_type(cls, response): method parse_text (line 348) | def parse_text(cls, messages): method send_message (line 360) | def send_message(cls, running_task, template_type, prompt: str, image,... FILE: swift/ui/llm_infer/model.py class Model (line 13) | class Model(BaseUI): method do_build_ui (line 103) | def do_build_ui(cls, base_tab: Type['BaseUI']): method after_build_ui (line 125) | def after_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_infer/runtime.py class Runtime (line 20) | class Runtime(BaseUI): method do_build_ui (line 93) | def do_build_ui(cls, base_tab: Type['BaseUI']): method break_log_event (line 122) | def break_log_event(cls, task): method update_log (line 129) | def update_log(cls): method wait (line 133) | def wait(cls, task): method get_all_ports (line 175) | def get_all_ports(cls): method refresh_tasks (line 193) | def refresh_tasks(cls, running_task=None): method construct_running_task (line 221) | def construct_running_task(proc): method parse_info_from_cmdline (line 231) | def parse_info_from_cmdline(cls, task): method kill_task (line 248) | def kill_task(cls, task): method task_changed (line 265) | def task_changed(cls, task, base_tab): FILE: swift/ui/llm_rlhf/advanced.py class RLHFAdvanced (line 5) | class RLHFAdvanced(Advanced): FILE: swift/ui/llm_rlhf/dataset.py class RLHFDataset (line 5) | class RLHFDataset(Dataset): FILE: swift/ui/llm_rlhf/hyper.py class RLHFHyper (line 5) | class RLHFHyper(Hyper): FILE: swift/ui/llm_rlhf/llm_rlhf.py class LLMRLHF (line 25) | class LLMRLHF(LLMTrain): method do_build_ui (line 232) | def do_build_ui(cls, base_tab: Type['BaseUI']): method prepare_sub_to_filter (line 317) | def prepare_sub_to_filter(cls): method filter_rlhf_args (line 325) | def filter_rlhf_args(cls, uncleaned_kwargs): FILE: swift/ui/llm_rlhf/lora.py class RLHFLoRA (line 5) | class RLHFLoRA(LoRA): FILE: swift/ui/llm_rlhf/model.py class RLHFModel (line 5) | class RLHFModel(Model): FILE: swift/ui/llm_rlhf/optimizer.py class RLHFOptimizer (line 5) | class RLHFOptimizer(Optimizer): FILE: swift/ui/llm_rlhf/quantization.py class RLHFQuantization (line 5) | class RLHFQuantization(Quantization): FILE: swift/ui/llm_rlhf/report_to.py class RLHFReportTo (line 5) | class RLHFReportTo(ReportTo): FILE: swift/ui/llm_rlhf/rlhf.py class RLHF (line 10) | class RLHF(BaseUI): method do_build_ui (line 147) | def do_build_ui(cls, base_tab: Type['BaseUI']): method after_build_ui (line 192) | def after_build_ui(cls, base_tab: Type['BaseUI']): method update_beta (line 207) | def update_beta(rlhf_type): FILE: swift/ui/llm_rlhf/runtime.py class RLHFRuntime (line 11) | class RLHFRuntime(Runtime): method save_cmd (line 159) | def save_cmd(cls, cmd): FILE: swift/ui/llm_rlhf/save.py class RLHFSave (line 5) | class RLHFSave(Save): FILE: swift/ui/llm_rlhf/target.py class RLHFTarget (line 5) | class RLHFTarget(Target): FILE: swift/ui/llm_rlhf/tuner.py class RLHFTuner (line 11) | class RLHFTuner(Tuner): method do_build_ui (line 18) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_sample/llm_sample.py class LLMSample (line 26) | class LLMSample(BaseUI): method do_build_ui (line 120) | def do_build_ui(cls, base_tab: Type['BaseUI']): method sample (line 166) | def sample(cls, *args): method sample_model (line 266) | def sample_model(cls, *args): FILE: swift/ui/llm_sample/model.py class Model (line 12) | class Model(BaseUI): method do_build_ui (line 82) | def do_build_ui(cls, base_tab: Type['BaseUI']): method after_build_ui (line 99) | def after_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_sample/runtime.py class SampleRuntime (line 8) | class SampleRuntime(Runtime): FILE: swift/ui/llm_sample/sample.py class Sample (line 8) | class Sample(BaseUI): method do_build_ui (line 70) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_train/advanced.py class Advanced (line 8) | class Advanced(BaseUI): method do_build_ui (line 102) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_train/dataset.py class Dataset (line 9) | class Dataset(BaseUI): method do_build_ui (line 63) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_train/hyper.py class Hyper (line 8) | class Hyper(BaseUI): method do_build_ui (line 118) | def do_build_ui(cls, base_tab: Type['BaseUI']): method update_lr (line 145) | def update_lr(tuner_type): FILE: swift/ui/llm_train/llm_train.py class LLMTrain (line 36) | class LLMTrain(BaseUI): method do_build_ui (line 242) | def do_build_ui(cls, base_tab: Type['BaseUI']): method update_runtime (line 321) | def update_runtime(cls): method train (line 325) | def train(cls, *args): method train_studio (line 497) | def train_studio(cls, *args): method train_local (line 518) | def train_local(cls, *args): method prepare_sub_to_filter (line 542) | def prepare_sub_to_filter(cls): method remove_useless_args (line 551) | def remove_useless_args(cls, uncleaned_kwargs, tabs_relation_dict): FILE: swift/ui/llm_train/lora.py class LoRA (line 8) | class LoRA(BaseUI): method do_build_ui (line 58) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_train/model.py class Model (line 12) | class Model(BaseUI): method do_build_ui (line 93) | def do_build_ui(cls, base_tab: Type['BaseUI']): method after_build_ui (line 118) | def after_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_train/optimizer.py class Optimizer (line 8) | class Optimizer(BaseUI): method do_build_ui (line 121) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_train/quantization.py class Quantization (line 8) | class Quantization(BaseUI): method do_build_ui (line 60) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_train/report_to.py class ReportTo (line 8) | class ReportTo(BaseUI): method do_build_ui (line 58) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_train/runtime.py class Runtime (line 26) | class Runtime(BaseUI): method do_build_ui (line 295) | def do_build_ui(cls, base_tab: Type['BaseUI']): method after_build_ui (line 367) | def after_build_ui(cls, base_tab: Type['BaseUI']): method get_plot (line 374) | def get_plot(cls, task): method update_log (line 390) | def update_log(cls, task): method get_initial (line 399) | def get_initial(cls, line): method wait (line 407) | def wait(cls, logging_dir, task): method break_log_event (line 458) | def break_log_event(cls, task): method show_log (line 465) | def show_log(cls, logging_dir): method start_tb (line 469) | def start_tb(cls, logging_dir): method close_tb (line 492) | def close_tb(logging_dir): method refresh_tasks (line 498) | def refresh_tasks(running_task=None, group=None): method construct_running_task (line 532) | def construct_running_task(proc): method parse_info_from_cmdline (line 542) | def parse_info_from_cmdline(task): method kill_task (line 579) | def kill_task(task): method reset (line 596) | def reset(): method task_changed (line 600) | def task_changed(task, base_tab): method plot (line 626) | def plot(task): method save_cmd (line 685) | def save_cmd(cls, cmd): method show_train_sh (line 695) | def show_train_sh(cmd): method cmd_to_sh_format (line 702) | def cmd_to_sh_format(cmd): method close_cmd_show (line 716) | def close_cmd_show(): FILE: swift/ui/llm_train/save.py class Save (line 8) | class Save(BaseUI): method do_build_ui (line 72) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_train/self_cog.py class SelfCog (line 8) | class SelfCog(BaseUI): method do_build_ui (line 42) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_train/target.py class Target (line 8) | class Target(BaseUI): method do_build_ui (line 70) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_train/task.py class Task (line 8) | class Task(BaseUI): method do_build_ui (line 64) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_train/tuner.py class Tuner (line 10) | class Tuner(BaseUI): method do_build_ui (line 290) | def do_build_ui(cls, base_tab: Type['BaseUI']): FILE: swift/ui/llm_train/utils.py function run_and_get_log (line 10) | async def run_and_get_log(*args, timeout=None): function run_command_in_subprocess (line 26) | def run_command_in_subprocess(*args, timeout): function close_loop (line 37) | def close_loop(handler): function run_command_in_background_with_popen (line 43) | def run_command_in_background_with_popen(command, all_envs, log_file): FILE: swift/utils/constants.py class Invoke (line 19) | class Invoke(object): FILE: swift/utils/dequantizer.py class Fp8Dequantizer (line 5) | class Fp8Dequantizer: method __init__ (line 7) | def __init__(self, block_size: Tuple[int, int] = (128, 128)): method convert (line 10) | def convert( class MxFp4Dequantizer (line 46) | class MxFp4Dequantizer: method convert (line 48) | def convert( FILE: swift/utils/env.py function use_hf_hub (line 12) | def use_hf_hub(): function get_hf_endpoint (line 16) | def get_hf_endpoint(): function is_deepspeed_enabled (line 23) | def is_deepspeed_enabled(): function get_dist_setting (line 27) | def get_dist_setting() -> Tuple[int, int, int, int]: function get_node_setting (line 37) | def get_node_setting(): function is_local_master (line 43) | def is_local_master(): function is_master (line 48) | def is_master(): function is_last_rank (line 53) | def is_last_rank(): function is_dist (line 58) | def is_dist(): function is_mp (line 64) | def is_mp() -> bool: function is_mp_ddp (line 78) | def is_mp_ddp() -> bool: function is_pai_training_job (line 87) | def is_pai_training_job() -> bool: function get_pai_tensorboard_dir (line 91) | def get_pai_tensorboard_dir() -> Optional[str]: FILE: swift/utils/hf_config.py class HfConfigFactory (line 10) | class HfConfigFactory: method get_torch_dtype (line 16) | def get_torch_dtype(config: Union[PretrainedConfig, Dict[str, Any]], method _get_config_attrs (line 28) | def _get_config_attrs(config: Union[PretrainedConfig, Dict[str, Any]], method is_moe_model (line 55) | def is_moe_model(config) -> bool: method is_multimodal (line 64) | def is_multimodal(config) -> bool: method get_config_attr (line 78) | def get_config_attr(config: Union[PretrainedConfig, Dict[str, Any]], method set_config_attr (line 89) | def set_config_attr(config: Union[PretrainedConfig, Dict[str, Any]], method set_model_config_attr (line 106) | def set_model_config_attr(model, attr_name: str, value: Any) -> None: method get_max_model_len (line 112) | def get_max_model_len(config: Union[PretrainedConfig, Dict[str, Any]])... method set_max_model_len (line 137) | def set_max_model_len(config: Union[PretrainedConfig, Dict[str, Any]],... method compat_zero3 (line 157) | def compat_zero3(config: PretrainedConfig) -> None: method to_torch_dtype (line 166) | def to_torch_dtype(torch_dtype: Union[str, torch.dtype, None]) -> Opti... method get_quant_info (line 174) | def get_quant_info(config: Union[PretrainedConfig, Dict[str, Any]]) ->... FILE: swift/utils/hub_utils.py function safe_snapshot_download (line 16) | def safe_snapshot_download(model_id_or_path: str, function git_clone_github (line 111) | def git_clone_github(github_url: str, function download_ms_file (line 151) | def download_ms_file(url: str, local_path: str, cookies=None) -> None: FILE: swift/utils/import_utils.py function is_vllm_available (line 15) | def is_vllm_available(): function is_vllm_ascend_available (line 19) | def is_vllm_ascend_available(): function is_lmdeploy_available (line 23) | def is_lmdeploy_available(): function is_liger_available (line 27) | def is_liger_available(): function is_swanlab_available (line 31) | def is_swanlab_available(): function is_megatron_available (line 35) | def is_megatron_available(): function is_flash_attn_3_available (line 39) | def is_flash_attn_3_available(): function is_flash_attn_2_available (line 44) | def is_flash_attn_2_available(): function is_unsloth_available (line 48) | def is_unsloth_available() -> bool: function is_pyreft_available (line 52) | def is_pyreft_available() -> bool: function is_wandb_available (line 56) | def is_wandb_available() -> bool: function is_trl_available (line 60) | def is_trl_available() -> bool: class _LazyModule (line 64) | class _LazyModule(ModuleType): method __init__ (line 71) | def __init__(self, name, module_file, import_structure, module_spec=No... method __dir__ (line 88) | def __dir__(self): method __getattr__ (line 97) | def __getattr__(self, name: str) -> Any: method _get_module (line 111) | def _get_module(self, module_name: str): method __reduce__ (line 114) | def __reduce__(self): FILE: swift/utils/io_utils.py function read_from_jsonl (line 17) | def read_from_jsonl(fpath: str, encoding: str = 'utf-8') -> List[Any]: function write_to_jsonl (line 25) | def write_to_jsonl(fpath: str, obj_list: List[Any], encoding: str = 'utf... class JsonlWriter (line 34) | class JsonlWriter: method __init__ (line 36) | def __init__(self, method _append_worker (line 56) | def _append_worker(self): method _append (line 61) | def _append(self, obj: Union[Dict, List[Dict]], gather_obj: bool = Fal... method append (line 75) | def append(self, obj: Union[Dict, List[Dict]], gather_obj: bool = False): method _write_buffer (line 84) | def _write_buffer(self, text: str): function append_to_jsonl (line 98) | def append_to_jsonl(fpath: str, function get_file_mm_type (line 108) | def get_file_mm_type(file_name: str) -> Literal['image', 'video', 'audio']: FILE: swift/utils/logger.py function _is_local_master (line 12) | def _is_local_master(): function info_if (line 27) | def info_if(self, msg, cond, *args, **kwargs): function warning_if (line 33) | def warning_if(self, msg, cond, *args, **kwargs): function info_once (line 39) | def info_once(self, msg, *args, **kwargs): function warning_once (line 47) | def warning_once(self, msg, *args, **kwargs): function get_logger (line 55) | def get_logger(log_file: Optional[str] = None, log_level: Optional[int] ... function logger_context (line 127) | def logger_context(logger, log_leval): function ms_logger_context (line 137) | def ms_logger_context(log_leval): function add_file_handler_if_needed (line 142) | def add_file_handler_if_needed(logger, log_file, file_mode, log_level): FILE: swift/utils/np_utils.py function transform_jsonl_to_df (line 7) | def transform_jsonl_to_df(dict_list: List[Dict[str, Any]]) -> pd.DataFrame: function get_seed (line 20) | def get_seed(random_state: Optional[np.random.RandomState] = None) -> int: function stat_array (line 28) | def stat_array(array: Union[np.ndarray, List[int], 'torch.Tensor']) -> T... FILE: swift/utils/processor_utils.py class ProcessorMixin (line 16) | class ProcessorMixin: method tokenizer (line 19) | def tokenizer(self): method tokenizer (line 26) | def tokenizer(self, value): FILE: swift/utils/safetensors.py class LazyTensor (line 10) | class LazyTensor: method __init__ (line 12) | def __init__(self, tensor=None, loader=None): method load (line 17) | def load(self): class SafetensorLazyLoader (line 23) | class SafetensorLazyLoader: method __init__ (line 25) | def __init__(self, hf_model_dir: str, is_peft_format: bool = False): method _open_file (line 32) | def _open_file(self, filename: str): method _load_index (line 39) | def _load_index(self): method get_state_dict (line 59) | def get_state_dict(self): method _load_tensor (line 65) | def _load_tensor(self, key): method close (line 70) | def close(self): method __enter__ (line 73) | def __enter__(self): method __exit__ (line 76) | def __exit__(self, exc_type, exc_val, exc_tb): class StreamingSafetensorSaver (line 80) | class StreamingSafetensorSaver: method __init__ (line 82) | def __init__( method add_tensor (line 106) | def add_tensor(self, name, tensor): method _save_current_shard (line 117) | def _save_current_shard(self, shard_filename: str = None): method finalize (line 135) | def finalize(self): method _save_index (line 162) | def _save_index(self, weight_map): FILE: swift/utils/shutdown_manager.py class ShutdownManager (line 5) | class ShutdownManager: method __init__ (line 7) | def __init__(self, signals=None, stop_file=None): method _handler (line 16) | def _handler(self, signum, frame): method register (line 19) | def register(self): method unregister (line 24) | def unregister(self): method should_shutdown (line 29) | def should_shutdown(self) -> bool: method reset (line 34) | def reset(self): FILE: swift/utils/tb_utils.py function read_tensorboard_file (line 10) | def read_tensorboard_file(fpath: str) -> Dict[str, List[Item]]: function tensorboard_smoothing (line 27) | def tensorboard_smoothing(values: List[float], smooth: float = 0.9) -> L... function plot_images (line 39) | def plot_images(images_dir: str, FILE: swift/utils/torch_utils.py function _find_local_mac (line 26) | def _find_local_mac() -> str: function synchronize (line 32) | def synchronize(device: Union[torch.device, str, int, None] = None): function time_synchronize (line 41) | def time_synchronize() -> float: function disable_safe_ddp_context_use_barrier (line 50) | def disable_safe_ddp_context_use_barrier(): function safe_ddp_context (line 60) | def safe_ddp_context(hash_id: Optional[str], use_barrier: bool = True): function get_device (line 88) | def get_device(local_rank: Optional[Union[str, int]] = None) -> str: function get_current_device (line 104) | def get_current_device(): function get_torch_device (line 116) | def get_torch_device(): function set_device (line 127) | def set_device(local_rank: Optional[Union[str, int]] = None): function get_device_count (line 136) | def get_device_count() -> int: function empty_cache (line 145) | def empty_cache(): function gc_collect (line 154) | def gc_collect() -> None: function get_last_valid_indices (line 159) | def get_last_valid_indices(attention_mask: torch.Tensor) -> torch.Tensor: class Serializer (line 195) | class Serializer: method to_tensor (line 198) | def to_tensor(obj): method from_tensor (line 206) | def from_tensor(obj): function set_default_ddp_config (line 215) | def set_default_ddp_config(): function init_process_group (line 228) | def init_process_group(backend: Optional[str] = None, timeout: int = 180... function check_shared_disk (line 243) | def check_shared_disk(error, cache_dir: Optional[str] = None): function to_float_dtype (line 270) | def to_float_dtype(data: Any, dtype: torch.dtype) -> Any: function to_device (line 282) | def to_device(data: Any, device: Union[str, torch.device, int], non_bloc... function get_generative_reranker_logits (line 294) | def get_generative_reranker_logits(lm_head_weight, tokenizer, hidden_sta... function get_max_reserved_memory (line 304) | def get_max_reserved_memory() -> float: FILE: swift/utils/transformers_utils.py function get_n_params_grads (line 18) | def get_n_params_grads(model) -> Tuple[List[int], List[int]]: function get_model_parameter_info (line 32) | def get_model_parameter_info(model: nn.Module, name: Optional[str] = Non... function find_sub_module (line 51) | def find_sub_module(module: torch.nn.Module, module_name: str) -> List[t... function show_layers (line 61) | def show_layers(model: nn.Module, max_lines: Optional[int] = 20) -> None: function freeze_parameters (line 70) | def freeze_parameters(model: nn.Module, function activate_parameters (line 101) | def activate_parameters(model: nn.Module, function find_layers (line 133) | def find_layers( function find_norm (line 167) | def find_norm(model: nn.Module) -> List[str]: function find_embedding (line 174) | def find_embedding(model: nn.Module) -> List[str]: function find_all_linears (line 178) | def find_all_linears(model, model_arch=None, extra_layers=None, sub_modu... function get_multimodal_target_regex (line 207) | def get_multimodal_target_regex( function get_cu_seqlens_from_position_ids (line 254) | def get_cu_seqlens_from_position_ids(position_ids: torch.LongTensor): function get_position_ids_from_cu_seqlens (line 263) | def get_position_ids_from_cu_seqlens(cu_seqlens: torch.LongTensor): function seed_worker (line 269) | def seed_worker(worker_id: int, num_workers: int, rank: int): function unwrap_model_for_generation (line 279) | def unwrap_model_for_generation( function disable_deepspeed_zero3 (line 305) | def disable_deepspeed_zero3(): function get_modules_to_not_convert (line 315) | def get_modules_to_not_convert(model): function get_packed_seq_params (line 337) | def get_packed_seq_params(position_ids: torch.Tensor): FILE: swift/utils/utils.py function check_json_format (line 34) | def check_json_format(obj: Any, token_safe: bool = True) -> Any: function _get_version (line 70) | def _get_version(work_dir: str) -> int: function format_time (line 85) | def format_time(seconds): function deep_getattr (line 103) | def deep_getattr(obj, attr: str, default=None): function seed_everything (line 115) | def seed_everything(seed: Optional[int] = None, full_determinism: bool =... function add_version_to_work_dir (line 128) | def add_version_to_work_dir(work_dir: str) -> str: function _patch_args (line 145) | def _patch_args(class_type): function _patch_get_type_hints (line 155) | def _patch_get_type_hints(): function parse_args (line 174) | def parse_args(class_type: Type[_T], argv: Optional[List[str]] = None) -... function lower_bound (line 191) | def lower_bound(lo: int, hi: int, cond: Callable[[int], bool]) -> int: function upper_bound (line 202) | def upper_bound(lo: int, hi: int, cond: Callable[[int], bool]) -> int: function test_time (line 213) | def test_time(func: Callable[[], _T], function read_multi_line (line 239) | def read_multi_line(addi_prompt: str = '') -> str: function subprocess_run (line 252) | def subprocess_run(command: List[str], env: Optional[Dict[str, str]] = N... function get_env_args (line 262) | def get_env_args(args_name: str, type_func: Callable[[str], _T], default... function find_node_ip (line 278) | def find_node_ip() -> Optional[str]: function find_free_port (line 294) | def find_free_port(start_port: Optional[int] = None, retry: int = 100) -... function copy_files_by_pattern (line 308) | def copy_files_by_pattern(source_dir, dest_dir, patterns, exclude_patter... function split_list (line 370) | def split_list(ori_list: List[_T], num_shards: int, contiguous=True) -> ... function patch_getattr (line 383) | def patch_getattr(obj_cls, item_name: str): function import_external_file (line 400) | def import_external_file(file_path: str): function json_parse_to_dict (line 408) | def json_parse_to_dict(value: Union[str, Dict, None], strict: bool = Tru... function retry_decorator (line 435) | def retry_decorator(retry=3): function start_event_loop_in_daemon (line 455) | def start_event_loop_in_daemon(name: str = None) -> Tuple[threading.Thre... function shutdown_event_loop_in_daemon (line 480) | def shutdown_event_loop_in_daemon(thread: threading.Thread = None, loop:... function remove_response (line 495) | def remove_response(messages) -> Optional[str]: function to_abspath (line 513) | def to_abspath(path: Union[str, List[str], None], check_path_exist: bool... FILE: tests/app/test_app.py function test_llm (line 1) | def test_llm(): function test_lora (line 6) | def test_lora(): function test_mllm (line 11) | def test_mllm(): function test_audio (line 16) | def test_audio(): FILE: tests/deploy/test_dataset.py function _test_client (line 1) | def _test_client(port=8000): function _test (line 24) | def _test(infer_backend): function test_vllm (line 35) | def test_vllm(): function test_lmdeploy (line 39) | def test_lmdeploy(): function test_pt (line 43) | def test_pt(): function test_vllm_origin (line 47) | def test_vllm_origin(): FILE: tests/deploy/test_logprobs.py function _test_client (line 1) | def _test_client(port: int, print_logprobs: bool = False, test_vlm: bool... function _test (line 66) | def _test(infer_backend, test_vlm: bool = False): function test_vllm_vlm (line 82) | def test_vllm_vlm(): function test_vllm (line 86) | def test_vllm(): function test_lmdeploy (line 90) | def test_lmdeploy(): function test_pt (line 94) | def test_pt(): function test_vllm_origin (line 98) | def test_vllm_origin(): FILE: tests/eval/test_eval.py function test_eval_native (line 8) | def test_eval_native(): function test_eval_llm (line 25) | def test_eval_llm(): function test_eval_mllm (line 36) | def test_eval_mllm(): function test_eval_url (line 51) | def test_eval_url(): FILE: tests/export/test_quant.py function test_llm_quant (line 7) | def test_llm_quant(quant_method: Literal['gptq', 'awq'] = 'awq'): function test_vlm_quant (line 17) | def test_vlm_quant(quant_method: Literal['gptq', 'awq'] = 'awq'): function test_audio_quant (line 27) | def test_audio_quant(quant_method: Literal['gptq', 'awq'] = 'awq'): function test_vlm_bnb_quant (line 37) | def test_vlm_bnb_quant(): function test_bert (line 44) | def test_bert(): function test_reward_model (line 52) | def test_reward_model(): function test_fp8 (line 63) | def test_fp8(): FILE: tests/general/test_arch.py function test_model_arch (line 1) | def test_model_arch(): FILE: tests/general/test_dataset.py function _test_dataset (line 6) | def _test_dataset(datasets: List[str], num_proc: int = 1, strict: bool =... function test_sft (line 12) | def test_sft(): function test_mllm (line 30) | def test_mllm(): function test_agent (line 52) | def test_agent(): function test_dpo (line 57) | def test_dpo(): function test_kto (line 64) | def test_kto(): function test_pretrain (line 68) | def test_pretrain(): function test_dataset_info (line 72) | def test_dataset_info(): function test_cls (line 77) | def test_cls(): FILE: tests/general/test_model.py function test_qwen2 (line 9) | def test_qwen2(): function test_modelscope_hub (line 23) | def test_modelscope_hub(): FILE: tests/general/test_stream.py function test_local_dataset (line 4) | def test_local_dataset(): function test_hub_dataset (line 12) | def test_hub_dataset(): FILE: tests/general/test_template.py function test_template (line 6) | def test_template(): function test_mllm (line 29) | def test_mllm(): function _test_dataset_map (line 54) | def _test_dataset_map(model_id: str, dataset_id: str): function test_llm_dataset_map (line 67) | def test_llm_dataset_map(): function test_mllm_dataset_map (line 71) | def test_mllm_dataset_map(): FILE: tests/hub/test_check_model.py class TestCheckModel (line 8) | class TestCheckModel(unittest.TestCase): method setUp (line 10) | def setUp(self): method tearDown (line 16) | def tearDown(self): method test_check_model (line 21) | def test_check_model(self): FILE: tests/infer/test_agent.py function test_sft (line 13) | def test_sft(): function test_infer (line 19) | def test_infer(): FILE: tests/infer/test_infer.py function _prepare (line 8) | def _prepare(infer_backend: Literal['vllm', 'transformers', 'lmdeploy']): function test_infer (line 29) | def test_infer(infer_backend): function test_stream (line 43) | def test_stream(infer_backend): FILE: tests/infer/test_logprobs.py function _prepare (line 9) | def _prepare(infer_backend: Literal['vllm', 'transformers', 'lmdeploy']): function test_infer (line 34) | def test_infer(engine, infer_requests): function test_stream (line 48) | def test_stream(engine, infer_requests): FILE: tests/infer/test_main.py function test_cli (line 6) | def test_cli(infer_backend): function test_cli_jinja (line 12) | def test_cli_jinja(infer_backend): function test_dataset (line 18) | def test_dataset(infer_backend): function test_mllm_dataset (line 28) | def test_mllm_dataset(infer_backend): function test_dataset_ddp (line 38) | def test_dataset_ddp(): function test_dataset_mp_ddp (line 46) | def test_dataset_mp_ddp(): function test_emu3_gen (line 54) | def test_emu3_gen(infer_backend): FILE: tests/infer/test_max_memory.py function test_max_memory (line 4) | def test_max_memory(): FILE: tests/infer/test_mllm.py function _prepare (line 8) | def _prepare(infer_backend: Literal['vllm', 'transformers', 'lmdeploy']): function test_infer (line 36) | def test_infer(engine, infer_requests): function test_stream (line 49) | def test_stream(engine, infer_requests): FILE: tests/infer/test_sglang.py function test_engine (line 6) | def test_engine(): function test_engine_stream (line 20) | def test_engine_stream(): function test_infer (line 33) | def test_infer(): function test_eval (line 39) | def test_eval(): FILE: tests/llm/test_custom.py class CustomPreprocessor (line 12) | class CustomPreprocessor(ResponsePreprocessor): method preprocess (line 18) | def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: class TestCustom (line 49) | class TestCustom(unittest.TestCase): method test_custom_model (line 51) | def test_custom_model(self): method test_custom_dataset (line 64) | def test_custom_dataset(self): FILE: tests/llm/test_dataset.py class TestDataset (line 6) | class TestDataset(unittest.TestCase): method test_load_v_dataset (line 8) | def test_load_v_dataset(self): FILE: tests/llm/test_ollama_export.py class TestTemplate (line 14) | class TestTemplate(unittest.TestCase): method setUp (line 16) | def setUp(self): method tearDown (line 20) | def tearDown(self): method test_llama3 (line 26) | def test_llama3(self): method test_chatglm4 (line 43) | def test_chatglm4(self): method test_qwen2 (line 62) | def test_qwen2(self): FILE: tests/llm/test_run.py class TestRun (line 34) | class TestRun(unittest.TestCase): method setUp (line 36) | def setUp(self): method tearDown (line 41) | def tearDown(self): method test_template (line 44) | def test_template(self): method test_hf_hub (line 63) | def test_hf_hub(self): method test_basic (line 85) | def test_basic(self): method test_vl_audio (line 142) | def test_vl_audio(self): method test_custom_dataset (line 178) | def test_custom_dataset(self): method test_rlhf (line 230) | def test_rlhf(self): method test_loss_matching (line 282) | def test_loss_matching(self): method test_pai_compat (line 319) | def test_pai_compat(self): function data_collate_fn (line 346) | def data_collate_fn(batch: List[Dict[str, Any]], tokenizer) -> Dict[str,... class BertTrainer (line 358) | class BertTrainer(Trainer): method compute_loss (line 360) | def compute_loss(self, model, inputs, return_outputs=False): class TestTrainer (line 368) | class TestTrainer(unittest.TestCase): method setUp (line 370) | def setUp(self): method tearDown (line 376) | def tearDown(self): method test_trainer (line 383) | def test_trainer(self): FILE: tests/llm/test_template.py function _infer_model (line 16) | def _infer_model(engine, system=None, messages=None): class TestTemplate (line 36) | class TestTemplate(unittest.TestCase): method test_template (line 39) | def test_template(self): method test_tool_message_join (line 46) | def test_tool_message_join(self): FILE: tests/llm/test_utils.py class TestLlmUtils (line 7) | class TestLlmUtils(unittest.TestCase): method test_count_startswith (line 9) | def test_count_startswith(self): method test_count_endswith (line 13) | def test_count_endswith(self): method test_dataset (line 18) | def test_dataset(self): FILE: tests/megatron/export/test_export.py function test_to_mcore (line 8) | def test_to_mcore(): function test_to_hf (line 19) | def test_to_hf(): function test_peft_to_mcore (line 31) | def test_peft_to_mcore(): function test_peft_to_hf (line 44) | def test_peft_to_hf(): FILE: tests/megatron/test_align/test_llm.py function _test_model (line 7) | def _test_model(model_id, **kwargs): function test_qwen2 (line 31) | def test_qwen2(): function test_llama2 (line 35) | def test_llama2(): function test_llama3 (line 39) | def test_llama3(): function test_marco_o1 (line 43) | def test_marco_o1(): function test_deepseek_r1_llama (line 47) | def test_deepseek_r1_llama(): function test_deepseek_r1_qwen (line 51) | def test_deepseek_r1_qwen(): function test_deepseek_r1_qwen_0528 (line 55) | def test_deepseek_r1_qwen_0528(): function test_yi (line 59) | def test_yi(): function test_megrez (line 63) | def test_megrez(): function test_llama3_1 (line 67) | def test_llama3_1(): function test_llama3_2 (line 71) | def test_llama3_2(): function test_qwen3 (line 75) | def test_qwen3(): function test_internlm3 (line 80) | def test_internlm3(): function test_qwen2_moe (line 84) | def test_qwen2_moe(): function test_qwen3_moe (line 88) | def test_qwen3_moe(): function test_mimo (line 92) | def test_mimo(): function test_moonlight (line 97) | def test_moonlight(): function test_gpt_oss (line 101) | def test_gpt_oss(): function test_deepseek_v2 (line 105) | def test_deepseek_v2(): function test_deepseek_moe (line 110) | def test_deepseek_moe(): function test_dots (line 114) | def test_dots(): function test_kimi_dev (line 118) | def test_kimi_dev(): function test_hunyuan (line 122) | def test_hunyuan(): function test_ernie (line 126) | def test_ernie(): function test_glm4_5 (line 131) | def test_glm4_5(): function test_qwen3_next (line 135) | def test_qwen3_next(): function test_tongyi_deepresearch (line 139) | def test_tongyi_deepresearch(): function test_glm4 (line 143) | def test_glm4(): function test_minimax_m2 (line 147) | def test_minimax_m2(): function test_glm4_moe_lite (line 151) | def test_glm4_moe_lite(): function test_olmoe (line 155) | def test_olmoe(): FILE: tests/megatron/test_align/test_mllm.py function _test_model (line 7) | def _test_model(model_id, **kwargs): function test_qwen2_5_vl (line 31) | def test_qwen2_5_vl(): function test_qwen2_vl (line 36) | def test_qwen2_vl(): function test_qwen2_5_omni (line 41) | def test_qwen2_5_omni(): function test_internvl3 (line 46) | def test_internvl3(): function test_internvl3_5 (line 51) | def test_internvl3_5(): function test_internvl3_5_moe (line 55) | def test_internvl3_5_moe(): function test_internvl3_hf (line 60) | def test_internvl3_hf(): function test_internvl3_5_hf (line 64) | def test_internvl3_5_hf(): function test_internvl3_5_moe_hf (line 68) | def test_internvl3_5_moe_hf(): function test_glm4_5v (line 73) | def test_glm4_5v(): function test_glm4_6v_flash (line 77) | def test_glm4_6v_flash(): function test_ovis2_5 (line 82) | def test_ovis2_5(): function test_kimi_vl (line 86) | def test_kimi_vl(): function test_qwen3_vl (line 90) | def test_qwen3_vl(): function test_qwen3_vl_moe (line 94) | def test_qwen3_vl_moe(): function test_qwen3_omni (line 98) | def test_qwen3_omni(): function test_llama4 (line 102) | def test_llama4(): function test_qwen3_5 (line 106) | def test_qwen3_5(): FILE: tests/megatron/test_embedding.py function test_embedding (line 6) | def test_embedding(): function test_reranker (line 32) | def test_reranker(): FILE: tests/megatron/test_export.py function _infer_model (line 6) | def _infer_model(engine, system=None, messages=None): function hf2mcore (line 34) | def hf2mcore(): function mcore2hf (line 41) | def mcore2hf(): function infer_hf_align (line 52) | def infer_hf_align(): FILE: tests/megatron/test_kto.py function test_kto (line 6) | def test_kto(): FILE: tests/megatron/test_lora.py function test_sft (line 6) | def test_sft(): function test_moe (line 31) | def test_moe(): function test_convert (line 58) | def test_convert(): function test_embedding (line 68) | def test_embedding(): function test_resume (line 72) | def test_resume(): FILE: tests/megatron/test_rlhf.py function test_dpo (line 6) | def test_dpo(): function test_hf (line 22) | def test_hf(): FILE: tests/megatron/test_train.py function test_sft (line 6) | def test_sft(): function test_pt (line 24) | def test_pt(): FILE: tests/model_tag.py class ModelTag (line 12) | class ModelTag(object): class ItemResult (line 29) | class ItemResult(object): method __init__ (line 31) | def __init__(self): method to_json (line 36) | def to_json(self): method __init__ (line 39) | def __init__(self): method _post_request (line 53) | def _post_request(self, url, param): method batch_commit_result (line 72) | def batch_commit_result(self): method batch_refresh_stage (line 100) | def batch_refresh_stage(self): method query_model_stage (line 121) | def query_model_stage(self): method commit_ut_result (line 150) | def commit_ut_result(self): function commit_model_ut_result (line 160) | def commit_model_ut_result(model_name, ut_result): FILE: tests/models/test_llm.py function test_llama3 (line 6) | def test_llama3(): FILE: tests/models/test_mllm.py function test_cogvlm (line 6) | def test_cogvlm(): FILE: tests/run.py function test_cases_result_to_df (line 27) | def test_cases_result_to_df(result_list): function statistics_test_result (line 33) | def statistics_test_result(df): function gather_test_suites_in_files (line 77) | def gather_test_suites_in_files(test_dir, case_file_list, list_tests): function gather_test_suites_files (line 92) | def gather_test_suites_files(test_dir, pattern): function collect_test_results (line 102) | def collect_test_results(case_results): function run_command_with_popen (line 125) | def run_command_with_popen(cmd): function async_run_command_with_popen (line 132) | def async_run_command_with_popen(cmd, device_id): function save_test_result (line 147) | def save_test_result(df, args): function run_command (line 156) | def run_command(cmd): function install_packages (line 166) | def install_packages(pkgs): function install_requirements (line 176) | def install_requirements(requirements): function wait_for_free_worker (line 185) | def wait_for_free_worker(workers): function wait_for_workers (line 204) | def wait_for_workers(workers): function parallel_run_case_in_env (line 232) | def parallel_run_case_in_env(env_name, env, test_suite_env_map, isolated... function run_case_in_env (line 281) | def run_case_in_env(env_name, env, test_suite_env_map, isolated_cases, r... function run_non_parallelizable_test_suites (line 315) | def run_non_parallelizable_test_suites(suites, result_dir): function get_selected_cases (line 323) | def get_selected_cases(): function run_in_subprocess (line 341) | def run_in_subprocess(args): function get_object_full_name (line 405) | def get_object_full_name(obj): class TimeCostTextTestResult (line 413) | class TimeCostTextTestResult(TextTestResult): method __init__ (line 416) | def __init__(self, stream, descriptions, verbosity): method startTest (line 420) | def startTest(self, test): method stopTest (line 427) | def stopTest(self, test): method addSuccess (line 438) | def addSuccess(self, test): class TimeCostTextTestRunner (line 443) | class TimeCostTextTestRunner(unittest.runner.TextTestRunner): method run (line 446) | def run(self, test): method _makeResult (line 449) | def _makeResult(self): function gather_test_cases (line 454) | def gather_test_cases(test_dir, pattern, list_tests): function print_abnormal_case_info (line 476) | def print_abnormal_case_info(df): function print_table_result (line 482) | def print_table_result(df): function main (line 493) | def main(args): FILE: tests/sample/test_client.py function test_client (line 4) | def test_client(): FILE: tests/test_align/test_cls.py function calc_acc (line 17) | def calc_acc(infer_result): function test_llm (line 25) | def test_llm(): FILE: tests/test_align/test_lmdeploy_vlm.py function _infer_image (line 6) | def _infer_image(model, system=None, images=None): function _infer_image_pipeline (line 19) | def _infer_image_pipeline(model, images=None, prefix='\n'): function test_internvl2_5 (line 32) | def test_internvl2_5(): function test_internvl2 (line 39) | def test_internvl2(): function test_deepseek_vl (line 46) | def test_deepseek_vl(): function test_qwen_vl (line 53) | def test_qwen_vl(): function test_qwen2_vl (line 60) | def test_qwen2_vl(): function test_qwen2_5_vl (line 67) | def test_qwen2_5_vl(): FILE: tests/test_align/test_padding_side.py function calc_acc (line 15) | def calc_acc(infer_result): function calc_diff (line 23) | def calc_diff(infer_result, infer_result2): function test_llm (line 31) | def test_llm(): function test_mllm (line 48) | def test_mllm(): FILE: tests/test_align/test_template/test_agent.py function _infer (line 63) | def _infer(engine, num_tools: int = 1, agent_tools=None, tool_messages=N... function test_react_en (line 92) | def test_react_en(): function test_react_zh (line 119) | def test_react_zh(): function test_qwen_en (line 129) | def test_qwen_en(): function test_qwen_zh (line 155) | def test_qwen_zh(): function test_qwen_en_parallel (line 165) | def test_qwen_en_parallel(): function test_qwen_zh_parallel (line 191) | def test_qwen_zh_parallel(): function test_hermes (line 201) | def test_hermes(): function test_toolbench (line 234) | def test_toolbench(): function test_chatglm4 (line 244) | def test_chatglm4(): function test_glm4 (line 254) | def test_glm4(): function test_llama3 (line 278) | def test_llama3(): function test_llama4 (line 300) | def test_llama4(): function test_hunyuan (line 312) | def test_hunyuan(): function test_glm4_5 (line 334) | def test_glm4_5(): function test_glm4_7 (line 356) | def test_glm4_7(): function test_qwen3_coder (line 376) | def test_qwen3_coder(): function test_qwen3_5 (line 400) | def test_qwen3_5(): function test_deepseek_v3_1 (line 425) | def test_deepseek_v3_1(): function test_youtu (line 490) | def test_youtu(): function test_seed_oss (line 541) | def test_seed_oss(): FILE: tests/test_align/test_template/test_audio.py function _infer_model (line 6) | def _infer_model(engine, system=None, messages=None, audios=None): function test_qwen_audio (line 29) | def test_qwen_audio(): function test_qwen2_audio (line 34) | def test_qwen2_audio(): function test_xcomposer2d5_ol (line 45) | def test_xcomposer2d5_ol(): function test_step_audio_chat (line 52) | def test_step_audio_chat(): function test_qwen2_5_omni (line 58) | def test_qwen2_5_omni(): function test_gemma3n (line 68) | def test_gemma3n(): function test_midashenglm (line 78) | def test_midashenglm(): function test_step_audio2_mini (line 87) | def test_step_audio2_mini(): FILE: tests/test_align/test_template/test_gene.py function test_deepseek_janus_pro_gene (line 7) | def test_deepseek_janus_pro_gene(): function test_emu3_gen (line 13) | def test_emu3_gen(infer_backend): FILE: tests/test_align/test_template/test_llm.py function _infer_model (line 9) | def _infer_model(engine, system=None, messages=None): function test_baichuan_m1 (line 31) | def test_baichuan_m1(): function test_qwen2_5 (line 38) | def test_qwen2_5(): function test_qwen3 (line 46) | def test_qwen3(): function test_qwen3_guard (line 54) | def test_qwen3_guard(): function test_yufeng_xguard (line 63) | def test_yufeng_xguard(): function test_phi4 (line 72) | def test_phi4(): function test_phi4_mini (line 80) | def test_phi4_mini(): function test_qwen1_5 (line 88) | def test_qwen1_5(): function test_chatglm4 (line 95) | def test_chatglm4(): function test_glm4 (line 103) | def test_glm4(): function test_qwq (line 113) | def test_qwq(): function test_internlm (line 121) | def test_internlm(): function test_internlm2 (line 126) | def test_internlm2(): function test_internlm3 (line 133) | def test_internlm3(): function test_yi_coder (line 141) | def test_yi_coder(): function test_yi (line 148) | def test_yi(): function test_deepseek_moe (line 155) | def test_deepseek_moe(): function test_codegeex4 (line 160) | def test_codegeex4(): function test_telechat (line 168) | def test_telechat(): function test_telechat2 (line 178) | def test_telechat2(): function test_glm_edge (line 187) | def test_glm_edge(): function test_llama (line 194) | def test_llama(): function test_openbuddy (line 210) | def test_openbuddy(): function test_megrez (line 220) | def test_megrez(): function test_skywork_o1 (line 228) | def test_skywork_o1(): function test_internlm2_reward (line 246) | def test_internlm2_reward(): function test_qwen2_reward (line 261) | def test_qwen2_reward(): function test_qwen2_5_math (line 291) | def test_qwen2_5_math(): function test_skywork_reward (line 300) | def test_skywork_reward(): function test_deepseek_r1_distill (line 316) | def test_deepseek_r1_distill(): function test_deepseek_prover_v2 (line 324) | def test_deepseek_prover_v2(): function test_qwen2_5_prm (line 332) | def test_qwen2_5_prm(): function test_mistral_small (line 379) | def test_mistral_small(): function test_moonlight (line 387) | def test_moonlight(): function test_ling (line 395) | def test_ling(): function test_gemma3 (line 403) | def test_gemma3(): function test_mimo (line 411) | def test_mimo(): function test_minicpm (line 419) | def test_minicpm(): function test_minimax (line 427) | def test_minimax(): function test_kimi_dev (line 443) | def test_kimi_dev(): function test_hunyuan (line 451) | def test_hunyuan(): function test_ernie (line 460) | def test_ernie(): function test_devstral (line 468) | def test_devstral(): function test_glm4_5 (line 624) | def test_glm4_5(): function test_gpt_oss (line 633) | def test_gpt_oss(): function test_qwen3_next (line 652) | def test_qwen3_next(): function test_ernie_thinking (line 660) | def test_ernie_thinking(): function test_ring2 (line 668) | def test_ring2(): function test_ling2 (line 676) | def test_ling2(): function test_minimind (line 684) | def test_minimind(): function test_medgemma3 (line 692) | def test_medgemma3(): function test_youtu_llm (line 702) | def test_youtu_llm(): function test_glm4_moe_lite (line 711) | def test_glm4_moe_lite(): function test_olmoe (line 719) | def test_olmoe(): FILE: tests/test_align/test_template/test_template.py function test_deepseek_v2_5 (line 5) | def test_deepseek_v2_5(): function test_qwen2_5_math_reward (line 31) | def test_qwen2_5_math_reward(): function test_minimax (line 68) | def test_minimax(): function test_minimax_vl (line 88) | def test_minimax_vl(): function test_deepseek_v3_1 (line 105) | def test_deepseek_v3_1(): FILE: tests/test_align/test_template/test_tool.py function _test_tool (line 26) | def _test_tool(engine, system=None): function test_qwen2_5 (line 48) | def test_qwen2_5(): function test_qwq (line 54) | def test_qwq(): function test_deepseek_r1_distill (line 61) | def test_deepseek_r1_distill(): FILE: tests/test_align/test_template/test_video.py function _infer_model (line 8) | def _infer_model(engine, system=None, messages=None, videos=None, max_to... function test_qwen2_vl (line 31) | def test_qwen2_vl(): function test_internvl2_5 (line 42) | def test_internvl2_5(): function test_internvl2_5_mpo (line 49) | def test_internvl2_5_mpo(): function test_xcomposer2_5 (line 56) | def test_xcomposer2_5(): function test_mplug3 (line 79) | def test_mplug3(): function test_minicpmv (line 87) | def test_minicpmv(): function test_minicpmo (line 94) | def test_minicpmo(): function test_valley (line 112) | def test_valley(): function _run_qwen2_5_vl_hf (line 117) | def _run_qwen2_5_vl_hf(messages, model, template): function test_qwen2_5_vl (line 132) | def test_qwen2_5_vl(): function test_qwen2_5_omni (line 161) | def test_qwen2_5_omni(): function _run_qwen3_omni_hf (line 181) | def _run_qwen3_omni_hf(model, processor, messages): function test_qwen3_omni (line 200) | def test_qwen3_omni(): function test_glm4_1v (line 228) | def test_glm4_1v(): function test_glm4_5v (line 238) | def test_glm4_5v(): function test_keye_vl (line 248) | def test_keye_vl(): function test_keye_vl_1_5 (line 258) | def test_keye_vl_1_5(): function test_ovis2_5 (line 268) | def test_ovis2_5(): function run_hf (line 276) | def run_hf(model, processor, messages): function test_interns1 (line 285) | def test_interns1(): function test_internvl3_5 (line 310) | def test_internvl3_5(): function test_minicpmv4_5 (line 326) | def test_minicpmv4_5(): function _run_qwen3_vl_hf (line 336) | def _run_qwen3_vl_hf(messages, model, template): function test_qwen3_vl (line 364) | def test_qwen3_vl(): function test_qwen3_vl_moe (line 389) | def test_qwen3_vl_moe(): FILE: tests/test_align/test_template/test_vision.py function _infer_model (line 8) | def _infer_model(engine, system=None, messages=None, images=None, **kwar... function test_qwen2_vl (line 31) | def test_qwen2_vl(): function test_qwen2_5_vl_batch_infer (line 39) | def test_qwen2_5_vl_batch_infer(): function test_qwen2_5_omni (line 111) | def test_qwen2_5_omni(): function _run_qwen3_omni_hf (line 119) | def _run_qwen3_omni_hf(model, processor, messages): function test_qwen3_omni (line 131) | def test_qwen3_omni(): function test_qwen3_omni_audio (line 156) | def test_qwen3_omni_audio(): function test_qvq (line 181) | def test_qvq(): function test_internvl2 (line 189) | def test_internvl2(): function test_internvl2_phi3 (line 197) | def test_internvl2_phi3(): function test_internvl3_8b (line 204) | def test_internvl3_8b(): function test_internvl3_9b (line 212) | def test_internvl3_9b(): function test_llava (line 220) | def test_llava(): function test_yi_vl (line 225) | def test_yi_vl(): function test_glm4v (line 230) | def test_glm4v(): function test_cogagent (line 243) | def test_cogagent(): function test_minicpmv (line 264) | def test_minicpmv(): function test_minicpmo (line 273) | def test_minicpmo(): function test_got_ocr (line 295) | def test_got_ocr(): function test_got_ocr_hf (line 307) | def test_got_ocr_hf(): function test_llama_vision (line 322) | def test_llama_vision(): function test_llava_hf (line 330) | def test_llava_hf(): function test_florence (line 338) | def test_florence(): function test_phi3_vision (line 352) | def test_phi3_vision(): function test_qwen_vl (line 360) | def test_qwen_vl(): function test_llava_onevision_hf (line 365) | def test_llava_onevision_hf(): function test_xcomposer2_5 (line 373) | def test_xcomposer2_5(): function test_deepseek_vl (line 382) | def test_deepseek_vl(): function test_deepseek_janus (line 388) | def test_deepseek_janus(): function test_deepseek_vl2 (line 397) | def test_deepseek_vl2(): function test_mplug_owl2 (line 404) | def test_mplug_owl2(): function test_mplug_owl3 (line 410) | def test_mplug_owl3(): function test_ovis1_6 (line 419) | def test_ovis1_6(): function test_ovis1_6_llama3 (line 428) | def test_ovis1_6_llama3(): function test_ovis2 (line 439) | def test_ovis2(): function test_ovis2_5 (line 447) | def test_ovis2_5(): function test_paligemma (line 455) | def test_paligemma(): function test_paligemma2 (line 461) | def test_paligemma2(): function test_pixtral (line 470) | def test_pixtral(): function test_glm_edge_v (line 475) | def test_glm_edge_v(): function test_internvl2_5 (line 480) | def test_internvl2_5(): function test_internvl2_5_mpo (line 487) | def test_internvl2_5_mpo(): function test_megrez_omni (line 496) | def test_megrez_omni(): function test_molmo (line 517) | def test_molmo(): function test_molmoe (line 531) | def test_molmoe(): function test_doc_owl2 (line 541) | def test_doc_owl2(): function test_valley (line 567) | def test_valley(): function test_ui_tars (line 572) | def test_ui_tars(): function test_phi4_vision (line 622) | def test_phi4_vision(): function test_gemma3_vision (line 641) | def test_gemma3_vision(): function test_mistral_2503 (line 650) | def test_mistral_2503(): function test_llama4 (line 660) | def test_llama4(): function test_kimi_vl (line 672) | def test_kimi_vl(): function test_kimi_vl_thinking (line 684) | def test_kimi_vl_thinking(): function test_glm4_1v (line 697) | def test_glm4_1v(): function test_glyph (line 712) | def test_glyph(): function test_gemma3n (line 725) | def test_gemma3n(): function test_keye_vl (line 743) | def test_keye_vl(): function test_keye_vl_1_5 (line 757) | def test_keye_vl_1_5(): function test_dots_ocr (line 770) | def test_dots_ocr(): function test_glm4_5v (line 781) | def test_glm4_5v(): function run_hf (line 794) | def run_hf(model, processor, messages): function test_interns1 (line 803) | def test_interns1(): function test_internvl3_5 (line 828) | def test_internvl3_5(): function test_internvl3_hf (line 844) | def test_internvl3_hf(): function test_internvl3_5_hf (line 867) | def test_internvl3_5_hf(): function test_internvl_gpt_hf (line 890) | def test_internvl_gpt_hf(): function test_minicpmv4_5 (line 913) | def test_minicpmv4_5(): function _run_qwen3_vl_hf (line 923) | def _run_qwen3_vl_hf(messages, model, processor): function test_qwen3_vl (line 938) | def test_qwen3_vl(): function test_sailvl2 (line 964) | def test_sailvl2(): function test_deepseek_ocr (line 979) | def test_deepseek_ocr(): function test_deepseek_ocr_2 (line 995) | def test_deepseek_ocr_2(): function test_llava_onevision1_5 (line 1011) | def test_llava_onevision1_5(): function test_paddle_ocr (line 1022) | def test_paddle_ocr(): function test_ernie_vl (line 1039) | def test_ernie_vl(): function _infer_ernie_vl_thinking_hf (line 1052) | def _infer_ernie_vl_thinking_hf(model, processor, messages): function test_ernie_vl_thinking (line 1075) | def test_ernie_vl_thinking(): function test_mistral_2506 (line 1110) | def test_mistral_2506(): function test_sensenova_si (line 1119) | def test_sensenova_si(): function test_mistral_2512 (line 1127) | def test_mistral_2512(): function test_mistral_2512_thinking (line 1137) | def test_mistral_2512_thinking(): function test_hunyuan_ocr (line 1145) | def test_hunyuan_ocr(): function test_medgemma3_vision (line 1163) | def test_medgemma3_vision(): function test_step3_vl (line 1177) | def test_step3_vl(): function test_paddle_ocr_1_5 (line 1190) | def test_paddle_ocr_1_5(): function test_glm_ocr (line 1208) | def test_glm_ocr(): FILE: tests/test_align/test_vllm_vlm.py function _infer_audio (line 6) | def _infer_audio(model, use_chat_template: bool = True, max_model_len=81... function _infer_image (line 26) | def _infer_image(model, use_chat_template: bool = True, max_model_len=81... function _infer_video (line 46) | def _infer_video(model, use_chat_template: bool = True, max_model_len=81... function test_qwen2_audio (line 66) | def test_qwen2_audio(): function test_qwen2_vl (line 71) | def test_qwen2_vl(): function test_qwen2_5_vl (line 79) | def test_qwen2_5_vl(): function test_deepseek_vl_v2 (line 88) | def test_deepseek_vl_v2(): function test_internvl2 (line 96) | def test_internvl2(): function test_minicpmv_2_5 (line 103) | def test_minicpmv_2_5(): function test_minicpmv_2_6 (line 112) | def test_minicpmv_2_6(): function test_minicpmo_2_6_video (line 121) | def test_minicpmo_2_6_video(): function test_qwen2_5_vl_video (line 129) | def test_qwen2_5_vl_video(): function test_qwen2_5_omni (line 137) | def test_qwen2_5_omni(): function test_ovis2 (line 144) | def test_ovis2(): function test_keye_vl (line 151) | def test_keye_vl(): function test_kimi_vl (line 158) | def test_kimi_vl(): function test_glm4v (line 163) | def test_glm4v(): function test_glm4_1v (line 168) | def test_glm4_1v(): function test_paddleocr_vl (line 173) | def test_paddleocr_vl(): function test_glm4_5_vl (line 178) | def test_glm4_5_vl(): function test_deepseek_ocr (line 183) | def test_deepseek_ocr(): FILE: tests/test_utils.py function delete_credential (line 32) | def delete_credential(): function test_level (line 37) | def test_level(): function require_tf (line 45) | def require_tf(test_case): function require_torch (line 50) | def require_torch(test_case): function set_test_level (line 54) | def set_test_level(level: int): class DummyTorchDataset (line 59) | class DummyTorchDataset: method __init__ (line 61) | def __init__(self, feat, label, num) -> None: method __getitem__ (line 66) | def __getitem__(self, index): method __len__ (line 70) | def __len__(self): function create_dummy_test_dataset (line 74) | def create_dummy_test_dataset(feat, label, num): function download_and_untar (line 78) | def download_and_untar(fpath, furl, dst) -> str: function get_case_model_info (line 96) | def get_case_model_info(): function compare_arguments_nested (line 124) | def compare_arguments_nested(print_content, arg1, arg2, rtol=1.e-3, atol... class DistributedTestCase (line 224) | class DistributedTestCase(unittest.TestCase): method _start (line 254) | def _start(self, dist_start_cmd, func, num_gpus, assert_callback=None,... method start (line 313) | def start(self, func, num_gpus, assert_callback=None, save_all_ranks=F... method clean_tmp (line 331) | def clean_tmp(self, tmp_file_list): FILE: tests/train/test_channel.py function test_channel (line 6) | def test_channel(): FILE: tests/train/test_cls.py function test_llm (line 14) | def test_llm(): function test_bert (line 28) | def test_bert(): function test_mllm (line 44) | def test_mllm(): FILE: tests/train/test_embedding.py function test_embedding (line 13) | def test_embedding(): function test_reranker (line 31) | def test_reranker(): function test_reranker2 (line 58) | def test_reranker2(): FILE: tests/train/test_export_cached_dataset.py function test_export_cached_dataset (line 1) | def test_export_cached_dataset(): function test_sft (line 13) | def test_sft(): FILE: tests/train/test_freeze.py function test_full_vit (line 13) | def test_full_vit(): function test_full_aligner (line 30) | def test_full_aligner(): function test_lora_vit (line 47) | def test_lora_vit(): function test_lora_aligner (line 64) | def test_lora_aligner(): FILE: tests/train/test_gkd.py function test_llm (line 13) | def test_llm(): function test_mllm (line 30) | def test_mllm(): FILE: tests/train/test_grpo.py function test_llm (line 20) | def test_llm(): function test_llm_zero2 (line 38) | def test_llm_zero2(): function test_llm_vllm (line 56) | def test_llm_vllm(): function test_llm_vllm_zero2 (line 75) | def test_llm_vllm_zero2(): function test_mllm_pt (line 94) | def test_mllm_pt(): FILE: tests/train/test_kto.py function test_llm (line 13) | def test_llm(): function test_mllm (line 26) | def test_mllm(): FILE: tests/train/test_liger.py function test_sft (line 12) | def test_sft(): function test_mllm_dpo (line 25) | def test_mllm_dpo(): FILE: tests/train/test_multilabel.py function test_reg_llm (line 14) | def test_reg_llm(): function test_reg_mllm (line 28) | def test_reg_mllm(): FILE: tests/train/test_packing.py function test_llm (line 13) | def test_llm(): function test_streaming (line 29) | def test_streaming(): function test_mllm_streaming (line 46) | def test_mllm_streaming(): FILE: tests/train/test_ppo.py function test_rm (line 13) | def test_rm(): function test_ppo (line 26) | def test_ppo(): FILE: tests/train/test_pt.py function test_llm (line 13) | def test_llm(): function test_mllm (line 22) | def test_mllm(): FILE: tests/train/test_resume_from_checkpoint.py function test_resume_from_checkpoint (line 28) | def test_resume_from_checkpoint(): FILE: tests/train/test_rlhf.py function test_llm (line 13) | def test_llm(): function test_mllm (line 26) | def test_mllm(): function test_mllm_zero3 (line 42) | def test_mllm_zero3(): FILE: tests/train/test_sample.py function test_sampling (line 4) | def test_sampling(): FILE: tests/train/test_sft.py function test_llm_ddp (line 14) | def test_llm_ddp(): function test_unsloth (line 31) | def test_unsloth(): function test_mllm_mp (line 47) | def test_mllm_mp(): function test_llm_streaming (line 65) | def test_llm_streaming(): function test_mllm_streaming (line 74) | def test_mllm_streaming(): function test_mllm_zero3 (line 88) | def test_mllm_zero3(): function test_qwen_vl (line 100) | def test_qwen_vl(): function test_qwen2_audio (line 111) | def test_qwen2_audio(): function test_llm_gptq (line 125) | def test_llm_gptq(): function test_llm_awq (line 137) | def test_llm_awq(): function test_mllm_streaming_zero3 (line 149) | def test_mllm_streaming_zero3(): function test_mllm_streaming_mp_ddp (line 162) | def test_mllm_streaming_mp_ddp(): function test_llm_hqq (line 175) | def test_llm_hqq(): function test_llm_bnb (line 189) | def test_llm_bnb(): function test_moe (line 203) | def test_moe(): function test_resume_from_checkpoint (line 215) | def test_resume_from_checkpoint(): function test_resume_only_model (line 238) | def test_resume_only_model(): function test_llm_transformers_4_33 (line 264) | def test_llm_transformers_4_33(): function test_predict_with_generate (line 275) | def test_predict_with_generate(): function test_predict_with_generate_zero3 (line 294) | def test_predict_with_generate_zero3(): function test_template (line 311) | def test_template(): function test_emu3_gen (line 328) | def test_emu3_gen(): function test_eval_strategy (line 347) | def test_eval_strategy(): function test_epoch (line 361) | def test_epoch(): function test_agent (line 379) | def test_agent(): function test_grounding (line 395) | def test_grounding(): FILE: tests/train/test_train_eval.py function test_train_eval_loop (line 11) | def test_train_eval_loop(): FILE: tests/train/test_vit_lr.py function test_vit_lr (line 6) | def test_vit_lr(): FILE: tests/train/test_vllm_importance_sampling_basic.py class MockAccelerator (line 13) | class MockAccelerator: method __init__ (line 16) | def __init__(self, device='cpu'): method gather_for_metrics (line 19) | def gather_for_metrics(self, tensor): class MockGRPOTrainer (line 24) | class MockGRPOTrainer: method __init__ (line 27) | def __init__(self, mode='token_truncate', threshold=2.0): method _compute_sequence_level_ratios (line 32) | def _compute_sequence_level_ratios(self, is_ratio: torch.Tensor, compl... method _apply_rollout_importance_sampling (line 49) | def _apply_rollout_importance_sampling(self, rollout_log_ratio: torch.... method _compute_is_correction_metrics (line 99) | def _compute_is_correction_metrics( class TestVLLMImportanceSampling (line 163) | class TestVLLMImportanceSampling: method test_token_truncate_basic (line 166) | def test_token_truncate_basic(self): method test_token_mask_basic (line 184) | def test_token_mask_basic(self): method test_sequence_truncate_basic (line 199) | def test_sequence_truncate_basic(self): method test_sequence_mask_basic (line 218) | def test_sequence_mask_basic(self): method test_threshold_sensitivity (line 237) | def test_threshold_sensitivity(self): method test_completion_mask (line 255) | def test_completion_mask(self): method test_edge_cases (line 269) | def test_edge_cases(self): method test_safety_bound (line 291) | def test_safety_bound(self): class TestISCorrectionMetrics (line 309) | class TestISCorrectionMetrics: method test_ess_uniform_weights (line 312) | def test_ess_uniform_weights(self): method test_ess_varied_weights (line 330) | def test_ess_varied_weights(self): method test_clipped_frac_token_truncate (line 345) | def test_clipped_frac_token_truncate(self): method test_clipped_frac_token_mask (line 359) | def test_clipped_frac_token_mask(self): method test_clipped_frac_sequence_level (line 373) | def test_clipped_frac_sequence_level(self): class TestOffpolicyMetrics (line 388) | class TestOffpolicyMetrics: method test_kl_divergence_same_policy (line 391) | def test_kl_divergence_same_policy(self): method test_k3_kl_estimator (line 410) | def test_k3_kl_estimator(self): method test_chi2_divergence (line 431) | def test_chi2_divergence(self): FILE: tests/tuners/test_extra_state_dict.py class TestExtraStateDict (line 14) | class TestExtraStateDict(unittest.TestCase): method setUp (line 16) | def setUp(self): method tearDown (line 22) | def tearDown(self): method test_swift_extra_state_dict (line 26) | def test_swift_extra_state_dict(self): method test_swift_modules_to_save (line 42) | def test_swift_modules_to_save(self): FILE: tests/tuners/test_merged_linear.py class TestMergedLinear (line 10) | class TestMergedLinear(unittest.TestCase): method test_swift_lora_forward (line 12) | def test_swift_lora_forward(self): FILE: tests/tuners/test_neft.py class TestNEFT (line 13) | class TestNEFT(unittest.TestCase): method setUp (line 15) | def setUp(self): method tearDown (line 21) | def tearDown(self): method test_neft (line 25) | def test_neft(self): method test_neft_lora (line 68) | def test_neft_lora(self): FILE: tests/tuners/test_peft.py class TestPeft (line 19) | class TestPeft(unittest.TestCase): method setUp (line 21) | def setUp(self): method tearDown (line 27) | def tearDown(self): method test_peft_lora_injection (line 31) | def test_peft_lora_injection(self): method test_lora_merge (line 48) | def test_lora_merge(self): method test_lora_reload_by_peft (line 106) | def test_lora_reload_by_peft(self): method test_peft_adalora_injection (line 120) | def test_peft_adalora_injection(self): method test_peft_lora_dtype (line 137) | def test_peft_lora_dtype(self): FILE: tests/tuners/test_scetuning.py class TestSCETuning (line 13) | class TestSCETuning(unittest.TestCase): method setUp (line 15) | def setUp(self): method tearDown (line 21) | def tearDown(self): method model_comparison (line 25) | def model_comparison(self, model, model2): method test_scetuning_on_diffusers_v1 (line 33) | def test_scetuning_on_diffusers_v1(self): method test_scetuning_part_mixin (line 64) | def test_scetuning_part_mixin(self): method test_scetuning_on_diffusers_v2 (line 100) | def test_scetuning_on_diffusers_v2(self): FILE: tests/tuners/test_swift_base.py class TestSwift (line 20) | class TestSwift(unittest.TestCase): method setUp (line 22) | def setUp(self): method tearDown (line 28) | def tearDown(self): method test_swift_lora_forward (line 32) | def test_swift_lora_forward(self): method test_swift_adapter_forward (line 73) | def test_swift_adapter_forward(self): method test_swift_prompt_forward (line 93) | def test_swift_prompt_forward(self): method test_swift_restuner_forward (line 110) | def test_swift_restuner_forward(self): method lora_injection_with_dtype (line 133) | def lora_injection_with_dtype(self, dtype=torch.float32): method test_swift_lora_injection (line 185) | def test_swift_lora_injection(self): method test_swift_lora_injection_bf16 (line 188) | def test_swift_lora_injection_bf16(self): method test_save_to_peft_mix (line 191) | def test_save_to_peft_mix(self): method test_save_to_peft_param (line 208) | def test_save_to_peft_param(self): method test_save_to_peft_ok (line 220) | def test_save_to_peft_ok(self): method test_swift_multiple_adapters (line 257) | def test_swift_multiple_adapters(self): method test_part (line 282) | def test_part(self): method test_swift_multiple_adapters_switching (line 382) | def test_swift_multiple_adapters_switching(self): method test_swift_side_bert (line 510) | def test_swift_side_bert(self): FILE: tests/tuners/test_swift_device_map.py class TestSwift (line 13) | class TestSwift(unittest.TestCase): method setUp (line 15) | def setUp(self): method tearDown (line 21) | def tearDown(self): method test_swift_multiple_adapters (line 25) | def test_swift_multiple_adapters(self): FILE: tests/tuners/test_swift_restuning.py class TestSwiftResTuning (line 12) | class TestSwiftResTuning(unittest.TestCase): method setUp (line 14) | def setUp(self): method tearDown (line 20) | def tearDown(self): method set_random_seed (line 24) | def set_random_seed(self, seed=123): method model_comparison (line 35) | def model_comparison(self, model, model2): method test_swift_restuning_vit (line 43) | def test_swift_restuning_vit(self): method test_swift_restuning_diffusers_sd (line 95) | def test_swift_restuning_diffusers_sd(self): FILE: tests/utils/test_async_rewards.py class TestAsyncRewardFunctions (line 14) | class TestAsyncRewardFunctions(unittest.TestCase): method test_start_and_shutdown_event_loop_in_daemon (line 17) | def test_start_and_shutdown_event_loop_in_daemon(self): method test_run_async_function_in_daemon_loop (line 38) | def test_run_async_function_in_daemon_loop(self): method test_async_orm_base_class (line 57) | def test_async_orm_base_class(self): method test_async_reward_is_detected (line 80) | def test_async_reward_is_detected(self): class TestAsyncRewardPerformance (line 103) | class TestAsyncRewardPerformance(unittest.TestCase): method test_parallel_async_execution (line 106) | def test_parallel_async_execution(self): method test_async_reward_function_batch_performance (line 153) | def test_async_reward_function_batch_performance(self): FILE: tests/utils/test_file_utils.py class TestFileUtils (line 9) | class TestFileUtils(unittest.TestCase): method setUp (line 11) | def setUp(self): method tearDown (line 15) | def tearDown(self): method test_copy_files (line 18) | def test_copy_files(self): FILE: tests/utils/test_io_utils.py class TestIOUtils (line 11) | class TestIOUtils(unittest.TestCase): method setUp (line 13) | def setUp(self): method tearDown (line 19) | def tearDown(self): method test_jsonl (line 22) | def test_jsonl(self): method test_jsonl2 (line 32) | def test_jsonl2(self): FILE: tests/utils/test_rewards.py class TestMathAccuracy (line 4) | class TestMathAccuracy(unittest.TestCase): method setUpClass (line 7) | def setUpClass(cls): method setUp (line 16) | def setUp(self): method test_pure_latex_format (line 20) | def test_pure_latex_format(self): method test_latex_in_long_text (line 29) | def test_latex_in_long_text(self): method test_multiple_steps_with_boxed (line 38) | def test_multiple_steps_with_boxed(self): method test_wrong_answer_no_tag (line 53) | def test_wrong_answer_no_tag(self): method test_batch_processing_no_tag (line 62) | def test_batch_processing_no_tag(self): method test_answer_tag_with_plain_number (line 73) | def test_answer_tag_with_plain_number(self): method test_answer_tag_with_latex (line 82) | def test_answer_tag_with_latex(self): method test_long_text_with_answer_tag (line 91) | def test_long_text_with_answer_tag(self): method test_answer_tag_with_complex_expression (line 106) | def test_answer_tag_with_complex_expression(self): method test_solution_with_answer_tag (line 115) | def test_solution_with_answer_tag(self): method test_answer_tag_wrong_answer (line 124) | def test_answer_tag_wrong_answer(self): method test_mixed_batch_with_and_without_tags (line 133) | def test_mixed_batch_with_and_without_tags(self): method test_empty_solution (line 152) | def test_empty_solution(self): method test_malformed_latex (line 161) | def test_malformed_latex(self): method test_answer_tag_with_extra_whitespace (line 170) | def test_answer_tag_with_extra_whitespace(self): method test_multiple_answer_tags (line 179) | def test_multiple_answer_tags(self): method test_real_world_example_from_user (line 188) | def test_real_world_example_from_user(self): method test_equivalent_fractions (line 212) | def test_equivalent_fractions(self): method test_different_forms_same_answer (line 221) | def test_different_forms_same_answer(self): method test_latex_inline_math_delimiters (line 230) | def test_latex_inline_math_delimiters(self): method test_latex_display_math_delimiters (line 240) | def test_latex_display_math_delimiters(self): method test_mixed_latex_delimiters (line 249) | def test_mixed_latex_delimiters(self): FILE: tests/utils/test_split_str_parts_by.py function test_split_str_parts_by (line 4) | def test_split_str_parts_by(): FILE: tests/utils/test_torch_utils.py class TestTorchUtils (line 7) | class TestTorchUtils(unittest.TestCase): method test_find_sub_module (line 9) | def test_find_sub_module(self):