SYMBOL INDEX (12172 symbols across 1438 files) FILE: .github/workflows/scripts/check_doc_i18n.py function compare_dirs (line 5) | def compare_dirs(dir1, dir2): FILE: .github/workflows/scripts/example_checks/check_dispatch_inputs.py function check_inputs (line 5) | def check_inputs(input_list): function main (line 13) | def main(): FILE: .github/workflows/scripts/example_checks/check_example_weekly.py function show_files (line 4) | def show_files(path, all_files): function join (line 19) | def join(input_list, sep=None): function main (line 23) | def main(): FILE: .github/workflows/scripts/example_checks/detect_changed_example.py function main (line 4) | def main(): FILE: .github/workflows/scripts/generate_leaderboard_and_send_to_lark.py class Counter (line 12) | class Counter(dict): method record (line 21) | def record(self, item: str): method to_sorted_list (line 27) | def to_sorted_list(self): function get_utc_time_one_week_ago (line 33) | def get_utc_time_one_week_ago(): function datetime2str (line 42) | def datetime2str(dt): function str2datetime (line 49) | def str2datetime(string): function plot_bar_chart (line 56) | def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str,... function get_organization_repositories (line 69) | def get_organization_repositories(github_token, organization_name) -> Li... function get_issue_pull_request_comments (line 90) | def get_issue_pull_request_comments(github_token: str, org_name: str, re... function get_discussion_comments (line 141) | def get_discussion_comments(github_token: str, org_name: str, repo_name:... function generate_user_engagement_leaderboard_image (line 315) | def generate_user_engagement_leaderboard_image( function generate_contributor_leaderboard_image (line 378) | def generate_contributor_leaderboard_image(github_token, org_name, repo_... function upload_image_to_lark (line 467) | def upload_image_to_lark(lark_tenant_token: str, image_path: str) -> str: function generate_lark_tenant_access_token (line 486) | def generate_lark_tenant_access_token(app_id: str, app_secret: str) -> str: function send_image_to_lark (line 500) | def send_image_to_lark(image_key: str, webhook_url: str) -> None: function send_message_to_lark (line 512) | def send_message_to_lark(message: str, webhook_url: str): FILE: .github/workflows/scripts/generate_release_draft.py function parse_args (line 14) | def parse_args(): function get_latest_tag_commit (line 21) | def get_latest_tag_commit(headers=None): function get_commit_info (line 29) | def get_commit_info(commit_hash, headers=None): function get_all_commit_info (line 35) | def get_all_commit_info(since, headers=None): function collate_release_info (line 54) | def collate_release_info(commit_info_list): function generate_release_post_markdown (line 78) | def generate_release_post_markdown(current_version, last_version, releas... FILE: .github/workflows/scripts/send_message_to_lark.py function parse_args (line 6) | def parse_args(): function send_message_to_lark (line 13) | def send_message_to_lark(message, webhook_url): FILE: .github/workflows/scripts/update_setup_for_nightly.py function open_setup_file (line 4) | def open_setup_file(): function replace_nightly_package_info (line 10) | def replace_nightly_package_info(file_lines): function write_setup_file (line 22) | def write_setup_file(file_lines): function main (line 27) | def main(): FILE: applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py class SeparatorStyle (line 20) | class SeparatorStyle(Enum): class Conversation (line 25) | class Conversation: method clear (line 33) | def clear(self): method get_prompt (line 36) | def get_prompt(self, length: int = None): method save_prompt (line 51) | def save_prompt(self): method append_message (line 63) | def append_message(self, role, message): method copy (line 66) | def copy(self): method dict (line 76) | def dict(self): FILE: applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py class RandomDataset (line 7) | class RandomDataset(Dataset): method __init__ (line 8) | def __init__(self, num_samples: int = 1000, max_length: int = 2048, vo... method __len__ (line 16) | def __len__(self): method __getitem__ (line 19) | def __getitem__(self, idx): FILE: applications/Colossal-LLaMA/colossal_llama/dataset/loader.py function load_tokenized_dataset (line 19) | def load_tokenized_dataset( class DataCollatorForSupervisedDataset (line 51) | class DataCollatorForSupervisedDataset(object): method __call__ (line 63) | def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[... class StatefulDistributedSampler (line 141) | class StatefulDistributedSampler(DistributedSampler): method __init__ (line 146) | def __init__( method __iter__ (line 165) | def __iter__(self) -> Iterator: method __len__ (line 171) | def __len__(self) -> int: method set_start_index (line 174) | def set_start_index(self, start_index: int) -> None: FILE: applications/Colossal-LLaMA/colossal_llama/dataset/spliced_and_tokenized_dataset.py function supervised_tokenize_pretrain (line 30) | def supervised_tokenize_pretrain( function supervised_tokenize_sft (line 73) | def supervised_tokenize_sft( class ClosedToConstantLengthSplicedDataset (line 188) | class ClosedToConstantLengthSplicedDataset(IterableDataset): method __init__ (line 194) | def __init__( method __len__ (line 226) | def __len__(self) -> int: method __iter__ (line 229) | def __iter__(self) -> Iterable[Dict[str, List[int]]]: FILE: applications/Colossal-LLaMA/colossal_llama/model/init_model.py function main (line 18) | def main(): FILE: applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py function expand_vocab_tokenizer (line 23) | def expand_vocab_tokenizer( function main (line 62) | def main(): FILE: applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py function load_json (line 20) | def load_json(file_path: Union[str, os.PathLike]) -> Dict[str, Any]: function save_json (line 28) | def save_json(data: Dict[str, Any], file_path: Union[str, os.PathLike]) ... function save_checkpoint (line 36) | def save_checkpoint( function load_checkpoint (line 71) | def load_checkpoint( FILE: applications/Colossal-LLaMA/colossal_llama/utils/froze.py function freeze_non_embeds_parameters (line 7) | def freeze_non_embeds_parameters(model: LlamaForCausalLM) -> None: function unfreeze_parameters (line 16) | def unfreeze_parameters(model: LlamaForCausalLM) -> None: FILE: applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py function unwrap (line 18) | def unwrap(model): function neftune_post_forward_hook (line 25) | def neftune_post_forward_hook(module, input, output): function activate_neftune (line 51) | def activate_neftune(model, neftune_noise_alpha=0.1): function deactivate_neftune (line 65) | def deactivate_neftune(model, neftune_hook_handle): FILE: applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py function get_prompt_template (line 13) | def get_prompt_template( function streaming_chat (line 52) | def streaming_chat( function stream_generate (line 141) | def stream_generate( FILE: applications/Colossal-LLaMA/colossal_llama/utils/utils.py function all_reduce_mean (line 11) | def all_reduce_mean(tensor: torch.Tensor, plugin: Plugin = None) -> torc... function get_model_numel (line 21) | def get_model_numel(model: torch.nn.Module) -> int: function format_numel_str (line 25) | def format_numel_str(numel: int) -> str: FILE: applications/Colossal-LLaMA/dataset/prepare_pretrain_dataset.py function main (line 26) | def main(): FILE: applications/Colossal-LLaMA/dataset/prepare_sft_dataset.py function main (line 23) | def main(): FILE: applications/Colossal-LLaMA/inference/inference_example.py function load_model (line 12) | def load_model(model_path, device="cuda", **kwargs): function generate (line 26) | def generate(args): FILE: applications/Colossal-LLaMA/inference/stream_chat_example.py function main (line 9) | def main(args): FILE: applications/Colossal-LLaMA/setup.py function fetch_requirements (line 4) | def fetch_requirements(path): function fetch_readme (line 9) | def fetch_readme(): function fetch_version (line 14) | def fetch_version(): FILE: applications/Colossal-LLaMA/train.py function train (line 40) | def train(args) -> None: FILE: applications/ColossalChat/benchmarks/benchmark_ppo.py function get_model_numel (line 39) | def get_model_numel(model: torch.nn.Module, plugin: str, tp: int) -> int: function get_gpt_config (line 46) | def get_gpt_config(model_name: str) -> OPTConfig: function benchmark_train (line 65) | def benchmark_train(args): FILE: applications/ColossalChat/benchmarks/dummy_dataset.py class DummyLLMDataset (line 6) | class DummyLLMDataset(Dataset): method __init__ (line 7) | def __init__(self, keys, seq_len, size=500, gen_fn={}): method _generate_data (line 14) | def _generate_data(self): method __len__ (line 23) | def __len__(self): method __getitem__ (line 26) | def __getitem__(self, idx): FILE: applications/ColossalChat/benchmarks/ray/1mmt_dummy.py function get_free_port (line 23) | def get_free_port(): function get_local_ip (line 29) | def get_local_ip(): function main (line 35) | def main(args): FILE: applications/ColossalChat/benchmarks/ray/mmmt_dummy.py function get_free_port (line 23) | def get_free_port(): function get_local_ip (line 29) | def get_local_ip(): function main (line 35) | def main(args): FILE: applications/ColossalChat/coati/dataset/conversation.py class Conversation (line 15) | class Conversation: method from_config (line 24) | def from_config(cls, tokenizer: PreTrainedTokenizer, config: Dict): method clear (line 35) | def clear(self): method get_conversation_template_keys (line 39) | def get_conversation_template_keys(cls): method __str__ (line 42) | def __str__(self): method get_prompt (line 49) | def get_prompt(self, length: int = None, add_generation_prompt=False) ... method save_prompt (line 75) | def save_prompt(self): method append_message (line 78) | def append_message(self, role: str, message: str): method copy (line 92) | def copy(self): function setup_conversation_template (line 96) | def setup_conversation_template( FILE: applications/ColossalChat/coati/dataset/loader.py function load_tokenized_dataset (line 24) | def load_tokenized_dataset( class DataCollatorForSupervisedDataset (line 58) | class DataCollatorForSupervisedDataset(object): method __call__ (line 69) | def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[... class DataCollatorForPromptDataset (line 146) | class DataCollatorForPromptDataset(DataCollatorForSupervisedDataset): method __call__ (line 147) | def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[... class DataCollatorForPreferenceDataset (line 170) | class DataCollatorForPreferenceDataset(object): method __call__ (line 180) | def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[... class DataCollatorForKTODataset (line 241) | class DataCollatorForKTODataset(object): method __call__ (line 255) | def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[... class StatefulDistributedSampler (line 325) | class StatefulDistributedSampler(DistributedSampler): method __init__ (line 326) | def __init__( method __iter__ (line 338) | def __iter__(self) -> Iterator: method __len__ (line 344) | def __len__(self) -> int: method set_start_index (line 347) | def set_start_index(self, start_index: int) -> None: function apply_chat_template_and_mask (line 351) | def apply_chat_template_and_mask( class RawConversationDataset (line 420) | class RawConversationDataset(Dataset): method __init__ (line 426) | def __init__(self, tokenizer: PreTrainedTokenizer, input_file: str, ma... method __len__ (line 436) | def __len__(self) -> int: method __getitem__ (line 439) | def __getitem__(self, index: int): function collate_fn_grpo (line 447) | def collate_fn_grpo(batch): FILE: applications/ColossalChat/coati/dataset/tokenization_utils.py function tokenize_sft (line 26) | def tokenize_sft( function tokenize_prompt (line 133) | def tokenize_prompt( function apply_rlhf_data_format (line 203) | def apply_rlhf_data_format(template: Conversation, tokenizer: Any): function tokenize_rlhf (line 226) | def tokenize_rlhf( function tokenize_kto (line 342) | def tokenize_kto( FILE: applications/ColossalChat/coati/dataset/utils.py function is_rank_0 (line 11) | def is_rank_0() -> bool: function _make_r_io_base (line 15) | def _make_r_io_base(f, mode: str): function jload (line 21) | def jload(f, mode="r"): function read_string_by_schema (line 29) | def read_string_by_schema(data: Dict[str, Any], schema: str) -> str: function pad_to_max_len (line 46) | def pad_to_max_len( function chuncate_sequence (line 71) | def chuncate_sequence(sequence: List[torch.Tensor], max_length: int, dty... function find_first_occurrence_subsequence (line 82) | def find_first_occurrence_subsequence(seq: torch.Tensor, subseq: torch.T... function tokenize_and_concatenate (line 91) | def tokenize_and_concatenate( function split_templated_prompt_into_chunks (line 137) | def split_templated_prompt_into_chunks(messages: List[Dict[str, str]], p... FILE: applications/ColossalChat/coati/distributed/comm.py function ray_broadcast_object (line 11) | def ray_broadcast_object(obj: Any, src: int = 0, device=None, group_name... function ray_broadcast_tensor_dict (line 36) | def ray_broadcast_tensor_dict( class SharedVariableActor (line 79) | class SharedVariableActor: method __init__ (line 80) | def __init__(self, number_of_readers: int = 0, buffer_size_limit: int ... method pickup_rollout_task (line 90) | def pickup_rollout_task(self, num_tasks: int): method append_data (line 108) | def append_data(self, data): method get_data (line 113) | def get_data(self, data_uid: int): method acquire_process_lock (line 134) | def acquire_process_lock(self, key: str): method release_process_lock (line 145) | def release_process_lock(self, key: str): method set_signal (line 150) | def set_signal(self, key: str, signal: str): method get_signal (line 153) | def get_signal(self): FILE: applications/ColossalChat/coati/distributed/consumer.py class BaseConsumer (line 24) | class BaseConsumer: method __init__ (line 25) | def __init__( method setup (line 69) | def setup(self) -> None: method state_dict (line 108) | def state_dict(self) -> Dict[str, torch.Tensor]: method step (line 111) | def step(self, step_idx: int, **kwargs) -> Optional[float]: method prepare_mini_batch (line 114) | def prepare_mini_batch(self, effective_group_to_raw_group_mapping: Dic... method calculate_effective_group_to_raw_group_mapping (line 138) | def calculate_effective_group_to_raw_group_mapping(self, step): method loop (line 149) | def loop(self) -> None: method __del__ (line 358) | def __del__(self): class SimpleConsumer (line 364) | class SimpleConsumer(BaseConsumer): method __init__ (line 365) | def __init__( method setup (line 405) | def setup(self): method step (line 409) | def step(self, step_idx: int, pbar: Any, **kwargs) -> Optional[float]: method state_dict (line 430) | def state_dict(self): FILE: applications/ColossalChat/coati/distributed/grpo_consumer.py class GRPOConsumer (line 19) | class GRPOConsumer(BaseConsumer): method __init__ (line 20) | def __init__( method setup (line 143) | def setup(self): method step (line 174) | def step(self, step_idx: int, pbar: Any, **kwargs) -> Optional[float]: method state_dict (line 607) | def state_dict(self): FILE: applications/ColossalChat/coati/distributed/inference_backend.py class BaseInferenceBackend (line 22) | class BaseInferenceBackend: method __init__ (line 23) | def __init__(self, model_config: Dict[str, Any], generate_config: Dict... method generate (line 26) | def generate(self, input_ids: torch.Tensor, attention_mask: torch.Tens... method load_state_dict (line 42) | def load_state_dict(self, state_dict: Dict[str, torch.Tensor]) -> None: class TransformersInferenceBackend (line 46) | class TransformersInferenceBackend(BaseInferenceBackend): method __init__ (line 56) | def __init__( method generate (line 74) | def generate(self, input_ids: torch.Tensor, attention_mask: torch.Tens... method load_state_dict (line 125) | def load_state_dict(self, state_dict: Dict[str, torch.Tensor]) -> None: class SGLangInferenceBackend (line 129) | class SGLangInferenceBackend(BaseInferenceBackend): method __init__ (line 130) | def __init__( method generate (line 152) | def generate(self, input_ids: torch.Tensor, attention_mask: torch.Tens... method load_state_dict (line 179) | def load_state_dict(self, state_dict: Dict[str, torch.Tensor]) -> None: class VLLMInferenceBackend (line 186) | class VLLMInferenceBackend(BaseInferenceBackend): method __init__ (line 195) | def __init__( method generate (line 219) | def generate(self, input_ids: torch.Tensor, attention_mask: torch.Tens... method load_state_dict (line 283) | def load_state_dict(self, state_dict: Dict[str, torch.Tensor]) -> None: FILE: applications/ColossalChat/coati/distributed/launch.py function get_jsonl_size_fast (line 21) | def get_jsonl_size_fast(path: str) -> int: function get_dp_size_fast (line 28) | def get_dp_size_fast(n_procs: int, plugin_config: Dict[str, Any]) -> int: function launch_distributed (line 36) | def launch_distributed( FILE: applications/ColossalChat/coati/distributed/launch_zero_bubble.py function get_jsonl_size_fast (line 16) | def get_jsonl_size_fast(path: str) -> int: function get_dp_size_fast (line 23) | def get_dp_size_fast(n_procs: int, plugin_config: Dict[str, Any]) -> int: function launch_distributed (line 31) | def launch_distributed( FILE: applications/ColossalChat/coati/distributed/loss.py class PolicyLoss (line 8) | class PolicyLoss(nn.Module): method __init__ (line 13) | def __init__( method forward (line 29) | def forward( FILE: applications/ColossalChat/coati/distributed/producer.py class BaseProducer (line 34) | class BaseProducer: method __init__ (line 35) | def __init__( method setup (line 198) | def setup(self) -> None: method rollout (line 212) | def rollout(self, input_ids: torch.Tensor, attention_mask: torch.Tenso... method load_state_dict (line 215) | def load_state_dict(self, state_dict: Dict[str, torch.Tensor]) -> None: method loop (line 218) | def loop(self) -> None: method __del__ (line 417) | def __del__(self): class SimpleProducer (line 422) | class SimpleProducer(BaseProducer): method __init__ (line 423) | def __init__( method rollout (line 483) | def rollout(self, input_ids, attention_mask, **kwargs): method __del__ (line 506) | def __del__(self): method load_state_dict (line 512) | def load_state_dict(self, state_dict): FILE: applications/ColossalChat/coati/distributed/profiling_utils.py class CustomProfiler (line 5) | class CustomProfiler: method __init__ (line 6) | def __init__(self, name, disabled=True): method _log (line 13) | def _log(self, message): method log (line 20) | def log(self, message): method enter (line 27) | def enter(self, event_name): method exit (line 30) | def exit(self, event_name): method close (line 33) | def close(self): FILE: applications/ColossalChat/coati/distributed/reward/code_reward/testing_util.py function truncatefn (line 43) | def truncatefn(s, length=300): class CODE_TYPE (line 51) | class CODE_TYPE(Enum): class Capturing (line 59) | class Capturing(list): method __enter__ (line 60) | def __enter__(self): method __exit__ (line 67) | def __exit__(self, *args): function only_int_check (line 73) | def only_int_check(val): function string_int_check (line 77) | def string_int_check(val): function combined_int_check (line 81) | def combined_int_check(val): function clean_traceback (line 85) | def clean_traceback(error_traceback): function run_test (line 92) | def run_test(in_outs, test=None, debug=False, timeout=15, run_all_tests=... function custom_compare_ (line 551) | def custom_compare_(output, ground_truth): function stripped_string_compare (line 566) | def stripped_string_compare(s1, s2): function call_method (line 572) | def call_method(method, inputs): function reliability_guard (line 598) | def reliability_guard(maximum_memory_bytes=None): FILE: applications/ColossalChat/coati/distributed/reward/code_reward/utils.py function _temp_run (line 27) | def _temp_run(sample, generation, debug, result, metadata_list, timeout): function check_correctness (line 39) | def check_correctness(in_outs: Optional[dict], generation, timeout=10, d... function check_correctness_code_api (line 61) | def check_correctness_code_api( FILE: applications/ColossalChat/coati/distributed/reward/reward_fn.py function verify_math_representation (line 36) | def verify_math_representation(completion, gt_answer): function verify_model_answer (line 76) | def verify_model_answer(decoded_final_answer, gt_answer, ans_acc, acc_sc... function math_reward_fn (line 99) | def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs): function boxed_math_reward_fn (line 160) | def boxed_math_reward_fn(input_ids, gt_answer, response_idx, **kwargs): function code_reward_fn (line 225) | def code_reward_fn(input_ids, test_cases, response_idx, **kwargs): FILE: applications/ColossalChat/coati/distributed/reward/reward_utils.py function validate_response_structure (line 20) | def validate_response_structure(processed_str: str, tags: Dict = None) -... function extract_solution (line 58) | def extract_solution(solution_str: str) -> Tuple[Optional[str], str]: function extract_boxed_solution (line 79) | def extract_boxed_solution(text: str) -> Optional[str]: FILE: applications/ColossalChat/coati/distributed/reward/verifiable_reward.py class VerifiableReward (line 11) | class VerifiableReward: method __init__ (line 12) | def __init__(self, reward_fns: List[callable], **kwargs: List[Dict[str... method __call__ (line 16) | def __call__( FILE: applications/ColossalChat/coati/distributed/utils.py function unbind_batch (line 11) | def unbind_batch(batch: Dict[str, torch.Tensor]) -> List[Dict[str, torch... function bind_batch (line 25) | def bind_batch(batches: List[Dict[str, torch.Tensor]]) -> Dict[str, torc... function pre_send (line 32) | def pre_send(batch: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: function post_recv (line 41) | def post_recv(batch: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: function update_by_default (line 50) | def update_by_default(data: Dict[str, Any], default: Dict[str, Any]) -> ... function log_probs_from_logits (line 58) | def log_probs_from_logits(logits: torch.Tensor, labels: torch.Tensor) ->... function memory_efficient_logprob (line 74) | def memory_efficient_logprob( function entropy_from_logits (line 113) | def entropy_from_logits(logits: torch.Tensor) -> torch.Tensor: function masked_mean (line 123) | def masked_mean(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1) ... function masked_sum (line 143) | def masked_sum(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1) -... function safe_append_to_jsonl_file (line 160) | def safe_append_to_jsonl_file(file_path, data): FILE: applications/ColossalChat/coati/distributed/zero_bubble/consumer.py class BaseConsumer (line 21) | class BaseConsumer: method __init__ (line 22) | def __init__( method setup (line 69) | def setup(self) -> None: method get_ddp_config (line 94) | def get_ddp_config(self) -> Dict[str, Any]: method init_collective_group (line 110) | def init_collective_group( method state_dict (line 123) | def state_dict(self) -> Dict[str, torch.Tensor]: method step (line 126) | def step(self, **kwargs) -> Optional[float]: method prepare_mini_batch (line 129) | def prepare_mini_batch(self, effective_group_to_raw_group_mapping: Dic... method calculate_effective_group_to_raw_group_mapping (line 153) | def calculate_effective_group_to_raw_group_mapping(self): method loop (line 160) | def loop(self) -> None: method __del__ (line 345) | def __del__(self): FILE: applications/ColossalChat/coati/distributed/zero_bubble/distributor.py class Distributor (line 13) | class Distributor: method __init__ (line 14) | def __init__( method init_collective_group (line 31) | def init_collective_group( method loop (line 44) | def loop(self): method get_weight_version (line 123) | def get_weight_version(self): FILE: applications/ColossalChat/coati/distributed/zero_bubble/grpo_consumer.py class GRPOConsumer (line 19) | class GRPOConsumer(BaseConsumer): method __init__ (line 20) | def __init__( method setup (line 134) | def setup(self): method step (line 164) | def step(self, pbar: Any, **kwargs) -> Optional[float]: method state_dict (line 531) | def state_dict(self): FILE: applications/ColossalChat/coati/distributed/zero_bubble/producer.py class BaseProducer (line 33) | class BaseProducer: method __init__ (line 34) | def __init__( method init_collective_group (line 193) | def init_collective_group( method rollout (line 206) | def rollout(self, input_ids: torch.Tensor, attention_mask: torch.Tenso... method load_state_dict (line 209) | def load_state_dict(self, state_dict: Dict[str, torch.Tensor]) -> None: method loop (line 212) | def loop(self) -> None: method __del__ (line 441) | def __del__(self): class SimpleProducer (line 446) | class SimpleProducer(BaseProducer): method __init__ (line 447) | def __init__( method rollout (line 510) | def rollout(self, input_ids, attention_mask, **kwargs): method __del__ (line 533) | def __del__(self): method load_state_dict (line 539) | def load_state_dict(self, state_dict): FILE: applications/ColossalChat/coati/experience_buffer/base.py class ExperienceBuffer (line 7) | class ExperienceBuffer(ABC): method __init__ (line 15) | def __init__(self, sample_batch_size: int, limit: int = 0) -> None: method append (line 22) | def append(self, experience: Experience) -> None: method clear (line 26) | def clear(self) -> None: method sample (line 30) | def sample(self) -> Experience: method __len__ (line 34) | def __len__(self) -> int: method __getitem__ (line 38) | def __getitem__(self, idx: int) -> Any: method collate_fn (line 42) | def collate_fn(self, batch: Any) -> Experience: FILE: applications/ColossalChat/coati/experience_buffer/naive.py class NaiveExperienceBuffer (line 15) | class NaiveExperienceBuffer(ExperienceBuffer): method __init__ (line 24) | def __init__(self, sample_batch_size: int, limit: int = 0, cpu_offload... method append (line 34) | def append(self, experience: Experience) -> None: method clear (line 49) | def clear(self) -> None: method sample (line 53) | def sample(self) -> Experience: method __len__ (line 69) | def __len__(self) -> int: method __getitem__ (line 72) | def __getitem__(self, idx: int) -> BufferItem: method collate_fn (line 75) | def collate_fn(self, batch) -> Experience: FILE: applications/ColossalChat/coati/experience_buffer/utils.py class BufferItem (line 10) | class BufferItem: function split_experience_batch (line 35) | def split_experience_batch(experience: Experience) -> List[BufferItem]: function _zero_pad_sequences (line 53) | def _zero_pad_sequences(sequences: List[torch.Tensor], side: str = "left... function make_experience_batch (line 64) | def make_experience_batch(items: List[BufferItem]) -> Experience: FILE: applications/ColossalChat/coati/experience_maker/base.py class Experience (line 11) | class Experience: method to_device (line 38) | def to_device(self, device: torch.device) -> None: method pin_memory (line 50) | def pin_memory(self): class ExperienceMaker (line 64) | class ExperienceMaker(ABC): method __init__ (line 69) | def __init__( method make_experience (line 79) | def make_experience(self, input_ids: torch.Tensor, attention_mask: tor... FILE: applications/ColossalChat/coati/experience_maker/naive.py function is_rank_0 (line 24) | def is_rank_0() -> bool: class NaiveExperienceMaker (line 28) | class NaiveExperienceMaker(ExperienceMaker): method __init__ (line 33) | def __init__( method calculate_advantage (line 64) | def calculate_advantage(self, value: torch.Tensor, reward: torch.Tenso... method make_experience (line 87) | def make_experience( FILE: applications/ColossalChat/coati/models/base.py class BaseModel (line 12) | class BaseModel(nn.Module): method __init__ (line 22) | def __init__(self, pretrained: str = None, config: Optional[Pretrained... method resize_token_embeddings (line 46) | def resize_token_embeddings(self, *args, **kwargs): FILE: applications/ColossalChat/coati/models/critic.py class Critic (line 13) | class Critic(BaseModel): method __init__ (line 22) | def __init__(self, pretrained: str = None, config: Optional[Pretrained... method forward (line 27) | def forward(self, input_ids: torch.LongTensor, attention_mask: Optiona... method get_input_embeddings (line 36) | def get_input_embeddings(self): method get_output_embeddings (line 39) | def get_output_embeddings(self): FILE: applications/ColossalChat/coati/models/generation.py function _prepare_logits_processor (line 19) | def _prepare_logits_processor( function _is_sequence_finished (line 44) | def _is_sequence_finished(unfinished_sequences: torch.Tensor) -> bool: function update_model_kwargs_fn (line 61) | def update_model_kwargs_fn(outputs: dict, new_mask, **model_kwargs) -> d... function prepare_inputs_fn (line 92) | def prepare_inputs_fn(input_ids: torch.Tensor, **model_kwargs) -> dict: function _sample (line 97) | def _sample( function generate (line 200) | def generate( function _sample_streaming (line 262) | def _sample_streaming( function generate_streaming (line 378) | def generate_streaming( FILE: applications/ColossalChat/coati/models/lora.py class LoraManager (line 22) | class LoraManager: class LoraConfig (line 30) | class LoraConfig: method from_file (line 40) | def from_file(cls, config_file: str): class LoraBase (line 48) | class LoraBase(lora.LoRALayer, nn.Module): method __init__ (line 49) | def __init__( method reset_parameters (line 68) | def reset_parameters(self): method train (line 103) | def train(self, mode: bool = True): class LoraLinear (line 124) | class LoraLinear(LoraBase): method __init__ (line 127) | def __init__( method forward (line 160) | def forward(self, x: torch.Tensor): class LoraEmbedding (line 169) | class LoraEmbedding(LoraBase): method __init__ (line 172) | def __init__( method _embed (line 218) | def _embed(self, x: torch.Tensor, weight) -> torch.Tensor: method forward (line 229) | def forward(self, x: torch.Tensor): method train (line 239) | def train(self, mode: bool = True): function _lora_linear_wrapper (line 260) | def _lora_linear_wrapper(linear: nn.Linear, lora_config: LoraConfig) -> ... function _convert_to_lora_recursively (line 287) | def _convert_to_lora_recursively(module: nn.Module, parent_name: str, lo... function convert_to_lora_module (line 337) | def convert_to_lora_module(module: nn.Module, lora_config: LoraConfig) -... FILE: applications/ColossalChat/coati/models/loss.py class GPTLMLoss (line 14) | class GPTLMLoss(nn.Module): method __init__ (line 19) | def __init__(self): method forward (line 24) | def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch... class PolicyLoss (line 31) | class PolicyLoss(nn.Module): method __init__ (line 36) | def __init__(self, clip_eps: float = 0.2, skip_threshold: float = 20.0... method forward (line 41) | def forward( class ValueLoss (line 70) | class ValueLoss(nn.Module): method __init__ (line 75) | def __init__(self, clip_eps: float = 0.2) -> None: method forward (line 79) | def forward( class DpoLoss (line 97) | class DpoLoss(nn.Module): method __init__ (line 106) | def __init__(self, beta: float = 0.1, gamma: float = 0.0): method forward (line 118) | def forward( class LogSigLoss (line 174) | class LogSigLoss(nn.Module): method forward (line 180) | def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Te... class LogExpLoss (line 184) | class LogExpLoss(nn.Module): method forward (line 190) | def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Te... class OddsRatioLoss (line 195) | class OddsRatioLoss(nn.Module): method forward (line 201) | def forward( class KTOLoss (line 219) | class KTOLoss(nn.Module): method __init__ (line 220) | def __init__(self, beta: float = 0.1, desirable_weight: float = 1.0, u... method forward (line 232) | def forward( FILE: applications/ColossalChat/coati/models/reward_model.py class RewardModel (line 13) | class RewardModel(BaseModel): method __init__ (line 23) | def __init__(self, pretrained: str = None, config: Optional[Pretrained... method forward (line 28) | def forward( method get_input_embeddings (line 43) | def get_input_embeddings(self): method get_output_embeddings (line 46) | def get_output_embeddings(self): FILE: applications/ColossalChat/coati/models/rlvr_reward_model.py class RLVRRewardModel (line 10) | class RLVRRewardModel: method __init__ (line 19) | def __init__(self, reward_fn_list: List[Callable], **kwargs) -> None: method __call__ (line 23) | def __call__( method to (line 46) | def to(self, device): method eval (line 49) | def eval(self): FILE: applications/ColossalChat/coati/models/utils.py function get_model_numel (line 9) | def get_model_numel(model: torch.nn.Module) -> int: function compute_reward (line 13) | def compute_reward( function _log_probs_from_logits (line 41) | def _log_probs_from_logits(logits: torch.Tensor, labels: torch.Tensor) -... function calc_action_log_probs (line 57) | def calc_action_log_probs(logits: torch.Tensor, sequences: torch.LongTen... function masked_mean (line 72) | def masked_mean(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1) ... function calc_masked_log_probs (line 92) | def calc_masked_log_probs( function load_json (line 115) | def load_json(file_path: Union[str, os.PathLike]) -> Dict[str, Any]: function save_json (line 123) | def save_json(data: Dict[str, Any], file_path: Union[str, os.PathLike]) ... function disable_dropout (line 131) | def disable_dropout(model: torch.nn.Module): function repad_to_left (line 147) | def repad_to_left(tensor, tokenizer): FILE: applications/ColossalChat/coati/quant/llama_gptq/loader.py function load_quant (line 8) | def load_quant(model: nn.Module, checkpoint: str, wbits: int, groupsize:... FILE: applications/ColossalChat/coati/quant/llama_gptq/model_utils.py function find_layers (line 6) | def find_layers(module, layers=[nn.Conv2d, nn.Linear], name=""): FILE: applications/ColossalChat/coati/quant/llama_gptq/quant.py function quantize (line 10) | def quantize(x, scale, zero, maxq): class Quantizer (line 15) | class Quantizer(nn.Module): method __init__ (line 16) | def __init__(self, shape=1): method configure (line 22) | def configure(self, bits, perchannel=False, sym=True, mse=False, norm=... method find_params (line 31) | def find_params(self, x, weight=False): method quantize (line 110) | def quantize(self, x): method enabled (line 115) | def enabled(self): method ready (line 118) | def ready(self): class QuantLinear (line 130) | class QuantLinear(nn.Module): method __init__ (line 131) | def __init__(self, bits, groupsize, infeatures, outfeatures): method pack (line 150) | def pack(self, linear, scales, zeros): method forward (line 239) | def forward(self, x): function make_quant (line 274) | def make_quant(module, names, bits, groupsize, name=""): FILE: applications/ColossalChat/coati/quant/utils.py function _noop (line 6) | def _noop(*args, **kwargs): function low_resource_init (line 11) | def low_resource_init(): FILE: applications/ColossalChat/coati/ray/callbacks/base.py class TrainerCallback (line 6) | class TrainerCallback(ABC): method on_fit_start (line 11) | def on_fit_start(self) -> None: method on_fit_end (line 14) | def on_fit_end(self) -> None: method on_episode_start (line 17) | def on_episode_start(self, episode: int) -> None: method on_episode_end (line 20) | def on_episode_end(self, episode: int) -> None: method on_epoch_start (line 23) | def on_epoch_start(self, epoch: int) -> None: method on_epoch_end (line 26) | def on_epoch_end(self, epoch: int) -> None: method on_batch_start (line 29) | def on_batch_start(self) -> None: method on_batch_end (line 32) | def on_batch_end(self, metrics: dict, experience: Experience) -> None: method on_update_start (line 35) | def on_update_start(self) -> None: method on_update_end (line 38) | def on_update_end(self) -> None: class MakerCallback (line 42) | class MakerCallback(ABC): method on_loop_start (line 43) | def on_loop_start(self) -> None: method on_loop_end (line 46) | def on_loop_end(self) -> None: method on_make_experience_start (line 49) | def on_make_experience_start(self) -> None: method on_make_experience_end (line 52) | def on_make_experience_end(self, experience: Experience) -> None: method on_send_start (line 55) | def on_send_start(self) -> None: method on_send_end (line 58) | def on_send_end(self) -> None: method on_batch_start (line 61) | def on_batch_start(self) -> None: method on_batch_end (line 64) | def on_batch_end(self) -> None: FILE: applications/ColossalChat/coati/ray/callbacks/performance_evaluator.py function get_world_size (line 11) | def get_world_size() -> int: function print_rank_0 (line 17) | def print_rank_0(*args, **kwargs) -> None: function all_reduce_mean (line 23) | def all_reduce_mean(x: float, world_size: int) -> float: class Timer (line 32) | class Timer: method __init__ (line 33) | def __init__(self) -> None: method start (line 37) | def start(self) -> None: method end (line 40) | def end(self) -> None: method reset (line 43) | def reset(self) -> None: class ExperienceMakerPerformanceEvaluator (line 47) | class ExperienceMakerPerformanceEvaluator(MakerCallback): method __init__ (line 48) | def __init__( method on_make_experience_start (line 68) | def on_make_experience_start(self) -> None: method on_make_experience_end (line 71) | def on_make_experience_end(self, experience: Experience) -> None: method on_send_start (line 92) | def on_send_start(self) -> None: method on_send_end (line 95) | def on_send_end(self) -> None: method on_batch_start (line 98) | def on_batch_start(self) -> None: method on_batch_end (line 101) | def on_batch_end(self) -> None: method on_loop_end (line 104) | def on_loop_end(self) -> None: class TrainerPerformanceEvaluator (line 127) | class TrainerPerformanceEvaluator(TrainerCallback): method __init__ (line 128) | def __init__( method on_episode_start (line 153) | def on_episode_start(self, episodes: int) -> None: method on_episode_end (line 159) | def on_episode_end(self, episodes: int) -> None: method on_batch_start (line 164) | def on_batch_start(self) -> None: method on_batch_end (line 169) | def on_batch_end(self, metrics: dict, experience: Experience) -> None: method on_update_start (line 183) | def on_update_start(self) -> None: method on_update_end (line 188) | def on_update_end(self) -> None: method on_fit_end (line 193) | def on_fit_end(self) -> None: FILE: applications/ColossalChat/coati/ray/detached_replay_buffer.py class DetachedReplayBuffer (line 11) | class DetachedReplayBuffer: method __init__ (line 24) | def __init__(self, sample_batch_size: int, limit: int = 0) -> None: method append (line 31) | def append(self, experience: Experience) -> None: method extend (line 39) | def extend(self, items: List[BufferItem]) -> None: method clear (line 50) | def clear(self) -> None: method sample (line 58) | def sample(self, worker_rank=0, to_device="cpu") -> Experience: method _sample_and_erase (line 64) | def _sample_and_erase(self) -> Experience: method get_length (line 68) | def get_length(self) -> int: FILE: applications/ColossalChat/coati/ray/detached_trainer_base.py class DetachedTrainer (line 17) | class DetachedTrainer(ABC): method __init__ (line 33) | def __init__( method update_target_holder_list (line 51) | def update_target_holder_list(self): method _update_remote_makers (line 59) | def _update_remote_makers(self, fully_update: bool = False, **kwargs): method sync_models_to_remote_makers (line 62) | def sync_models_to_remote_makers(self, **kwargs): method training_step (line 66) | def training_step(self, experience: Experience) -> Dict[str, Any]: method _learn (line 69) | def _learn(self, update_steps: int, train_epochs: int) -> None: method _learn_epoch (line 86) | def _learn_epoch(self, pbar: tqdm, data: List[Experience]) -> None: method fit (line 105) | def fit(self, total_steps: int, update_steps: int, train_epochs: int =... method buffer_get_length (line 117) | def buffer_get_length(self): method buffer_append (line 124) | def buffer_append(self, experience: Experience): method buffer_extend (line 131) | def buffer_extend(self, items: List[BufferItem]): method _buffer_sample (line 138) | def _buffer_sample(self): method _on_fit_start (line 141) | def _on_fit_start(self) -> None: method _on_fit_end (line 145) | def _on_fit_end(self) -> None: method _on_episode_start (line 149) | def _on_episode_start(self, episode: int) -> None: method _on_episode_end (line 153) | def _on_episode_end(self, episode: int) -> None: method _on_epoch_start (line 157) | def _on_epoch_start(self, epoch: int) -> None: method _on_epoch_end (line 161) | def _on_epoch_end(self, epoch: int) -> None: method _on_batch_start (line 165) | def _on_batch_start(self) -> None: method _on_batch_end (line 169) | def _on_batch_end(self, metrics: dict, experience: Experience) -> None: method _on_update_start (line 173) | def _on_update_start(self) -> None: method _on_update_end (line 177) | def _on_update_end(self) -> None: FILE: applications/ColossalChat/coati/ray/detached_trainer_ppo.py class DetachedPPOTrainer (line 22) | class DetachedPPOTrainer(DetachedTrainer): method __init__ (line 43) | def __init__( method _update_remote_makers (line 104) | def _update_remote_makers(self, fully_update: bool = False, **config): method training_step (line 142) | def training_step(self, experience: Experience) -> Dict[str, float]: method strategy_save_actor (line 167) | def strategy_save_actor(self, path: str, only_rank0: bool = False) -> ... method strategy_save_critic (line 170) | def strategy_save_critic(self, path: str, only_rank0: bool = False) ->... method strategy_save_actor_optim (line 173) | def strategy_save_actor_optim(self, path: str, only_rank0: bool = Fals... method strategy_save_critic_optim (line 176) | def strategy_save_critic_optim(self, path: str, only_rank0: bool = Fal... method _get_model_state_dict_shard (line 179) | def _get_model_state_dict_shard(self, model: torch.nn.Module, fully_up... method _get_model_lora_config_dict (line 187) | def _get_model_lora_config_dict(self, model: torch.nn.Module): FILE: applications/ColossalChat/coati/ray/experience_maker_holder.py class ExperienceMakerHolder (line 22) | class ExperienceMakerHolder: method __init__ (line 31) | def __init__( method _get_ready (line 93) | def _get_ready(self): method _fully_initialized (line 97) | def _fully_initialized(self): method _init_target_trainer_list (line 100) | def _init_target_trainer_list(self): method _make_experience (line 108) | def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -... method _send_items (line 117) | def _send_items(self, experience: Experience) -> None: method _inference_step (line 128) | def _inference_step(self, batch) -> None: method workingloop (line 141) | def workingloop(self, dataloader_fn: Callable[[], Iterable], num_epoch... method update_experience_maker (line 171) | def update_experience_maker( method _on_make_experience_start (line 231) | def _on_make_experience_start(self) -> None: method _on_make_experience_end (line 235) | def _on_make_experience_end(self, experience: Experience) -> None: method _on_loop_start (line 239) | def _on_loop_start(self) -> None: method _on_loop_end (line 243) | def _on_loop_end(self) -> None: method _on_send_start (line 247) | def _on_send_start(self) -> None: method _on_send_end (line 251) | def _on_send_end(self) -> None: method _on_batch_start (line 255) | def _on_batch_start(self) -> None: method _on_batch_end (line 259) | def _on_batch_end(self) -> None: function _set_default_generate_kwargs (line 264) | def _set_default_generate_kwargs(generate_kwargs: dict, actor: Actor) ->... FILE: applications/ColossalChat/coati/ray/lora_constructor.py class LoRAConfig (line 10) | class LoRAConfig: class LoRAConstructor (line 17) | class LoRAConstructor: method __init__ (line 39) | def __init__(self): method register_lora_config (line 42) | def register_lora_config(self, lora_config_dict: Dict[str, Any]): method reconstruct_increase (line 45) | def reconstruct_increase(self, state_dict_lora: Dict[str, Any], lora_c... method _compute (line 72) | def _compute(self, lora_A, lora_B, config=LoRAConfig()): method load_state_dict_increase (line 82) | def load_state_dict_increase(self, model: nn.Module, state_dict_increa... method filter_state_dict_lora (line 90) | def filter_state_dict_lora(state_dict: Dict[str, Any], keep_non_lora=F... method extract_lora_config (line 107) | def extract_lora_config(model: nn.Module) -> Dict[str, LoRAConfig]: FILE: applications/ColossalChat/coati/ray/utils.py function is_rank_0 (line 16) | def is_rank_0() -> bool: function get_rank (line 20) | def get_rank() -> int: function get_world_size (line 24) | def get_world_size() -> int: function get_actor_from_args (line 28) | def get_actor_from_args(model: str, pretrained: str = None, config=None,... function get_critic_from_args (line 42) | def get_critic_from_args(model: str, pretrained: str = None, config=None... function get_reward_model_from_args (line 56) | def get_reward_model_from_args(model: str, pretrained: str = None, confi... function get_strategy_from_args (line 70) | def get_strategy_from_args(strategy: str): function get_tokenizer_from_args (line 88) | def get_tokenizer_from_args(model: str, **kwargs): function set_dist_env (line 105) | def set_dist_env(env_info: Dict[str, str]): function get_model_numel (line 113) | def get_model_numel(model: nn.Module) -> int: function get_receivers_per_sender (line 118) | def get_receivers_per_sender(sender_idx: int, num_senders: int, num_rece... function state_dict_to (line 133) | def state_dict_to( FILE: applications/ColossalChat/coati/trainer/base.py class SLTrainer (line 24) | class SLTrainer(ABC): method __init__ (line 35) | def __init__( method _train (line 53) | def _train(self, epoch): method _eval (line 57) | def _eval(self, epoch): method _before_fit (line 61) | def _before_fit(self): method fit (line 64) | def fit(self, *args, **kwargs): class OLTrainer (line 71) | class OLTrainer(ABC): method __init__ (line 83) | def __init__( method _fit_ctx (line 102) | def _fit_ctx(self) -> None: method _episode_ctx (line 112) | def _episode_ctx(self, episode: int) -> None: method _on_make_experience_start (line 121) | def _on_make_experience_start(self) -> None: method _on_make_experience_end (line 125) | def _on_make_experience_end(self, experience: Experience) -> None: method _on_learn_epoch_start (line 129) | def _on_learn_epoch_start(self, epoch: int) -> None: method _on_learn_epoch_end (line 133) | def _on_learn_epoch_end(self, epoch: int) -> None: method _on_learn_batch_start (line 137) | def _on_learn_batch_start(self) -> None: method _on_learn_batch_end (line 141) | def _on_learn_batch_end(self, experience: Experience) -> None: method _make_experience (line 146) | def _make_experience(self, collect_step: int): method _learn (line 153) | def _learn(self, update_step: int): method _setup_update_phrase_dataload (line 161) | def _setup_update_phrase_dataload(self): method _save_checkpoint (line 168) | def _save_checkpoint(self, episode: int = 0): method _collect_phase (line 174) | def _collect_phase(self, collect_step: int): method _update_phase (line 180) | def _update_phase(self, update_step: int): method _before_fit (line 185) | def _before_fit(self, *args, **kwargs): method fit (line 188) | def fit( FILE: applications/ColossalChat/coati/trainer/callbacks/base.py class Callback (line 6) | class Callback(ABC): method on_fit_start (line 11) | def on_fit_start(self) -> None: method on_fit_end (line 14) | def on_fit_end(self) -> None: method on_episode_start (line 17) | def on_episode_start(self, episode: int) -> None: method on_episode_end (line 20) | def on_episode_end(self, episode: int) -> None: method on_make_experience_start (line 23) | def on_make_experience_start(self) -> None: method on_make_experience_end (line 26) | def on_make_experience_end(self, experience: Experience) -> None: method on_learn_epoch_start (line 29) | def on_learn_epoch_start(self, epoch: int) -> None: method on_learn_epoch_end (line 32) | def on_learn_epoch_end(self, epoch: int) -> None: method on_learn_batch_start (line 35) | def on_learn_batch_start(self) -> None: method on_learn_batch_end (line 38) | def on_learn_batch_end(self, experience: Experience) -> None: FILE: applications/ColossalChat/coati/trainer/callbacks/performance_evaluator.py function get_world_size (line 11) | def get_world_size() -> int: function save_eval_result_rank_0 (line 17) | def save_eval_result_rank_0(s: str, save_path: str, **kwargs) -> None: function divide (line 24) | def divide(x: float, y: float) -> float: function all_reduce_mean (line 33) | def all_reduce_mean(x: float, world_size: int) -> float: class Timer (line 42) | class Timer: method __init__ (line 43) | def __init__(self) -> None: method start (line 47) | def start(self) -> None: method end (line 50) | def end(self) -> None: method reset (line 55) | def reset(self) -> None: class PerformanceEvaluator (line 59) | class PerformanceEvaluator(Callback): method __init__ (line 71) | def __init__( method on_episode_start (line 102) | def on_episode_start(self, episode: int) -> None: method on_episode_end (line 108) | def on_episode_end(self, episode: int) -> None: method on_make_experience_start (line 113) | def on_make_experience_start(self) -> None: method on_make_experience_end (line 118) | def on_make_experience_end(self, experience: Experience) -> None: method on_learn_batch_start (line 141) | def on_learn_batch_start(self) -> None: method on_learn_batch_end (line 146) | def on_learn_batch_end(self, experience: Experience) -> None: method on_fit_end (line 160) | def on_fit_end(self) -> None: FILE: applications/ColossalChat/coati/trainer/dpo.py class DPOTrainer (line 29) | class DPOTrainer(SLTrainer): method __init__ (line 49) | def __init__( method _before_fit (line 86) | def _before_fit( method _train (line 123) | def _train(self, epoch: int): method _eval (line 406) | def _eval(self, epoch: int): FILE: applications/ColossalChat/coati/trainer/grpo.py function _set_default_generate_kwargs (line 33) | def _set_default_generate_kwargs(actor: PreTrainedModel) -> Dict: class GRPOTrainer (line 53) | class GRPOTrainer(OLTrainer): method __init__ (line 78) | def __init__( method _before_fit (line 164) | def _before_fit( method _setup_update_phrase_dataload (line 195) | def _setup_update_phrase_dataload(self): method _make_experience (line 210) | def _make_experience(self, collect_step: int) -> Experience: method _training_step (line 228) | def _training_step(self, experience: Experience): method _learn (line 331) | def _learn(self, update_step: int): method _save_checkpoint (line 361) | def _save_checkpoint(self, num_train_step: int = 0): FILE: applications/ColossalChat/coati/trainer/kto.py class KTOTrainer (line 28) | class KTOTrainer(SLTrainer): method __init__ (line 50) | def __init__( method _before_fit (line 89) | def _before_fit( method _train (line 119) | def _train(self, epoch: int): method _eval (line 265) | def _eval(self, epoch: int): FILE: applications/ColossalChat/coati/trainer/orpo.py class ORPOTrainer (line 27) | class ORPOTrainer(SLTrainer): method __init__ (line 46) | def __init__( method _before_fit (line 79) | def _before_fit( method _train (line 109) | def _train(self, epoch: int): method _eval (line 240) | def _eval(self, epoch: int): FILE: applications/ColossalChat/coati/trainer/ppo.py function _set_default_generate_kwargs (line 33) | def _set_default_generate_kwargs(actor: PreTrainedModel) -> Dict: class PPOTrainer (line 54) | class PPOTrainer(OLTrainer): method __init__ (line 81) | def __init__( method _before_fit (line 155) | def _before_fit( method _setup_update_phrase_dataload (line 186) | def _setup_update_phrase_dataload(self): method _make_experience (line 201) | def _make_experience(self, collect_step: int) -> Experience: method _training_step (line 217) | def _training_step(self, experience: Experience): method _learn (line 340) | def _learn(self, update_step: int): method _save_checkpoint (line 371) | def _save_checkpoint(self, episode: int = 0): FILE: applications/ColossalChat/coati/trainer/rm.py class RewardModelTrainer (line 26) | class RewardModelTrainer(SLTrainer): method __init__ (line 46) | def __init__( method _before_fit (line 77) | def _before_fit( method _train (line 107) | def _train(self, epoch): method _eval (line 199) | def _eval(self, epoch): FILE: applications/ColossalChat/coati/trainer/sft.py class SFTTrainer (line 25) | class SFTTrainer(SLTrainer): method __init__ (line 38) | def __init__( method _before_fit (line 65) | def _before_fit( method _train (line 98) | def _train(self, epoch: int): method _eval (line 181) | def _eval(self, epoch: int): FILE: applications/ColossalChat/coati/trainer/utils.py class AnnealingScheduler (line 15) | class AnnealingScheduler: method __init__ (line 16) | def __init__(self, start, end, warmup_steps=100, annealing_step=2000): method get_temperature (line 23) | def get_temperature(self): method step_forward (line 32) | def step_forward(self): class CycledDataLoader (line 36) | class CycledDataLoader: method __init__ (line 52) | def __init__( method next (line 61) | def next(self): function is_rank_0 (line 81) | def is_rank_0() -> bool: function to_device (line 91) | def to_device(x: Any, device: torch.device) -> Any: function all_reduce_mean (line 111) | def all_reduce_mean(tensor: torch.Tensor, plugin: Plugin = None) -> torc... function all_reduce_sum (line 131) | def all_reduce_sum(tensor: torch.Tensor, plugin: Plugin = None) -> torch... function all_gather_tensors (line 149) | def all_gather_tensors(local_tensor_list: torch.Tensor, plugin: Plugin =... FILE: applications/ColossalChat/coati/utils/accumulative_meter.py class AccumulativeMeanVariable (line 6) | class AccumulativeMeanVariable: method __init__ (line 11) | def __init__(self): method add (line 15) | def add(self, value, count_update=1): method get (line 26) | def get(self): method reset (line 35) | def reset(self): class AccumulativeMeanMeter (line 43) | class AccumulativeMeanMeter: method __init__ (line 56) | def __init__(self): method add (line 59) | def add(self, name, value, count_update=1): method get (line 64) | def get(self, name): method reset (line 67) | def reset(self): FILE: applications/ColossalChat/coati/utils/ckpt_io.py function load_json (line 20) | def load_json(file_path: Union[str, os.PathLike]) -> Dict[str, Any]: function save_json (line 28) | def save_json(data: Dict[str, Any], file_path: Union[str, os.PathLike]) ... function save_checkpoint (line 36) | def save_checkpoint( function load_checkpoint (line 72) | def load_checkpoint( FILE: applications/ColossalChat/coati/utils/reward_score/competition.py function math_competition_reward_fn (line 6) | def math_competition_reward_fn(input_ids, attention_mask, **kwargs): FILE: applications/ColossalChat/coati/utils/reward_score/gsm8k.py function gsm8k_reward_fn (line 6) | def gsm8k_reward_fn(input_ids, attention_mask, **kwargs): FILE: applications/ColossalChat/coati/utils/reward_score/utils.py function validate_response_structure (line 20) | def validate_response_structure(processed_str: str, tags: Dict = None) -... function extract_solution (line 58) | def extract_solution(solution_str: str) -> Tuple[Optional[str], str]: FILE: applications/ColossalChat/examples/community/peft/easy_dataset.py function _tokenize_fn (line 13) | def _tokenize_fn(strings: Sequence[str], tokenizer: AutoTokenizer, max_l... function preprocess (line 37) | def preprocess(sources: Sequence[str], targets: Sequence[str], tokenizer... class EasySupervisedDataset (line 50) | class EasySupervisedDataset(Dataset): method __init__ (line 51) | def __init__(self, data_file: str, tokenizer: AutoTokenizer, max_lengt... method __len__ (line 71) | def __len__(self): method __getitem__ (line 74) | def __getitem__(self, i) -> Dict[str, torch.Tensor]: method __repr__ (line 77) | def __repr__(self): method __str__ (line 80) | def __str__(self): class EasyPromptsDataset (line 84) | class EasyPromptsDataset(Dataset): method __init__ (line 85) | def __init__(self, data_file: str, tokenizer: AutoTokenizer, max_lengt... method __len__ (line 100) | def __len__(self): method __getitem__ (line 103) | def __getitem__(self, idx): method __repr__ (line 106) | def __repr__(self): method __str__ (line 109) | def __str__(self): class EasyRewardDataset (line 113) | class EasyRewardDataset(Dataset): method __init__ (line 114) | def __init__(self, train_file: str, tokenizer: AutoTokenizer, special_... method __len__ (line 146) | def __len__(self): method __getitem__ (line 150) | def __getitem__(self, idx): method __repr__ (line 159) | def __repr__(self): method __str__ (line 162) | def __str__(self): class EasySFTDataset (line 172) | class EasySFTDataset(Dataset): method __init__ (line 173) | def __init__(self, data_file: str, tokenizer: AutoTokenizer, max_lengt... method __len__ (line 227) | def __len__(self): method __getitem__ (line 231) | def __getitem__(self, idx): method __repr__ (line 235) | def __repr__(self): method __str__ (line 239) | def __str__(self): FILE: applications/ColossalChat/examples/community/peft/easy_models.py class Actor (line 13) | class Actor(Module): method __init__ (line 21) | def __init__(self, model: nn.Module) -> None: method generate (line 26) | def generate( method forward (line 48) | def forward( method get_base_model (line 57) | def get_base_model(self): class BLOOMActor (line 61) | class BLOOMActor(Actor): method __init__ (line 73) | def __init__( method print_trainable_parameters (line 92) | def print_trainable_parameters(self): FILE: applications/ColossalChat/examples/community/peft/train_peft_prompts.py function main (line 22) | def main(args): FILE: applications/ColossalChat/examples/community/peft/train_peft_sft.py function train (line 22) | def train(args): FILE: applications/ColossalChat/examples/community/ray/ray_job_script.py function main (line 6) | def main(api_server_endpoint="http://127.0.0.1:8265"): FILE: applications/ColossalChat/examples/community/ray/train_prompts_on_ray.py class ExperienceCompositionRefs (line 28) | class ExperienceCompositionRefs: method __init__ (line 29) | def __init__( class ExperienceMaker (line 44) | class ExperienceMaker: method __init__ (line 45) | def __init__(self, kl_coef) -> None: method make_experience (line 49) | def make_experience(self, experiment_computation_refs: ExperienceCompo... class DistributedTorchRayActor (line 65) | class DistributedTorchRayActor: method __init__ (line 66) | def __init__(self, world_size, rank, local_rank, master_addr, master_p... method _get_current_node_ip (line 83) | def _get_current_node_ip(): method _get_free_port (line 87) | def _get_free_port(): method get_master_addr_port (line 92) | def get_master_addr_port(self): class BasePPORole (line 96) | class BasePPORole(DistributedTorchRayActor): method add_experience_maker (line 97) | def add_experience_maker(self, kl_coef: float = 0.1): method make_experience (line 100) | def make_experience(self, experience_computation_ref: ExperienceCompos... method _init_strategy (line 103) | def _init_strategy(self, strategy: str): method _init_optimizer (line 114) | def _init_optimizer(self): method _prepare_model_with_strategy (line 120) | def _prepare_model_with_strategy(self, has_optimizer: bool): method _load_model_from_pretrained (line 127) | def _load_model_from_pretrained(self, model_class: Type[LoRAModule], p... method init_model_from_pretrained (line 130) | def init_model_from_pretrained( method eval (line 137) | def eval(self): class TrainablePPORole (line 141) | class TrainablePPORole(BasePPORole): method _load_model_from_pretrained (line 142) | def _load_model_from_pretrained(self, model_class, pretrain): method _train (line 146) | def _train(self): method _training_step (line 149) | def _training_step(self, experience: Experience): method learn_on_experiences (line 152) | def learn_on_experiences(self, experience_refs): class RayPPOActor (line 163) | class RayPPOActor(TrainablePPORole): method set_loss_function (line 164) | def set_loss_function(self, eps_clip: float): method load_tokenizer_from_pretrained (line 167) | def load_tokenizer_from_pretrained(self, model_type: str, pretrained): method setup_generate_kwargs (line 186) | def setup_generate_kwargs(self, generate_kwargs: dict): method load_csv_prompt_file_from_url_to_sampler (line 193) | def load_csv_prompt_file_from_url_to_sampler(self, prompt_url): method _generate (line 199) | def _generate(self, input_ids, **generate_kwargs): method sample_prompts_and_make_sequence (line 202) | def sample_prompts_and_make_sequence(self, experience_batch_size): method calculate_action_log_probs (line 211) | def calculate_action_log_probs(self, sequence_attention_action_mask): method _training_step (line 215) | def _training_step(self, experience): method save_checkpoint (line 226) | def save_checkpoint(self, save_path, should_save_optimizer: bool): method generate_answer (line 238) | def generate_answer(self, prompt, max_length=30, num_return_sequences=5): class RayPPOCritic (line 250) | class RayPPOCritic(TrainablePPORole): method set_loss_function (line 251) | def set_loss_function(self, value_clip: float): method _training_step (line 254) | def _training_step(self, experience): method calculate_value (line 267) | def calculate_value(self, sequence_attention_action_mask): class RayPPORewardModel (line 273) | class RayPPORewardModel(BasePPORole): method _load_model_from_pretrained (line 274) | def _load_model_from_pretrained(self, model_class, pretrain): method calculate_r (line 282) | def calculate_r(self, sequence_attention_action_mask): class RayPPOInitialModel (line 288) | class RayPPOInitialModel(BasePPORole): method _load_model_from_pretrained (line 289) | def _load_model_from_pretrained(self, model_class, pretrain): method calculate_base_action_log_probs (line 294) | def calculate_base_action_log_probs(self, sequence_attention_action_ma... class PPORayActorGroup (line 299) | class PPORayActorGroup: method __init__ (line 305) | def __init__(self, num_nodes, num_gpus_per_node, ray_actor_type: Type[... method _initiate_actors (line 311) | def _initiate_actors(self): method async_init_model_from_pretrained (line 344) | def async_init_model_from_pretrained( class TrainableModelRayActorGroup (line 353) | class TrainableModelRayActorGroup(PPORayActorGroup): method async_learn_on_experiences (line 354) | def async_learn_on_experiences(self, experience_refs): class PPOActorRayActorGroup (line 363) | class PPOActorRayActorGroup(TrainableModelRayActorGroup): method __init__ (line 364) | def __init__(self, num_nodes, num_gpus_per_node) -> None: method async_prepare_for_sequence_generation (line 367) | def async_prepare_for_sequence_generation(self, model: str, pretrain: ... method load_csv_prompt_file_from_url_to_sampler (line 374) | def load_csv_prompt_file_from_url_to_sampler(self, csv_url): method async_sample_prompts_and_make_sequence (line 377) | def async_sample_prompts_and_make_sequence(self, experience_batch_size): method async_calculate_action_log_probs (line 380) | def async_calculate_action_log_probs(self, sequences_attention_mask_ac... method set_loss_function (line 390) | def set_loss_function(self, eps_clip: float = 0.2): method save_checkpoint (line 393) | def save_checkpoint(self, save_path, should_save_optimizer): class PPOCriticRayActorGroup (line 397) | class PPOCriticRayActorGroup(TrainableModelRayActorGroup): method __init__ (line 398) | def __init__(self, num_nodes, num_gpus_per_node) -> None: method async_calculate_value (line 401) | def async_calculate_value(self, sequences_attention_mask_action_mask_r... method set_loss_function (line 411) | def set_loss_function(self, value_clip: float = 0.4): class PPOInitialRayActorGroup (line 415) | class PPOInitialRayActorGroup(PPORayActorGroup): method __init__ (line 416) | def __init__(self, num_nodes, num_gpus_per_node) -> None: method async_calculate_base_action_log_probs (line 419) | def async_calculate_base_action_log_probs(self, sequences_attention_ma... class PPORewardRayActorGroup (line 430) | class PPORewardRayActorGroup(PPORayActorGroup): method __init__ (line 431) | def __init__(self, num_nodes, num_gpus_per_node) -> None: method async_calculate_r (line 434) | def async_calculate_r(self, sequences_attention_mask_action_mask_refs): function main (line 445) | def main(args): FILE: applications/ColossalChat/examples/data_preparation_scripts/prepare_dataset.py function main (line 52) | def main(): FILE: applications/ColossalChat/examples/inference/chatio.py class ChatIO (line 17) | class ChatIO(abc.ABC): method prompt_for_input (line 19) | def prompt_for_input(self, role: str) -> str: method prompt_for_output (line 23) | def prompt_for_output(self, role: str): method stream_output (line 27) | def stream_output(self, output_stream): class SimpleChatIO (line 31) | class SimpleChatIO(ChatIO): method prompt_for_input (line 32) | def prompt_for_input(self, role) -> str: method prompt_for_output (line 35) | def prompt_for_output(self, role: str): method stream_output (line 38) | def stream_output(self, output_stream): class RichChatIO (line 51) | class RichChatIO(ChatIO): method __init__ (line 52) | def __init__(self): method prompt_for_input (line 57) | def prompt_for_input(self, role) -> str: method prompt_for_output (line 68) | def prompt_for_output(self, role: str) -> str: method stream_output (line 71) | def stream_output(self, output_stream): class DummyChatIO (line 107) | class DummyChatIO(ChatIO): method __init__ (line 112) | def __init__(self): method prompt_for_input (line 116) | def prompt_for_input(self, role) -> str: method prompt_for_output (line 127) | def prompt_for_output(self, role: str) -> str: method stream_output (line 130) | def stream_output(self, output_stream): FILE: applications/ColossalChat/examples/inference/inference.py function get_gpu_memory (line 17) | def get_gpu_memory(max_gpus=None): function load_model_and_tokenizer (line 42) | def load_model_and_tokenizer(model_path, tokenizer_path, device="cuda", ... function _set_default_generate_kwargs (line 64) | def _set_default_generate_kwargs(model: PreTrainedModel) -> Dict: function generation_wrapper (line 85) | def generation_wrapper(*args, **kwargs): function main (line 92) | def main(args): FILE: applications/ColossalChat/examples/inference/web_chatbot/locustfile.py class GenerationUser (line 17) | class GenerationUser(HttpUser): method generate (line 19) | def generate(self): FILE: applications/ColossalChat/examples/inference/web_chatbot/server.py class GenerationTaskReq (line 24) | class GenerationTaskReq(BaseModel): function generate_streamingly (line 57) | def generate_streamingly(prompt, max_length, max_new_tokens, top_k, top_... function event_generator (line 92) | async def event_generator(request: Request, generator: Generator): function generate (line 105) | def generate(data: GenerationTaskReq, request: Request): function generate_no_stream (line 116) | def generate_no_stream(data: GenerationTaskReq, request: Request): FILE: applications/ColossalChat/examples/inference/web_chatbot/utils.py function update_model_kwargs_fn (line 12) | def update_model_kwargs_fn(outputs: dict, **model_kwargs) -> dict: class Dialogue (line 33) | class Dialogue(BaseModel): class ChatPromptProcessor (line 38) | class ChatPromptProcessor: method __init__ (line 41) | def __init__(self, censored_words: List[str] = []): method preprocess_prompt (line 45) | def preprocess_prompt(self, history: List[Dialogue]) -> str: method postprocess_output (line 53) | def postprocess_output(self, output: str) -> str: method has_censored_words (line 56) | def has_censored_words(self, text: str) -> bool: class LockedIterator (line 63) | class LockedIterator: method __init__ (line 64) | def __init__(self, it, lock: Lock) -> None: method __iter__ (line 68) | def __iter__(self): method __next__ (line 71) | def __next__(self): function load_json (line 76) | def load_json(path: str): FILE: applications/ColossalChat/examples/training_scripts/lora_finetune.py function all_reduce_mean (line 39) | def all_reduce_mean(loss: torch.Tensor, plugin: Plugin) -> torch.Tensor: function train (line 46) | def train(args) -> None: FILE: applications/ColossalChat/examples/training_scripts/train_dpo.py function train (line 25) | def train(args): FILE: applications/ColossalChat/examples/training_scripts/train_grpo.py function train (line 41) | def train(args): FILE: applications/ColossalChat/examples/training_scripts/train_kto.py function train (line 25) | def train(args): FILE: applications/ColossalChat/examples/training_scripts/train_orpo.py function train (line 25) | def train(args): FILE: applications/ColossalChat/examples/training_scripts/train_ppo.py function train (line 50) | def train(args): FILE: applications/ColossalChat/examples/training_scripts/train_rm.py function train (line 27) | def train(args): FILE: applications/ColossalChat/examples/training_scripts/train_sft.py function train (line 26) | def train(args): FILE: applications/ColossalChat/setup.py function fetch_requirements (line 4) | def fetch_requirements(path): function fetch_readme (line 9) | def fetch_readme(): function fetch_version (line 14) | def fetch_version(): FILE: applications/ColossalChat/start_code_verifier.py class CheckCorrectnessRequest (line 10) | class CheckCorrectnessRequest(BaseModel): class CheckCorrectnessResponse (line 18) | class CheckCorrectnessResponse(BaseModel): function check_correctness_api (line 24) | def check_correctness_api(request: CheckCorrectnessRequest): FILE: applications/ColossalChat/tests/test_lora.py class SimpleNN (line 9) | class SimpleNN(nn.Module): method __init__ (line 10) | def __init__(self, input_size, hidden_size, num_classes): method forward (line 16) | def forward(self, x): function test_overfit (line 23) | def test_overfit(): function test_lora_linear_accuracy (line 68) | def test_lora_linear_accuracy(): function test_lora_embedding_accuracy (line 89) | def test_lora_embedding_accuracy(): FILE: applications/ColossalEval/colossal_eval/dataset/agieval.py function get_prompt (line 55) | def get_prompt(line: Dict, dataset_name: str, logger: DistributedLogger)... function combine_prompt (line 103) | def combine_prompt(prompt_path, dataset_name, load_explanation=True, cha... class AGIEvalDataset (line 180) | class AGIEvalDataset(BaseDataset): method load (line 200) | def load(path: str, logger: DistributedLogger, few_shot: bool, *args, ... FILE: applications/ColossalEval/colossal_eval/dataset/base.py class BaseDataset (line 9) | class BaseDataset: method __init__ (line 18) | def __init__(self, path, logger, *args, **kwargs): method save (line 21) | def save(self, save_path): method load (line 26) | def load(path, logger: DistributedLogger, *args, **kwargs): class DistributedDataset (line 30) | class DistributedDataset(Dataset): method __init__ (line 31) | def __init__(self, data): method __len__ (line 34) | def __len__(self): method __getitem__ (line 37) | def __getitem__(self, idx): FILE: applications/ColossalEval/colossal_eval/dataset/ceval.py function get_few_shot_data (line 78) | def get_few_shot_data(data: List[Dict], subject): class CEvalDataset (line 85) | class CEvalDataset(BaseDataset): method load (line 93) | def load(path: str, logger: DistributedLogger, few_shot: bool, *args, ... FILE: applications/ColossalEval/colossal_eval/dataset/cmmlu.py function get_few_shot_data (line 89) | def get_few_shot_data(data: List[Dict], subject): class CMMLUDataset (line 96) | class CMMLUDataset(BaseDataset): method load (line 104) | def load(path: str, logger: DistributedLogger, few_shot: bool, *args, ... FILE: applications/ColossalEval/colossal_eval/dataset/colossalai.py function get_data_per_category (line 24) | def get_data_per_category(data): class ColossalDataset (line 33) | class ColossalDataset(BaseDataset): method load (line 40) | def load(path: str, logger: DistributedLogger, *args, **kwargs) -> Lis... FILE: applications/ColossalEval/colossal_eval/dataset/cvalues.py class CValuesDataset (line 23) | class CValuesDataset(BaseDataset): method load (line 31) | def load(path: str, logger: DistributedLogger, *args, **kwargs) -> Lis... FILE: applications/ColossalEval/colossal_eval/dataset/gaokaobench.py function get_all_classes (line 44) | def get_all_classes(instruction: str): class GaoKaoBenchDataset (line 58) | class GaoKaoBenchDataset(BaseDataset): method load (line 72) | def load(path: str, logger: DistributedLogger, *args, **kwargs) -> Lis... FILE: applications/ColossalEval/colossal_eval/dataset/gsm.py function get_few_shot_data (line 80) | def get_few_shot_data(): class GSMDataset (line 88) | class GSMDataset(BaseDataset): method load (line 96) | def load( FILE: applications/ColossalEval/colossal_eval/dataset/longbench.py class LongBenchDataset (line 68) | class LongBenchDataset(BaseDataset): method load (line 80) | def load(path: str, logger: DistributedLogger, *args, **kwargs) -> Lis... FILE: applications/ColossalEval/colossal_eval/dataset/mmlu.py function get_few_shot_data (line 19) | def get_few_shot_data(data: List[Dict], subject): class MMLUDataset (line 26) | class MMLUDataset(BaseDataset): method load (line 34) | def load(path: str, logger: DistributedLogger, few_shot: bool, *args, ... FILE: applications/ColossalEval/colossal_eval/dataset/mtbench.py class MTBenchDataset (line 23) | class MTBenchDataset(BaseDataset): method __init__ (line 30) | def __init__(self, path, logger: DistributedLogger, *args, **kwargs): method load (line 35) | def load(path: str, logger: DistributedLogger, *args, **kwargs) -> Lis... FILE: applications/ColossalEval/colossal_eval/dataset/safetybench_en.py function get_query_str (line 36) | def get_query_str(question, options, choices_templates=CHOICE_TEMP, pad=... function process_test (line 55) | def process_test(sample_list, pad_choices=False): function process_dev (line 83) | def process_dev(sample_dict, pad_choices=False): function get_few_shot_data (line 107) | def get_few_shot_data(data: List[Dict]): function add_few_shot_to_test (line 114) | def add_few_shot_to_test(dataset): class SafetyBenchENDataset (line 125) | class SafetyBenchENDataset(BaseDataset): method load (line 133) | def load(path: str, logger: DistributedLogger, few_shot: bool, *args, ... FILE: applications/ColossalEval/colossal_eval/dataset/safetybench_zh.py function get_query_str (line 36) | def get_query_str(question, options, choices_templates=CHOICE_TEMP, pad=... function process_test (line 55) | def process_test(sample_list, pad_choices=False): function process_dev (line 83) | def process_dev(sample_dict, pad_choices=False): function get_few_shot_data (line 107) | def get_few_shot_data(data: List[Dict]): function add_few_shot_to_test (line 114) | def add_few_shot_to_test(dataset): class SafetyBenchZHDataset (line 125) | class SafetyBenchZHDataset(BaseDataset): method load (line 133) | def load(path: str, logger: DistributedLogger, few_shot: bool, *args, ... FILE: applications/ColossalEval/colossal_eval/evaluate/dataset_evaluator/dataset_evaluator.py class DatasetEvaluator (line 39) | class DatasetEvaluator(object): method __init__ (line 45) | def __init__(self, config_path: str, save_path: str): method _calculate_label_metrics (line 49) | def _calculate_label_metrics(self, metric: str, category: str): method _calculate_combined_metrics (line 93) | def _calculate_combined_metrics(self, metric: str, category: str): method _calculate_other_metrics (line 148) | def _calculate_other_metrics(self, metric: str, category: str): method _calculate_gpt_metrics (line 174) | def _calculate_gpt_metrics(self, metric: str, category: str): method _calculate_loss_metrics (line 192) | def _calculate_loss_metrics(self, metric: str, category: str): method _evaluate (line 245) | def _evaluate(self): method get_evaluation_results (line 282) | def get_evaluation_results( FILE: applications/ColossalEval/colossal_eval/evaluate/dataset_evaluator/gpt_judge.py function load_mt_prompts (line 28) | def load_mt_prompts(prompt_file: str): function get_mt_prompt (line 37) | def get_mt_prompt(prompts: Dict[str, str], multiturn: bool, math: bool): function chat_compeletion_openai (line 48) | def chat_compeletion_openai(messages: List[Dict], temperature: float = 0... function get_mtbench_judgements (line 69) | def get_mtbench_judgements(question: Dict[str, Any], prompts: Dict[str, ... function mtbench_single_judge (line 119) | def mtbench_single_judge(data: List[Dict], config_path: str): FILE: applications/ColossalEval/colossal_eval/evaluate/dataset_evaluator/metrics.py function _fix_fracs (line 205) | def _fix_fracs(string): function _fix_a_slash_b (line 237) | def _fix_a_slash_b(string): function _remove_right_units (line 252) | def _remove_right_units(string): function _fix_sqrt (line 262) | def _fix_sqrt(string): function _strip_string (line 277) | def _strip_string(string): function parse_math_answer (line 347) | def parse_math_answer(raw_string): function math_equivalence (line 418) | def math_equivalence(prediction, reference, **kwargs): function multi_choice_accuracy (line 436) | def multi_choice_accuracy(prediction, reference, **kwargs): function accuracy_by_options (line 460) | def accuracy_by_options(question, prediction, reference): function combined_single_choice_accuracy (line 474) | def combined_single_choice_accuracy(prediction, reference, **kwargs): function single_choice_accuracy (line 478) | def single_choice_accuracy(prediction, reference, **kwargs): function normalize_answer (line 500) | def normalize_answer(s): function normalize_zh_answer (line 519) | def normalize_zh_answer(s): function count_score (line 536) | def count_score(prediction, reference, **kwargs): function retrieval_score (line 546) | def retrieval_score(prediction, reference, **kwargs): function retrieval_zh_score (line 559) | def retrieval_zh_score(prediction, reference, **kwargs): function code_sim_score (line 572) | def code_sim_score(prediction, reference, **kwargs): function classification_score (line 582) | def classification_score(prediction, reference, **kwargs): function rouge_score (line 608) | def rouge_score(prediction, reference, **kwargs): function rouge_zh_score (line 617) | def rouge_zh_score(prediction, reference, **kwargs): function _f1_score (line 624) | def _f1_score(prediction, reference, **kwargs): function f1_score (line 635) | def f1_score(prediction, reference, **kwargs): function f1_zh_score (line 644) | def f1_zh_score(prediction, reference, **kwargs): function extract_answer_hf (line 654) | def extract_answer_hf(completion): function get_match_str (line 664) | def get_match_str(match, idx): function extract_answer (line 676) | def extract_answer(completion): function is_correct (line 697) | def is_correct(completion, answer): function gsm_accuracy (line 704) | def gsm_accuracy(prediction, reference, **kwargs): FILE: applications/ColossalEval/colossal_eval/evaluate/evaluator.py class Evaluator (line 9) | class Evaluator(object): method __init__ (line 15) | def __init__( method battle (line 33) | def battle(self, answers1: List[Dict], answers2: List[Dict]) -> None: method evaluate (line 40) | def evaluate(self, answers: List[Dict], targets: List[Dict], save_path... method save (line 81) | def save(self, path: str, model_name_list: List[str]) -> None: FILE: applications/ColossalEval/colossal_eval/evaluate/gpt_evaluate.py function get_battle_result (line 32) | def get_battle_result(sys_prompt: str, user_prompt: str, id: int, max_to... function parse_battle_score (line 70) | def parse_battle_score(evaluation: str) -> List[float]: function battle (line 108) | def battle(answer1: List[Dict], answer2: List[Dict], prompt_dict: Dict[s... function save_battle_results (line 164) | def save_battle_results(evaluations: List[Dict], name1: str, name2: str,... function reference_template (line 248) | def reference_template(metric: str, language: str, reference: Dict[str, ... function fill_in_message (line 289) | def fill_in_message(role: str, content: str) -> Dict[str, str]: function multiturn_chat_completion (line 304) | def multiturn_chat_completion(user_messages: List[str], model: str, max_... function get_gpt_evaluation_without_logprobs (line 355) | def get_gpt_evaluation_without_logprobs( function get_gpt_evaluation_with_logprobs (line 432) | def get_gpt_evaluation_with_logprobs( function evaluate (line 496) | def evaluate( function calculate_scores_form_logprobs (line 634) | def calculate_scores_form_logprobs(logprobs: Dict[str, Any]) -> float: function calculate_scores_form_response (line 670) | def calculate_scores_form_response(response: str, evaluation: Dict[str, ... function save_gpt_evaluation_results (line 694) | def save_gpt_evaluation_results( function save_gpt_evaluation_statistics (line 716) | def save_gpt_evaluation_statistics(model_name: str, evaluations: List[Di... function analyze_gpt_evaluation_statistics (line 771) | def analyze_gpt_evaluation_statistics(statistics_path: str, save_path: s... FILE: applications/ColossalEval/colossal_eval/evaluate/utils.py function get_data_per_category (line 1) | def get_data_per_category(data, categories): FILE: applications/ColossalEval/colossal_eval/models/base.py class BaseModel (line 9) | class BaseModel: method __init__ (line 21) | def __init__( method inference (line 41) | def inference(self, data: List[Dict]) -> None: method generate (line 51) | def generate(self, inputs: List[str], max_new_tokens: int) -> List[str]: method get_loss (line 64) | def get_loss(self, batch: List[str], batch_target: List[str]) -> List[... method to (line 77) | def to(self, device): FILE: applications/ColossalEval/colossal_eval/models/chatglm.py class ChatGLMModel (line 13) | class ChatGLMModel(HuggingFaceModel): method _get_truncated_prompts (line 14) | def _get_truncated_prompts(self, inputs: List[str], max_new_tokens: in... method get_loss (line 30) | def get_loss( method _calculate_loss (line 114) | def _calculate_loss(self, input_ids_list: List[torch.LongTensor], labe... class ChatGLM2Model (line 150) | class ChatGLM2Model(ChatGLMModel): method _get_truncated_prompts (line 151) | def _get_truncated_prompts(self, inputs: List[str], max_new_tokens: in... method generate (line 167) | def generate(self, inputs: List[str], max_new_tokens: int, **kwargs) -... method get_loss (line 227) | def get_loss( FILE: applications/ColossalEval/colossal_eval/models/huggingface.py class HuggingFaceModel (line 21) | class HuggingFaceModel(BaseModel): method __init__ (line 39) | def __init__( method _get_choices_indices (line 63) | def _get_choices_indices(self, language: str): method _load_tokenizer (line 84) | def _load_tokenizer(self, path: str, tokenizer_path: Optional[str], to... method _load_model (line 115) | def _load_model( method _calculate_loss (line 150) | def _calculate_loss(self, input_ids_list: List[torch.LongTensor], labe... method _get_truncated_prompts (line 186) | def _get_truncated_prompts(self, inputs: List[str], max_new_tokens: in... method _get_input_ids_and_labels_pretrain (line 212) | def _get_input_ids_and_labels_pretrain(self, batch_prompt: List[str]) ... method _get_input_ids_and_labels (line 253) | def _get_input_ids_and_labels( method inference (line 334) | def inference(self, data_loader: DataLoader, inference_kwargs: Dict[st... method generate (line 447) | def generate(self, inputs: List[str], max_new_tokens: int, **kwargs) -... method get_loss (line 505) | def get_loss( class HuggingFaceCausalLM (line 569) | class HuggingFaceCausalLM(HuggingFaceModel): method _load_model (line 587) | def _load_model( FILE: applications/ColossalEval/colossal_eval/models/vllm.py class vLLMModel (line 18) | class vLLMModel(HuggingFaceModel): method __init__ (line 43) | def __init__( method _load_model (line 90) | def _load_model( method _calculate_loss (line 177) | def _calculate_loss(self, inputs: List[str], labels: List[str]) -> Tup... method inference (line 217) | def inference(self, data_loader: DataLoader, inference_kwargs: Dict[st... method generate (line 330) | def generate(self, inputs: List[str], max_new_tokens: int, **kwargs) -... method get_loss (line 366) | def get_loss( class GetTokenLogitsProcessor (line 469) | class GetTokenLogitsProcessor: method __init__ (line 478) | def __init__( method __call__ (line 485) | def __call__(self, input_ids: torch.Tensor, logits: torch.Tensor) -> t... method get_target_logits (line 497) | def get_target_logits(self) -> torch.Tensor: FILE: applications/ColossalEval/colossal_eval/utils/conversation.py class SeparatorStyle (line 8) | class SeparatorStyle(Enum): class Conversation (line 16) | class Conversation: method clear (line 24) | def clear(self): method get_prompt (line 27) | def get_prompt(self): method get_prompt_with_target (line 63) | def get_prompt_with_target(self, target): method save_prompt (line 90) | def save_prompt(self): method append_message (line 102) | def append_message(self, role, message): method copy (line 105) | def copy(self): method dict (line 115) | def dict(self): function get_few_shot_prefix (line 126) | def get_few_shot_prefix(few_shot_data: List[str], tokenizer: Optional[Au... function get_batch_prompt (line 153) | def get_batch_prompt( FILE: applications/ColossalEval/colossal_eval/utils/utilities.py function is_rank_0 (line 8) | def is_rank_0() -> bool: function _make_w_io_base (line 12) | def _make_w_io_base(f, mode: str): function _make_r_io_base (line 21) | def _make_r_io_base(f, mode: str): function jdump (line 27) | def jdump(obj, f, mode="w", indent=4, default=str): function jload (line 49) | def jload(f, mode="r"): function get_json_list (line 57) | def get_json_list(file_path): FILE: applications/ColossalEval/examples/dataset_evaluation/eval_dataset.py function main (line 9) | def main(args): FILE: applications/ColossalEval/examples/dataset_evaluation/inference.py function rm_and_merge (line 21) | def rm_and_merge( function main (line 87) | def main(args): FILE: applications/ColossalEval/examples/gpt_evaluation/eval.py function main (line 9) | def main(args): FILE: applications/ColossalEval/examples/gpt_evaluation/inference.py function rm_and_merge (line 18) | def rm_and_merge( function main (line 83) | def main(args): FILE: applications/ColossalEval/setup.py function fetch_requirements (line 4) | def fetch_requirements(path): function fetch_readme (line 9) | def fetch_readme(): FILE: applications/ColossalMoE/infer.py function parse_args (line 14) | def parse_args(): function main (line 54) | def main(): FILE: applications/ColossalMoE/setup.py function fetch_requirements (line 4) | def fetch_requirements(path): function fetch_readme (line 9) | def fetch_readme(): function fetch_version (line 14) | def fetch_version(): FILE: applications/ColossalMoE/train.py function get_global_loss (line 21) | def get_global_loss(loss, booster): class RandomDataset (line 28) | class RandomDataset(Dataset): method __init__ (line 29) | def __init__(self, num_samples: int = 1000, max_length: int = 2048, vo... method __len__ (line 35) | def __len__(self): method __getitem__ (line 38) | def __getitem__(self, idx): function parse_args (line 46) | def parse_args(): function main (line 142) | def main(): FILE: applications/ColossalMoE/utils.py function move_to_cuda (line 13) | def move_to_cuda(batch, device): function load_json (line 17) | def load_json(file_path: Union[str, os.PathLike]) -> Dict[str, Any]: function save_json (line 25) | def save_json(data: Dict[str, Any], file_path: Union[str, os.PathLike]) ... function save_checkpoint (line 33) | def save_checkpoint( function load_checkpoint (line 63) | def load_checkpoint( FILE: applications/ColossalQA/colossalqa/chain/memory/summary.py class SummarizerMixin (line 24) | class SummarizerMixin(BaseModel): method predict_new_summary (line 36) | def predict_new_summary(self, messages: List[BaseMessage], existing_su... class ConversationSummaryMemory (line 51) | class ConversationSummaryMemory(BaseChatMemory, SummarizerMixin): method from_messages (line 58) | def from_messages( method memory_variables (line 71) | def memory_variables(self) -> List[str]: method load_memory_variables (line 75) | def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, A... method validate_prompt_input_variables (line 84) | def validate_prompt_input_variables(cls, values: Dict) -> Dict: method save_context (line 95) | def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]... method clear (line 100) | def clear(self) -> None: FILE: applications/ColossalQA/colossalqa/chain/retrieval_qa/base.py class CustomBaseRetrievalQA (line 29) | class CustomBaseRetrievalQA(BaseRetrievalQA): method from_llm (line 33) | def from_llm( method from_chain_type (line 61) | def from_chain_type( method _call (line 74) | def _call( method _acall (line 133) | async def _acall( class RetrievalQA (line 181) | class RetrievalQA(CustomBaseRetrievalQA): method _get_docs (line 198) | def _get_docs( method _aget_docs (line 207) | async def _aget_docs( method _chain_type (line 217) | def _chain_type(self) -> str: FILE: applications/ColossalQA/colossalqa/chain/retrieval_qa/load_chain.py class LoadingCallable (line 25) | class LoadingCallable(Protocol): method __call__ (line 28) | def __call__(self, llm: BaseLanguageModel, **kwargs: Any) -> BaseCombi... function _load_stuff_chain (line 32) | def _load_stuff_chain( function load_qa_chain (line 65) | def load_qa_chain( FILE: applications/ColossalQA/colossalqa/chain/retrieval_qa/stuff.py class CustomStuffDocumentsChain (line 19) | class CustomStuffDocumentsChain(StuffDocumentsChain): method _get_inputs (line 57) | def _get_inputs(self, docs: List[Document], **kwargs: Any) -> dict: FILE: applications/ColossalQA/colossalqa/data_loader/document_loader.py class DocumentLoader (line 23) | class DocumentLoader: method __init__ (line 28) | def __init__(self, files: List, **kwargs) -> None: method load_data (line 52) | def load_data(self, path: str) -> None: method clear (line 130) | def clear(self): FILE: applications/ColossalQA/colossalqa/data_loader/table_dataloader.py class TableLoader (line 18) | class TableLoader: method __init__ (line 23) | def __init__(self, files: str, sql_path: str = "sqlite:///mydatabase.d... method load_data (line 51) | def load_data(self, path): method to_sql (line 99) | def to_sql(self, path, table_name): method get_sql_path (line 107) | def get_sql_path(self): method __del__ (line 110) | def __del__(self): FILE: applications/ColossalQA/colossalqa/local/colossalcloud_llm.py class ColossalCloudLLM (line 32) | class ColossalCloudLLM(LLM): method __init__ (line 43) | def __init__(self, gen_config=None, **kwargs): method _identifying_params (line 61) | def _identifying_params(self) -> Mapping[str, Any]: method _llm_type (line 66) | def _llm_type(self) -> str: method set_auth_config (line 69) | def set_auth_config(self, **kwargs): method _call (line 78) | def _call(self, prompt: str, stop=None, **kwargs: Any) -> str: method text_completion (line 104) | def text_completion(self, prompt, gen_config, auth_config): FILE: applications/ColossalQA/colossalqa/local/llm.py class ColossalAPI (line 28) | class ColossalAPI: method __init__ (line 35) | def __init__(self, model_type: str, model_path: str, ckpt_path: str = ... method get_api (line 57) | def get_api(model_type: str, model_path: str, ckpt_path: str = None): method generate (line 63) | def generate(self, input: str, **kwargs) -> str: class VllmAPI (line 89) | class VllmAPI: method __init__ (line 90) | def __init__(self, host: str = "localhost", port: int = 8077) -> None: method generate (line 96) | def generate(self, input: str, **kwargs): class ColossalLLM (line 101) | class ColossalLLM(LLM): method _llm_type (line 111) | def _llm_type(self) -> str: method _call (line 114) | def _call( method _identifying_params (line 136) | def _identifying_params(self) -> Mapping[str, int]: method get_token_ids (line 140) | def get_token_ids(self, text: str) -> List[int]: class VllmLLM (line 154) | class VllmLLM(LLM): method _llm_type (line 164) | def _llm_type(self) -> str: method _call (line 167) | def _call( method set_host_port (line 187) | def set_host_port(self, host: str = "localhost", port: int = 8077, **k... method _identifying_params (line 194) | def _identifying_params(self) -> Mapping[str, int]: FILE: applications/ColossalQA/colossalqa/local/pangu_llm.py class Pangu (line 31) | class Pangu(LLM): method __init__ (line 41) | def __init__(self, gen_config=None, **kwargs): method _identifying_params (line 49) | def _identifying_params(self) -> Mapping[str, Any]: method _llm_type (line 54) | def _llm_type(self) -> str: method _call (line 57) | def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwarg... method set_auth_config (line 79) | def set_auth_config(self, **kwargs): method get_latest_auth_token (line 92) | def get_latest_auth_token(self, region, username, password, domain_name): method text_completion (line 110) | def text_completion(self, text, gen_config, auth_config): method chat_model (line 131) | def chat_model(self, messages, gen_config, auth_config): FILE: applications/ColossalQA/colossalqa/local/utils.py function post_http_request (line 11) | def post_http_request( function get_response (line 27) | def get_response(response: requests.Response) -> List[str]: FILE: applications/ColossalQA/colossalqa/memory.py class ConversationBufferWithSummary (line 18) | class ConversationBufferWithSummary(ConversationSummaryMemory): method buffer (line 39) | def buffer(self) -> Any: method buffer_as_str (line 44) | def buffer_as_str(self) -> str: method buffer_as_messages (line 50) | def buffer_as_messages(self) -> List[BaseMessage]: method clear (line 54) | def clear(self): method initiate_document_retrieval_chain (line 59) | def initiate_document_retrieval_chain( method memory_variables (line 80) | def memory_variables(self) -> List[str]: method format_dialogue (line 84) | def format_dialogue(self, lang: str = "en") -> str: method get_conversation_length (line 119) | def get_conversation_length(self): method load_memory_variables (line 125) | def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, s... method save_context (line 165) | def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]... FILE: applications/ColossalQA/colossalqa/mylogging.py class ColossalQALogger (line 8) | class ColossalQALogger: method __init__ (line 20) | def __init__(self, name): method get_instance (line 30) | def get_instance(name: str): method info (line 45) | def info(self, message: str, verbose: bool = False) -> None: method warning (line 56) | def warning(self, message: str, verbose: bool = False) -> None: method debug (line 66) | def debug(self, message: str, verbose: bool = False) -> None: method error (line 76) | def error(self, message: str) -> None: function get_logger (line 85) | def get_logger(name: str = None, level=logging.INFO) -> ColossalQALogger: FILE: applications/ColossalQA/colossalqa/retrieval_conversation_en.py class EnglishRetrievalConversation (line 18) | class EnglishRetrievalConversation: method __init__ (line 23) | def __init__(self, retriever: CustomRetriever, model_path: str, model_... method disambiguity (line 65) | def disambiguity(self, input: str): method from_retriever (line 70) | def from_retriever( method run (line 75) | def run(self, user_input: str, memory: ConversationBufferWithSummary) ... FILE: applications/ColossalQA/colossalqa/retrieval_conversation_universal.py class UniversalRetrievalConversation (line 20) | class UniversalRetrievalConversation: method __init__ (line 25) | def __init__( method load_supporting_docs (line 90) | def load_supporting_docs(self, files: List[List[str]] = None, text_spl... method start_test_session (line 117) | def start_test_session(self): method run (line 130) | def run(self, user_input: str, which_language=str): FILE: applications/ColossalQA/colossalqa/retrieval_conversation_zh.py class ChineseRetrievalConversation (line 18) | class ChineseRetrievalConversation: method __init__ (line 23) | def __init__(self, retriever: CustomRetriever, model_path: str, model_... method disambiguity (line 71) | def disambiguity(self, input: str): method from_retriever (line 76) | def from_retriever( method run (line 81) | def run(self, user_input: str, memory: ConversationBufferWithSummary) ... FILE: applications/ColossalQA/colossalqa/retriever.py class CustomRetriever (line 22) | class CustomRetriever(BaseRetriever): method from_documents (line 39) | def from_documents( method add_documents (line 52) | def add_documents( method clear_documents (line 100) | def clear_documents(self): method __del__ (line 108) | def __del__(self): method set_sql_database_chain (line 113) | def set_sql_database_chain(self, db_chains) -> None: method set_rephrase_handler (line 120) | def set_rephrase_handler(self, handler: Callable = None) -> None: method _get_relevant_documents (line 126) | def _get_relevant_documents( FILE: applications/ColossalQA/colossalqa/text_splitter/chinese_text_splitter.py class ChineseTextSplitter (line 11) | class ChineseTextSplitter(RecursiveCharacterTextSplitter): method __init__ (line 12) | def __init__(self, separators: Optional[List[str]] = None, is_separato... method split_text (line 21) | def split_text(self, text: str) -> List[str]: FILE: applications/ColossalQA/colossalqa/text_splitter/utils.py function remove_format (line 4) | def remove_format(text: str) -> str: function get_cleaned_paragraph (line 13) | def get_cleaned_paragraph(s: str) -> str: FILE: applications/ColossalQA/colossalqa/utils.py function drop_table (line 12) | def drop_table(engine: Engine) -> None: function create_empty_sql_database (line 25) | def create_empty_sql_database(database_uri): function destroy_sql_database (line 39) | def destroy_sql_database(sql_engine: Union[Engine, str]) -> None: function detect_lang_naive (line 50) | def detect_lang_naive(s): FILE: applications/ColossalQA/examples/retrieval_conversation_chatgpt.py function disambiguity (line 118) | def disambiguity(input): FILE: applications/ColossalQA/examples/retrieval_conversation_en.py function disambiguity (line 58) | def disambiguity(input): FILE: applications/ColossalQA/examples/retrieval_conversation_en_customer_service.py function disambiguity (line 60) | def disambiguity(input): function metadata_func (line 85) | def metadata_func(data_sample, additional_fields): FILE: applications/ColossalQA/examples/retrieval_conversation_zh.py function disambiguity (line 64) | def disambiguity(input: str): FILE: applications/ColossalQA/examples/retrieval_intent_classification_zh_customer_service.py function metadata_func (line 47) | def metadata_func(data_sample, additional_fields): FILE: applications/ColossalQA/examples/webui_demo/RAG_ChatBot.py class RAG_ChatBot (line 16) | class RAG_ChatBot: method __init__ (line 17) | def __init__( method set_embed_model (line 35) | def set_embed_model(self, **kwargs): method set_text_splitter (line 42) | def set_text_splitter(self, **kwargs): method set_memory (line 46) | def set_memory(self, **kwargs): method set_info_retriever (line 58) | def set_info_retriever(self, **kwargs): method set_rag_chain (line 63) | def set_rag_chain(self, **kwargs): method set_disambig_retriv (line 74) | def set_disambig_retriv(self, **kwargs): method load_doc_from_console (line 84) | def load_doc_from_console(self, json_parse_args: Dict = {}): method load_doc_from_files (line 96) | def load_doc_from_files(self, files, data_name="default_kb", json_pars... method split_docs_and_add_to_mem (line 103) | def split_docs_and_add_to_mem(self, **kwargs): method split_docs (line 110) | def split_docs(self, documents): method clear_docs (line 114) | def clear_docs(self, **kwargs): method reset_config (line 120) | def reset_config(self, rag_config): method run (line 130) | def run(self, user_input: str, memory: ConversationBufferWithSummary) ... method start_test_session (line 142) | def start_test_session(self): FILE: applications/ColossalQA/examples/webui_demo/server.py function parseArgs (line 16) | def parseArgs(): class DocUpdateReq (line 26) | class DocUpdateReq(BaseModel): class GenerationTaskReq (line 31) | class GenerationTaskReq(BaseModel): function update_docs (line 36) | def update_docs(data: DocUpdateReq, request: Request): function generate (line 51) | def generate(data: GenerationTaskReq, request: Request): FILE: applications/ColossalQA/examples/webui_demo/utils.py class DocAction (line 4) | class DocAction(str, Enum): FILE: applications/ColossalQA/examples/webui_demo/webui.py function parseArgs (line 10) | def parseArgs(): function get_response (line 17) | def get_response(data, url): function add_text (line 24) | def add_text(history, text): function add_file (line 29) | def add_file(history, files): function bot (line 39) | def bot(history): function restart (line 50) | def restart(chatbot, txt): FILE: applications/ColossalQA/setup.py function fetch_requirements (line 4) | def fetch_requirements(path): function fetch_readme (line 9) | def fetch_readme(): function fetch_version (line 14) | def fetch_version(): FILE: applications/ColossalQA/tests/test_document_loader.py function test_add_document (line 6) | def test_add_document(): FILE: applications/ColossalQA/tests/test_memory.py function test_memory_long (line 12) | def test_memory_long(): function test_memory_short (line 66) | def test_memory_short(): FILE: applications/ColossalQA/tests/test_retrieval_qa.py function test_en_retrievalQA (line 6) | def test_en_retrievalQA(): function test_zh_retrievalQA (line 27) | def test_zh_retrievalQA(): FILE: applications/ColossalQA/tests/test_text_splitter.py function test_text_splitter (line 4) | def test_text_splitter(): FILE: colossalai/_analyzer/_subclasses/_meta_registration.py function new (line 26) | def new(*args, **kwargs): function new_strided (line 30) | def new_strided(*args, **kwargs): function new_like (line 34) | def new_like(*args, **kwargs): function register_meta (line 38) | def register_meta(op, register_dispatcher=True): function meta_conv (line 59) | def meta_conv( function meta__conv (line 185) | def meta__conv( function meta_conv_backward (line 201) | def meta_conv_backward( function meta_adaptive_avg_pool2d_backward (line 218) | def meta_adaptive_avg_pool2d_backward( function meta_cuda_rnn (line 227) | def meta_cuda_rnn( function meta_cudnn_rnn_backward (line 280) | def meta_cudnn_rnn_backward( function meta_unregistered_ewise (line 313) | def meta_unregistered_ewise(input: torch.Tensor, *args): function meta_bn (line 319) | def meta_bn(input: torch.Tensor, weight, bias, running_mean, running_var... function meta_bn_backward (line 325) | def meta_bn_backward( function meta_cudnn_bn (line 341) | def meta_cudnn_bn(input: torch.Tensor, weight, bias, running_mean, runni... function meta_cudnn_bn_backward (line 355) | def meta_cudnn_bn_backward( function meta_ln (line 370) | def meta_ln(input: torch.Tensor, normalized_shape, weight, bias, eps): function meta_ln_backward (line 376) | def meta_ln_backward( function meta_im2col (line 385) | def meta_im2col(input: torch.Tensor, kernel_size, dilation, padding, str... function meta_roll (line 390) | def meta_roll(input: torch.Tensor, shifts, dims): function meta_local_scalar_dense (line 395) | def meta_local_scalar_dense(self: torch.Tensor): function meta_where_self (line 400) | def meta_where_self(condition: torch.Tensor, self: torch.Tensor, other: ... function meta_embedding_dense_backward (line 408) | def meta_embedding_dense_backward( function meta_native_dropout_default (line 416) | def meta_native_dropout_default(input: torch.Tensor, p: float, train: bo... function meta_native_dropout_backward_default (line 422) | def meta_native_dropout_backward_default(grad: torch.Tensor, mask: torch... function meta_eye (line 428) | def meta_eye(n: int, m: int, out: torch.Tensor): function meta_index_Tensor (line 432) | def meta_index_Tensor(self, indices): FILE: colossalai/_analyzer/_subclasses/flop_tensor.py class Phase (line 22) | class Phase(Enum): function normalize_tuple (line 27) | def normalize_tuple(x): function _format_flops (line 33) | def _format_flops(flop): function flop_count (line 50) | def flop_count(module: Union[torch.nn.Module, Callable] = None, *args, v... function matmul_flop_jit (line 225) | def matmul_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number: function addmm_flop_jit (line 259) | def addmm_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number: function linear_flop_jit (line 276) | def linear_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number: function bmm_flop_jit (line 290) | def bmm_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number: function conv_flop_count (line 304) | def conv_flop_count( function conv_flop_jit (line 329) | def conv_flop_jit(inputs: List[Any], outputs: List[Any]): function transpose_shape (line 340) | def transpose_shape(shape): function conv_backward_flop_jit (line 344) | def conv_backward_flop_jit(inputs: List[Any], outputs: List[Any]): function norm_flop_counter (line 360) | def norm_flop_counter(affine_arg_index: int, input_arg_index: int) -> Ca... function batchnorm_flop_jit (line 386) | def batchnorm_flop_jit(inputs: List[Any], outputs: List[Any], training: ... function ewise_flop_counter (line 397) | def ewise_flop_counter(input_scale: float = 1, output_scale: float = 0) ... function zero_flop_jit (line 419) | def zero_flop_jit(*args): FILE: colossalai/_analyzer/_subclasses/meta_tensor.py function register_storage (line 14) | def register_storage(r, data_ptr_fn=None): function _normalize_tuple (line 23) | def _normalize_tuple(x): function _assert_alias (line 30) | def _assert_alias(func): class MetaTensor (line 34) | class MetaTensor(torch.Tensor): method __new__ (line 50) | def __new__(cls, elem, device=None, data_ptr_fn=None): method __repr__ (line 83) | def __repr__(self): method __torch_dispatch__ (line 90) | def __torch_dispatch__(cls, func, types, args=(), kwargs=None): method to (line 126) | def to(self, *args, **kwargs) -> torch.Tensor: method cpu (line 152) | def cpu(self, *args, **kwargs): method cuda (line 157) | def cuda(self, device=None, non_blocking=False): method data_ptr (line 162) | def data_ptr(self): class MetaTensorMode (line 166) | class MetaTensorMode(object): method __init__ (line 179) | def __init__(self): method __enter__ (line 183) | def __enter__(self): method __exit__ (line 200) | def __exit__(self, exc_type, exc_value, traceback): FILE: colossalai/_analyzer/envs.py class MeshConfig (line 5) | class MeshConfig: FILE: colossalai/_analyzer/fx/codegen.py function _gen_ckpt_fn_def (line 28) | def _gen_ckpt_fn_def(label, free_vars: List[str]) -> str: function _gen_ckpt_output (line 35) | def _gen_ckpt_output(output_vars: List[str]) -> str: function _gen_ckpt_usage (line 42) | def _gen_ckpt_usage(label, input_vars, output_vars, use_reentrant=True): function _end_of_ckpt (line 51) | def _end_of_ckpt(node: Node, ckpt_level: int) -> bool: function _find_input_and_output_nodes (line 60) | def _find_input_and_output_nodes(nodes: List[Node]): function _find_nested_ckpt_regions (line 86) | def _find_nested_ckpt_regions(node_list: List[Node], ckpt_level: int = 0): function emit_ckpt_func (line 134) | def emit_ckpt_func( function emit_code_with_activation_checkpoint (line 210) | def emit_code_with_activation_checkpoint(body, ckpt_func, nodes, emit_no... class ActivationCheckpointCodeGen (line 248) | class ActivationCheckpointCodeGen(CodeGen): method _gen_python_code (line 249) | def _gen_python_code(self, nodes, root_module: str, namespace: _Namesp... FILE: colossalai/_analyzer/fx/graph_module.py class _WrappedCall (line 27) | class _WrappedCall: method __init__ (line 28) | def __init__(self, cls, cls_call): method _generate_error_message (line 42) | def _generate_error_message(frame_summary: traceback.FrameSummary) -> ... method __call__ (line 65) | def __call__(self, obj, *args, **kwargs): class ColoGraphModule (line 85) | class ColoGraphModule(torch.fx.GraphModule): method __init__ (line 107) | def __init__( method bind (line 112) | def bind(self, ckpt_def, globals): method recompile (line 132) | def recompile(self) -> PythonCode: method to_folder (line 176) | def to_folder(self, folder: Union[str, os.PathLike], module_name: str ... FILE: colossalai/_analyzer/fx/node_util.py function intersect (line 11) | def intersect(a, b): function subtract (line 15) | def subtract(a, b): function union (line 19) | def union(a, b): function compute_size_in_bytes (line 23) | def compute_size_in_bytes(elem: Union[torch.Tensor, Dict, List, Tuple, i... class MetaInfo (line 48) | class MetaInfo: method __new__ (line 119) | def __new__(cls, node: Node, **kwargs): method __post_init__ (line 136) | def __post_init__(self): method fwd_time (line 140) | def fwd_time(self, tflops: float = MeshConfig.TFLOPS, bandwidth: float... method bwd_time (line 144) | def bwd_time(self, tflops: float = MeshConfig.TFLOPS, bandwidth: float... method param_size (line 148) | def param_size(self): method buffer_size (line 152) | def buffer_size(self): method output_size (line 156) | def output_size(self): method accumulate_size (line 166) | def accumulate_size(self): method temp_size (line 176) | def temp_size(self): method backward_size (line 186) | def backward_size(self): method __repr__ (line 190) | def __repr__(self): FILE: colossalai/_analyzer/fx/passes/graph_profile.py function _format_flops (line 13) | def _format_flops(flops: float) -> str: function _denormalize_tuple (line 26) | def _denormalize_tuple(t: Tuple[int, ...]) -> Tuple[int, ...]: function _normalize_tuple (line 30) | def _normalize_tuple(x): function _current_device (line 36) | def _current_device(module): class GraphProfiler (line 40) | class GraphProfiler(torch.fx.Interpreter): method __init__ (line 52) | def __init__(self, module: GraphModule, garbage_collect_values: bool =... method run (line 55) | def run(self, *args, initial_env: Optional[Dict[Node, Any]] = None, en... method fetch_initial_env (line 91) | def fetch_initial_env(self, device=None) -> Dict[Node, Any]: method propagate (line 107) | def propagate(self, *args, device=None): method summary (line 123) | def summary(self) -> str: class CommunicationProfiler (line 184) | class CommunicationProfiler(GraphProfiler): method __init__ (line 189) | def __init__(self, module: GraphModule, garbage_collect_values: bool =... class FlopProfiler (line 193) | class FlopProfiler(GraphProfiler): method run_node (line 232) | def run_node(self, n: torch.fx.Node) -> Any: method call_function (line 269) | def call_function(self, target: "Target", args: Tuple[Argument, ...], ... method call_method (line 293) | def call_method(self, target: "Target", args: Tuple[Argument, ...], kw... method call_module (line 311) | def call_module(self, target: "Target", args: Tuple[Argument, ...], kw... function graph_profile_pass (line 333) | def graph_profile_pass(module: GraphModule, *args, verbose=False) -> Gra... FILE: colossalai/_analyzer/fx/passes/shape_prop.py class sim_env (line 17) | class sim_env(saved_tensors_hooks): method __init__ (line 32) | def __init__(self, module: Optional[torch.nn.Module] = None): method pack_hook (line 38) | def pack_hook(self, tensor: torch.Tensor): method unpack_hook (line 43) | def unpack_hook(self, tensor): function _normalize_tuple (line 47) | def _normalize_tuple(x): function _current_device (line 53) | def _current_device(module): class ShapeProp (line 61) | class ShapeProp(torch.fx.Interpreter): method __init__ (line 97) | def __init__(self, module: torch.fx.GraphModule, garbage_collect_value... method run_node (line 101) | def run_node(self, n: torch.fx.Node) -> Any: method call_function (line 174) | def call_function(self, target: "Target", args: Tuple[Any, ...], kwarg... method call_method (line 203) | def call_method(self, target: "Target", args: Tuple[Any, ...], kwargs:... method propagate (line 235) | def propagate(self, *args, device=None): function shape_prop_pass (line 256) | def shape_prop_pass(module: torch.fx.GraphModule, *args) -> torch.fx.Gra... FILE: colossalai/_analyzer/fx/symbolic_profile.py function register_flop_count_impl (line 7) | def register_flop_count_impl(func): function register_shape_impl (line 15) | def register_shape_impl(func): function symbolic_profile (line 23) | def symbolic_profile(module: GraphModule, *args, verbose=False) -> Graph... FILE: colossalai/_analyzer/fx/tracer/bias_addition.py function linear_impl (line 16) | def linear_impl(input, weight, bias=None): function conv1d_impl (line 24) | def conv1d_impl(input, weight, bias=None, stride=_single(1), padding=_si... function conv2d_impl (line 34) | def conv2d_impl(input, weight, bias=None, stride=_pair(1), padding=_pair... function conv3d_impl (line 44) | def conv3d_impl(input, weight, bias=None, stride=_triple(1), padding=_tr... function conv_transpose1d_impl (line 54) | def conv_transpose1d_impl( function conv_transpose2d_impl (line 87) | def conv_transpose2d_impl( function conv_transpose3d_impl (line 113) | def conv_transpose3d_impl( function addmm_impl (line 147) | def addmm_impl(input, mat1, mat2, beta=1, alpha=1): function addbmm_impl (line 160) | def addbmm_impl(input, batch1, batch2, beta=1, alpha=1): FILE: colossalai/_analyzer/fx/tracer/custom_leaf_module.py function torch_nn_normalize (line 17) | def torch_nn_normalize(self, input: torch.Tensor): FILE: colossalai/_analyzer/fx/tracer/proxy.py class ColoProxy (line 13) | class ColoProxy(Proxy): method __init__ (line 16) | def __init__(self, *args, data=None, **kwargs): method meta_data (line 21) | def meta_data(self): method meta_data (line 25) | def meta_data(self, args): method __torch_function__ (line 30) | def __torch_function__(cls, orig_method, types, args=(), kwargs=None): method from_torch_proxy (line 45) | def from_torch_proxy(cls, proxy: Proxy): method __repr__ (line 48) | def __repr__(self): method __len__ (line 51) | def __len__(self): method __int__ (line 54) | def __int__(self): method __index__ (line 57) | def __index__(self): method __float__ (line 63) | def __float__(self): method __bool__ (line 66) | def __bool__(self): method __getattr__ (line 69) | def __getattr__(self, k): method __setitem__ (line 72) | def __setitem__(self, key, value): method __contains__ (line 77) | def __contains__(self, key): method __isinstancecheck__ (line 85) | def __isinstancecheck__(self, type): class ColoAttribute (line 89) | class ColoAttribute(ColoProxy): method __init__ (line 90) | def __init__(self, root, attr: str, data=None): method node (line 98) | def node(self): method __call__ (line 105) | def __call__(self, *args, **kwargs): method __repr__ (line 108) | def __repr__(self): FILE: colossalai/_analyzer/fx/tracer/symbolic_trace.py function _default_device (line 19) | def _default_device(): function _current_device (line 23) | def _current_device(module: torch.nn.Module): function symbolic_trace (line 30) | def symbolic_trace( FILE: colossalai/_analyzer/fx/tracer/tracer.py function _truncate_suffix (line 19) | def _truncate_suffix(s: str): function register_tracer_impl (line 26) | def register_tracer_impl(func: Callable[..., Any], name: Optional[str] =... function register_leaf_module_impl (line 35) | def register_leaf_module_impl(module: nn.Module): function register_leaf_module (line 43) | def register_leaf_module(module: nn.Module): function register_non_leaf_module (line 47) | def register_non_leaf_module(module: nn.Module): class ColoTracer (line 51) | class ColoTracer(Tracer): method __init__ (line 67) | def __init__(self, trace_act_ckpt: bool = False, bias_addition_split: ... method is_leaf_module (line 82) | def is_leaf_module(self, m: nn.Module, module_qualified_name: str) -> ... method call_module (line 92) | def call_module( method proxy (line 101) | def proxy(self, node: Node) -> "ColoProxy": method create_proxy (line 104) | def create_proxy( method create_node (line 161) | def create_node(self, *args, **kwargs) -> Node: method trace (line 166) | def trace( method _tracer_override (line 236) | def _tracer_override(self): method _torch_factory_override (line 269) | def _torch_factory_override(self): method _post_check (line 306) | def _post_check(self, non_concrete_arg_names: Set[str]): method getattr (line 336) | def getattr(self, attr, attr_val, parameter_proxy_cache): method _module_getattr (line 339) | def _module_getattr(self, attr, attr_val, parameter_proxy_cache): FILE: colossalai/accelerator/api.py function set_accelerator (line 22) | def set_accelerator(accelerator: Union[str, BaseAccelerator]) -> None: function auto_set_accelerator (line 40) | def auto_set_accelerator() -> None: function get_accelerator (line 60) | def get_accelerator() -> BaseAccelerator: FILE: colossalai/accelerator/base_accelerator.py class BaseAccelerator (line 11) | class BaseAccelerator(ABC): method __init__ (line 14) | def __init__(self, name: str, communication_backend: str, is_synchrono... method name (line 24) | def name(self) -> str: method communication_backend (line 31) | def communication_backend(self) -> str: method is_synchronous (line 38) | def is_synchronous(self) -> bool: method __repr__ (line 44) | def __repr__(self) -> str: method get_version (line 52) | def get_version(self) -> str: method get_current_device (line 58) | def get_current_device(self) -> torch.device: method current_device (line 64) | def current_device(self) -> int: method set_device (line 70) | def set_device(self, device: Optional[Union[torch.device, int]] = None... method get_device_name (line 76) | def get_device_name(self, device: Union[torch.device, int]) -> str: method synchronize (line 82) | def synchronize(self, device: Union[torch.device, int] = None): method is_available (line 88) | def is_available(self): method device_count (line 94) | def device_count(self): method set_to_device (line 99) | def set_to_device(self, models: Any) -> Any: method get_device_capability (line 116) | def get_device_capability(self, device=None) -> Tuple[int, int]: method get_device_name (line 122) | def get_device_name(self, device=None) -> str: method get_device_properties (line 128) | def get_device_properties(self, device): method utilization (line 134) | def utilization(self, device=None) -> int: method get_rng_state (line 143) | def get_rng_state(self, device="cuda") -> torch.Tensor: method get_rng_state_all (line 149) | def get_rng_state_all(self) -> List[torch.Tensor]: method set_rng_state (line 155) | def set_rng_state(self, new_state: torch.ByteTensor, device: str = "cu... method set_rng_state_all (line 161) | def set_rng_state_all(self, new_states: List[torch.ByteTensor]) -> None: method manual_seed (line 167) | def manual_seed(self, seed: int) -> None: method manual_seed_all (line 173) | def manual_seed_all(self, seed: int) -> None: method seed (line 179) | def seed(self) -> None: method seed_all (line 185) | def seed_all(self) -> None: method initial_seed (line 191) | def initial_seed(self) -> int: method empty_cache (line 200) | def empty_cache(self) -> None: method memory_stats (line 206) | def memory_stats(self, device=None) -> Dict[str, Any]: method memory_summary (line 212) | def memory_summary(self, device=None, abbreviated=False) -> str: method memory_snapshot (line 218) | def memory_snapshot(self): method memory_allocated (line 224) | def memory_allocated(self, device=None) -> int: method max_memory_allocated (line 230) | def max_memory_allocated(self, device=None) -> int: method reset_max_memory_allocated (line 236) | def reset_max_memory_allocated(self, device=None) -> None: method reset_max_memory_cached (line 242) | def reset_max_memory_cached(self, device=None) -> None: method memory_reserved (line 248) | def memory_reserved(self, device=None) -> int: method max_memory_reserved (line 254) | def max_memory_reserved(self, device=None) -> int: method set_per_process_memory_fraction (line 260) | def set_per_process_memory_fraction(self, fraction: float, device=None... method reset_peak_memory_stats (line 266) | def reset_peak_memory_stats(self, device=None) -> None: method Stream (line 276) | def Stream(self, device=None, priority=0, **kwargs): method Event (line 282) | def Event(self, enable_timing: bool = False, blocking: bool = False, i... method current_stream (line 288) | def current_stream(self, device=None): method default_stream (line 294) | def default_stream(self, device=None): method set_stream (line 300) | def set_stream(self, stream_): method stream (line 306) | def stream(self, stream_): method autocast (line 315) | def autocast( FILE: colossalai/accelerator/cpu_accelerator.py class CpuAccelerator (line 15) | class CpuAccelerator(BaseAccelerator): method __init__ (line 21) | def __init__(self): method get_version (line 27) | def get_version(self) -> str: method get_current_device (line 33) | def get_current_device(self) -> torch.device: method current_device (line 39) | def current_device(self) -> int: method set_device (line 45) | def set_device(self, device: Optional[Union[torch.device, int]] = None... method get_device_name (line 51) | def get_device_name(self, device: Union[torch.device, int]) -> str: method synchronize (line 57) | def synchronize(self, device: Union[torch.device, int] = None): method is_available (line 63) | def is_available(self): method device_count (line 69) | def device_count(self): method get_device_capability (line 75) | def get_device_capability(self, device=None) -> Tuple[int, int]: method get_device_name (line 81) | def get_device_name(self, device=None) -> str: method get_device_properties (line 87) | def get_device_properties(self, device): method utilization (line 93) | def utilization(self, device=None) -> int: method get_rng_state (line 102) | def get_rng_state(self, device=None) -> torch.Tensor: method get_rng_state_all (line 108) | def get_rng_state_all(self) -> List[torch.Tensor]: method set_rng_state (line 114) | def set_rng_state(self, new_state: torch.ByteTensor, device: str = Non... method set_rng_state_all (line 120) | def set_rng_state_all(self, new_states: List[torch.ByteTensor]) -> None: method manual_seed (line 126) | def manual_seed(self, seed: int) -> None: method manual_seed_all (line 132) | def manual_seed_all(self, seed: int) -> None: method seed (line 138) | def seed(self) -> None: method seed_all (line 144) | def seed_all(self) -> None: method initial_seed (line 150) | def initial_seed(self) -> int: method empty_cache (line 160) | def empty_cache(self) -> None: method memory_stats (line 166) | def memory_stats(self, device=None) -> Dict[str, Any]: method memory_summary (line 172) | def memory_summary(self, device=None, abbreviated=False) -> str: method memory_snapshot (line 178) | def memory_snapshot(self): method memory_allocated (line 184) | def memory_allocated(self, device=None) -> int: method max_memory_allocated (line 190) | def max_memory_allocated(self, device=None) -> int: method reset_max_memory_allocated (line 196) | def reset_max_memory_allocated(self, device=None) -> None: method reset_max_memory_cached (line 202) | def reset_max_memory_cached(self, device=None) -> None: method memory_reserved (line 208) | def memory_reserved(self, device=None) -> int: method max_memory_reserved (line 214) | def max_memory_reserved(self, device=None) -> int: method set_per_process_memory_fraction (line 220) | def set_per_process_memory_fraction(self, fraction: float, device=None... method reset_peak_memory_stats (line 228) | def reset_peak_memory_stats(self, device=None) -> None: method Stream (line 238) | def Stream(self, device=None, priority=0, **kwargs): method Event (line 244) | def Event(self, enable_timing: bool = False, blocking: bool = False, i... method current_stream (line 250) | def current_stream(self, device=None): method default_stream (line 256) | def default_stream(self, device=None): method set_stream (line 262) | def set_stream(self, stream_): method stream (line 268) | def stream(self, stream_): method autocast (line 277) | def autocast( FILE: colossalai/accelerator/cuda_accelerator.py class CudaAccelerator (line 13) | class CudaAccelerator(BaseAccelerator): method __init__ (line 18) | def __init__(self): method get_version (line 24) | def get_version(self) -> str: method get_current_device (line 30) | def get_current_device(self) -> torch.device: method current_device (line 36) | def current_device(self) -> int: method set_device (line 42) | def set_device(self, device: Optional[Union[torch.device, int]] = None... method get_device_name (line 52) | def get_device_name(self, device: Union[torch.device, int]) -> str: method synchronize (line 58) | def synchronize(self, device: Union[torch.device, int] = None): method is_available (line 64) | def is_available(self): method device_count (line 70) | def device_count(self): method get_device_capability (line 76) | def get_device_capability(self, device=None) -> Tuple[int, int]: method get_device_name (line 82) | def get_device_name(self, device=None) -> str: method get_device_properties (line 88) | def get_device_properties(self, device): method utilization (line 94) | def utilization(self, device=None) -> int: method get_rng_state (line 103) | def get_rng_state(self, device="cuda") -> torch.Tensor: method get_rng_state_all (line 109) | def get_rng_state_all(self) -> List[torch.Tensor]: method set_rng_state (line 115) | def set_rng_state(self, new_state: torch.ByteTensor, device: str = "cu... method set_rng_state_all (line 121) | def set_rng_state_all(self, new_states: List[torch.ByteTensor]) -> None: method manual_seed (line 127) | def manual_seed(self, seed: int) -> None: method manual_seed_all (line 133) | def manual_seed_all(self, seed: int) -> None: method seed (line 139) | def seed(self) -> None: method seed_all (line 145) | def seed_all(self) -> None: method initial_seed (line 151) | def initial_seed(self) -> int: method empty_cache (line 161) | def empty_cache(self) -> None: method memory_stats (line 167) | def memory_stats(self, device=None) -> Dict[str, Any]: method memory_summary (line 173) | def memory_summary(self, device=None, abbreviated=False) -> str: method memory_snapshot (line 179) | def memory_snapshot(self): method memory_allocated (line 185) | def memory_allocated(self, device=None) -> int: method max_memory_allocated (line 191) | def max_memory_allocated(self, device=None) -> int: method reset_max_memory_allocated (line 197) | def reset_max_memory_allocated(self, device=None) -> None: method reset_max_memory_cached (line 203) | def reset_max_memory_cached(self, device=None) -> None: method memory_reserved (line 209) | def memory_reserved(self, device=None) -> int: method max_memory_reserved (line 215) | def max_memory_reserved(self, device=None) -> int: method set_per_process_memory_fraction (line 221) | def set_per_process_memory_fraction(self, fraction: float, device=None... method reset_peak_memory_stats (line 227) | def reset_peak_memory_stats(self, device=None) -> None: method Stream (line 237) | def Stream(self, device=None, priority=0, **kwargs): method Event (line 243) | def Event(self, enable_timing: bool = False, blocking: bool = False, i... method current_stream (line 249) | def current_stream(self, device=None): method default_stream (line 255) | def default_stream(self, device=None): method set_stream (line 261) | def set_stream(self, stream_): method stream (line 267) | def stream(self, stream_): method autocast (line 276) | def autocast( FILE: colossalai/accelerator/npu_accelerator.py class NpuAccelerator (line 19) | class NpuAccelerator(BaseAccelerator): method __init__ (line 24) | def __init__(self): method get_version (line 30) | def get_version(self) -> str: method get_current_device (line 36) | def get_current_device(self) -> torch.device: method current_device (line 42) | def current_device(self) -> int: method set_device (line 48) | def set_device(self, device: Optional[Union[torch.device, int]] = None... method get_device_name (line 58) | def get_device_name(self, device: Union[torch.device, int]) -> str: method synchronize (line 64) | def synchronize(self, device: Union[torch.device, int] = None): method is_available (line 70) | def is_available(self): method device_count (line 76) | def device_count(self): method get_device_capability (line 82) | def get_device_capability(self, device=None) -> Tuple[int, int]: method get_device_name (line 88) | def get_device_name(self, device=None) -> str: method get_device_properties (line 94) | def get_device_properties(self, device): method utilization (line 100) | def utilization(self, device=None) -> int: method get_rng_state (line 109) | def get_rng_state(self, device="npu") -> torch.Tensor: method get_rng_state_all (line 115) | def get_rng_state_all(self) -> List[torch.Tensor]: method set_rng_state (line 121) | def set_rng_state(self, new_state: torch.ByteTensor, device: str = "np... method set_rng_state_all (line 127) | def set_rng_state_all(self, new_states: List[torch.ByteTensor]) -> None: method manual_seed (line 133) | def manual_seed(self, seed: int) -> None: method manual_seed_all (line 139) | def manual_seed_all(self, seed: int) -> None: method seed (line 145) | def seed(self) -> None: method seed_all (line 151) | def seed_all(self) -> None: method initial_seed (line 157) | def initial_seed(self) -> int: method empty_cache (line 167) | def empty_cache(self) -> None: method memory_stats (line 173) | def memory_stats(self, device=None) -> Dict[str, Any]: method memory_summary (line 179) | def memory_summary(self, device=None, abbreviated=False) -> str: method memory_snapshot (line 185) | def memory_snapshot(self): method memory_allocated (line 191) | def memory_allocated(self, device=None) -> int: method max_memory_allocated (line 197) | def max_memory_allocated(self, device=None) -> int: method reset_max_memory_allocated (line 203) | def reset_max_memory_allocated(self, device=None) -> None: method reset_max_memory_cached (line 209) | def reset_max_memory_cached(self, device=None) -> None: method memory_reserved (line 215) | def memory_reserved(self, device=None) -> int: method max_memory_reserved (line 221) | def max_memory_reserved(self, device=None) -> int: method set_per_process_memory_fraction (line 227) | def set_per_process_memory_fraction(self, fraction: float, device=None... method reset_peak_memory_stats (line 233) | def reset_peak_memory_stats(self, device=None) -> None: method Stream (line 243) | def Stream(self, device=None, priority=0, **kwargs): method Event (line 249) | def Event(self, enable_timing: bool = False, blocking: bool = False, i... method current_stream (line 255) | def current_stream(self, device=None): method default_stream (line 261) | def default_stream(self, device=None): method set_stream (line 267) | def set_stream(self, stream_): method stream (line 273) | def stream(self, stream_): method autocast (line 282) | def autocast( FILE: colossalai/amp/naive_amp/grad_scaler/base_grad_scaler.py class BaseGradScaler (line 16) | class BaseGradScaler(ABC): method __init__ (line 24) | def __init__(self, initial_scale: float, verbose: bool): method scale (line 33) | def scale(self) -> Tensor: method inv_scale (line 39) | def inv_scale(self) -> Tensor: method state_dict (line 44) | def state_dict(self) -> Dict: method load_state_dict (line 51) | def load_state_dict(self, state_dict: Dict) -> None: method update (line 61) | def update(self, overflow: bool) -> None: method log (line 68) | def log(self, message, *args, **kwargs): FILE: colossalai/amp/naive_amp/grad_scaler/constant_grad_scaler.py class ConstantGradScaler (line 8) | class ConstantGradScaler(BaseGradScaler): method __init__ (line 16) | def __init__(self, initial_scale: int, verbose: bool): method update (line 20) | def update(self, overflow: bool) -> None: FILE: colossalai/amp/naive_amp/grad_scaler/dynamic_grad_scaler.py class DynamicGradScaler (line 15) | class DynamicGradScaler(BaseGradScaler): method __init__ (line 29) | def __init__( method _sanity_checks (line 65) | def _sanity_checks(self) -> None: method update (line 78) | def update(self, overflow: bool) -> None: method _backoff_scale (line 103) | def _backoff_scale(self) -> None: method _grow_scale (line 110) | def _grow_scale(self) -> None: method state_dict (line 117) | def state_dict(self): method load_state_dict (line 125) | def load_state_dict(self, state_dict): FILE: colossalai/amp/naive_amp/mixed_precision_mixin/base.py class MixedPrecisionMixin (line 7) | class MixedPrecisionMixin(ABC): method pre_backward (line 46) | def pre_backward(self, loss: Tensor, *args, **kwargs) -> Tensor: method pre_backward_by_grad (line 57) | def pre_backward_by_grad(self, tensor: Tensor, grad: Tensor) -> Tensor: method should_skip_step (line 69) | def should_skip_step(self) -> bool: method pre_zero_grad (line 77) | def pre_zero_grad(self) -> None: method get_grad_div_scale (line 81) | def get_grad_div_scale(self) -> float: FILE: colossalai/amp/naive_amp/mixed_precision_mixin/bf16.py class BF16MixedPrecisionMixin (line 7) | class BF16MixedPrecisionMixin(MixedPrecisionMixin): method pre_backward (line 10) | def pre_backward(self, loss: Tensor) -> Tensor: method pre_backward_by_grad (line 13) | def pre_backward_by_grad(self, tensor: Tensor, grad: Tensor) -> Tensor: method should_skip_step (line 16) | def should_skip_step(self) -> bool: method pre_zero_grad (line 19) | def pre_zero_grad(self) -> None: method get_grad_div_scale (line 22) | def get_grad_div_scale(self) -> float: FILE: colossalai/amp/naive_amp/mixed_precision_mixin/fp16.py class OptimState (line 14) | class OptimState(Enum): class FP16MixedPrecisionMixin (line 19) | class FP16MixedPrecisionMixin(MixedPrecisionMixin): method __init__ (line 22) | def __init__( method loss_scale (line 46) | def loss_scale(self) -> float: method check_local_overflow (line 50) | def check_local_overflow(self) -> bool: method check_overflow (line 57) | def check_overflow(self) -> bool: method pre_backward (line 65) | def pre_backward(self, loss: Tensor) -> Tensor: method pre_backward_by_grad (line 70) | def pre_backward_by_grad(self, tensor: Tensor, grad: Tensor) -> Tensor: method should_skip_step (line 74) | def should_skip_step(self) -> bool: method pre_zero_grad (line 81) | def pre_zero_grad(self) -> None: method get_grad_div_scale (line 84) | def get_grad_div_scale(self) -> float: FILE: colossalai/amp/naive_amp/mixed_precision_optimizer.py class NaiveFP16MixedPrecisionMixin (line 13) | class NaiveFP16MixedPrecisionMixin(FP16MixedPrecisionMixin): method __init__ (line 14) | def __init__( method check_local_overflow (line 30) | def check_local_overflow(self) -> bool: class MixedPrecisionOptimizer (line 37) | class MixedPrecisionOptimizer(OptimizerWrapper): method __init__ (line 38) | def __init__( method backward (line 89) | def backward(self, loss: Tensor, inputs=None, retain_graph=False, **kw... method backward_by_grad (line 93) | def backward_by_grad(self, tensor: Tensor, grad: Tensor, inputs: Tenso... method zero_grad (line 102) | def zero_grad(self, *args, **kwargs): method _unscale_and_clip_grads (line 108) | def _unscale_and_clip_grads(self, total_norm: float) -> None: method _compute_grad_norm (line 140) | def _compute_grad_norm(self, param_gradient_pairs: List[Tuple[Tensor]]... method step (line 169) | def step(self, *args, **kwargs): method update_master_params (line 208) | def update_master_params(self, model: Module): method get_working_to_master_map (line 217) | def get_working_to_master_map(self) -> Dict[int, torch.Tensor]: method get_master_to_working_map (line 220) | def get_master_to_working_map(self) -> Dict[int, torch.Tensor]: method get_grad_norm (line 223) | def get_grad_norm(self, norm_type=2, **kwargs): FILE: colossalai/auto_parallel/checkpoint/ckpt_solver_base.py function _copy_output (line 18) | def _copy_output(src: Graph, dst: Graph): function _get_param_size (line 25) | def _get_param_size(module: torch.nn.Module): class CheckpointSolverBase (line 30) | class CheckpointSolverBase(ABC): method __init__ (line 31) | def __init__( method solve (line 82) | def solve(self): method get_node_list (line 85) | def get_node_list(self): method _linearize_graph (line 89) | def _linearize_graph(self) -> List[List[Node]]: FILE: colossalai/auto_parallel/checkpoint/ckpt_solver_chen.py class CheckpointSolverChen (line 14) | class CheckpointSolverChen(CheckpointSolverBase): method __init__ (line 15) | def __init__(self, graph: Graph, cnode: List[str] = None, num_grids: i... method solve (line 36) | def solve(self) -> Graph: method run_chen_greedy (line 52) | def run_chen_greedy(self, b: int = 0) -> Tuple[Set, int]: method grid_search (line 73) | def grid_search(self) -> Set: FILE: colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.c function PyObject (line 50) | static PyObject* computeTable(PyObject* self, PyObject* args) { type PyModuleDef (line 199) | struct PyModuleDef function PyMODINIT_FUNC (line 209) | PyMODINIT_FUNC PyInit_rotorc(void) { return PyModule_Create(&rotorModule... FILE: colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py class CheckpointSolverRotor (line 24) | class CheckpointSolverRotor(CheckpointSolverBase): method __init__ (line 25) | def __init__( method solve (line 66) | def solve(self, force_python: bool = False, verbose: bool = False) -> ... method print_chain (line 104) | def print_chain(self): method print_sequence (line 116) | def print_sequence(self): method _construct_chain (line 120) | def _construct_chain(cls, graph: Graph, node_list: List[List[Node]]) -... method _extract_node_info (line 141) | def _extract_node_info(cls, node: List[Node]) -> Tuple[int, ...]: method _extract_input (line 168) | def _extract_input(graph: Graph) -> Tuple[Tensor, ...]: method _extract_unused_output (line 177) | def _extract_unused_output(node: Node) -> int: method _extract_btmp (line 182) | def _extract_btmp(node: List[Node]) -> int: method _compute_table (line 209) | def _compute_table(chain: Chain, mmax: int) -> Tuple: method _compute_table_c (line 276) | def _compute_table_c(chain: Chain, mmax: int) -> Tuple: method _backtrack (line 308) | def _backtrack( method _annotate_from_sequence (line 361) | def _annotate_from_sequence(sequence: Sequence, node_list: List[List[N... FILE: colossalai/auto_parallel/checkpoint/operation.py class Chain (line 8) | class Chain: method __init__ (line 9) | def __init__( method check_lengths (line 40) | def check_lengths(self): method __repr__ (line 50) | def __repr__(self): method __len__ (line 58) | def __len__(self): method discretize_all (line 61) | def discretize_all(self, unit: int): class Operation (line 70) | class Operation(ABC): method __repr__ (line 73) | def __repr__(self) -> str: method shift (line 76) | def shift(self, value): class Forward (line 83) | class Forward(Operation): method __init__ (line 86) | def __init__(self, index): method cost (line 89) | def cost(self, chain: Chain): class ForwardEnable (line 96) | class ForwardEnable(Forward): class ForwardNograd (line 100) | class ForwardNograd(Forward): class ForwardCheck (line 104) | class ForwardCheck(Forward): class Forwards (line 108) | class Forwards(Operation): method __init__ (line 109) | def __init__(self, start, end): method __repr__ (line 112) | def __repr__(self): method cost (line 115) | def cost(self, chain: Chain): function isForward (line 122) | def isForward(op): class Backward (line 126) | class Backward(Operation): method __init__ (line 129) | def __init__(self, index): method cost (line 132) | def cost(self, chain: Chain): class Loss (line 139) | class Loss(Operation): method __init__ (line 140) | def __init__(self): method __repr__ (line 143) | def __repr__(self): method cost (line 146) | def cost(self, chain): class MemoryAccess (line 150) | class MemoryAccess(Operation): method __init__ (line 153) | def __init__(self, index): method cost (line 156) | def cost(self, chain: Chain): class WriteMemory (line 160) | class WriteMemory(MemoryAccess): class ReadMemory (line 164) | class ReadMemory(MemoryAccess): class DiscardMemory (line 168) | class DiscardMemory(MemoryAccess): class Sequence (line 172) | class Sequence(list): method __init__ (line 173) | def __init__(self): method __repr__ (line 176) | def __repr__(self): method list_operations (line 179) | def list_operations(self): FILE: colossalai/auto_parallel/meta_profiler/meta_registry/activation.py function elementwise_meta_info (line 14) | def elementwise_meta_info(temp_mem_scale: float = 0, buffer_mem_scale: f... FILE: colossalai/auto_parallel/meta_profiler/meta_registry/binary_elementwise_ops.py function binary_elementwise_meta_info (line 16) | def binary_elementwise_meta_info(*args, **kwargs) -> Tuple[TrainCycleIte... FILE: colossalai/auto_parallel/meta_profiler/meta_registry/conv.py function convnd_meta_info (line 20) | def convnd_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycl... FILE: colossalai/auto_parallel/meta_profiler/meta_registry/embedding.py function embedding_meta_info (line 15) | def embedding_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainC... FILE: colossalai/auto_parallel/meta_profiler/meta_registry/linear.py function linear_meta_info (line 17) | def linear_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycl... function matmul_meta_info (line 190) | def matmul_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycl... FILE: colossalai/auto_parallel/meta_profiler/meta_registry/non_spmd.py function non_spmd_meta_info (line 17) | def non_spmd_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCy... FILE: colossalai/auto_parallel/meta_profiler/meta_registry/norm.py function batchnormnd_meta_info (line 17) | def batchnormnd_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, Trai... function layernorm_meta_info (line 113) | def layernorm_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainC... FILE: colossalai/auto_parallel/meta_profiler/meta_registry/pooling.py function avgpool_meta_info (line 17) | def avgpool_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCyc... function maxpool_meta_info (line 74) | def maxpool_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCyc... FILE: colossalai/auto_parallel/meta_profiler/meta_registry/tensor.py function tensor_related_metainfo (line 13) | def tensor_related_metainfo(bwd_mem_out_factor: float = 1, bwd_mem_tmp_f... FILE: colossalai/auto_parallel/meta_profiler/meta_registry/where.py function where_meta_info (line 15) | def where_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycle... FILE: colossalai/auto_parallel/meta_profiler/registry.py class Registry (line 4) | class Registry: method __init__ (line 5) | def __init__(self, name): method register (line 9) | def register(self, source): method get (line 21) | def get(self, source): method has (line 26) | def has(self, source): FILE: colossalai/auto_parallel/meta_profiler/shard_metainfo.py class ShardMetaInfo (line 14) | class ShardMetaInfo: method __init__ (line 20) | def __init__(self, strategy: ShardingStrategy = None, target: Callable... method strategy (line 47) | def strategy(self) -> ShardingStrategy: method target (line 51) | def target(self) -> Callable: method strategy (line 55) | def strategy(self, strategy: ShardingStrategy) -> None: method target (line 61) | def target(self, target: Callable) -> None: method compute_sharded_opdata (line 66) | def compute_sharded_opdata(self, operation_data: OperationData, shardi... method compute_shard_metainfo (line 91) | def compute_shard_metainfo(self): FILE: colossalai/auto_parallel/offload/amp_optimizer.py class OptimState (line 17) | class OptimState(Enum): class AMPOptimizer (line 22) | class AMPOptimizer(OptimizerWrapper): method __init__ (line 40) | def __init__( method _set_grad_ptr (line 87) | def _set_grad_ptr(self): method _update_fp16_params (line 97) | def _update_fp16_params(self): method _check_overflow (line 105) | def _check_overflow(self): method _get_combined_scale (line 110) | def _get_combined_scale(self): method loss_scale (line 125) | def loss_scale(self): method zero_grad (line 128) | def zero_grad(self, *args, **kwargs): method step (line 132) | def step(self, *args, **kwargs): method clip_grad_norm (line 155) | def clip_grad_norm(self, model: torch.nn.Module, max_norm: float, norm... method backward (line 158) | def backward(self, loss: torch.Tensor): method __init__optimizer (line 163) | def __init__optimizer(self): FILE: colossalai/auto_parallel/offload/base_offload_module.py class BaseOffloadModule (line 14) | class BaseOffloadModule: method __init__ (line 24) | def __init__(self, model: nn.Module, region_manager: RegionManager, is... method register_grad_hook (line 34) | def register_grad_hook(self): method remove_grad_hook (line 39) | def remove_grad_hook(self): method __call__ (line 43) | def __call__(self, *args, **kwargs): method _pre_forward (line 46) | def _pre_forward(self): method forward (line 51) | def forward(self, *args, **kwargs): method backward (line 58) | def backward(self, loss): method _post_backward (line 62) | def _post_backward(self): method grad_handle (line 72) | def grad_handle(self, p, grad): method _cast_buffers (line 86) | def _cast_buffers(self): method parameters (line 90) | def parameters(self, recurse: bool = True): method named_parameters (line 93) | def named_parameters(self, prefix: str = "", recurse: bool = True): method named_buffers (line 96) | def named_buffers(self, prefix: str = "", recurse: bool = True): method named_children (line 99) | def named_children(self): method named_modules (line 102) | def named_modules( FILE: colossalai/auto_parallel/offload/mem_optimize.py function memory_optimize (line 17) | def memory_optimize( FILE: colossalai/auto_parallel/offload/region.py class Region (line 10) | class Region: method __init__ (line 18) | def __init__(self, r_id: int = 0) -> None: method can_release (line 41) | def can_release(self) -> bool: method has_inf_or_nan (line 48) | def has_inf_or_nan(self) -> bool: method init_param_data (line 54) | def init_param_data(self, pre_alloc_tensor: torch.Tensor = None): method move_param_to_cuda (line 74) | def move_param_to_cuda(self): method move_grad_to_cpu (line 92) | def move_grad_to_cpu(self): method free_cuda_data (line 105) | def free_cuda_data(self): method copy_grad_to_region_slice (line 110) | def copy_grad_to_region_slice(self, param: torch.nn.Parameter, data_sl... method split (line 125) | def split(self, cut_node_idx: int, cut_param_idx: int): method __update_params_ptr (line 143) | def __update_params_ptr(self) -> None: FILE: colossalai/auto_parallel/offload/region_manager.py class RegionManager (line 12) | class RegionManager: method __init__ (line 23) | def __init__(self, graph: Graph, solver_name: str = "asyn", memory_bud... method _build_regions (line 42) | def _build_regions(self): method _pre_process (line 59) | def _pre_process(self): method _post_process (line 99) | def _post_process(self, ts: TrainingSimulator = None): method _early_region_placement (line 104) | def _early_region_placement(self, ts: TrainingSimulator): method _merge_small_regions (line 144) | def _merge_small_regions(self, orig_reg_list: List[Region]) -> List[Re... method _search_block_size (line 173) | def _search_block_size( method _init_region_data (line 217) | def _init_region_data(self): method _process_shared_region (line 241) | def _process_shared_region(self): method _linearize_graph (line 271) | def _linearize_graph(self) -> List[Region]: method _set_node_and_region_info (line 466) | def _set_node_and_region_info(self, node_id: int, cur_n: Node, cur_reg... method get_region (line 502) | def get_region(self, param: torch.nn.Parameter) -> Region: method __update_param_region_map (line 511) | def __update_param_region_map(self, params: List[torch.nn.Parameter], ... FILE: colossalai/auto_parallel/offload/runtime.py class SynPreFwdPostBwdOP (line 10) | class SynPreFwdPostBwdOP(torch.autograd.Function): method forward (line 23) | def forward(ctx, input_, fwd_info, bwd_info): method backward (line 40) | def backward(ctx, grad_output): class AsynPreFwdPostBwdOP (line 50) | class AsynPreFwdPostBwdOP(torch.autograd.Function): method forward (line 63) | def forward(ctx, input_, fwd_info, bwd_info): method backward (line 88) | def backward(ctx, grad_output): function convert_fwd_upload_bwd_offload_to_action (line 114) | def convert_fwd_upload_bwd_offload_to_action(tensor, fwd_info, bwd_info): function convert_fwd_prefetch_bwd_offload_to_action (line 130) | def convert_fwd_prefetch_bwd_offload_to_action(tensor, fwd_info, bwd_info): function replace_node_users (line 146) | def replace_node_users(orig_node: Node, inserted_node: Node, rep_user_no... function runtime_syn_offload_apply_pass (line 166) | def runtime_syn_offload_apply_pass(gm: torch.fx.GraphModule, region_list... function runtime_asyn_offload_apply_pass (line 200) | def runtime_asyn_offload_apply_pass(gm: torch.fx.GraphModule, region_lis... FILE: colossalai/auto_parallel/offload/solver.py function benchmark_func (line 21) | def benchmark_func(func, number=1, repeat=1, warmup=3): class Solver (line 42) | class Solver(ABC): method __init__ (line 53) | def __init__(self, region_list: List[Region], memory_budget: float = -... method _call_solver (line 69) | def _call_solver(self): method _try_to_offload (line 73) | def _try_to_offload(self, *args): method _eval_one_choice (line 77) | def _eval_one_choice(self, *args): method _compute_offload_profit (line 80) | def _compute_offload_profit(self, total_mem_saving: float, peak_mem_sa... method _compare_profit (line 99) | def _compare_profit(self, profit_a: tuple, profit_b: tuple) -> bool: method _update_state (line 116) | def _update_state(self, best_ts: TrainingSimulator): method _update_node_mem_info (line 124) | def _update_node_mem_info(self, fwd_mem_info: Dict[Node, float], bwd_m... method _extract_computing_power (line 140) | def _extract_computing_power(self): method _profile_bandwidth (line 164) | def _profile_bandwidth(self): class SynGreedySolver (line 203) | class SynGreedySolver(Solver): method __init__ (line 204) | def __init__(self, region_list: List[Region], memory_budget: float = -... method _init_state (line 210) | def _init_state(self): method _call_solver (line 219) | def _call_solver(self): method _call_solver_l2l (line 254) | def _call_solver_l2l(self): method _try_to_offload (line 263) | def _try_to_offload(self, offload_region: Region): method _eval_one_choice (line 275) | def _eval_one_choice(self, offload_region: Region): class AsynGreedySolver (line 299) | class AsynGreedySolver(Solver): method __init__ (line 300) | def __init__(self, region_list: List[Region], memory_budget: float = -... method _init_state (line 310) | def _init_state(self): method _call_solver (line 320) | def _call_solver(self): method _try_to_offload (line 383) | def _try_to_offload(self, host_region: Region, offload_region: Region): method _try_convert_to_syn_upload (line 408) | def _try_convert_to_syn_upload(self, host_region: Region, offload_regi... method _repair_strategy (line 429) | def _repair_strategy(self): method _eval_one_choice (line 472) | def _eval_one_choice(self): class SolverFactory (line 490) | class SolverFactory: method create (line 494) | def create(solver_name: str) -> Type[Solver]: method get_solver_names (line 500) | def get_solver_names(): FILE: colossalai/auto_parallel/offload/training_simulator.py class ExecutionPeriod (line 13) | class ExecutionPeriod: class TrainingSimulator (line 18) | class TrainingSimulator(ABC): method __init__ (line 29) | def __init__(self, region_list: List[Region], comp_power: float, link_... method execute (line 47) | def execute(self): method _eval_fwd_mem_per_region (line 51) | def _eval_fwd_mem_per_region(self, region: Region): method _eval_bwd_mem_per_region (line 55) | def _eval_bwd_mem_per_region(self, region: Region): method _get_bandwidth (line 58) | def _get_bandwidth(self, link: str, comm_volumn: float) -> float: method _get_communication_overhead (line 79) | def _get_communication_overhead(self, link: str, comm_volumn: float) -... method _get_computing_overhead (line 82) | def _get_computing_overhead(self, flop: float) -> float: class SynTrainingSimulator (line 86) | class SynTrainingSimulator(TrainingSimulator): method __init__ (line 87) | def __init__(self, region_list: List[Region], comp_power: float, link_... method execute (line 90) | def execute(self): method _eval_fwd_mem_per_region (line 101) | def _eval_fwd_mem_per_region(self, region: Region): method _eval_bwd_mem_per_region (line 119) | def _eval_bwd_mem_per_region(self, region: Region): class AsynTrainingSimulator (line 170) | class AsynTrainingSimulator(TrainingSimulator): method __init__ (line 171) | def __init__(self, region_list: List[Region], comp_power: float, link_... method execute (line 205) | def execute(self): method _insert_h2d_exec (line 234) | def _insert_h2d_exec(self, region: Region, is_fwd: bool = True): method _insert_comp_exec (line 248) | def _insert_comp_exec(self, region: Region, is_fwd: bool = True): method _insert_d2h_exec (line 269) | def _insert_d2h_exec(self, region: Region): method _eval_fwd_cost_per_region (line 280) | def _eval_fwd_cost_per_region(self, region: Region): method _eval_fwd_mem_per_region (line 297) | def _eval_fwd_mem_per_region(self, region: Region): method _eval_bwd_cost_per_region (line 330) | def _eval_bwd_cost_per_region(self, region: Region): method _eval_bwd_mem_per_region (line 361) | def _eval_bwd_mem_per_region(self, region: Region): FILE: colossalai/auto_parallel/offload/util.py class NodeInfo (line 13) | class NodeInfo: class NvDevicePower (line 19) | class NvDevicePower: class GlobalRuntimeInfo (line 37) | class GlobalRuntimeInfo(metaclass=SingletonMeta): method __init__ (line 38) | def __init__(self): function compute_act_peak_mem (line 46) | def compute_act_peak_mem(region_list: List[Region]) -> float: function compute_max_param_mem (line 76) | def compute_max_param_mem(region_list: List[Region]) -> float: function compute_total_param_mem (line 80) | def compute_total_param_mem(region_list: List[Region]) -> float: function requires_upload_p_in_fwd (line 84) | def requires_upload_p_in_fwd(shared_reg: Region): function requires_release_p_in_bwd (line 90) | def requires_release_p_in_bwd(shared_reg: Region): function requires_offload_g_in_bwd (line 96) | def requires_offload_g_in_bwd(region: Region): FILE: colossalai/auto_parallel/passes/comm_metainfo_pass.py function _construct_shard_meta_info (line 17) | def _construct_shard_meta_info( function _runtime_apply_meta_info (line 61) | def _runtime_apply_meta_info(node: Node, origin_spec_dict, sharding_spec... function _runtime_comm_spec_apply_meta_info (line 77) | def _runtime_comm_spec_apply_meta_info(node: Node, comm_actions_dict: Di... function comm_metainfo_pass (line 111) | def comm_metainfo_pass( FILE: colossalai/auto_parallel/passes/meta_info_prop.py function _normalize_tuple (line 16) | def _normalize_tuple(x): class MetaInfoProp (line 23) | class MetaInfoProp: method __init__ (line 24) | def __init__(self, module: GraphModule) -> None: method _set_data_ptr (line 35) | def _set_data_ptr(self, x): method _is_inplace (line 44) | def _is_inplace(self, node: Node): method run (line 54) | def run(self) -> GraphModule: method placeholder_handler (line 63) | def placeholder_handler(self, node: Node) -> None: method get_attr_handler (line 73) | def get_attr_handler(self, node: Node) -> None: method output_handler (line 81) | def output_handler(self, node: Node) -> None: method node_handler (line 94) | def node_handler(self, node: Node) -> None: FILE: colossalai/auto_parallel/passes/runtime_apply_pass.py function runtime_apply (line 15) | def runtime_apply(node: Node, origin_dict: Dict, input_dict: Dict, node_... function runtime_apply_for_iterable_object (line 25) | def runtime_apply_for_iterable_object( function runtime_comm_spec_apply (line 45) | def runtime_comm_spec_apply(tensor: torch.Tensor, comm_actions_dict: Dic... function _preprocess_graph (line 59) | def _preprocess_graph(nodes: List[Node]): function _shape_consistency_apply (line 85) | def _shape_consistency_apply(gm: torch.fx.GraphModule): function _comm_spec_apply (line 151) | def _comm_spec_apply(gm: torch.fx.GraphModule): function _act_annotation_pass (line 225) | def _act_annotation_pass(gm: torch.fx.GraphModule): function runtime_apply_pass (line 252) | def runtime_apply_pass(gm: torch.fx.GraphModule): FILE: colossalai/auto_parallel/passes/runtime_preparation_pass.py function size_processing (line 21) | def size_processing( function solution_annotation_pass (line 52) | def solution_annotation_pass( function size_value_converting_pass (line 131) | def size_value_converting_pass(gm: torch.fx.GraphModule, device_mesh: De... function node_args_converting_pass (line 280) | def node_args_converting_pass(gm: torch.fx.GraphModule, device_mesh: Dev... function module_params_sharding_pass (line 384) | def module_params_sharding_pass(gm: torch.fx.GraphModule, device_mesh: D... function implicit_comm_action_apply (line 496) | def implicit_comm_action_apply(gm: torch.fx.GraphModule): function runtime_preparation_pass (line 502) | def runtime_preparation_pass( FILE: colossalai/auto_parallel/tensor_shard/initialize.py class ModuleWrapper (line 22) | class ModuleWrapper(nn.Module): method __init__ (line 28) | def __init__( method forward (line 48) | def forward(self, *args, **kwargs): function extract_meta_args_from_dataloader (line 58) | def extract_meta_args_from_dataloader(data_loader: torch.utils.data.Data... function extract_alpha_beta_for_device_mesh (line 65) | def extract_alpha_beta_for_device_mesh(alpha_beta_dict: Dict[Tuple[int],... function build_strategy_constructor (line 73) | def build_strategy_constructor( function solve_solution (line 117) | def solve_solution(gm: ColoGraphModule, strategy_constructor: Strategies... function transform_to_sharded_model (line 135) | def transform_to_sharded_model( function initialize_device_mesh (line 160) | def initialize_device_mesh( function initialize_model (line 221) | def initialize_model( function autoparallelize (line 300) | def autoparallelize( FILE: colossalai/auto_parallel/tensor_shard/node_handler/addmm_handler.py class ADDMMFunctionHandler (line 16) | class ADDMMFunctionHandler(NodeHandler): method _infer_op_data_type (line 23) | def _infer_op_data_type(self, tensor: torch.Tensor) -> OperationDataType: method get_operation_data_mapping (line 30) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: method get_strategy_generator (line 64) | def get_strategy_generator(self) -> List[StrategyGenerator]: method post_process (line 72) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt... FILE: colossalai/auto_parallel/tensor_shard/node_handler/batch_norm_handler.py class BatchNormModuleHandler (line 16) | class BatchNormModuleHandler(MetaInfoModuleHandler): method get_strategy_generator (line 21) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 27) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/binary_elementwise_handler.py class BinaryElementwiseHandler (line 18) | class BinaryElementwiseHandler(MetaInfoNodeHandler): method get_operation_data_mapping (line 24) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: method get_strategy_generator (line 83) | def get_strategy_generator(self) -> List[StrategyGenerator]: method post_process (line 89) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt... FILE: colossalai/auto_parallel/tensor_shard/node_handler/bmm_handler.py function _get_data_mapping_for_bmm_op (line 14) | def _get_data_mapping_for_bmm_op(node, input_idx, other_idx, bias_idx=No... class BMMFunctionHandler (line 48) | class BMMFunctionHandler(NodeHandler): method get_operation_data_mapping (line 55) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: method get_strategy_generator (line 59) | def get_strategy_generator(self) -> List[StrategyGenerator]: class AddBMMFunctionHandler (line 68) | class AddBMMFunctionHandler(NodeHandler): method get_operation_data_mapping (line 77) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: method get_strategy_generator (line 81) | def get_strategy_generator(self) -> List[StrategyGenerator]: method post_process (line 90) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt... FILE: colossalai/auto_parallel/tensor_shard/node_handler/conv_handler.py class ConvModuleHandler (line 18) | class ConvModuleHandler(MetaInfoModuleHandler): method get_strategy_generator (line 23) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 29) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: method post_process (line 57) | def post_process(self, strategy: ShardingStrategy): class ConvFunctionHandler (line 70) | class ConvFunctionHandler(MetaInfoNodeHandler): method get_strategy_generator (line 75) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 81) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: method post_process (line 121) | def post_process(self, strategy: ShardingStrategy): FILE: colossalai/auto_parallel/tensor_shard/node_handler/default_reshape_handler.py class DefaultReshapeHandler (line 16) | class DefaultReshapeHandler(MetaInfoNodeHandler): method get_strategy_generator (line 21) | def get_strategy_generator(self) -> List[StrategyGenerator]: method infer_logical_shape (line 27) | def infer_logical_shape(self, data): method get_operation_data_mapping (line 45) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/embedding_handler.py function _convert_logical_sharding_to_physical_sharding_spec_for_embedding (line 18) | def _convert_logical_sharding_to_physical_sharding_spec_for_embedding( class EmbeddingModuleHandler (line 116) | class EmbeddingModuleHandler(ModuleHandler): method get_strategy_generator (line 121) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 127) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: method post_process (line 163) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt... class EmbeddingFunctionHandler (line 177) | class EmbeddingFunctionHandler(NodeHandler): method get_strategy_generator (line 182) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 188) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: method post_process (line 230) | def post_process(self, strategy: ShardingStrategy): FILE: colossalai/auto_parallel/tensor_shard/node_handler/getattr_handler.py class GetattrHandler (line 10) | class GetattrHandler(NodeHandler): method get_strategy_generator (line 15) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 21) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/getitem_handler.py class GetItemHandler (line 15) | class GetItemHandler(NodeHandler): method get_strategy_generator (line 20) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 30) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/layer_norm_handler.py class LayerNormModuleHandler (line 14) | class LayerNormModuleHandler(MetaInfoModuleHandler): method get_strategy_generator (line 19) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 25) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/linear_handler.py function _update_sharding_spec_for_transposed_weight_for_linear (line 18) | def _update_sharding_spec_for_transposed_weight_for_linear( function _convert_logical_sharding_to_physical_sharding_spec_for_linear (line 40) | def _convert_logical_sharding_to_physical_sharding_spec_for_linear( class LinearModuleHandler (line 152) | class LinearModuleHandler(MetaInfoModuleHandler): method get_strategy_generator (line 157) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 170) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: method post_process (line 205) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt... class LinearFunctionHandler (line 224) | class LinearFunctionHandler(MetaInfoNodeHandler): method get_strategy_generator (line 229) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 237) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: method post_process (line 285) | def post_process(self, strategy: ShardingStrategy): FILE: colossalai/auto_parallel/tensor_shard/node_handler/matmul_handler.py class MatMulType (line 30) | class MatMulType(Enum): function get_matmul_type (line 47) | def get_matmul_type(input_dim: int, other_dim: int): class BmmTransform (line 70) | class BmmTransform(ABC): method apply (line 77) | def apply(self, shape_mapping: Dict[str, List[int]]): method recover (line 81) | def recover(self, op_data_mapping: Dict[str, OperationData], strategy:... class Padder (line 85) | class Padder(BmmTransform): method __init__ (line 90) | def __init__(self) -> None: method apply (line 94) | def apply(self, shape_mapping: Dict[str, List[int]]): method recover (line 113) | def recover(self, op_data_mapping: Dict[str, OperationData], strategy:... class Broadcaster (line 159) | class Broadcaster(BmmTransform): method __init__ (line 164) | def __init__(self) -> None: method apply (line 167) | def apply(self, shape_mapping: Dict[str, List[int]]): method recover (line 196) | def recover(self, op_data_mapping: Dict[str, OperationData], strategy:... class Viewer (line 236) | class Viewer(BmmTransform): method __init__ (line 241) | def __init__(self) -> None: method apply (line 244) | def apply(self, shape_mapping: Dict[str, List[int]]): method recover (line 262) | def recover(self, op_data_mapping: Dict[str, OperationData], strategy:... function _get_bmm_logical_shape (line 305) | def _get_bmm_logical_shape(input_shape, other_shape, transforms): class MatMulHandler (line 331) | class MatMulHandler(MetaInfoNodeHandler): method __init__ (line 338) | def __init__(self, *args, **kwargs) -> None: method get_strategy_generator (line 358) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 373) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: method _get_op_data_mapping (line 384) | def _get_op_data_mapping(self, input_logical_shape, other_logical_shap... method _get_logical_shape_for_dot (line 418) | def _get_logical_shape_for_dot(self): method _get_logical_shape_for_mm (line 424) | def _get_logical_shape_for_mm(self): method _get_logical_shape_for_mv (line 437) | def _get_logical_shape_for_mv(self): method _get_logical_shape_for_bmm (line 443) | def _get_logical_shape_for_bmm(self): method post_process (line 448) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt... FILE: colossalai/auto_parallel/tensor_shard/node_handler/node_handler.py class NodeHandler (line 24) | class NodeHandler(ABC): method __init__ (line 34) | def __init__( method update_resharding_cost (line 50) | def update_resharding_cost(self, strategy: ShardingStrategy) -> None: method get_target_function (line 143) | def get_target_function(self) -> callable: method register_strategy (line 162) | def register_strategy(self, compute_resharding_cost: bool = True) -> S... method post_process (line 221) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt... method get_strategy_generator (line 227) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 233) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: class MetaInfoNodeHandler (line 255) | class MetaInfoNodeHandler(NodeHandler): method register_strategy (line 263) | def register_strategy(self, compute_resharding_cost: bool = True) -> S... class ModuleHandler (line 291) | class ModuleHandler(NodeHandler): method __init__ (line 292) | def __init__(self, *args, **kwargs) -> None: class MetaInfoModuleHandler (line 310) | class MetaInfoModuleHandler(ModuleHandler): method register_strategy (line 318) | def register_strategy(self, compute_resharding_cost: bool = True) -> S... FILE: colossalai/auto_parallel/tensor_shard/node_handler/normal_pooling_handler.py class NormPoolingHandler (line 19) | class NormPoolingHandler(MetaInfoModuleHandler): method get_strategy_generator (line 24) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 30) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/output_handler.py class OutputHandler (line 14) | class OutputHandler(NodeHandler): method __init__ (line 19) | def __init__( method get_strategy_generator (line 25) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 31) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/permute_handler.py class PermuteHandler (line 15) | class PermuteHandler(NodeHandler): method get_strategy_generator (line 20) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 26) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/placeholder_handler.py class PlaceholderHandler (line 14) | class PlaceholderHandler(NodeHandler): method __init__ (line 19) | def __init__( method get_strategy_generator (line 25) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 33) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/registry.py class Registry (line 1) | class Registry: method __init__ (line 2) | def __init__(self, name): method register (line 6) | def register(self, source): method get (line 18) | def get(self, source): method has (line 23) | def has(self, source): FILE: colossalai/auto_parallel/tensor_shard/node_handler/softmax_handler.py class SoftmaxHandler (line 15) | class SoftmaxHandler(NodeHandler): method get_strategy_generator (line 21) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 27) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/split_handler.py class SplitHandler (line 15) | class SplitHandler(NodeHandler): method get_strategy_generator (line 20) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 26) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/batch_norm_generator.py class BatchNormStrategyGenerator (line 20) | class BatchNormStrategyGenerator(StrategyGenerator): method validate (line 32) | def validate(self) -> bool: method update_compute_cost (line 46) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 73) | def update_memory_cost(self, strategy: ShardingStrategy): method split_input_channel (line 115) | def split_input_channel(self, mesh_dim_0): method split_input_channel_1d (line 139) | def split_input_channel_1d(self, mesh_dim_0, mesh_dim_1): method non_split (line 163) | def non_split(self): method split_input_batch (line 187) | def split_input_batch(self, mesh_dim_0): method split_input_batch_1d (line 224) | def split_input_batch_1d(self, mesh_dim_0, mesh_dim_1): method split_input_both_dim (line 261) | def split_input_both_dim(self, mesh_dim_0, mesh_dim_1): method collate_strategies (line 311) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/binary_elementwise_generator.py class BinaryElementwiseStrategyGenerator (line 20) | class BinaryElementwiseStrategyGenerator(StrategyGenerator): method validate (line 28) | def validate(self) -> bool: method update_compute_cost (line 36) | def update_compute_cost(self, strategy: ShardingStrategy) -> ShardingS... method update_memory_cost (line 49) | def update_memory_cost(self, strategy: ShardingStrategy) -> ShardingSt... method enumerate_all_possible_output (line 67) | def enumerate_all_possible_output(self, mesh_dim_0, mesh_dim_1): method collate_strategies (line 111) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/conv_strategy_generator.py class ConvStrategyGenerator (line 18) | class ConvStrategyGenerator(StrategyGenerator): method validate (line 24) | def validate(self) -> bool: method update_compute_cost (line 38) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 78) | def update_memory_cost(self, strategy: ShardingStrategy): method split_input_batch_weight_out_channel (line 111) | def split_input_batch_weight_out_channel(self, mesh_dim_0, mesh_dim_1): method split_input_batch (line 178) | def split_input_batch(self, mesh_dim_0): method split_input_both_dim_weight_in_channel (line 238) | def split_input_both_dim_weight_in_channel(self, mesh_dim_0, mesh_dim_1): method split_input_in_channel_weight_both_channel (line 308) | def split_input_in_channel_weight_both_channel(self, mesh_dim_0, mesh_... method split_input_in_channel_weight_in_channel (line 355) | def split_input_in_channel_weight_in_channel(self, mesh_dim_0): method split_weight_out_channel (line 390) | def split_weight_out_channel(self, mesh_dim_0): method non_split (line 428) | def non_split(self): method split_1d_parallel_on_input_batch (line 447) | def split_1d_parallel_on_input_batch(self, mesh_dim_0, mesh_dim_1): method split_1d_parallel_on_in_channel (line 509) | def split_1d_parallel_on_in_channel(self, mesh_dim_0, mesh_dim_1): method split_1d_parallel_on_out_channel (line 543) | def split_1d_parallel_on_out_channel(self, mesh_dim_0, mesh_dim_1): method collate_strategies (line 579) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/embedding_generator.py class EmbeddingStrategyGenerator (line 18) | class EmbeddingStrategyGenerator(StrategyGenerator): method validate (line 24) | def validate(self) -> bool: method update_compute_cost (line 27) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 54) | def update_memory_cost(self, strategy: ShardingStrategy): method non_split (line 83) | def non_split(self): method split_input (line 99) | def split_input(self, mesh_dim_0): method split_input_and_embedding_dim (line 139) | def split_input_and_embedding_dim(self, mesh_dim_0, mesh_dim_1): method split_1d_parallel_on_input (line 193) | def split_1d_parallel_on_input(self, mesh_dim_0, mesh_dim_1): method split_embedding_dim (line 235) | def split_embedding_dim(self, mesh_dim_0): method split_1d_parallel_on_embedding_dim (line 268) | def split_1d_parallel_on_embedding_dim(self, mesh_dim_0, mesh_dim_1): method collate_strategies (line 300) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/getattr_generator.py class GetattrGenerator (line 16) | class GetattrGenerator(StrategyGenerator): method validate (line 21) | def validate(self) -> bool: method update_compute_cost (line 24) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 28) | def update_memory_cost(self, strategy: ShardingStrategy): method enumerate_all_possible_output (line 47) | def enumerate_all_possible_output(self, mesh_dim_0, mesh_dim_1): method collate_strategies (line 89) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/getitem_generator.py class GetItemStrategyGenerator (line 13) | class GetItemStrategyGenerator(FollowingStrategyGenerator): method validate (line 24) | def validate(self) -> bool: method update_compute_cost (line 27) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 31) | def update_memory_cost(self, strategy: ShardingStrategy): class TensorStrategyGenerator (line 62) | class TensorStrategyGenerator(GetItemStrategyGenerator): method collate_strategies (line 67) | def collate_strategies(self) -> List[ShardingStrategy]: class TensorTupleStrategyGenerator (line 137) | class TensorTupleStrategyGenerator(GetItemStrategyGenerator): method collate_strategies (line 142) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/layer_norm_generator.py class LayerNormGenerator (line 24) | class LayerNormGenerator(StrategyGenerator): method validate (line 30) | def validate(self) -> bool: method update_compute_cost (line 33) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 64) | def update_memory_cost(self, strategy: ShardingStrategy): method _generate_strategy_with_dim_partition (line 100) | def _generate_strategy_with_dim_partition(self, dim_partition): method split_input_batch_single_mesh_dim (line 145) | def split_input_batch_single_mesh_dim(self, mesh_dim_0, batch_dimensio... method split_input_batch_both_mesh_dim (line 153) | def split_input_batch_both_mesh_dim(self, mesh_dim_0, mesh_dim_1, batc... method non_split (line 162) | def non_split(self): method collate_strategies (line 182) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/matmul_strategy_generator.py class MatMulStrategyGenerator (line 18) | class MatMulStrategyGenerator(StrategyGenerator): method update_memory_cost (line 24) | def update_memory_cost(self, strategy: ShardingStrategy) -> ShardingSt... class DotProductStrategyGenerator (line 54) | class DotProductStrategyGenerator(MatMulStrategyGenerator): method validate (line 55) | def validate(self) -> bool: method update_compute_cost (line 60) | def update_compute_cost(self, strategy: ShardingStrategy) -> ShardingS... method no_split (line 70) | def no_split(self): method split_one_dim (line 82) | def split_one_dim(self, mesh_dim): method collate_strategies (line 103) | def collate_strategies(self) -> List[ShardingStrategy]: class MatVecStrategyGenerator (line 118) | class MatVecStrategyGenerator(MatMulStrategyGenerator): method validate (line 119) | def validate(self) -> bool: method update_compute_cost (line 124) | def update_compute_cost(self, strategy: ShardingStrategy) -> ShardingS... method no_split (line 134) | def no_split(self): method split_input_batch (line 146) | def split_input_batch(self, mesh_dim): method collate_strategies (line 203) | def collate_strategies(self) -> List[ShardingStrategy]: class LinearProjectionStrategyGenerator (line 216) | class LinearProjectionStrategyGenerator(MatMulStrategyGenerator): method __init__ (line 217) | def __init__( method update_compute_cost (line 228) | def update_compute_cost(self, strategy: ShardingStrategy) -> ShardingS... method dp_strategies (line 246) | def dp_strategies(self) -> List[ShardingStrategy]: method tp_strategies (line 254) | def tp_strategies(self) -> List[ShardingStrategy]: method mix_strategies (line 277) | def mix_strategies(self) -> List[ShardingStrategy]: method collate_strategies (line 293) | def collate_strategies(self) -> List[ShardingStrategy]: method split_lhs_space_rhs_space (line 308) | def split_lhs_space_rhs_space(self, mesh_dim_0, mesh_dim_1): method split_lhs_space_both_contract (line 384) | def split_lhs_space_both_contract(self, mesh_dim_0, mesh_dim_1): method split_rhs_space_both_contract (line 463) | def split_rhs_space_both_contract(self, mesh_dim_0, mesh_dim_1): method recompute_split_both_contract (line 503) | def recompute_split_both_contract(self, mesh_dim): method split_rhs_space_only (line 534) | def split_rhs_space_only(self, mesh_dim): method split_lhs_1st_dim_1d (line 566) | def split_lhs_1st_dim_1d(self, mesh_dim_0, mesh_dim_1): method split_lhs_2nd_dim_1d (line 632) | def split_lhs_2nd_dim_1d(self, mesh_dim_0, mesh_dim_1): method split_rhs_2nd_dim_1d (line 664) | def split_rhs_2nd_dim_1d(self, mesh_dim_0, mesh_dim_1): method non_split (line 697) | def non_split(self): method validate (line 721) | def validate(self) -> bool: class BatchedMatMulStrategyGenerator (line 736) | class BatchedMatMulStrategyGenerator(MatMulStrategyGenerator): method __init__ (line 751) | def __init__(self, *args, **kwargs): method _pop_batch_dim_sharding_for_output (line 755) | def _pop_batch_dim_sharding_for_output(self, dim_partition_dict): method validate (line 767) | def validate(self) -> bool: method update_compute_cost (line 776) | def update_compute_cost(self, strategy: ShardingStrategy) -> ShardingS... method split_one_batch_dim (line 787) | def split_one_batch_dim(self, mesh_dim): method split_two_batch_dim (line 814) | def split_two_batch_dim(self, mesh_dim_0, mesh_dim_1): method split_batch_dim_lhs_space (line 845) | def split_batch_dim_lhs_space(self, mesh_dim_0, mesh_dim_1): method split_batch_dim_rhs_space (line 887) | def split_batch_dim_rhs_space(self, mesh_dim_0, mesh_dim_1): method split_batch_dim_both_contract (line 928) | def split_batch_dim_both_contract(self, mesh_dim_0, mesh_dim_1): method collate_strategies (line 968) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/normal_pooling_generator.py class NormalPoolStrategyGenerator (line 16) | class NormalPoolStrategyGenerator(StrategyGenerator): method validate (line 23) | def validate(self) -> bool: method update_compute_cost (line 37) | def update_compute_cost(self, strategy: ShardingStrategy) -> TrainCycl... method update_memory_cost (line 65) | def update_memory_cost(self, strategy: ShardingStrategy) -> ShardingSt... method _generate_strategy_with_dim_partition (line 89) | def _generate_strategy_with_dim_partition(self, dim_partition): method enumerate_all_possible_batch_dimensions_dim_partition (line 107) | def enumerate_all_possible_batch_dimensions_dim_partition(self, mesh_d... method collate_strategies (line 117) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/output_generator.py class OutputGenerator (line 18) | class OutputGenerator(OutputStrategyGenerator): method __init__ (line 23) | def __init__( method validate (line 33) | def validate(self) -> bool: method update_compute_cost (line 36) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 40) | def update_memory_cost(self, strategy: ShardingStrategy): method replica_strategy (line 53) | def replica_strategy(self) -> List[ShardingStrategy]: method distributed_strategy (line 87) | def distributed_strategy(self, mesh_list: List[List[int]] = None) -> L... method collate_strategies (line 118) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/placeholder_generator.py class PlaceholderGenerator (line 16) | class PlaceholderGenerator(StrategyGenerator): method __init__ (line 21) | def __init__( method validate (line 27) | def validate(self) -> bool: method update_compute_cost (line 30) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 34) | def update_memory_cost(self, strategy: ShardingStrategy): method replica_placeholder (line 52) | def replica_placeholder(self) -> ShardingStrategy: method distributed_placeholder (line 72) | def distributed_placeholder(self, mesh_list) -> ShardingStrategy: method collate_strategies (line 92) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/reshape_generator.py class ReshapeGenerator (line 23) | class ReshapeGenerator(FollowingStrategyGenerator): method validate (line 28) | def validate(self) -> bool: method update_compute_cost (line 31) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 35) | def update_memory_cost(self, strategy: ShardingStrategy): method collate_strategies (line 65) | def collate_strategies(self) -> List[ShardingStrategy]: class ViewGenerator (line 69) | class ViewGenerator(ReshapeGenerator): method collate_strategies (line 74) | def collate_strategies(self) -> List[ShardingStrategy]: class PermuteGenerator (line 155) | class PermuteGenerator(ReshapeGenerator): method collate_strategies (line 160) | def collate_strategies(self) -> List[ShardingStrategy]: class TransposeGenerator (line 195) | class TransposeGenerator(ReshapeGenerator): method collate_strategies (line 200) | def collate_strategies(self) -> List[ShardingStrategy]: class SplitGenerator (line 241) | class SplitGenerator(ReshapeGenerator): method collate_strategies (line 246) | def collate_strategies(self) -> List[ShardingStrategy]: class DefaultReshapeGenerator (line 314) | class DefaultReshapeGenerator(ReshapeGenerator): method collate_strategies (line 320) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/softmax_generator.py class SoftmaxGenerator (line 12) | class SoftmaxGenerator(FollowingStrategyGenerator): method validate (line 17) | def validate(self) -> bool: method update_compute_cost (line 20) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 35) | def update_memory_cost(self, strategy: ShardingStrategy): method collate_strategies (line 65) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/strategy_generator.py class StrategyGenerator (line 23) | class StrategyGenerator(ABC): method __init__ (line 30) | def __init__(self, operation_data_mapping: Dict[str, OperationData], d... method has_bias (line 38) | def has_bias(self): method is_param (line 44) | def is_param(self, op_data_name): method is_buffer (line 48) | def is_buffer(self, op_data_name): method get_sharding_strategy (line 52) | def get_sharding_strategy( method to_sharding_spec_mapping (line 69) | def to_sharding_spec_mapping(self, mapping: Dict[str, Dict[int, List[i... method replace_op_name_with_op_data (line 117) | def replace_op_name_with_op_data(self, mapping: Dict[str, Any]): method get_communication_spec (line 127) | def get_communication_spec( method get_communication_action (line 140) | def get_communication_action( method update_communication_cost (line 163) | def update_communication_cost(self, strategy: ShardingStrategy) -> Sha... method update_compute_cost (line 204) | def update_compute_cost(self, strategy: ShardingStrategy) -> ShardingS... method update_memory_cost (line 210) | def update_memory_cost(self, strategy: ShardingStrategy) -> ShardingSt... method _compute_size_in_bytes (line 215) | def _compute_size_in_bytes(self, strategy: ShardingStrategy, key: str): method generate (line 258) | def generate(self) -> List[ShardingStrategy]: method collate_strategies (line 281) | def collate_strategies(self) -> List[ShardingStrategy]: method validate (line 285) | def validate(self) -> bool: class FollowingStrategyGenerator (line 292) | class FollowingStrategyGenerator(StrategyGenerator): method __init__ (line 299) | def __init__( class OutputStrategyGenerator (line 307) | class OutputStrategyGenerator(StrategyGenerator): method __init__ (line 312) | def __init__( FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/sum_generator.py class SumGenerator (line 12) | class SumGenerator(FollowingStrategyGenerator): method validate (line 17) | def validate(self) -> bool: method update_compute_cost (line 20) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 32) | def update_memory_cost(self, strategy: ShardingStrategy): method collate_strategies (line 62) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/tensor_constructor_generator.py class TensorConstructorGenerator (line 10) | class TensorConstructorGenerator(StrategyGenerator): method validate (line 16) | def validate(self) -> bool: method update_compute_cost (line 19) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 23) | def update_memory_cost(self, strategy: ShardingStrategy): method collate_strategies (line 43) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/unary_elementwise_generator.py class UnaryElementwiseGenerator (line 11) | class UnaryElementwiseGenerator(FollowingStrategyGenerator): method validate (line 16) | def validate(self) -> bool: method update_compute_cost (line 19) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 23) | def update_memory_cost(self, strategy: ShardingStrategy): method collate_strategies (line 53) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/where_generator.py class WhereGenerator (line 16) | class WhereGenerator(StrategyGenerator): method validate (line 21) | def validate(self) -> bool: method update_compute_cost (line 24) | def update_compute_cost(self, strategy: ShardingStrategy): method update_memory_cost (line 28) | def update_memory_cost(self, strategy: ShardingStrategy): method _generate_strategy_with_dim_partition (line 57) | def _generate_strategy_with_dim_partition(self, dim_partition): method enumerate_all_possible_output_spec (line 78) | def enumerate_all_possible_output_spec(self, mesh_dim_0, mesh_dim_1, d... method collate_strategies (line 88) | def collate_strategies(self) -> List[ShardingStrategy]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/sum_handler.py class SumHandler (line 15) | class SumHandler(NodeHandler): method get_strategy_generator (line 20) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 26) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/tensor_constructor_handler.py class TensorConstructorHandler (line 15) | class TensorConstructorHandler(NodeHandler): method get_strategy_generator (line 20) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 26) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/transpose_handler.py class TransposeHandler (line 15) | class TransposeHandler(NodeHandler): method get_strategy_generator (line 20) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 26) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/unary_elementwise_handler.py class UnaryElementwiseHandler (line 22) | class UnaryElementwiseHandler(MetaInfoNodeHandler): method get_strategy_generator (line 27) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 33) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/view_handler.py class ViewHandler (line 16) | class ViewHandler(NodeHandler): method get_strategy_generator (line 21) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 27) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: FILE: colossalai/auto_parallel/tensor_shard/node_handler/where_handler.py class WhereHandler (line 16) | class WhereHandler(NodeHandler): method get_strategy_generator (line 21) | def get_strategy_generator(self) -> List[StrategyGenerator]: method get_operation_data_mapping (line 27) | def get_operation_data_mapping(self) -> Dict[str, OperationData]: method convert_physical_operand_to_logical_operand (line 55) | def convert_physical_operand_to_logical_operand(self, physical_operand... method post_process (line 60) | def post_process(self, strategy: ShardingStrategy): FILE: colossalai/auto_parallel/tensor_shard/options.py class SolverPerference (line 7) | class SolverPerference(Enum): class ShardOption (line 17) | class ShardOption(Enum): class DataloaderOption (line 36) | class DataloaderOption(Enum): class SolverOptions (line 46) | class SolverOptions: FILE: colossalai/auto_parallel/tensor_shard/sharding_strategy.py class OperationDataType (line 23) | class OperationDataType(Enum): class OperationData (line 36) | class OperationData: method __post_init__ (line 52) | def __post_init__(self): method __repr__ (line 72) | def __repr__(self) -> str: method __eq__ (line 75) | def __eq__(self, other) -> bool: method __hash__ (line 78) | def __hash__(self) -> int: class TrainCycleItem (line 83) | class TrainCycleItem: class MemoryCost (line 99) | class MemoryCost: class CommType (line 116) | class CommType(Enum): class CommAction (line 134) | class CommAction: class ShardingStrategy (line 152) | class ShardingStrategy: method input_sharding_specs (line 174) | def input_sharding_specs(self) -> Dict[OperationData, ShardingSpec]: method argument_sharding_specs (line 181) | def argument_sharding_specs(self) -> Dict[OperationData, ShardingSpec]: method param_sharding_specs (line 185) | def param_sharding_specs(self) -> Dict[OperationData, ShardingSpec]: method output_sharding_specs (line 189) | def output_sharding_specs(self) -> Dict[OperationData, ShardingSpec]: method _get_sharding_spec (line 192) | def _get_sharding_spec(self, operation_data_type: OperationDataType): method get_op_data_by_name (line 196) | def get_op_data_by_name(self, name: str): method get_sharding_spec_by_name (line 202) | def get_sharding_spec_by_name(self, name: str): method clone (line 208) | def clone(self): class StrategiesVector (line 237) | class StrategiesVector(list): method __init__ (line 246) | def __init__(self, node: Node): method check_merge (line 254) | def check_merge(self): FILE: colossalai/auto_parallel/tensor_shard/solver/cost_graph.py class CostGraph (line 6) | class CostGraph: method __init__ (line 20) | def __init__(self, leaf_strategies, simplify=True, forward_only=False): method _remove_invalid_node (line 32) | def _remove_invalid_node(self, node, attr_name): method _build_cost_graph (line 41) | def _build_cost_graph(self): method get_edge_cost (line 98) | def get_edge_cost(self, src_node, dst_node): method merge_node (line 101) | def merge_node(self, src_node, dst_node): method _reindexing_src (line 190) | def _reindexing_src(self, src): method simplify_graph (line 195) | def simplify_graph(self): FILE: colossalai/auto_parallel/tensor_shard/solver/graph_analysis.py class LiveVariable (line 14) | class LiveVariable: class LiveVariableVector (line 24) | class LiveVariableVector(list): method exists (line 29) | def exists(self, name) -> bool: method get (line 38) | def get(self, name) -> LiveVariable: method copy (line 44) | def copy(self) -> "LiveVariableVector": class LiveStage (line 55) | class LiveStage: class GraphAnalyser (line 66) | class GraphAnalyser: method __init__ (line 67) | def __init__(self, gm: GraphModule): method gm (line 72) | def gm(self) -> GraphModule: method graph (line 79) | def graph(self) -> Graph: method liveness_analysis (line 85) | def liveness_analysis(self) -> List[LiveStage]: method get_alias_set (line 166) | def get_alias_set(self): FILE: colossalai/auto_parallel/tensor_shard/solver/solver.py class Solver (line 29) | class Solver: method __init__ (line 30) | def __init__( method _recover_merged_node_strategy (line 78) | def _recover_merged_node_strategy(self): method _generate_node_index_dict (line 95) | def _generate_node_index_dict(self) -> Dict[Node, int]: method _prepare_data_for_solver (line 101) | def _prepare_data_for_solver(self): method _call_solver_serialized_args (line 211) | def _call_solver_serialized_args( method call_solver_serialized_args (line 497) | def call_solver_serialized_args(self): FILE: colossalai/auto_parallel/tensor_shard/solver/strategies_constructor.py class StrategiesConstructor (line 19) | class StrategiesConstructor: method __init__ (line 29) | def __init__(self, graph: Graph, device_mesh: DeviceMesh, solver_optio... method remove_duplicated_strategy (line 41) | def remove_duplicated_strategy(self, strategies_vector): method generate_alias_set (line 57) | def generate_alias_set(self): method build_strategies_and_cost (line 72) | def build_strategies_and_cost(self): FILE: colossalai/auto_parallel/tensor_shard/utils/broadcast.py class BroadcastType (line 25) | class BroadcastType(Enum): function is_broadcastable (line 31) | def is_broadcastable(shape1: torch.Size, shape2: torch.Size) -> bool: function get_broadcast_shape (line 43) | def get_broadcast_shape(shape1: torch.Size, shape2: torch.Size) -> List[... function get_broadcast_dim_info (line 61) | def get_broadcast_dim_info(logical_shape, physical_shape): function recover_sharding_spec_for_broadcast_shape (line 92) | def recover_sharding_spec_for_broadcast_shape( function comm_actions_for_oprands (line 141) | def comm_actions_for_oprands( FILE: colossalai/auto_parallel/tensor_shard/utils/factory.py function generate_sharding_spec (line 20) | def generate_sharding_spec( function generate_resharding_costs (line 55) | def generate_resharding_costs( function find_repeat_blocks (line 101) | def find_repeat_blocks(node_list: List[torch.fx.Node], root_module, comm... FILE: colossalai/auto_parallel/tensor_shard/utils/misc.py function ignore_sharding_exception (line 12) | def ignore_sharding_exception(func): function check_sharding_spec_validity (line 37) | def check_sharding_spec_validity(sharding_spec: ShardingSpec, tensor: to... function pytree_map (line 79) | def pytree_map(obj: Any, fn: Callable, process_types: Union[Type, Tuple[... FILE: colossalai/auto_parallel/tensor_shard/utils/reshape.py class PreviousStatus (line 7) | class PreviousStatus(Enum): function detect_reshape_mapping (line 19) | def detect_reshape_mapping(origin_shape: torch.Size, tgt_shape: torch.Si... function check_keep_sharding_status (line 134) | def check_keep_sharding_status( function infer_output_dim_partition_dict (line 177) | def infer_output_dim_partition_dict( FILE: colossalai/auto_parallel/tensor_shard/utils/sharding.py function transpose_partition_dim (line 19) | def transpose_partition_dim(sharding_spec: ShardingSpec, dim1: int, dim2... function update_partition_dim (line 50) | def update_partition_dim( function enumerate_all_possible_2d_sharding (line 88) | def enumerate_all_possible_2d_sharding(mesh_dim_0, mesh_dim_1, dim_size): function enumerate_all_possible_1d_sharding (line 104) | def enumerate_all_possible_1d_sharding(mesh_dim_0, dim_size): function generate_sharding_size (line 114) | def generate_sharding_size(dim_partition_dict, device_mesh): FILE: colossalai/autochunk/autochunk_codegen.py function _gen_chunk_slice_dim (line 31) | def _gen_chunk_slice_dim(chunk_dim: int, chunk_indice_name: str, shape: ... function _gen_loop_start (line 54) | def _gen_loop_start(chunk_input: List[Node], chunk_output: List[Node], c... function _gen_loop_end (line 100) | def _gen_loop_end( function _replace_name (line 135) | def _replace_name(context: str, name_from: str, name_to: str) -> str: function _replace_reshape_size (line 149) | def _replace_reshape_size(context: str, node_name: str, reshape_size_dic... function _replace_new_tensor_like_shape (line 159) | def _replace_new_tensor_like_shape( function _replace_new_tensor_shape (line 184) | def _replace_new_tensor_shape( function _add_node_slice (line 211) | def _add_node_slice( function emit_code_with_chunk (line 246) | def emit_code_with_chunk( class AutoChunkCodeGen (line 358) | class AutoChunkCodeGen(CodeGen): method __init__ (line 359) | def __init__( method _gen_python_code (line 375) | def _gen_python_code(self, nodes, root_module: str, namespace: _Namesp... FILE: colossalai/autochunk/estimate_memory.py class EstimateMemory (line 9) | class EstimateMemory(object): method __init__ (line 14) | def __init__(self) -> None: method _get_node_size (line 17) | def _get_node_size(self, x: Node) -> float: method _add_active_node (line 29) | def _add_active_node(self, n: Node, active_nodes: Dict, chunk_ratio: f... method _build_delete_node_dict (line 41) | def _build_delete_node_dict(self, node_mgr: NodeMgr) -> Dict: method _remove_deactive_node (line 62) | def _remove_deactive_node( method _get_tmp_memory (line 81) | def _get_tmp_memory(self, node, not_contiguous_list, delete=False): method _get_chunk_ratio (line 101) | def _get_chunk_ratio(self, node, chunk_node_dim, chunk_size): method _print_compute_op_mem_log (line 111) | def _print_compute_op_mem_log(self, log, nodes, title=None): method _add_active_nodes_from_list (line 124) | def _add_active_nodes_from_list(self, active_nodes: List, nodes: List)... method _get_memory_from_active_nodes (line 131) | def _get_memory_from_active_nodes(self, active_nodes: Dict) -> float: method estimate_chunk_inference_mem (line 139) | def estimate_chunk_inference_mem(self, node_list: List, chunk_infos: D... FILE: colossalai/autochunk/reorder_graph.py class ReorderGraph (line 5) | class ReorderGraph(object): method __init__ (line 10) | def __init__(self, trace_indice: TraceIndice, node_mgr: NodeMgr) -> None: method _get_reorder_map (line 15) | def _get_reorder_map(self, chunk_info): method _reorder_chunk_info (line 36) | def _reorder_chunk_info(self, chunk_info, reorder_map): method _update_all_reorder_map (line 51) | def _update_all_reorder_map(self, reorder_map): method _reorder_self_node_list (line 55) | def _reorder_self_node_list(self, reorder_map): method _reorder_idx_trace (line 61) | def _reorder_idx_trace(self, reorder_map): method reorder_all (line 82) | def reorder_all(self, chunk_info): method reorder_node_list (line 94) | def reorder_node_list(self, node_list): method tmp_reorder (line 100) | def tmp_reorder(self, node_list, chunk_info): FILE: colossalai/autochunk/search_chunk.py class SearchChunk (line 14) | class SearchChunk(object): method __init__ (line 43) | def __init__(self, gm, max_memory=None, print_mem=False, print_progres... method _init_trace (line 61) | def _init_trace(self) -> None: method _find_peak_region (line 74) | def _find_peak_region(self, mem_peak: List) -> int: method _search_max_chunk_region (line 106) | def _search_max_chunk_region(self, active_node: List, peak_region: int... method _find_chunk_info (line 157) | def _find_chunk_info(self, input_trace, output_trace, start_idx, end_i... method _search_possible_chunk_regions (line 196) | def _search_possible_chunk_regions(self, max_chunk_region: Tuple, peak... method _step_search (line 230) | def _step_search( method search_region (line 262) | def search_region(self) -> Dict: FILE: colossalai/autochunk/select_chunk.py class SelectChunk (line 7) | class SelectChunk(object): method __init__ (line 8) | def __init__( method _select_best_chunk_region (line 26) | def _select_best_chunk_region(self, possible_chunk_regions, chunk_info... method _select_fit_memory_chunk_region (line 35) | def _select_fit_memory_chunk_region(self, possible_chunk_regions, chun... method _get_fit_chunk_size (line 84) | def _get_fit_chunk_size(self, chunk_region_dict, chunk_infos): method _chunk_size_binary_search (line 105) | def _chunk_size_binary_search(self, left, right, chunk_region_dict, ch... method _get_compute_node_num (line 125) | def _get_compute_node_num(self, start, end): method _select_min_memory_chunk_region (line 132) | def _select_min_memory_chunk_region(self, possible_chunk_regions, chun... method _is_legal_region (line 178) | def _is_legal_region(self, cur_chunk_info, chunk_infos): FILE: colossalai/autochunk/trace_flow.py class TraceFlow (line 18) | class TraceFlow(object): method __init__ (line 19) | def __init__(self, trace_indice: TraceIndice, node_mgr: NodeMgr) -> None: method check_index_source (line 23) | def check_index_source(self, start_dim, start_node, start_idx, end_dim... method check_index_compute (line 48) | def check_index_compute(self, start_idx, end_dim, end_node, end_idx): method _assign_single_node_flow (line 66) | def _assign_single_node_flow( method _get_all_node_info (line 152) | def _get_all_node_info(self, end_dim, start_idx, end_idx): method _get_input_nodes_dim (line 197) | def _get_input_nodes_dim(self, inputs: List[Node], start_idx: int, end... method _get_prepose_nodes (line 245) | def _get_prepose_nodes(self, all_node_info: Dict, start_idx: int, end_... method _get_non_chunk_inputs (line 316) | def _get_non_chunk_inputs(self, chunk_info, start_idx, end_idx): method flow_search (line 328) | def flow_search(self, start_idx, start_dim, end_idx, end_dim): method _get_other_output_info (line 374) | def _get_other_output_info( method _update_chunk_info (line 407) | def _update_chunk_info(self, chunk_info: Dict, new_all_node_info: Dict... method _reassign_reshape_size (line 433) | def _reassign_reshape_size(self, chunk_info): method check_region_start_end (line 470) | def check_region_start_end( FILE: colossalai/autochunk/trace_indice.py class TraceIndice (line 9) | class TraceIndice(object): method __init__ (line 31) | def __init__(self, node_mgr: NodeMgr) -> None: method _init_indice_trace_list (line 38) | def _init_indice_trace_list(self) -> List: method set_active_nodes (line 52) | def set_active_nodes(self, active_node_list: List) -> None: method _add_indice (line 55) | def _add_indice(self) -> int: method _del_dim (line 65) | def _del_dim(self, idx: int, dim_idx: int) -> None: method _add_dim (line 73) | def _add_dim(self, node_idx: int, dim_idx: int) -> None: method _add_source (line 84) | def _add_source( method _transform_indice (line 114) | def _transform_indice(self, node: Node, node_dim: int) -> int: method _inherit_indice (line 119) | def _inherit_indice( method _inherit_all_indice (line 143) | def _inherit_all_indice(self, node_from: Node, node_to: Node) -> None: method _inherit_more_indice_from_node_with_exclude (line 154) | def _inherit_more_indice_from_node_with_exclude(self, node_from: Node,... method _mark_computation (line 170) | def _mark_computation(self, node: Node, idx: int, dim: int) -> None: method _find_trace_from_node (line 187) | def _find_trace_from_node(self, node: Node) -> Dict: method _find_source_trace_from_node (line 201) | def _find_source_trace_from_node(self, node: Node) -> List: method _find_indice_trace_from_node (line 215) | def _find_indice_trace_from_node(self, node) -> List: method _find_compute_trace_from_node (line 227) | def _find_compute_trace_from_node(self, node: Node) -> List: method _assign_indice_as_input (line 239) | def _assign_indice_as_input(self, node: Node, node_idx: int, input_nod... method _assign_all_indice (line 251) | def _assign_all_indice(self, node: Node, node_idx: int) -> None: method _assign_transpose_indice (line 267) | def _assign_transpose_indice(self, node: Node, node_idx: int) -> None: method _assign_permute_indice (line 284) | def _assign_permute_indice(self, node: Node, node_idx: int) -> None: method _assign_linear_indice (line 301) | def _assign_linear_indice(self, node: Node, node_idx: int) -> None: method _assign_addmm_indice (line 322) | def _assign_addmm_indice(self, node: Node, node_idx: int) -> None: method _assign_baddbmm_indice (line 338) | def _assign_baddbmm_indice(self, node: Node, node_idx: int) -> None: method _assign_matmul_indice (line 360) | def _assign_matmul_indice(self, node: Node, node_idx: int) -> None: method _assign_conv2d_indice (line 380) | def _assign_conv2d_indice(self, node: Node, node_idx: int) -> None: method _assign_interpolate_indice (line 406) | def _assign_interpolate_indice(self, node: Node, node_idx: int) -> None: method _assign_layernorm_indice (line 422) | def _assign_layernorm_indice(self, node, idx): method _assign_groupnorm_indice (line 435) | def _assign_groupnorm_indice(self, node, idx): method _assign_elementwise_indice (line 447) | def _assign_elementwise_indice(self, node, idx): method _assign_no_change_indice (line 464) | def _assign_no_change_indice(self, node, idx): method _assign_einsum_indice (line 470) | def _assign_einsum_indice(self, node, idx): method _assign_softmax_indice (line 506) | def _assign_softmax_indice(self, node, idx): method _assign_split_indice (line 519) | def _assign_split_indice(self, node: Node, node_idx: int) -> None: method _assign_unsqueeze_indice (line 532) | def _assign_unsqueeze_indice(self, node: Node, node_idx: int) -> None: method _assign_cat_indice (line 549) | def _assign_cat_indice(self, node: Node, node_idx: int) -> None: method _assign_sum_indice (line 565) | def _assign_sum_indice(self, node: Node, node_idx: int) -> None: method _assign_flatten_indice (line 581) | def _assign_flatten_indice(self, node: Node, node_idx: int) -> None: method _assign_expand_indice (line 601) | def _assign_expand_indice(self, node: Node, node_idx: int) -> None: method _assign_unbind_indice (line 622) | def _assign_unbind_indice(self, node: Node, node_idx: int) -> None: method _assign_embedding_indice (line 635) | def _assign_embedding_indice(self, node: Node, node_idx: int) -> None: method _assign_getitem_indice (line 647) | def _assign_getitem_indice(self, node: Node, node_idx: int) -> None: method _assign_view_reshape_indice (line 718) | def _assign_view_reshape_indice(self, node: Node, node_idx: int) -> None: method _clear_trace (line 817) | def _clear_trace(self, node_idx: int) -> None: method trace_indice (line 838) | def trace_indice(self) -> None: FILE: colossalai/autochunk/utils.py class NodeMgr (line 12) | class NodeMgr(object): method __init__ (line 13) | def __init__(self, nodes_list: List[Node]) -> None: method _set_node_dict (line 18) | def _set_node_dict(self) -> None: method find_node_idx (line 26) | def find_node_idx(self, node: Node) -> int: method find_node_idx_by_name (line 32) | def find_node_idx_by_name(self, node_name: str) -> int: method get_node_by_idx (line 38) | def get_node_by_idx(self, idx: int) -> Node: method get_node_slice_by_idx (line 44) | def get_node_slice_by_idx(self, start: int, end: int) -> List[Node]: method get_node_list (line 50) | def get_node_list(self) -> List: method update_node_list (line 56) | def update_node_list(self, node_list: List) -> None: function get_logger (line 64) | def get_logger() -> Any: function flat_list (line 68) | def flat_list(inputs: Any) -> List: function find_first_tensor_arg (line 85) | def find_first_tensor_arg(node: Node) -> Node: function is_non_compute_node (line 95) | def is_non_compute_node(node: Node) -> bool: function get_node_shape (line 111) | def get_node_shape(node: Node) -> Any: function is_non_memory_node (line 122) | def is_non_memory_node(node: Node) -> bool: function is_non_compute_node_except_placeholder (line 130) | def is_non_compute_node_except_placeholder(node: Node) -> bool: function is_non_compute_node_except_placeholder_output (line 136) | def is_non_compute_node_except_placeholder_output(node: Node) -> bool: function delete_free_var_from_last_use (line 142) | def delete_free_var_from_last_use(user_to_last_uses: Dict) -> None: function find_chunk_all_input_nodes (line 149) | def find_chunk_all_input_nodes(nodes: List[Node]) -> List: function find_chunk_compute_input_and_output_nodes (line 163) | def find_chunk_compute_input_and_output_nodes(nodes: List[Node]) -> Unio... function get_module_node_name (line 197) | def get_module_node_name(node: Node) -> str: function get_node_name (line 210) | def get_node_name(node: Node) -> str: function find_tensor_node (line 227) | def find_tensor_node(node_list: List[Node]) -> List[Node]: function find_tensor_shape_node (line 238) | def find_tensor_shape_node(node_list: List[Node]) -> List[Node]: FILE: colossalai/booster/accelerator.py class Accelerator (line 16) | class Accelerator: method __init__ (line 24) | def __init__(self, device: str): method bind (line 31) | def bind(self): method configure_model (line 46) | def configure_model(self, model: nn.Module) -> nn.Module: FILE: colossalai/booster/booster.py class Booster (line 33) | class Booster: method __init__ (line 74) | def __init__( method boost (line 126) | def boost( method backward (line 175) | def backward(self, loss: torch.Tensor, optimizer: Optimizer) -> None: method execute_pipeline (line 185) | def execute_pipeline( method no_sync (line 223) | def no_sync(self, model: nn.Module = None, optimizer: OptimizerWrapper... method enable_lora (line 240) | def enable_lora( method load_model (line 291) | def load_model( method save_model (line 315) | def save_model( method load_optimizer (line 352) | def load_optimizer( method save_optimizer (line 372) | def save_optimizer( method save_lr_scheduler (line 400) | def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str... method load_lr_scheduler (line 409) | def load_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str... method save_lora_as_pretrained (line 418) | def save_lora_as_pretrained( FILE: colossalai/booster/mixed_precision/__init__.py function mixed_precision_factory (line 28) | def mixed_precision_factory(mixed_precision_type: str) -> MixedPrecision: FILE: colossalai/booster/mixed_precision/bf16.py class BF16MixedPrecision (line 4) | class BF16MixedPrecision(MixedPrecision): FILE: colossalai/booster/mixed_precision/fp16_apex.py class FP16ApexMixedPrecision (line 8) | class FP16ApexMixedPrecision(MixedPrecision): method __init__ (line 26) | def __init__( FILE: colossalai/booster/mixed_precision/fp16_naive.py class FP16NaiveMixedPrecision (line 4) | class FP16NaiveMixedPrecision(MixedPrecision): method __init__ (line 18) | def __init__( FILE: colossalai/booster/mixed_precision/fp16_torch.py class TorchAMPOptimizer (line 16) | class TorchAMPOptimizer(OptimizerWrapper): method __init__ (line 33) | def __init__( method backward (line 49) | def backward(self, loss: Tensor, inputs=None, retain_graph=False, **kw... method step (line 53) | def step(self, *args, **kwargs) -> Optional[float]: method scale_loss (line 58) | def scale_loss(self, loss: Tensor) -> Tensor: method unscale_grad (line 61) | def unscale_grad(self) -> None: method clip_grad_by_value (line 64) | def clip_grad_by_value(self, clip_value: float, *args, **kwargs) -> None: method clip_grad_by_norm (line 68) | def clip_grad_by_norm( class TorchAMPModule (line 80) | class TorchAMPModule(ModelWrapper): method __init__ (line 88) | def __init__(self, module: nn.Module): method forward (line 91) | def forward(self, *args, **kwargs): class FP16TorchMixedPrecision (line 96) | class FP16TorchMixedPrecision(MixedPrecision): method __init__ (line 112) | def __init__( method configure (line 127) | def configure( FILE: colossalai/booster/mixed_precision/fp8.py class FP8MixedPrecision (line 4) | class FP8MixedPrecision(MixedPrecision): FILE: colossalai/booster/mixed_precision/mixed_precision_base.py class MixedPrecision (line 10) | class MixedPrecision(ABC): method configure (line 16) | def configure( FILE: colossalai/booster/plugin/dp_plugin_base.py class DPPluginBase (line 12) | class DPPluginBase(Plugin): method __init__ (line 15) | def __init__(self) -> None: method prepare_dataloader (line 23) | def prepare_dataloader( FILE: colossalai/booster/plugin/gemini_plugin.py function get_param_info (line 45) | def get_param_info(optim: Optimizer): class GeminiCheckpointIO (line 63) | class GeminiCheckpointIO(GeneralCheckpointIO): method __init__ (line 64) | def __init__(self) -> None: method save_unsharded_model (line 69) | def save_unsharded_model( method load_unsharded_model (line 98) | def load_unsharded_model( method save_unsharded_optimizer (line 115) | def save_unsharded_optimizer( method load_unsharded_optimizer (line 141) | def load_unsharded_optimizer( method save_sharded_model (line 153) | def save_sharded_model( method load_sharded_model (line 219) | def load_sharded_model( method save_sharded_optimizer (line 242) | def save_sharded_optimizer( method load_sharded_optimizer (line 317) | def load_sharded_optimizer( method save_lr_scheduler (line 361) | def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str): class GeminiPlugin (line 369) | class GeminiPlugin(DPPluginBase): method __init__ (line 442) | def __init__( method __del__ (line 569) | def __del__(self): method support_no_sync (line 573) | def support_no_sync(self) -> bool: method support_lora (line 576) | def support_lora(self) -> bool: method control_precision (line 579) | def control_precision(self) -> bool: method supported_precisions (line 582) | def supported_precisions(self) -> List[str]: method control_device (line 585) | def control_device(self) -> bool: method supported_devices (line 588) | def supported_devices(self) -> List[str]: method prepare_dataloader (line 591) | def prepare_dataloader( method configure (line 655) | def configure( method control_checkpoint_io (line 700) | def control_checkpoint_io(self) -> bool: method get_checkpoint_io (line 703) | def get_checkpoint_io(self) -> CheckpointIO: method no_sync (line 706) | def no_sync(self, model: nn.Module, optimizer: OptimizerWrapper) -> It... method enable_lora (line 709) | def enable_lora( FILE: colossalai/booster/plugin/hybrid_parallel_plugin.py function _convert_floating_point (line 53) | def _convert_floating_point(x, dtype: torch.dtype = torch.float16): class HybridParallelModule (line 59) | class HybridParallelModule(ModelWrapper, AMPModelMixin): method __init__ (line 60) | def __init__( method sync_shared_params (line 131) | def sync_shared_params(self): method no_sync (line 139) | def no_sync(self): method sync_dp_grads (line 161) | def sync_dp_grads(self): method sync_sp_grads (line 185) | def sync_sp_grads(self, grads: Optional[List[torch.Tensor]] = None): method forward (line 217) | def forward(self, *args, **kwargs): method unwrap (line 224) | def unwrap(self, unwrap_peft: bool = True): method _force_wait_all_gather (line 232) | def _force_wait_all_gather(self): method _hook_context (line 236) | def _hook_context(self): function get_param_info (line 240) | def get_param_info(optim: Optimizer): function reinitialize_optimizer (line 269) | def reinitialize_optimizer(optim: Optimizer, model: Module): class HybridParallelNaiveOptimizer (line 278) | class HybridParallelNaiveOptimizer(OptimizerWrapper): method __init__ (line 279) | def __init__( method backward (line 303) | def backward(self, loss: Tensor, inputs=None, retain_graph=False, **kw... method backward_by_grad (line 331) | def backward_by_grad(self, tensor: Tensor, grad: Tensor, inputs: Tenso... method step (line 357) | def step(self, *args, **kwargs): method _compute_grad_norm (line 380) | def _compute_grad_norm(self, param_gradient_pairs: List[Tuple[Tensor]]... method _clip_grad_norm (line 458) | def _clip_grad_norm(self, total_norm: float) -> None: method update_master_params (line 477) | def update_master_params(self, model: Module): method get_working_to_master_map (line 480) | def get_working_to_master_map(self): method get_master_to_working_map (line 483) | def get_master_to_working_map(self): method get_grad_norm (line 486) | def get_grad_norm(self, norm_type=2, **kwargs): class HybridParallelAMPOptimizer (line 490) | class HybridParallelAMPOptimizer(MixedPrecisionOptimizer): method __init__ (line 491) | def __init__( method backward (line 532) | def backward(self, loss: Tensor, inputs=None, retain_graph=False, **kw... method backward_by_grad (line 559) | def backward_by_grad(self, tensor: Tensor, grad: Tensor, inputs: Tenso... method _compute_grad_norm (line 584) | def _compute_grad_norm(self, param_gradient_pairs: List[Tuple[Tensor]]... class HybridParallelZeroOptimizer (line 666) | class HybridParallelZeroOptimizer(LowLevelZeroOptimizer): method __init__ (line 667) | def __init__( method sync_dp_grads (line 727) | def sync_dp_grads(self): method _sync_sp_grads (line 745) | def _sync_sp_grads(self): method backward (line 792) | def backward(self, loss, inputs=None, retain_graph=False): method backward_by_grad (line 817) | def backward_by_grad(self, tensor, grad, inputs: Tensor = None, retain... method _compute_grad_norm (line 842) | def _compute_grad_norm(self, dp_pg, gradients: List[Tensor], norm_type... class HybridParallelPlugin (line 928) | class HybridParallelPlugin(PipelinePluginBase): method __init__ (line 1000) | def __init__( method __del__ (line 1256) | def __del__(self): method enable_pipeline_parallelism (line 1261) | def enable_pipeline_parallelism(self) -> bool: method supported_devices (line 1264) | def supported_devices(self) -> List[str]: method supported_precisions (line 1267) | def supported_precisions(self) -> List[str]: method control_device (line 1270) | def control_device(self) -> bool: method control_precision (line 1273) | def control_precision(self) -> bool: method support_no_sync (line 1276) | def support_no_sync(self) -> bool: method support_lora (line 1279) | def support_lora(self) -> bool: method control_checkpoint_io (line 1282) | def control_checkpoint_io(self) -> bool: method configure (line 1285) | def configure( method execute_pipeline (line 1387) | def execute_pipeline( method prepare_dataloader (line 1437) | def prepare_dataloader( method get_checkpoint_io (line 1497) | def get_checkpoint_io(self) -> CheckpointIO: method no_sync (line 1502) | def no_sync(self, model: Module, optimizer: OptimizerWrapper) -> Itera... method enable_lora (line 1508) | def enable_lora( FILE: colossalai/booster/plugin/low_level_zero_plugin.py function _convert_floating_point (line 52) | def _convert_floating_point(x, dtype: torch.dtype = torch.float16): class OptimizerParamCheckState (line 61) | class OptimizerParamCheckState(enum.Enum): class LowLevelZeroModel (line 67) | class LowLevelZeroModel(ModelWrapper, AMPModelMixin): method __init__ (line 68) | def __init__( method forward (line 102) | def forward(self, *args, **kwargs): method _force_wait_all_gather (line 109) | def _force_wait_all_gather(self): method _hook_context (line 113) | def _hook_context(self): class LowLevelZeroCheckpointIO (line 117) | class LowLevelZeroCheckpointIO(TorchDDPCheckpointIO): method save_unsharded_optimizer (line 118) | def save_unsharded_optimizer( method load_unsharded_optimizer (line 149) | def load_unsharded_optimizer( method save_sharded_optimizer (line 163) | def save_sharded_optimizer( method load_sharded_optimizer (line 247) | def load_sharded_optimizer( method load_unsharded_model (line 301) | def load_unsharded_model( method load_sharded_model (line 316) | def load_sharded_model( method save_unsharded_model (line 339) | def save_unsharded_model( method save_sharded_model (line 346) | def save_sharded_model( method save_lora_as_pretrained (line 362) | def save_lora_as_pretrained(self, model, checkpoint, use_safetensors, ... class LowLevelZeroPlugin (line 368) | class LowLevelZeroPlugin(DPPluginBase): method __init__ (line 407) | def __init__( method support_no_sync (line 472) | def support_no_sync(self) -> bool: method support_lora (line 475) | def support_lora(self) -> bool: method control_precision (line 478) | def control_precision(self) -> bool: method supported_precisions (line 481) | def supported_precisions(self) -> List[str]: method control_device (line 484) | def control_device(self) -> bool: method supported_devices (line 487) | def supported_devices(self) -> List[str]: method support_lora (line 490) | def support_lora(self) -> bool: method enable_lora (line 493) | def enable_lora( method get_param_group_id (line 515) | def get_param_group_id(self, optimizer: Optimizer, origin_param: Param... method get_param_group_id (line 523) | def get_param_group_id(self, optimizer: Optimizer, origin_param: Param... method add_lora_params_to_optimizer (line 539) | def add_lora_params_to_optimizer(self, model, optimizer): method configure (line 564) | def configure( method control_checkpoint_io (line 624) | def control_checkpoint_io(self) -> bool: method get_checkpoint_io (line 627) | def get_checkpoint_io(self) -> CheckpointIO: method no_sync (line 630) | def no_sync(self, model: nn.Module, optimizer: OptimizerWrapper) -> It... FILE: colossalai/booster/plugin/moe_hybrid_parallel_plugin.py class MoeHybridParallelZeroOptimizer (line 39) | class MoeHybridParallelZeroOptimizer(HybridParallelZeroOptimizer): method __init__ (line 40) | def __init__( class MoeHybridParallelPlugin (line 107) | class MoeHybridParallelPlugin(HybridParallelPlugin): method __init__ (line 177) | def __init__( method get_checkpoint_io (line 412) | def get_checkpoint_io(self) -> MoECheckpointIO: method configure (line 423) | def configure( FILE: colossalai/booster/plugin/plugin_base.py class Plugin (line 15) | class Plugin(ABC): method supported_devices (line 17) | def supported_devices(self) -> List[str]: method supported_precisions (line 21) | def supported_precisions(self) -> List[str]: method control_precision (line 25) | def control_precision(self) -> bool: method control_device (line 29) | def control_device(self) -> bool: method support_no_sync (line 33) | def support_no_sync(self) -> bool: method support_lora (line 37) | def support_lora(self) -> bool: method configure (line 41) | def configure( method control_checkpoint_io (line 53) | def control_checkpoint_io(self) -> bool: method get_checkpoint_io (line 59) | def get_checkpoint_io(self) -> CheckpointIO: method no_sync (line 65) | def no_sync(self, model: nn.Module, optimizer: OptimizerWrapper) -> It... method enable_lora (line 71) | def enable_lora(self, model: nn.Module, pretrained_dir: str, lora_conf... method prepare_dataloader (line 77) | def prepare_dataloader( FILE: colossalai/booster/plugin/pp_plugin_base.py class PipelinePluginBase (line 11) | class PipelinePluginBase(Plugin): method execute_pipeline (line 13) | def execute_pipeline( FILE: colossalai/booster/plugin/torch_ddp_plugin.py class TorchDDPCheckpointIO (line 25) | class TorchDDPCheckpointIO(GeneralCheckpointIO): method __init__ (line 26) | def __init__(self) -> None: method load_unsharded_model (line 31) | def load_unsharded_model( method save_unsharded_model (line 47) | def save_unsharded_model( method load_unsharded_optimizer (line 59) | def load_unsharded_optimizer( method save_unsharded_optimizer (line 70) | def save_unsharded_optimizer( method save_lr_scheduler (line 80) | def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str): method save_sharded_model (line 87) | def save_sharded_model( method load_sharded_model (line 112) | def load_sharded_model( method save_sharded_optimizer (line 136) | def save_sharded_optimizer( method load_sharded_optimizer (line 154) | def load_sharded_optimizer( method save_lora_as_pretrained (line 170) | def save_lora_as_pretrained( class TorchDDPModel (line 197) | class TorchDDPModel(ModelWrapper): method __init__ (line 198) | def __init__(self, module: nn.Module, *args, **kwargs) -> None: method unwrap (line 202) | def unwrap(self, unwrap_peft: bool = True) -> nn.Module: class TorchDDPPlugin (line 209) | class TorchDDPPlugin(DPPluginBase): method __init__ (line 235) | def __init__( method support_no_sync (line 256) | def support_no_sync(self) -> bool: method support_lora (line 259) | def support_lora(self) -> bool: method control_precision (line 262) | def control_precision(self) -> bool: method supported_precisions (line 265) | def supported_precisions(self) -> List[str]: method control_device (line 268) | def control_device(self) -> bool: method supported_devices (line 271) | def supported_devices(self) -> List[str]: method configure (line 274) | def configure( method control_checkpoint_io (line 301) | def control_checkpoint_io(self) -> bool: method get_checkpoint_io (line 304) | def get_checkpoint_io(self) -> CheckpointIO: method no_sync (line 307) | def no_sync(self, model: nn.Module, optimizer: OptimizerWrapper) -> It... method enable_lora (line 311) | def enable_lora( FILE: colossalai/booster/plugin/torch_fsdp_plugin.py class TorchFSDPCheckpointIO (line 40) | class TorchFSDPCheckpointIO(GeneralCheckpointIO): method __init__ (line 41) | def __init__(self) -> None: method load_unsharded_model (line 46) | def load_unsharded_model( method load_unsharded_optimizer (line 54) | def load_unsharded_optimizer( method save_unsharded_model (line 91) | def save_unsharded_model( method save_unsharded_optimizer (line 118) | def save_unsharded_optimizer( method save_sharded_model (line 164) | def save_sharded_model( method load_sharded_model (line 232) | def load_sharded_model( method save_sharded_optimizer (line 264) | def save_sharded_optimizer( method load_sharded_optimizer (line 363) | def load_sharded_optimizer( method save_lr_scheduler (line 427) | def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str): class TorchFSDPModel (line 435) | class TorchFSDPModel(ModelWrapper): method __init__ (line 436) | def __init__(self, module: nn.Module, *args, **kwargs) -> None: class FSDPOptimizerWrapper (line 441) | class FSDPOptimizerWrapper(OptimizerWrapper): method __init__ (line 442) | def __init__(self, optimizer: Optimizer, model: nn.Module): method unwrap_model (line 446) | def unwrap_model(self) -> nn.Module: class TorchFSDPPlugin (line 450) | class TorchFSDPPlugin(DPPluginBase): method __init__ (line 472) | def __init__( method support_no_sync (line 503) | def support_no_sync(self) -> bool: method support_lora (line 506) | def support_lora(self) -> bool: method no_sync (line 509) | def no_sync(self, model: nn.Module, optimizer: OptimizerWrapper) -> It... method control_precision (line 512) | def control_precision(self) -> bool: method supported_precisions (line 515) | def supported_precisions(self) -> List[str]: method control_device (line 518) | def control_device(self) -> bool: method supported_devices (line 521) | def supported_devices(self) -> List[str]: method configure (line 524) | def configure( method control_checkpoint_io (line 560) | def control_checkpoint_io(self) -> bool: method get_checkpoint_io (line 563) | def get_checkpoint_io(self) -> CheckpointIO: method enable_lora (line 566) | def enable_lora( FILE: colossalai/checkpoint_io/checkpoint_io_base.py class CheckpointIO (line 18) | class CheckpointIO(ABC): method __init__ (line 65) | def __init__(self): method _sync_io (line 70) | def _sync_io(self): method _sync_d2h (line 75) | def _sync_d2h(self): method synchronize (line 79) | def synchronize(self): method __del__ (line 83) | def __del__(self): method load_model (line 87) | def load_model( method save_model (line 143) | def save_model( method load_optimizer (line 196) | def load_optimizer( method save_optimizer (line 232) | def save_optimizer( method load_sharded_model (line 268) | def load_sharded_model( method load_unsharded_model (line 284) | def load_unsharded_model( method save_sharded_model (line 300) | def save_sharded_model( method save_unsharded_model (line 323) | def save_unsharded_model( method load_sharded_optimizer (line 341) | def load_sharded_optimizer( method load_unsharded_optimizer (line 361) | def load_unsharded_optimizer( method save_sharded_optimizer (line 375) | def save_sharded_optimizer( method save_unsharded_optimizer (line 396) | def save_unsharded_optimizer( method save_lr_scheduler (line 413) | def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str): method load_lr_scheduler (line 423) | def load_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str): method save_lora_as_pretrained (line 439) | def save_lora_as_pretrained( FILE: colossalai/checkpoint_io/general_checkpoint_io.py class GeneralCheckpointIO (line 37) | class GeneralCheckpointIO(CheckpointIO): method load_unsharded_model (line 42) | def load_unsharded_model( method save_unsharded_model (line 55) | def save_unsharded_model( method load_sharded_optimizer (line 71) | def load_sharded_optimizer( method save_sharded_optimizer (line 104) | def save_sharded_optimizer( method load_unsharded_optimizer (line 174) | def load_unsharded_optimizer( method save_unsharded_optimizer (line 185) | def save_unsharded_optimizer( method save_sharded_model (line 210) | def save_sharded_model( method load_sharded_model (line 269) | def load_sharded_model( method save_lora_as_pretrained (line 311) | def save_lora_as_pretrained( FILE: colossalai/checkpoint_io/hybrid_parallel_checkpoint_io.py class HybridParallelCheckpointIO (line 59) | class HybridParallelCheckpointIO(GeneralCheckpointIO): method __init__ (line 71) | def __init__( method _model_sharder (line 97) | def _model_sharder( method _optimizer_sharder (line 159) | def _optimizer_sharder( method save_sharded_model (line 205) | def save_sharded_model( method load_sharded_model (line 361) | def load_sharded_model( method save_sharded_optimizer (line 469) | def save_sharded_optimizer( method load_sharded_optimizer (line 647) | def load_sharded_optimizer( method load_states_into_optimizer (line 737) | def load_states_into_optimizer(self, optimizer: Optimizer, state_dict:... method save_unsharded_model (line 761) | def save_unsharded_model( method load_unsharded_model (line 824) | def load_unsharded_model( method save_unsharded_optimizer (line 861) | def save_unsharded_optimizer( method load_unsharded_optimizer (line 956) | def load_unsharded_optimizer( method save_lr_scheduler (line 1009) | def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str): method gather_from_sharded_optimizer_state (line 1017) | def gather_from_sharded_optimizer_state( method shard_from_complete_optimizer_state (line 1082) | def shard_from_complete_optimizer_state( method save_lora_as_pretrained (line 1142) | def save_lora_as_pretrained(self, model, checkpoint, use_safetensors, ... FILE: colossalai/checkpoint_io/index_file.py class CheckpointIndexFile (line 12) | class CheckpointIndexFile: method __init__ (line 23) | def __init__(self, root_path=None) -> None: method from_file (line 31) | def from_file(index_path: Union[str, Path]): method load (line 45) | def load(self, json_path: str): method export (line 65) | def export(self, json_path: str): method append_weight_map (line 81) | def append_weight_map(self, param_name: str, shard_file: str): method append_meta_data (line 91) | def append_meta_data(self, name: str, val: Any): method contains_dtensor (line 101) | def contains_dtensor(self): method get_checkpoint_filenames (line 114) | def get_checkpoint_filenames(self) -> List[str]: method assert_no_dtensor_checkpoint (line 138) | def assert_no_dtensor_checkpoint(self): method get_checkpoint_file (line 143) | def get_checkpoint_file(self, param_name: str) -> str: method get_all_param_names (line 156) | def get_all_param_names(self): method get_param_group_filename (line 162) | def get_param_group_filename(self) -> Union[str, None]: method write_index_file (line 174) | def write_index_file(self, save_index_file): FILE: colossalai/checkpoint_io/moe_checkpoint.py class MoECheckpointIO (line 44) | class MoECheckpointIO(HybridParallelCheckpointIO): method __init__ (line 45) | def __init__( method _model_sharder (line 71) | def _model_sharder( method save_sharded_model (line 116) | def save_sharded_model( method gather_from_sharded_optimizer_state (line 249) | def gather_from_sharded_optimizer_state( method _optimizer_sharder (line 323) | def _optimizer_sharder( method save_sharded_optimizer (line 369) | def save_sharded_optimizer( method load_sharded_optimizer (line 516) | def load_sharded_optimizer( method shard_from_complete_optimizer_state (line 624) | def shard_from_complete_optimizer_state( method pre_save_model (line 686) | def pre_save_model(self, model: nn.Module) -> dict: method save_unsharded_model (line 724) | def save_unsharded_model( method save_unsharded_optimizer (line 747) | def save_unsharded_optimizer( method load_unsharded_optimizer (line 811) | def load_unsharded_optimizer( method save_lora_as_pretrained (line 899) | def save_lora_as_pretrained(self, model, checkpoint, use_safetensors, ... FILE: colossalai/checkpoint_io/utils.py function calculate_tensor_size (line 48) | def calculate_tensor_size(tensor: torch.Tensor) -> float: function is_safetensors_available (line 62) | def is_safetensors_available() -> bool: function is_dtensor_checkpoint (line 75) | def is_dtensor_checkpoint(checkpoint_file_path: str) -> bool: function is_safetensor_checkpoint (line 91) | def is_safetensor_checkpoint(checkpoint_file_path: str) -> bool: function search_tp_partition_dim (line 107) | def search_tp_partition_dim(current_shape: torch.Size, original_shape: t... function search_padding_dim (line 135) | def search_padding_dim(global_shape: torch.Size, original_shape: torch.S... class StateDictSharder (line 149) | class StateDictSharder: method __init__ (line 150) | def __init__(self, size_per_shard: int) -> None: method append_param (line 155) | def append_param(self, name: str, tensor: torch.Tensor) -> Tuple[Optio... method append_optim_state (line 172) | def append_optim_state(self, param_id: int, state: OrderedDict) -> Tup... function gather_distributed_param (line 209) | def gather_distributed_param(param: torch.Tensor, keep_vars: bool = Fals... function save_state_dict_shards (line 229) | def save_state_dict_shards( function async_save_state_dict_shards (line 278) | def async_save_state_dict_shards( function async_move_save_state_dict_shards (line 336) | def async_move_save_state_dict_shards( function shard_model_checkpoint (line 405) | def shard_model_checkpoint( function shard_optimizer_checkpoint (line 432) | def shard_optimizer_checkpoint( function save_state_dict (line 467) | def save_state_dict( function save_param_groups (line 495) | def save_param_groups(state_dict: dict, group_file_path: str) -> None: function clean_folder (line 507) | def clean_folder( function save_config_file (line 543) | def save_config_file(model: nn.Module, checkpoint_path: str, is_master: ... function save_dtensor (line 579) | def save_dtensor(name: str, tensor: torch.Tensor, index_file: "Checkpoin... function get_checkpoint_file_suffix (line 614) | def get_checkpoint_file_suffix(use_safetensors: bool) -> str: function generate_checkpoint_shard_file_name (line 630) | def generate_checkpoint_shard_file_name( function generate_dtensor_file_name (line 653) | def generate_dtensor_file_name(param_name: str, index: int, use_safetens... function load_shard_state_dict (line 674) | def load_shard_state_dict(checkpoint_file: Path, use_safetensors: bool =... function load_state_dict_into_model (line 688) | def load_state_dict_into_model( function load_param_groups_into_optimizer (line 741) | def load_param_groups_into_optimizer(optimizer: Optimizer, param_group_p... function load_states_into_optimizer (line 786) | def load_states_into_optimizer(optimizer: Optimizer, state_dict: dict, i... function sharded_optimizer_loading_epilogue (line 834) | def sharded_optimizer_loading_epilogue(optimizer: Optimizer): function has_index_file (line 849) | def has_index_file(checkpoint_path: str) -> Tuple[bool, Optional[Path]]: function load_state_dict (line 885) | def load_state_dict(checkpoint_file_path: Path): function add_prefix (line 918) | def add_prefix(weights_name: str, prefix: Optional[str] = None) -> str: function get_model_base_filenames (line 927) | def get_model_base_filenames(prefix: str = None, use_safetensors: bool =... function get_optimizer_base_filenames (line 940) | def get_optimizer_base_filenames(prefix: str = None, use_safetensors: bo... function get_shard_filename (line 956) | def get_shard_filename(weights_name: str, idx: int): function _pin_tensor (line 965) | def _pin_tensor(tensor: torch.Tensor, empty: bool = True) -> torch.Tensor: function create_pinned_state_dict (line 971) | def create_pinned_state_dict( function load_optim_or_model_shard (line 991) | def load_optim_or_model_shard(path: str, is_optim: bool, use_safetensors... function load_state_dict_shards (line 1002) | def load_state_dict_shards( function get_lora_state_dict (line 1024) | def get_lora_state_dict( function gather_state_dict_fast (line 1120) | def gather_state_dict_fast( FILE: colossalai/cli/check/__init__.py function check (line 10) | def check(installation): FILE: colossalai/cli/check/check_installation.py function to_click_output (line 10) | def to_click_output(val): function check_installation (line 20) | def check_installation(): function _is_compatible (line 101) | def _is_compatible(versions): function _parse_colossalai_version (line 127) | def _parse_colossalai_version(): function _check_aot_built_cuda_extension_installed (line 151) | def _check_aot_built_cuda_extension_installed(): function _check_torch_version (line 165) | def _check_torch_version(): function _check_cuda_version (line 191) | def _check_cuda_version(): FILE: colossalai/cli/cli.py class Arguments (line 7) | class Arguments: method __init__ (line 8) | def __init__(self, arg_dict): function cli (line 14) | def cli(): FILE: colossalai/cli/launcher/__init__.py function run (line 70) | def run( FILE: colossalai/cli/launcher/hostinfo.py class HostInfo (line 4) | class HostInfo: method __init__ (line 13) | def __init__( method is_host_localhost (line 23) | def is_host_localhost(hostname: str, port: str = None) -> None: method __str__ (line 51) | def __str__(self): method __repr__ (line 54) | def __repr__(self): class HostInfoList (line 58) | class HostInfoList: method __init__ (line 63) | def __init__(self): method append (line 66) | def append(self, hostinfo: HostInfo) -> None: method remove (line 76) | def remove(self, hostname: str) -> None: method get_hostinfo (line 87) | def get_hostinfo(self, hostname: str) -> HostInfo: method has (line 104) | def has(self, hostname: str) -> bool: method __iter__ (line 119) | def __iter__(self): method __len__ (line 122) | def __len__(self): FILE: colossalai/cli/launcher/multinode_runner.py function run_on_host (line 10) | def run_on_host( class MultiNodeRunner (line 66) | class MultiNodeRunner: method __init__ (line 72) | def __init__(self): method connect (line 77) | def connect(self, host_info_list: HostInfoList, workdir: str, env: dic... method send (line 95) | def send(self, hostinfo: HostInfo, cmd: str) -> None: method stop_all (line 108) | def stop_all(self) -> None: method recv_from_all (line 116) | def recv_from_all(self) -> dict: FILE: colossalai/cli/launcher/run.py function fetch_hostfile (line 18) | def fetch_hostfile(hostfile_path: str, ssh_port: int) -> HostInfoList: function parse_device_filter (line 58) | def parse_device_filter(device_pool: HostInfoList, include_str=None, exc... function get_launch_command (line 108) | def get_launch_command( function launch_multi_processes (line 212) | def launch_multi_processes(args: Config) -> None: FILE: colossalai/cluster/device_mesh_manager.py class DeviceMeshInfo (line 12) | class DeviceMeshInfo: method __post_init__ (line 24) | def __post_init__(self): function initialize_device_mesh (line 33) | def initialize_device_mesh(device_mesh_info: DeviceMeshInfo): class DeviceMeshManager (line 58) | class DeviceMeshManager: method __init__ (line 63) | def __init__(self): method create_device_mesh (line 66) | def create_device_mesh(self, name, device_mesh_info: DeviceMeshInfo) -... method get (line 81) | def get(self, name: str) -> DeviceMesh: method destroy (line 96) | def destroy(self, name: str) -> None: method destroy_all (line 111) | def destroy_all(self): FILE: colossalai/cluster/dist_coordinator.py class DistCoordinator (line 11) | class DistCoordinator(metaclass=SingletonMeta): method __init__ (line 40) | def __init__(self): method rank (line 50) | def rank(self) -> int: method world_size (line 54) | def world_size(self) -> int: method local_rank (line 58) | def local_rank(self) -> int: method _assert_local_rank_set (line 61) | def _assert_local_rank_set(self): method is_master (line 69) | def is_master(self, process_group: ProcessGroup = None) -> bool: method is_node_master (line 82) | def is_node_master(self) -> bool: method is_last_process (line 92) | def is_last_process(self, process_group: ProcessGroup = None) -> bool: method print_on_master (line 106) | def print_on_master(self, msg: str, process_group: ProcessGroup = None): method print_on_node_master (line 118) | def print_on_node_master(self, msg: str): method priority_execution (line 130) | def priority_execution(self, executor_rank: int = 0, process_group: Pr... method destroy (line 159) | def destroy(self, process_group: ProcessGroup = None): method block_all (line 168) | def block_all(self, process_group: ProcessGroup = None): method on_master_only (line 177) | def on_master_only(self, process_group: ProcessGroup = None): FILE: colossalai/cluster/process_group_manager.py class ProcessGroupManager (line 7) | class ProcessGroupManager: method __init__ (line 19) | def __init__(self): method create_process_group (line 22) | def create_process_group(self, name: str, ranks: List[int], backend: s... method get (line 41) | def get(self, name: str) -> ProcessGroup: method destroy (line 56) | def destroy(self, name: str) -> None: method destroy_all (line 69) | def destroy_all(self) -> None: FILE: colossalai/cluster/process_group_mesh.py function prod (line 13) | def prod(nums: List[int]) -> int: class ProcessGroupMesh (line 25) | class ProcessGroupMesh: method __init__ (line 40) | def __init__(self, *size: int) -> None: method destroy_mesh_process_groups (line 54) | def destroy_mesh_process_groups(self): method shape (line 76) | def shape(self) -> Tuple[int, ...]: method rank (line 80) | def rank(self) -> int: method size (line 83) | def size(self, dim: Optional[int] = None) -> Union[int, Tuple[int, ...]]: method coordinate (line 97) | def coordinate(self, dim: Optional[int] = None) -> Union[int, Tuple[in... method unravel (line 112) | def unravel(rank: int, shape: Tuple[int, ...]) -> Tuple[int, ...]: method ravel (line 125) | def ravel(coord: Tuple[int, ...], shape: Tuple[int, ...], mode: str = ... method _get_group (line 143) | def _get_group(self, ranks_in_group: List[int], backend: Optional[str]... method get_ranks_in_group (line 161) | def get_ranks_in_group(self, group: ProcessGroup) -> List[int]: method get_coords_along_axis (line 173) | def get_coords_along_axis( method create_group_along_axis (line 210) | def create_group_along_axis( method get_group_along_axis (line 251) | def get_group_along_axis( FILE: colossalai/context/config.py class Config (line 12) | class Config(dict): method __init__ (line 20) | def __init__(self, config: dict = None): method __missing__ (line 25) | def __missing__(self, key): method __getattr__ (line 28) | def __getattr__(self, key): method __setattr__ (line 35) | def __setattr__(self, key, value): method _add_item (line 38) | def _add_item(self, key, value): method update (line 44) | def update(self, config): method from_file (line 51) | def from_file(filename: str): class ConfigException (line 106) | class ConfigException(Exception): FILE: colossalai/context/singleton_meta.py class SingletonMeta (line 4) | class SingletonMeta(type): method __call__ (line 13) | def __call__(cls, *args, **kwargs): FILE: colossalai/device/alpha_beta_profiler.py class AlphaBetaProfiler (line 15) | class AlphaBetaProfiler: method __init__ (line 32) | def __init__( method _init_profiling (line 65) | def _init_profiling(self): method _profile (line 80) | def _profile(self, process_group, pg_handler, nbytes): method profile_latency (line 127) | def profile_latency(self, process_group, pg_handler): method profile_bandwidth (line 152) | def profile_bandwidth(self, process_group, pg_handler, maxbytes=(1 * G... method profile_ab (line 163) | def profile_ab(self): method search_best_logical_mesh (line 212) | def search_best_logical_mesh(self): method extract_alpha_beta_for_device_mesh (line 355) | def extract_alpha_beta_for_device_mesh(self): FILE: colossalai/device/calc_pipeline_strategy.py function get_submesh_choices (line 6) | def get_submesh_choices(num_hosts, num_devices_per_host, mode="new"): function alpa_dp_impl (line 29) | def alpa_dp_impl( function alpa_dp (line 92) | def alpa_dp( FILE: colossalai/device/device_mesh.py class ProcessGroupContainer (line 16) | class ProcessGroupContainer: class DeviceMesh (line 22) | class DeviceMesh: method __init__ (line 43) | def __init__( method shape (line 143) | def shape(self) -> torch.Size: method num_devices (line 150) | def num_devices(self) -> int: method logical_mesh_id (line 157) | def logical_mesh_id(self) -> torch.Tensor: method is_initialized (line 164) | def is_initialized(self) -> bool: method from_process_group (line 171) | def from_process_group(process_group: Union[ProcessGroup, List[Process... method get_process_group (line 230) | def get_process_group(self, axis: int, global_rank: int = None) -> Pro... method get_process_group_for_all_axes (line 246) | def get_process_group_for_all_axes(self, global_rank: int = None) -> D... method get_ranks_in_process_group (line 261) | def get_ranks_in_process_group(self, axis: int, global_rank: int = Non... method __deepcopy__ (line 277) | def __deepcopy__(self, memo) -> "DeviceMesh": method _init_global_to_logical_rank_mapping (line 290) | def _init_global_to_logical_rank_mapping( method init_logical_process_group (line 320) | def init_logical_process_group(self): method _init_ranks_in_the_same_group (line 364) | def _init_ranks_in_the_same_group(self): method global_rank_to_local_rank (line 383) | def global_rank_to_local_rank(self, rank: int, axis: int = None) -> Un... method _collate_global_ranks_in_same_process_group (line 402) | def _collate_global_ranks_in_same_process_group(self, global_rank): method flatten (line 481) | def flatten(self): method all_gather_cost (line 500) | def all_gather_cost(self, num_bytes, mesh_dim): method all_reduce_cost (line 504) | def all_reduce_cost(self, num_bytes, mesh_dim): method reduce_scatter_cost (line 512) | def reduce_scatter_cost(self, num_bytes, mesh_dim): method all_to_all_cost (line 518) | def all_to_all_cost(self, num_bytes, mesh_dim): FILE: colossalai/fx/_compatibility.py function compatibility (line 18) | def compatibility(is_backward_compatible: bool = False) -> Callable: function is_compatible_with_meta (line 44) | def is_compatible_with_meta() -> bool: FILE: colossalai/fx/_meta_regist_12.py function register_meta (line 18) | def register_meta(op, register_dispatcher=True): function meta_conv (line 38) | def meta_conv( function meta_conv_1 (line 165) | def meta_conv_1( function meta_conv_backward (line 182) | def meta_conv_backward( function meta_adaptive_avg_pool2d_backward (line 200) | def meta_adaptive_avg_pool2d_backward( function meta_cuda_rnn (line 211) | def meta_cuda_rnn( function meta_cudnn_rnn_backward (line 265) | def meta_cudnn_rnn_backward( function meta_relu (line 285) | def meta_relu(input: torch.Tensor): function meta_prelu (line 290) | def meta_prelu(input: torch.Tensor, weight: torch.Tensor): function meta_hardswish (line 295) | def meta_hardswish(input: torch.Tensor): function meta_hardtanh (line 300) | def meta_hardtanh(input: torch.Tensor, min, max): function meta_hardswish_backward (line 305) | def meta_hardswish_backward(grad_out: torch.Tensor, input: torch.Tensor): function meta_hardtanh_backward (line 311) | def meta_hardtanh_backward(grad_out: torch.Tensor, input: torch.Tensor, ... function meta_bn (line 319) | def meta_bn(input: torch.Tensor, weight, bias, running_mean, running_var... function meta_bn_backward (line 330) | def meta_bn_backward( function meta_cudnn_bn (line 350) | def meta_cudnn_bn(input: torch.Tensor, weight, bias, running_mean, runni... function meta_cudnn_bn_backward (line 365) | def meta_cudnn_bn_backward( function meta_ln (line 384) | def meta_ln(input: torch.Tensor, normalized_shape, weight, bias, eps): function meta_ln_backward (line 396) | def meta_ln_backward( function meta_gn_backward (line 407) | def meta_gn_backward(dY: torch.Tensor, input: torch.Tensor, mean, rstd, ... function meta_roll (line 417) | def meta_roll(input: torch.Tensor, shifts, dims): function meta_local_scalar_dense (line 423) | def meta_local_scalar_dense(self: torch.Tensor): function meta_where_self (line 429) | def meta_where_self(condition: torch.Tensor, self: torch.Tensor, other: ... function meta_index_Tensor (line 435) | def meta_index_Tensor(self, indices): function meta_embedding_dense_backward (line 530) | def meta_embedding_dense_backward( function meta_native_dropout_default (line 544) | def meta_native_dropout_default(input: torch.Tensor, p: float, train: bo... function meta_native_dropout_backward_default (line 553) | def meta_native_dropout_backward_default(grad: torch.Tensor, mask: torch... FILE: colossalai/fx/_meta_regist_13.py function meta_convolution_backward (line 11) | def meta_convolution_backward( function meta__adaptive_avg_pool2d_backward (line 41) | def meta__adaptive_avg_pool2d_backward(grad_out, self): FILE: colossalai/fx/codegen/activation_checkpoint_codegen.py function _gen_saved_tensors_hooks (line 45) | def _gen_saved_tensors_hooks(): function _gen_save_tensors_hooks_context (line 74) | def _gen_save_tensors_hooks_context(offload_input=True) -> str: function _gen_save_on_cpu_context (line 90) | def _gen_save_on_cpu_context(): function _find_input_and_output_nodes (line 99) | def _find_input_and_output_nodes(nodes: List[Node]): function _find_ckpt_regions (line 125) | def _find_ckpt_regions(nodes: List[Node]): function _find_offload_regions (line 167) | def _find_offload_regions(nodes: List[Node]): function _gen_ckpt_fn_def (line 212) | def _gen_ckpt_fn_def(label, free_vars: List[str]) -> str: function _gen_ckpt_output (line 219) | def _gen_ckpt_output(output_vars: List[str]) -> str: function _gen_ckpt_usage (line 226) | def _gen_ckpt_usage(label, activation_offload, input_vars, output_vars, ... function _end_of_ckpt (line 235) | def _end_of_ckpt(node: Node, check_idx: int) -> bool: function _find_nested_ckpt_regions (line 253) | def _find_nested_ckpt_regions(nodes, check_idx=0): function emit_ckpt_func (line 302) | def emit_ckpt_func( function emit_code_with_nested_activation_checkpoint (line 400) | def emit_code_with_nested_activation_checkpoint(body, ckpt_func, nodes, ... function emit_code_with_activation_checkpoint (line 490) | def emit_code_with_activation_checkpoint(body, ckpt_func, nodes, emit_no... class ActivationCheckpointCodeGen (line 627) | class ActivationCheckpointCodeGen(CodeGen): method _gen_python_code (line 628) | def _gen_python_code(self, nodes, root_module: str, namespace: _Namesp... function python_code_with_activation_checkpoint (line 866) | def python_code_with_activation_checkpoint(self, root_module: str, names... FILE: colossalai/fx/graph_module.py class ColoGraphModule (line 25) | class ColoGraphModule(GraphModule): method __init__ (line 26) | def __init__( method bind (line 37) | def bind(self, ckpt_def, globals): method recompile (line 57) | def recompile(self) -> PythonCode: method to_folder (line 101) | def to_folder(self, folder: Union[str, os.PathLike], module_name: str ... method __init__ (line 183) | def __init__(self, root: Union[torch.nn.Module, Dict[str, Any]], graph... class ColoGraphModule (line 182) | class ColoGraphModule(GraphModule): method __init__ (line 26) | def __init__( method bind (line 37) | def bind(self, ckpt_def, globals): method recompile (line 57) | def recompile(self) -> PythonCode: method to_folder (line 101) | def to_folder(self, folder: Union[str, os.PathLike], module_name: str ... method __init__ (line 183) | def __init__(self, root: Union[torch.nn.Module, Dict[str, Any]], graph... FILE: colossalai/fx/passes/adding_split_node_pass.py function pipe_split (line 8) | def pipe_split(): function block_split (line 12) | def block_split(): function construct_blocks (line 17) | def construct_blocks(gm: torch.fx.GraphModule, limit=0.01): function remove_blocks (line 46) | def remove_blocks(gm: torch.fx.GraphModule): function get_compute_costs (line 52) | def get_compute_costs(node_list): function do_dp_split_gpipe_impl (line 64) | def do_dp_split_gpipe_impl(num_nodes, num_stages, num_microbatches, comp... function do_dp_split_gpipe (line 110) | def do_dp_split_gpipe(node_list, compute_costs, num_stages: int, num_mic... function gpipe_dp_split_pass (line 141) | def gpipe_dp_split_pass(gm: torch.fx.GraphModule, pp_size: int, num_micr... function avgcompute_split_pass (line 174) | def avgcompute_split_pass(gm: torch.fx.GraphModule, pp_size: int): function avgnode_split_pass (line 208) | def avgnode_split_pass(gm: torch.fx.GraphModule, pp_size: int): function balanced_split_pass (line 232) | def balanced_split_pass(gm: torch.fx.GraphModule, pp_size: int): function balanced_split_pass_v2 (line 279) | def balanced_split_pass_v2(gm: torch.fx.GraphModule, pp_size: int): function uniform_split_pass (line 313) | def uniform_split_pass(gm: torch.fx.GraphModule, pp_size: int): function split_with_split_nodes_pass (line 342) | def split_with_split_nodes_pass(annotated_gm: torch.fx.GraphModule, merg... FILE: colossalai/fx/passes/concrete_info_prop.py class ConcreteInfoProp (line 14) | class ConcreteInfoProp(torch.fx.Interpreter): method run (line 50) | def run(self, *args, initial_env: Optional[Dict[Node, Any]] = None, en... method run_node (line 72) | def run_node(self, n: Node) -> Any: method placeholder (line 101) | def placeholder(self, target: "Target", args: Tuple[Argument, ...], kw... method get_attr (line 122) | def get_attr(self, target: "Target", args: Tuple[Argument, ...], kwarg... method call_function (line 141) | def call_function(self, target: "Target", args: Tuple[Argument, ...], ... method call_method (line 160) | def call_method(self, target: "Target", args: Tuple[Argument, ...], kw... method call_module (line 178) | def call_module(self, target: "Target", args: Tuple[Argument, ...], kw... method output (line 200) | def output(self, target: "Target", args: Tuple[Argument, ...], kwargs:... method propagate (line 218) | def propagate(self, *args): method summary (line 231) | def summary(self, unit: str = "MB") -> str: FILE: colossalai/fx/passes/experimental/adding_shape_consistency_pass.py function apply (line 11) | def apply(*args, **kwargs): function solution_annotation_pass (line 16) | def solution_annotation_pass(gm: torch.fx.GraphModule, solution: List[in... function shape_consistency_pass (line 60) | def shape_consistency_pass(gm: torch.fx.GraphModule): FILE: colossalai/fx/passes/meta_info_prop.py class TensorMetadata (line 23) | class TensorMetadata(NamedTuple): function _extract_tensor_metadata (line 37) | def _extract_tensor_metadata(result: torch.Tensor) -> TensorMetadata: class MetaInfoProp (line 52) | class MetaInfoProp(torch.fx.Interpreter): method run_node (line 89) | def run_node(self, n: Node) -> Any: method placeholder (line 128) | def placeholder(self, target: "Target", args: Tuple[Argument, ...], kw... method get_attr (line 149) | def get_attr(self, target: "Target", args: Tuple[Argument, ...], kwarg... method call_function (line 168) | def call_function(self, target: "Target", args: Tuple[Argument, ...], ... method call_method (line 187) | def call_method(self, target: "Target", args: Tuple[Argument, ...], kw... method call_module (line 205) | def call_module(self, target: "Target", args: Tuple[Argument, ...], kw... method output (line 227) | def output(self, target: "Target", args: Tuple[Argument, ...], kwargs:... method propagate (line 247) | def propagate(self, *args): method summary (line 260) | def summary(self, unit: str = "MB") -> str: function metainfo_trace (line 330) | def metainfo_trace(gm: torch.fx.GraphModule, *args, verbose: bool = Fals... FILE: colossalai/fx/passes/passes_for_gpt2_test.py function customized_split_pass_for_gpt2 (line 14) | def customized_split_pass_for_gpt2(gm: torch.fx.GraphModule, pp_size: in... function split_with_split_nodes_pass_for_gp2_test (line 47) | def split_with_split_nodes_pass_for_gp2_test(annotated_gm: torch.fx.Grap... function split_module_for_gpt2_test (line 161) | def split_module_for_gpt2_test( FILE: colossalai/fx/passes/shard_1d_pass.py function weight_split (line 28) | def weight_split(weight: torch.nn.parameter.Parameter, dim: int, col_nor... function column_shard_linear_pass (line 46) | def column_shard_linear_pass(gm: torch.fx.GraphModule): function row_shard_linear_pass (line 61) | def row_shard_linear_pass(gm: torch.fx.GraphModule): function transformer_mlp_pass (line 74) | def transformer_mlp_pass(graph_module: torch.fx.GraphModule, process_gro... FILE: colossalai/fx/passes/split_module.py class Partition (line 11) | class Partition: method __init__ (line 16) | def __init__(self, name: str): method __repr__ (line 27) | def __repr__(self) -> str: function split_module (line 40) | def split_module( FILE: colossalai/fx/passes/utils.py function get_comm_size (line 8) | def get_comm_size(prev_partition, next_partition): function get_leaf (line 33) | def get_leaf(graph: Graph): function is_leaf (line 53) | def is_leaf(graph: Graph, node: Node): function get_top (line 57) | def get_top(graph: Graph): function is_top (line 81) | def is_top(graph: Graph, node: Node): function get_all_consumers (line 85) | def get_all_consumers(graph: Graph, node: Node): function assign_bfs_level_to_nodes (line 99) | def assign_bfs_level_to_nodes(graph: Graph): function get_node_module (line 161) | def get_node_module(node) -> torch.nn.Module: FILE: colossalai/fx/profiler/dataflow.py class Phase (line 11) | class Phase(Enum): class GraphInfo (line 19) | class GraphInfo: function is_phase (line 70) | def is_phase(n: Node, phase: Phase) -> bool: function autograd_graph_analysis (line 76) | def autograd_graph_analysis(graph: Graph) -> GraphInfo: FILE: colossalai/fx/profiler/experimental/profiler.py class GraphInfo (line 18) | class GraphInfo: function profile_function (line 76) | def profile_function(target: "Target") -> Callable: function profile_method (line 115) | def profile_method(target: "Target") -> Callable: function profile_module (line 144) | def profile_module(module: torch.nn.Module) -> Callable: FILE: colossalai/fx/profiler/experimental/profiler_function/activation_function.py function torch_nn_func_non_linear_act (line 32) | def torch_nn_func_non_linear_act(input: torch.Tensor, inplace: bool = Fa... FILE: colossalai/fx/profiler/experimental/profiler_function/arithmetic.py function _elementwise_flops_compute (line 13) | def _elementwise_flops_compute(input, other): function torch_add_like_ops (line 53) | def torch_add_like_ops(input: Any, other: Any, *, out: Optional[torch.Te... function torch_elementwise_op (line 58) | def torch_elementwise_op(input: torch.Tensor, *, out: Optional[torch.Ten... function torch_matmul (line 67) | def torch_matmul(input: torch.Tensor, other: torch.Tensor, *, out: Optio... function torch_bmm (line 74) | def torch_bmm(input: torch.Tensor, other: torch.Tensor, *, out: Optional... function torch_var_mean (line 81) | def torch_var_mean( FILE: colossalai/fx/profiler/experimental/profiler_function/embedding.py function torch_nn_functional_embedding (line 9) | def torch_nn_functional_embedding( FILE: colossalai/fx/profiler/experimental/profiler_function/linear.py function torch_nn_linear (line 9) | def torch_nn_linear(input: torch.Tensor, weight: torch.Tensor, bias: tor... FILE: colossalai/fx/profiler/experimental/profiler_function/normalization.py function torch_nn_func_instancenorm (line 9) | def torch_nn_func_instancenorm( function torch_nn_func_groupnorm (line 26) | def torch_nn_func_groupnorm( function torch_nn_func_layernorm (line 40) | def torch_nn_func_layernorm( function torch_nn_func_batchnorm (line 54) | def torch_nn_func_batchnorm( FILE: colossalai/fx/profiler/experimental/profiler_function/pooling.py function torch_nn_func_pooling (line 20) | def torch_nn_func_pooling(input: torch.Tensor, *args, **kwargs) -> Tuple... FILE: colossalai/fx/profiler/experimental/profiler_function/python_ops.py function operator_getitem (line 8) | def operator_getitem(a: Any, b: Any) -> Tuple[int, int]: function python_getattr (line 15) | def python_getattr(a: Any, b: Any) -> Tuple[int, int]: FILE: colossalai/fx/profiler/experimental/profiler_function/torch_ops.py function torch_zero_flops_op (line 32) | def torch_zero_flops_op(*args, **kwargs) -> Tuple[int, int]: function torch_where (line 39) | def torch_where(condition: torch.Tensor, x: Any, y: Any) -> Tuple[int, i... function torch_max (line 48) | def torch_max( FILE: colossalai/fx/profiler/experimental/profiler_module/activation_function.py function torch_nn_non_linear_act (line 32) | def torch_nn_non_linear_act(self: torch.nn.Module, input: torch.Tensor) ... FILE: colossalai/fx/profiler/experimental/profiler_module/attention.py function torch_nn_msa (line 10) | def torch_nn_msa( FILE: colossalai/fx/profiler/experimental/profiler_module/convolution.py function torch_nn_conv1d (line 15) | def torch_nn_conv1d(self: torch.nn.Conv1d, input: torch.Tensor) -> Tuple... function torch_nn_conv2d (line 37) | def torch_nn_conv2d(self: torch.nn.Conv2d, input: torch.Tensor) -> Tuple... function torch_nn_conv3d (line 63) | def torch_nn_conv3d(self: torch.nn.Conv3d, input: torch.Tensor) -> Tuple... function torch_nn_convtranspose1d (line 93) | def torch_nn_convtranspose1d(self: torch.nn.ConvTranspose1d, input: torc... function torch_nn_convtranspose2d (line 121) | def torch_nn_convtranspose2d(self: torch.nn.ConvTranspose2d, input: torc... function torch_nn_convtranspose3d (line 155) | def torch_nn_convtranspose3d(self: torch.nn.ConvTranspose3d, input: torc... FILE: colossalai/fx/profiler/experimental/profiler_module/dropout.py function torch_nn_dropout (line 9) | def torch_nn_dropout(self: torch.nn.Module, input: torch.Tensor) -> Tupl... FILE: colossalai/fx/profiler/experimental/profiler_module/embedding.py function torch_nn_embedding (line 9) | def torch_nn_embedding(self: torch.nn.Embedding, input: torch.Tensor) ->... FILE: colossalai/fx/profiler/experimental/profiler_module/linear.py function torch_nn_linear (line 10) | def torch_nn_linear(self: torch.nn.Linear, input: torch.Tensor) -> Tuple... FILE: colossalai/fx/profiler/experimental/profiler_module/normalization.py function torch_nn_normalize (line 19) | def torch_nn_normalize( FILE: colossalai/fx/profiler/experimental/profiler_module/pooling.py function torch_nn_pooling (line 20) | def torch_nn_pooling(self: torch.nn.Module, input: torch.Tensor) -> Tupl... FILE: colossalai/fx/profiler/experimental/profiler_module/rnn.py function _rnn_flops (line 10) | def _rnn_flops( function torch_nn_rnn (line 44) | def torch_nn_rnn(self: torch.nn.RNNBase, input: torch.Tensor, hx: Option... function torch_nn_rnn (line 66) | def torch_nn_rnn(self: torch.nn.RNNCellBase, input: torch.Tensor, hx: Op... FILE: colossalai/fx/profiler/experimental/profiler_module/torch_op.py function torch_nn_flatten (line 9) | def torch_nn_flatten(self: torch.nn.Flatten, input: torch.Tensor) -> Tup... FILE: colossalai/fx/profiler/experimental/registry.py class ProfilerRegistry (line 1) | class ProfilerRegistry: method __init__ (line 2) | def __init__(self, name): method register (line 6) | def register(self, source): method get (line 13) | def get(self, source): method has (line 18) | def has(self, source): FILE: colossalai/fx/profiler/experimental/shard_utils.py function calculate_fwd_in (line 11) | def calculate_fwd_in(n: Node) -> bool: function calculate_fwd_tmp (line 24) | def calculate_fwd_tmp(n: Node) -> int: function calculate_fwd_out (line 37) | def calculate_fwd_out(n: Node) -> int: FILE: colossalai/fx/profiler/memory_utils.py function activation_size (line 12) | def activation_size(out: Union[torch.Tensor, Dict, List, Tuple, int]) ->... function parameter_size (line 37) | def parameter_size(mod: torch.nn.Module) -> int: function is_inplace (line 52) | def is_inplace(n: Node): FILE: colossalai/fx/profiler/opcount.py function matmul_flop_jit (line 15) | def matmul_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number: function addmm_flop_jit (line 49) | def addmm_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number: function linear_flop_jit (line 66) | def linear_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number: function bmm_flop_jit (line 80) | def bmm_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number: function baddbmm_flop_jit (line 94) | def baddbmm_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number: function conv_flop_count (line 107) | def conv_flop_count( function conv_flop_jit (line 132) | def conv_flop_jit(inputs: List[Any], outputs: List[Any]): function transpose_shape (line 143) | def transpose_shape(shape): function conv_backward_flop_jit (line 147) | def conv_backward_flop_jit(inputs: List[Any], outputs: List[Any]): function norm_flop_counter (line 163) | def norm_flop_counter(affine_arg_index: int, input_arg_index: int) -> Ca... function batchnorm_flop_jit (line 189) | def batchnorm_flop_jit(inputs: List[Any], outputs: List[Any], training: ... function elementwise_flop_counter (line 200) | def elementwise_flop_counter(input_scale: float = 1, output_scale: float... function zero_flop_jit (line 222) | def zero_flop_jit(*args): FILE: colossalai/fx/profiler/profiler.py function normalize_tuple (line 28) | def normalize_tuple(x): function is_autogradable (line 34) | def is_autogradable(x): function detach_variables (line 38) | def detach_variables(x): function _profile_concrete (line 48) | def _profile_concrete(target: Callable, *args, **kwargs) -> Tuple[Tuple[... function _profile_meta (line 148) | def _profile_meta(target: Callable, *args, **kwargs) -> Tuple[Tuple[Any,... function profile_function (line 289) | def profile_function(target: "Target", device: str = "meta") -> Callable: function profile_method (line 345) | def profile_method(target: "Target", device: str = "meta") -> Callable: function profile_module (line 364) | def profile_module(module: torch.nn.Module, device: str = "meta") -> Cal... FILE: colossalai/fx/profiler/shard_utils.py function calculate_fwd_in (line 14) | def calculate_fwd_in(n: Node) -> int: function calculate_fwd_tmp (line 28) | def calculate_fwd_tmp(n: Node) -> int: function calculate_fwd_out (line 74) | def calculate_fwd_out(n: Node) -> int: function calculate_fwd_time (line 95) | def calculate_fwd_time(n: Node) -> float: function calculate_bwd_time (line 106) | def calculate_bwd_time(n: Node) -> float: FILE: colossalai/fx/profiler/tensor.py function set_data_ptr (line 13) | def set_data_ptr(x): class MetaTensor (line 21) | class MetaTensor(torch.Tensor): method __new__ (line 30) | def __new__(cls, elem, fake_device=None): method __repr__ (line 57) | def __repr__(self): method __torch_dispatch__ (line 63) | def __torch_dispatch__(cls, func, types, args=(), kwargs=None): method to (line 102) | def to(self, *args, **kwargs) -> torch.Tensor: method cpu (line 130) | def cpu(self, *args, **kwargs): method cuda (line 135) | def cuda(self, device=None, non_blocking=False): FILE: colossalai/fx/proxy.py class ColoProxy (line 11) | class ColoProxy(Proxy): method __init__ (line 24) | def __init__(self, *args, **kwargs): method meta_data (line 29) | def meta_data(self): method meta_data (line 33) | def meta_data(self, data: Any): method has_meta_data (line 37) | def has_meta_data(self): method _assert_meta_data_is_tensor (line 40) | def _assert_meta_data_is_tensor(self): method _assert_has_meta_data (line 45) | def _assert_has_meta_data(self): method __len__ (line 48) | def __len__(self): method __int__ (line 52) | def __int__(self): method __float__ (line 56) | def __float__(self): method __bool__ (line 60) | def __bool__(self): method __getattr__ (line 64) | def __getattr__(self, k): method __contains__ (line 67) | def __contains__(self, key): function extract_meta (line 76) | def extract_meta(*args, **kwargs): class ColoAttribute (line 93) | class ColoAttribute(ColoProxy): method __init__ (line 94) | def __init__(self, root, attr: str): method node (line 101) | def node(self): method __call__ (line 113) | def __call__(self, *args, **kwargs): FILE: colossalai/fx/tracer/_meta_trace.py function normalize_tuple (line 6) | def normalize_tuple(x): function is_autogradable (line 12) | def is_autogradable(x): function meta_trace (line 16) | def meta_trace(module: torch.nn.Module, fake_device=None, *args, **kwarg... FILE: colossalai/fx/tracer/_symbolic_trace.py function symbolic_trace (line 12) | def symbolic_trace( FILE: colossalai/fx/tracer/_tracer_utils.py function is_element_in_list (line 11) | def is_element_in_list(elements: Union[List[Any], Any], list_: List[Any]): function extract_meta (line 23) | def extract_meta(*args, **kwargs): function compute_meta_data_for_functions_proxy (line 37) | def compute_meta_data_for_functions_proxy(target, args, kwargs): FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/addbmm.py class Addbmm (line 9) | class Addbmm(LinearBasedBiasFunc): method extract_kwargs_from_origin_func (line 10) | def extract_kwargs_from_origin_func(self): method create_non_bias_func_proxy (line 18) | def create_non_bias_func_proxy(self, input_proxy, other_proxy): method insert_sum_node (line 33) | def insert_sum_node(self, input_proxy, sum_dims=0): method generate (line 44) | def generate(self): FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/addmm.py class Addmm (line 9) | class Addmm(LinearBasedBiasFunc): method extract_kwargs_from_origin_func (line 10) | def extract_kwargs_from_origin_func(self): method transpose_other_operand_for_linear (line 18) | def transpose_other_operand_for_linear(self, other_proxy): method generate (line 37) | def generate(self): FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/bias_addition_function.py class BiasAdditionFunc (line 8) | class BiasAdditionFunc(ABC): method __init__ (line 14) | def __init__(self, tracer, target, args, kwargs, substitute_func): method extract_kwargs_from_origin_func (line 22) | def extract_kwargs_from_origin_func(self): method generate (line 34) | def generate(self): method create_mul_node (line 53) | def create_mul_node(self, input_proxy, coefficent): class LinearBasedBiasFunc (line 71) | class LinearBasedBiasFunc(BiasAdditionFunc): method create_non_bias_func_proxy (line 77) | def create_non_bias_func_proxy(self, input_proxy, other_proxy): method create_bias_addition_proxy (line 92) | def create_bias_addition_proxy(self, non_bias_func_proxy, bias_proxy): FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/linear.py class Linear (line 8) | class Linear(LinearBasedBiasFunc): method extract_kwargs_from_origin_func (line 9) | def extract_kwargs_from_origin_func(self): method generate (line 16) | def generate(self): FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/bias_addition_module.py class BiasAdditionModule (line 8) | class BiasAdditionModule(ABC): method __init__ (line 14) | def __init__(self, tracer, target, args, kwargs, substitute_func): method _create_weight_proxy (line 23) | def _create_weight_proxy(self): method _create_bias_proxy (line 35) | def _create_bias_proxy(self): method extract_kwargs_from_mod (line 48) | def extract_kwargs_from_mod(self): method create_non_bias_func_proxy (line 58) | def create_non_bias_func_proxy(self, input_proxy=None): method create_bias_addition_proxy (line 72) | def create_bias_addition_proxy(self, non_bias_func_proxy, bias_proxy): method generate (line 84) | def generate(self): FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/conv.py class BiasAdditionConv (line 11) | class BiasAdditionConv(BiasAdditionModule): method extract_kwargs_from_mod (line 12) | def extract_kwargs_from_mod(self): method create_bias_reshape_proxy (line 35) | def create_bias_reshape_proxy(self, dimensions): method generate (line 50) | def generate(self): FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/linear.py class BiasAdditionLinear (line 8) | class BiasAdditionLinear(BiasAdditionModule): method extract_kwargs_from_mod (line 9) | def extract_kwargs_from_mod(self): method generate (line 12) | def generate(self): FILE: colossalai/fx/tracer/experimental.py function _truncate_suffix (line 53) | def _truncate_suffix(s: str): function default_device (line 59) | def default_device(): class ColoProxy (line 64) | class ColoProxy(Proxy): method __init__ (line 65) | def __init__(self, *args, data=None, **kwargs): method meta_data (line 70) | def meta_data(self): method meta_data (line 74) | def meta_data(self, args): method __torch_function__ (line 79) | def __torch_function__(cls, orig_method, types, args=(), kwargs=None): method from_torch_proxy (line 88) | def from_torch_proxy(cls, proxy: Proxy): method __repr__ (line 91) | def __repr__(self): method __len__ (line 94) | def __len__(self): method __int__ (line 97) | def __int__(self): method __index__ (line 100) | def __index__(self): method __float__ (line 106) | def __float__(self): method __bool__ (line 109) | def __bool__(self): method __getattr__ (line 112) | def __getattr__(self, k): method __setitem__ (line 115) | def __setitem__(self, key, value): method __contains__ (line 120) | def __contains__(self, key): method __isinstancecheck__ (line 128) | def __isinstancecheck__(self, type): method shape (line 132) | def shape(self): method ndim (line 136) | def ndim(self): method device (line 140) | def device(self): method dtype (line 146) | def dtype(self): method to (line 151) | def to(self, *args, **kwargs): method cpu (line 154) | def cpu(self, *args, **kwargs): method cuda (line 157) | def cuda(self, *args, **kwargs): class ColoAttribute (line 162) | class ColoAttribute(ColoProxy): method __init__ (line 163) | def __init__(self, root, attr: str, data=None): method node (line 171) | def node(self): method __call__ (line 178) | def __call__(self, *args, **kwargs): method __repr__ (line 181) | def __repr__(self): class ColoTracer (line 186) | class ColoTracer(Tracer): method __init__ (line 187) | def __init__(self, trace_act_ckpt: bool = False, *args, **kwargs): method proxy (line 198) | def proxy(self, node: Node) -> "ColoProxy": method create_proxy (line 201) | def create_proxy( method create_node (line 252) | def create_node(self, *args, **kwargs) -> Node: method trace (line 260) | def trace( method trace_activation_checkpoint (line 306) | def trace_activation_checkpoint(self, enabled: bool): method _post_check (line 334) | def _post_check(self, non_concrete_arg_names: Set[str]): method _module_getattr (line 366) | def _module_getattr(self, attr, attr_val, parameter_proxy_cache): function symbolic_trace (line 402) | def symbolic_trace( class _TorchTensorOverride (line 429) | class _TorchTensorOverride(object): method __init__ (line 430) | def __init__(self, tracer: Tracer): method __enter__ (line 434) | def __enter__(self): method __exit__ (line 463) | def __exit__(self, exc_type, exc_val, exc_tb): function meta_prop_pass (line 468) | def meta_prop_pass( function _meta_data_computing (line 497) | def _meta_data_computing(meta_args, concrete_args, root, kind, target, a... function _meta_data_computing_v0 (line 525) | def _meta_data_computing_v0(meta_args, root, kind, target, args, kwargs): function bias_addition_pass (line 595) | def bias_addition_pass(gm: ColoGraphModule, root_model: torch.nn.Module,... FILE: colossalai/fx/tracer/meta_patch/patched_function/activation_function.py function torch_nn_func_relu (line 7) | def torch_nn_func_relu(input, inplace=False): FILE: colossalai/fx/tracer/meta_patch/patched_function/arithmetic.py function torch_matmul (line 8) | def torch_matmul(input, other, *, out=None): function torch_abs (line 46) | def torch_abs(input, *, out=None): function torch_bmm (line 52) | def torch_bmm(input, mat2, *, out=None): function torch_linear (line 61) | def torch_linear(input, mat2, bias=None, *, out=None): function torch_addbmm (line 72) | def torch_addbmm(input, mat1, mat2, *, beta=1, alpha=1, out=None): function torch_addmm (line 82) | def torch_addmm(input, mat1, mat2, *, beta=1, alpha=1, out=None): function torch_var_mean (line 91) | def torch_var_mean(input, dim, unbiased=True, keepdim=False, *, out=None): FILE: colossalai/fx/tracer/meta_patch/patched_function/convolution.py function _ntuple (line 10) | def _ntuple(n, name="parse"): function _extract_kwargs (line 25) | def _extract_kwargs(kwargs): function torch_nn_functional_conv1d (line 48) | def torch_nn_functional_conv1d(input, weight, **kwargs): function torch_nn_functional_conv2d (line 67) | def torch_nn_functional_conv2d(input, weight, **kwargs): function torch_nn_functional_conv3d (line 88) | def torch_nn_functional_conv3d(input, weight, **kwargs): function torch_nn_functional_convtranspose1d (line 111) | def torch_nn_functional_convtranspose1d(input, weight, **kwargs): function torch_nn_functional_convtranspose2d (line 133) | def torch_nn_functional_convtranspose2d(input, weight, **kwargs): function torch_nn_functional_convtranspose3d (line 159) | def torch_nn_functional_convtranspose3d(input, weight, **kwargs): FILE: colossalai/fx/tracer/meta_patch/patched_function/embedding.py function torch_nn_functional_embedding (line 7) | def torch_nn_functional_embedding( FILE: colossalai/fx/tracer/meta_patch/patched_function/normalization.py function torch_nn_func_layernorm (line 7) | def torch_nn_func_layernorm(input, normalized_shape, weight=None, bias=N... function torch_nn_func_batchnorm (line 12) | def torch_nn_func_batchnorm( FILE: colossalai/fx/tracer/meta_patch/patched_function/python_ops.py function operator_getitem (line 11) | def operator_getitem(a, b): FILE: colossalai/fx/tracer/meta_patch/patched_function/torch_ops.py function torch_arange (line 7) | def torch_arange(*args, **kwargs): function torch_finfo (line 29) | def torch_finfo(*args): function torch_where (line 34) | def torch_where(condition, x, y): function torch_tensor_repeat (line 41) | def torch_tensor_repeat(self, *sizes): function torch_index_select (line 49) | def torch_index_select(input, dim, index, *, out=None): function torch_tensor_index_select (line 56) | def torch_tensor_index_select(self, dim, index): function torch_squeeze (line 61) | def torch_squeeze(input, dim=None): function torch_tensor_squeeze (line 79) | def torch_tensor_squeeze(self, dim=None): function torch_unsqueeze (line 84) | def torch_unsqueeze(input, dim): function torch_tensor_unsqueeze (line 93) | def torch_tensor_unsqueeze(self, dim): function torch_cat (line 98) | def torch_cat(tensors, dim=None, axis=None, *, out=None): function torch_repeat_interleave (line 113) | def torch_repeat_interleave(input, repeats, dim=None, output_size=None): function torch_tensor_repeat_interleave (line 130) | def torch_tensor_repeat_interleave(self, repeats, dim=None, *, output_si... function torch_roll (line 135) | def torch_roll(input, shifts, dims=None): function torch_full (line 140) | def torch_full(size, fill_value, *, out=None, dtype=None, layout=torch.s... function torch_max (line 146) | def torch_max(input, dim=None, keepdim=False, *, out=None): function torch_tensor_cpu (line 169) | def torch_tensor_cpu(input): function torch_tensor_cuda (line 174) | def torch_tensor_cuda(input, *args, **kwargs): FILE: colossalai/fx/tracer/meta_patch/patched_module/activation_function.py function torch_nn_non_linear_act (line 12) | def torch_nn_non_linear_act(self, input): FILE: colossalai/fx/tracer/meta_patch/patched_module/convolution.py function torch_nn_conv1d (line 9) | def torch_nn_conv1d(self, input): function torch_nn_conv2d (line 25) | def torch_nn_conv2d(self, input): function torch_nn_conv3d (line 45) | def torch_nn_conv3d(self, input): function torch_nn_convtranspose1d (line 69) | def torch_nn_convtranspose1d(self, input): function torch_nn_convtranspose2d (line 89) | def torch_nn_convtranspose2d(self, input): function torch_nn_convtranspose3d (line 117) | def torch_nn_convtranspose3d(self, input): FILE: colossalai/fx/tracer/meta_patch/patched_module/embedding.py function torch_nn_embedding (line 7) | def torch_nn_embedding(self, input): FILE: colossalai/fx/tracer/meta_patch/patched_module/linear.py function torch_nn_linear (line 7) | def torch_nn_linear(self, input): FILE: colossalai/fx/tracer/meta_patch/patched_module/normalization.py function torch_nn_normalize (line 11) | def torch_nn_normalize(self, input): FILE: colossalai/fx/tracer/meta_patch/patched_module/pooling.py function torch_nn_avgpool1d (line 9) | def torch_nn_avgpool1d(self, input): function torch_nn_avgpool2d (line 32) | def torch_nn_avgpool2d(self, input): function torch_nn_avgpool3d (line 59) | def torch_nn_avgpool3d(self, input): function torch_nn_maxpool1d (line 88) | def torch_nn_maxpool1d(self, input): function torch_nn_maxpool2d (line 112) | def torch_nn_maxpool2d(self, input): function torch_nn_maxpool3d (line 140) | def torch_nn_maxpool3d(self, input): function torch_nn_adapative_pooling_1d (line 171) | def torch_nn_adapative_pooling_1d(self, input): function torch_nn_adapative_pooling_2d (line 183) | def torch_nn_adapative_pooling_2d(self, input): function torch_nn_adapative_pooling_3d (line 195) | def torch_nn_adapative_pooling_3d(self, input): FILE: colossalai/fx/tracer/meta_patch/patched_module/rnn.py function torch_nn_rnn (line 8) | def torch_nn_rnn(self, input, hx): FILE: colossalai/fx/tracer/registry.py class PatchRegistry (line 1) | class PatchRegistry: method __init__ (line 2) | def __init__(self, name): method register (line 6) | def register(self, source): method get (line 13) | def get(self, source): method has (line 18) | def has(self, source): FILE: colossalai/fx/tracer/tracer.py class TracerType (line 35) | class TracerType(enum.Enum): class ColoTracer (line 40) | class ColoTracer(Tracer): method __init__ (line 67) | def __init__(self, trace_act_ckpt: bool = False, *args, **kwargs): method create_proxy (line 83) | def create_proxy(self, kind, target, args, kwargs, name=None, type_exp... method _module_getattr (line 152) | def _module_getattr(self, attr, attr_val, parameter_proxy_cache): method call_module (line 189) | def call_module(self, m, forward, args, kwargs): method proxy (line 202) | def proxy(self, node) -> Proxy: method _configure_tracer_type (line 208) | def _configure_tracer_type(self, tracer_type: TracerType): method _meta_data_computing (line 218) | def _meta_data_computing(self, kind, target, args, kwargs): method trace (line 317) | def trace( method trace_activation_checkpoint (line 448) | def trace_activation_checkpoint(self, enabled: bool): method create_node (line 476) | def create_node(self, *args, **kwargs) -> Node: function wrap_tensor_constructor_method (line 485) | def wrap_tensor_constructor_method(target): function _scope (line 527) | def _scope(method): function _define_reflectable (line 545) | def _define_reflectable(orig_method_name): FILE: colossalai/inference/batch_bucket.py class BatchBucket (line 9) | class BatchBucket: method __init__ (line 23) | def __init__( method is_empty (line 68) | def is_empty(self): method current_batch_size (line 72) | def current_batch_size(self): method __len__ (line 75) | def __len__(self): method available_batch_size (line 79) | def available_batch_size(self): method block_tables (line 83) | def block_tables(self): method seq_lengths (line 87) | def seq_lengths(self): method seqs_ids (line 91) | def seqs_ids(self): method seqs_li (line 95) | def seqs_li(self): method is_compact (line 99) | def is_compact(self): method use_spec_dec (line 108) | def use_spec_dec(self) -> bool: method num_tokens_to_verify (line 112) | def num_tokens_to_verify(self) -> int: method batch_token_ids (line 116) | def batch_token_ids(self) -> List[List[int]]: method streamingllm_update_batch (line 122) | def streamingllm_update_batch(self, start_token_size: int, generated_t... method set_use_spec_dec (line 149) | def set_use_spec_dec(self, num_tokens_to_verify: int = 5) -> None: method reset_use_spec_dec (line 157) | def reset_use_spec_dec(self) -> None: method _make_compact (line 162) | def _make_compact(self) -> None: method add_seq (line 184) | def add_seq( method add_seqs (line 219) | def add_seqs( method pop_seq_update_batch (line 271) | def pop_seq_update_batch( method pop_seqs (line 324) | def pop_seqs( method pop_n_seqs (line 347) | def pop_n_seqs( method pop_finished (line 378) | def pop_finished( method append_batch_tokens (line 407) | def append_batch_tokens(self, tokens: torch.Tensor) -> None: method revoke_batch_tokens (line 422) | def revoke_batch_tokens(self, n_tokens: int, n_seqs: int = 1) -> None: method clear (line 440) | def clear(self, free_block_tables_fn: Optional[Callable[[torch.Tensor]... method merge (line 455) | def merge(self, other: "BatchBucket") -> List[int]: method is_prompts (line 488) | def is_prompts(self) -> bool: method get_1D_inputs_spec_dec (line 495) | def get_1D_inputs_spec_dec(self, n: int) -> torch.Tensor: method get_1D_inputs (line 511) | def get_1D_inputs(self) -> torch.Tensor: method get_block_table_tensor (line 542) | def get_block_table_tensor(self) -> torch.Tensor: method get_sequence_lengths (line 548) | def get_sequence_lengths(self) -> torch.Tensor: method fd_inter_tensor (line 555) | def fd_inter_tensor(self) -> None: method __repr__ (line 559) | def __repr__(self) -> str: FILE: colossalai/inference/config.py class RPC_PARAM (line 35) | class RPC_PARAM(ABC): method to_rpc_param (line 43) | def to_rpc_param(self): method from_rpc_param (line 48) | def from_rpc_param(): class InputMetaData (line 53) | class InputMetaData(RPC_PARAM): method to_rpc_param (line 90) | def to_rpc_param(self) -> Dict[str, any]: method from_rpc_param (line 108) | def from_rpc_param(rpc_dict: Dict[str, any]) -> "InputMetaData": method __repr__ (line 135) | def __repr__(self) -> str: class InferenceConfig (line 151) | class InferenceConfig(RPC_PARAM): method __post_init__ (line 254) | def __post_init__(self): method _verify_config (line 258) | def _verify_config(self) -> None: method to_generation_config (line 317) | def to_generation_config(self, model_config) -> GenerationConfig: method to_model_shard_inference_config (line 334) | def to_model_shard_inference_config(self) -> "ModelShardInferenceConfig": method to_rpc_param (line 345) | def to_rpc_param(self) -> dict: method from_rpc_param (line 363) | def from_rpc_param(rpc_dict: dict) -> "InferenceConfig": method from_dict (line 383) | def from_dict(cls, config_dict: Dict[str, Any]) -> "InferenceConfig": class ModelShardInferenceConfig (line 399) | class ModelShardInferenceConfig: class DiffusionGenerationConfig (line 418) | class DiffusionGenerationConfig: method to_dict (line 449) | def to_dict(self) -> Dict[str, Any]: method from_kwargs (line 459) | def from_kwargs(cls, **kwargs) -> "DiffusionGenerationConfig": FILE: colossalai/inference/core/async_engine.py function _raise_exception_on_finish (line 14) | def _raise_exception_on_finish(task: asyncio.Task, request_tracker: "Tra... class RequstStream (line 29) | class RequstStream: method __init__ (line 39) | def __init__(self, request_id: int) -> None: method set_result (line 43) | def set_result(self, result) -> None: method get_result (line 48) | async def get_result(self): method finished (line 53) | def finished(self) -> bool: class Tracer (line 58) | class Tracer: method __init__ (line 67) | def __init__(self) -> None: method __contains__ (line 73) | def __contains__(self, item): method init_event (line 76) | def init_event(self): method propagate_exception (line 79) | def propagate_exception(self, exc: Exception, request_id: Optional[int... method process_finished_request (line 89) | def process_finished_request(self, finished_request) -> None: method add_request (line 98) | def add_request(self, request_id: int, **engine_add_request_kwargs) ->... method abort_request (line 113) | def abort_request(self, request_id: int, *, verbose: bool = False) -> ... method get_new_requests (line 127) | def get_new_requests(self): method wait_for_new_requests (line 151) | async def wait_for_new_requests(self): class _AsyncInferenceEngine (line 155) | class _AsyncInferenceEngine(InferenceEngine): method async_step (line 161) | async def async_step(self) -> List[str]: method add_single_request (line 206) | def add_single_request(self, request_id: int, prompt: str, prompt_toke... class AsyncInferenceEngine (line 212) | class AsyncInferenceEngine: method __init__ (line 225) | def __init__(self, start_engine_loop: bool = True, **kwargs): method background_loop_status (line 234) | def background_loop_status(self): method start_background_loop (line 237) | def start_background_loop(self): method _init_engine (line 249) | def _init_engine(self, **kwargs): method step (line 252) | async def step(self): method _engine_abort (line 267) | async def _engine_abort(self, request_ids: Iterable[int]): method abort (line 270) | async def abort(self, request_id: int): method _abort (line 278) | def _abort(self, request_id: int): method run_engine_loop (line 281) | async def run_engine_loop(self): method add_request (line 289) | async def add_request( method generate (line 312) | async def generate( FILE: colossalai/inference/core/base_engine.py class BaseEngine (line 13) | class BaseEngine(ABC): method __init__ (line 15) | def __init__(self, model_or_path, inference_config=None, verbose=False... method init_model (line 19) | def init_model(self, model_or_path, model_policy=None, model_shard_inf... method generate (line 25) | def generate(self, request_ids=None, prompts=None, generation_config=N... method add_request (line 31) | def add_request(self, prompts, request_ids=None, **kwargs): method step (line 37) | def step(self): method _verify_args (line 43) | def _verify_args(self): method capture_model (line 49) | def capture_model(self): method _shardformer (line 55) | def _shardformer( FILE: colossalai/inference/core/diffusion_engine.py class DiffusionEngine (line 27) | class DiffusionEngine(BaseEngine): method __init__ (line 28) | def __init__( method _verify_args (line 53) | def _verify_args(self) -> None: method init_model (line 56) | def init_model( method generate (line 128) | def generate( method add_request (line 161) | def add_request( method step (line 189) | def step(self) -> List[PIL.Image.Image]: FILE: colossalai/inference/core/engine.py class InferenceEngine (line 16) | class InferenceEngine: method __init__ (line 28) | def __init__( method _verify_args (line 64) | def _verify_args(self) -> None: method generate (line 69) | def generate( method add_request (line 87) | def add_request( method step (line 107) | def step(self): method __getattr__ (line 111) | def __getattr__(self, name): method __setattr__ (line 126) | def __setattr__(self, name, value): FILE: colossalai/inference/core/llm_engine.py class LLMEngine (line 46) | class LLMEngine(BaseEngine): method __init__ (line 58) | def __init__( method init_model (line 107) | def init_model( method capture_model (line 213) | def capture_model(self, k_cache: List[torch.Tensor], v_cache: List[tor... method _verify_args (line 285) | def _verify_args(self) -> None: method enable_spec_dec (line 301) | def enable_spec_dec( method disable_spec_dec (line 369) | def disable_spec_dec(self) -> None: method clear_spec_dec (line 377) | def clear_spec_dec(self) -> None: method steps_spec_dec (line 388) | def steps_spec_dec(self) -> List[Sequence]: method generate (line 496) | def generate( method has_prompt_template (line 561) | def has_prompt_template(self) -> bool: method format_prompt (line 565) | def format_prompt(self, prompts: Union[List[str], str]) -> Union[List[... method add_request (line 580) | def add_request( method prepare_input (line 671) | def prepare_input(self, batch: BatchBucket) -> Tuple[torch.Tensor, tor... method step (line 719) | def step(self) -> List[str]: FILE: colossalai/inference/core/plugin.py class InferCheckpoint_io (line 21) | class InferCheckpoint_io(GeneralCheckpointIO): method __init__ (line 27) | def __init__( method load_sharded_model (line 35) | def load_sharded_model(self, model: ModelWrapper, checkpoint_index_fil... method save_sharded_model (line 131) | def save_sharded_model( FILE: colossalai/inference/core/request_handler.py class RunningList (line 19) | class RunningList: method __init__ (line 30) | def __init__(self, prefill_ratio: int, prefill: List[Sequence] = None)... method decoding (line 38) | def decoding(self): method prefill (line 42) | def prefill(self): method prefill_seq_num (line 46) | def prefill_seq_num(self): method decoding_seq_num (line 50) | def decoding_seq_num(self): method total_seq_num (line 54) | def total_seq_num(self): method append (line 57) | def append(self, seq: Sequence): method extend (line 63) | def extend(self, seqs: List[Sequence]): method find_seq (line 67) | def find_seq(self, request_id) -> Union[Sequence, None]: method remove (line 75) | def remove(self, seq: Sequence) -> None: method ready_for_prefill (line 83) | def ready_for_prefill(self): method is_empty (line 88) | def is_empty(self): method mark_prefill_running (line 91) | def mark_prefill_running(self) -> None: method move_prefill_to_decoding (line 95) | def move_prefill_to_decoding(self, seq_ids: List[int]) -> None: class NaiveRequestHandler (line 101) | class NaiveRequestHandler: method __init__ (line 102) | def __init__(self) -> None: method _has_waiting (line 106) | def _has_waiting(self) -> bool: method _has_running (line 109) | def _has_running(self) -> bool: method check_unfinished_reqs (line 112) | def check_unfinished_reqs(self): method add_sequence (line 115) | def add_sequence(self, seq: DiffusionSequence): method _find_sequence (line 122) | def _find_sequence(self, request_id: int) -> DiffusionSequence: method schedule (line 132) | def schedule(self): class RequestHandler (line 140) | class RequestHandler(NaiveRequestHandler): method __init__ (line 151) | def __init__(self, inference_config: InferenceConfig, model_config: Pr... method _has_running (line 218) | def _has_running(self) -> bool: method _init_cache (line 221) | def _init_cache(self, model_config): method get_kvcache (line 224) | def get_kvcache(self): method set_spec_dec_mode (line 227) | def set_spec_dec_mode(self, n_spec_tokens: int): method unset_spec_dec_mode (line 231) | def unset_spec_dec_mode(self): method schedule (line 235) | def schedule(self): method allocate_batch_spec_dec (line 293) | def allocate_batch_spec_dec(self, batch: BatchBucket, n: int): method add_sequence (line 300) | def add_sequence(self, req: Sequence): method abort_sequence (line 310) | def abort_sequence(self, request_id: int): method _find_sequence (line 330) | def _find_sequence(self, request_id: int) -> Sequence: method update_seq_finished (line 344) | def update_seq_finished(self, sequence: Sequence, generation_config: G... method update_batch_finished (line 351) | def update_batch_finished(self, batch: BatchBucket, generation_config:... method check_unfinished_reqs (line 360) | def check_unfinished_reqs(self) -> bool: method total_requests_in_batch_bucket (line 363) | def total_requests_in_batch_bucket(self) -> int: method append_next_tokens (line 366) | def append_next_tokens(self, sample_tokens: torch.Tensor): method update (line 380) | def update(self): method streamingllm_free_block_tables (line 398) | def streamingllm_free_block_tables(self, updated_block_ids: List[int]): class RPCRequestHandler (line 405) | class RPCRequestHandler(RequestHandler): method __init__ (line 410) | def __init__(self, inference_config: InferenceConfig, model_config: Pr... method _init_cache (line 451) | def _init_cache(self, model_config): FILE: colossalai/inference/core/rpc_engine.py function run_server (line 27) | def run_server(host, port, event: mp.Event = None): class RPCInferenceEngine (line 36) | class RPCInferenceEngine(InferenceEngine): method __init__ (line 51) | def __init__( method _verify_args (line 125) | def _verify_args(self) -> None: method init_workers (line 134) | def init_workers(self): method async_parallel_wrapper (line 167) | async def async_parallel_wrapper(self, f, *args, **kwargs): method init_worker_env (line 173) | async def init_worker_env(self): method init_model (line 186) | async def init_model(self, model_or_path: Union[nn.Module, str], model... method init_scheduler (line 200) | def init_scheduler(self, inference_config: InferenceConfig, model_conf... method _init_device_cache (line 203) | async def _init_device_cache(self, alloc_shape: Tuple[int, int, int, i... method init_device_cache (line 210) | def init_device_cache(self, alloc_shape: Tuple[Tuple[int, ...], Tuple[... method prepare_input (line 213) | def prepare_input(self, batch: BatchBucket) -> Tuple[List[int], InputM... method step_ (line 257) | async def step_(self, input_token_ids, input_meta_data: InputMetaData): method step (line 273) | def step(self) -> List[str]: method kill_workers (line 286) | def kill_workers(self): method __del__ (line 296) | def __del__(self): FILE: colossalai/inference/executor/rpc_worker.py class rpcWorkerService (line 44) | class rpcWorkerService(rpyc.Service): method exposed_init_dist_env (line 51) | def exposed_init_dist_env(self, rank, world_size, master_address, mast... method exposed_init_model (line 56) | def exposed_init_model( method exposed_init_cache (line 72) | def exposed_init_cache(self, alloc_shape: Tuple[Tuple[int, ...], Tuple... method exposed_execute_model_forward (line 100) | def exposed_execute_model_forward( method _init_output_tensor (line 134) | def _init_output_tensor(self): method _init_fd_tensor (line 142) | def _init_fd_tensor(self): method _init_model (line 169) | def _init_model(self, model_or_path: Union[nn.Module, str], model_poli... method _shardformer (line 261) | def _shardformer( method exposed_compute_only_for_test (line 295) | def exposed_compute_only_for_test(self): FILE: colossalai/inference/flash_decoding_utils.py class FDIntermTensors (line 7) | class FDIntermTensors(metaclass=SingletonMeta): method __init__ (line 12) | def __init__(self): method _reset (line 15) | def _reset(self): method is_initialized (line 23) | def is_initialized(self): method mid_output (line 27) | def mid_output(self): method mid_output_lse (line 32) | def mid_output_lse(self): method exp_sums (line 37) | def exp_sums(self): method max_logits (line 42) | def max_logits(self): method initialize (line 46) | def initialize( FILE: colossalai/inference/graph_runner.py class CUDAGraphRunner (line 10) | class CUDAGraphRunner: method __init__ (line 11) | def __init__(self, model: nn.Module): method capture (line 18) | def capture( method forward (line 66) | def forward( method __call__ (line 99) | def __call__(self, *args, **kwargs): FILE: colossalai/inference/kv_cache/block_cache.py class CacheBlock (line 6) | class CacheBlock: method __init__ (line 9) | def __init__(self, block_id: int, block_size: int, elem_size: int, k_p... method available_space (line 32) | def available_space(self) -> int: method add_ref (line 36) | def add_ref(self) -> None: method remove_ref (line 39) | def remove_ref(self) -> None: method has_ref (line 43) | def has_ref(self) -> bool: method allocate (line 46) | def allocate(self, size: int) -> None: method is_empty (line 50) | def is_empty(self): method clear (line 53) | def clear(self) -> None: method __repr__ (line 57) | def __repr__(self): FILE: colossalai/inference/kv_cache/kvcache_manager.py class KVCacheManager (line 18) | class KVCacheManager: method __init__ (line 48) | def __init__(self, config: InferenceConfig, model_config: PretrainedCo... method total_num_blocks (line 127) | def total_num_blocks(self) -> int: method num_available_blocks (line 132) | def num_available_blocks(self) -> int: method get_head_size (line 136) | def get_head_size(self): method get_kv_cache (line 139) | def get_kv_cache(self): method get_max_blocks_per_sequence (line 143) | def get_max_blocks_per_sequence(self) -> int: method check_allocation (line 150) | def check_allocation(self, seq: Sequence) -> bool: method get_block_kv_ptrs (line 154) | def get_block_kv_ptrs(self, block_id: int, layer_id: int) -> Tuple[Lis... method get_block_table_kv_ptrs (line 159) | def get_block_table_kv_ptrs(self, block_table: torch.Tensor, layer_id:... method allocate_context_from_block_table (line 170) | def allocate_context_from_block_table(self, block_table: torch.Tensor,... method allocate_context_from_block_tables (line 220) | def allocate_context_from_block_tables(self, block_tables: torch.Tenso... method allocate_token_from_block_table (line 295) | def allocate_token_from_block_table(self, block_table: torch.Tensor, c... method allocate_tokens_from_block_tables (line 309) | def allocate_tokens_from_block_tables( method allocate_n_tokens_from_block_tables (line 376) | def allocate_n_tokens_from_block_tables( method allocate_single_block (line 396) | def allocate_single_block(self, block_table: torch.Tensor, block_local... method free_block_table (line 426) | def free_block_table(self, block_table: torch.Tensor) -> None: method free_block_tables (line 441) | def free_block_tables(self, block_tables: torch.Tensor, first_n: int =... method clear_all (line 450) | def clear_all(self) -> None: method streamingllm_free_block_tables (line 457) | def streamingllm_free_block_tables(self, updated_block_ids: List[int]): method get_physical_cache (line 471) | def get_physical_cache(self, layer_id: int, block_idx: int) -> Tuple[t... method _allocate_on_block (line 475) | def _allocate_on_block(self, block: CacheBlock, space_asked: int) -> int: method _init_logical_caches (line 486) | def _init_logical_caches(self): method _init_device_caches (line 508) | def _init_device_caches( class RPCKVCacheManager (line 524) | class RPCKVCacheManager(KVCacheManager): method __init__ (line 525) | def __init__(self, config: InferenceConfig, model_config: PretrainedCo... method get_physical_cache_shape (line 578) | def get_physical_cache_shape(self) -> Tuple[Tuple[int, ...], Tuple[int... method get_kv_cache (line 594) | def get_kv_cache(self): method _init_logical_caches (line 598) | def _init_logical_caches(self): FILE: colossalai/inference/logit_processors.py function register_logits_processor (line 11) | def register_logits_processor(process_type): function apply_no_repeat_ngram_size (line 25) | def apply_no_repeat_ngram_size(logits, ngram_size: int, batch_token_ids:... function apply_repetition_penalty (line 57) | def apply_repetition_penalty(logits, penalty: float, batch_token_ids: Li... function apply_temperature (line 83) | def apply_temperature(logits, temperature: float): function apply_top_k (line 98) | def apply_top_k(logits, top_k: int): function apply_top_p (line 112) | def apply_top_p(logits, top_p: float): function apply_forced_eos_token_id (line 134) | def apply_forced_eos_token_id( function get_logits_processor (line 172) | def get_logits_processor(processor: str, logits, *args, **kwargs): FILE: colossalai/inference/modeling/backends/attention_backend.py class AttentionMetaData (line 12) | class AttentionMetaData: class AttentionBackend (line 30) | class AttentionBackend(ABC): method prefill (line 32) | def prefill(self, attn_metadata: AttentionMetaData, **kwargs): method decode (line 36) | def decode(self, attn_metadatas: AttentionMetaData, **kwargs): class CudaAttentionBackend (line 40) | class CudaAttentionBackend(AttentionBackend): method __init__ (line 46) | def __init__(self, use_flash_attn: bool = False): method prefill (line 51) | def prefill(self, attn_metadata: AttentionMetaData, **kwargs): method decode (line 89) | def decode(self, attn_metadata: AttentionMetaData, **kwargs): class TritonAttentionBackend (line 110) | class TritonAttentionBackend(AttentionBackend): method prefill (line 115) | def prefill(self, attn_metadata: AttentionMetaData, **kwargs): method decode (line 131) | def decode(self, attn_metadata: AttentionMetaData, **kwargs): function get_attention_backend (line 151) | def get_attention_backend( FILE: colossalai/inference/modeling/backends/pre_attention_backend.py class PreAttentionBackend (line 9) | class PreAttentionBackend(ABC): method prefill (line 11) | def prefill(self, attn_metadata: AttentionMetaData, **kwargs): method decode (line 15) | def decode(self, attn_metadata: AttentionMetaData, **kwargs): class CudaPreAttentionBackend (line 19) | class CudaPreAttentionBackend(PreAttentionBackend): method __init__ (line 24) | def __init__(self, use_flash_attn: bool): method prefill (line 29) | def prefill(self, attn_metadata: AttentionMetaData, **kwargs): method decode (line 57) | def decode(self, attn_metadata: AttentionMetaData, **kwargs): class TritonPreAttentionBackend (line 82) | class TritonPreAttentionBackend(PreAttentionBackend): method prefill (line 87) | def prefill(self, attn_metadata: AttentionMetaData, **kwargs): method decode (line 96) | def decode(self, attn_metadata: AttentionMetaData, **kwargs): function get_pre_attention_backend (line 133) | def get_pre_attention_backend( FILE: colossalai/inference/modeling/layers/attention.py function copy_to_cache (line 9) | def copy_to_cache(source, cache, lengths, block_tables, type: str = "pre... function convert_kvcache (line 43) | def convert_kvcache(cache, lengths, block_tables, pad_id=0): class PagedAttention (line 75) | class PagedAttention: method pad_and_reshape (line 82) | def pad_and_reshape(tensor, seq_lengths, max_seq_len, num_heads, head_... method generate_padding_mask (line 97) | def generate_padding_mask(lengths, max_seq_len): method repeat_kv (line 103) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int = 1) -> torch.Te... method nopad_context_forward (line 120) | def nopad_context_forward( method pad_context_forward (line 188) | def pad_context_forward( method pad_decoding_forward (line 245) | def pad_decoding_forward( method no_pad_decoding_forward (line 301) | def no_pad_decoding_forward( FILE: colossalai/inference/modeling/layers/baichuan_tp_linear.py class BaichuanLMHeadLinear1D_Col (line 12) | class BaichuanLMHeadLinear1D_Col(Linear1D_Col): method from_native_module (line 14) | def from_native_module( method _load_from_state_dict (line 58) | def _load_from_state_dict( FILE: colossalai/inference/modeling/layers/diffusion.py class DiffusionPipe (line 8) | class DiffusionPipe(nn.Module): method __init__ (line 13) | def __init__(self, source_obj) -> None: method _execution_device (line 40) | def _execution_device(self): method device (line 50) | def device(self): method forward (line 53) | def forward(self, *args, **kwargs): FILE: colossalai/inference/modeling/layers/distrifusion.py function PixArtAlphaTransformer2DModel_forward (line 36) | def PixArtAlphaTransformer2DModel_forward( function SD3Transformer2DModel_forward (line 154) | def SD3Transformer2DModel_forward( class DistrifusionPatchEmbed (line 212) | class DistrifusionPatchEmbed(ParallelModule): method __init__ (line 213) | def __init__( method from_native_module (line 225) | def from_native_module(module: PatchEmbed, process_group: Union[Proces... method forward (line 232) | def forward(self, latent): class DistrifusionConv2D (line 268) | class DistrifusionConv2D(ParallelModule): method __init__ (line 270) | def __init__( method from_native_module (line 282) | def from_native_module(module: nn.Conv2d, process_group: Union[Process... method sliced_forward (line 287) | def sliced_forward(self, x: torch.Tensor) -> torch.Tensor: method forward (line 315) | def forward(self, input: torch.Tensor) -> Tuple[torch.Tensor, torch.Te... class DistrifusionFusedAttention (line 321) | class DistrifusionFusedAttention(ParallelModule): method __init__ (line 323) | def __init__( method from_native_module (line 340) | def from_native_module( method _forward (line 350) | def _forward( method forward (line 444) | def forward( class DistriSelfAttention (line 492) | class DistriSelfAttention(ParallelModule): method __init__ (line 493) | def __init__( method from_native_module (line 510) | def from_native_module( method _forward (line 520) | def _forward(self, hidden_states: torch.FloatTensor, scale: float = 1.0): method forward (line 598) | def forward( FILE: colossalai/inference/modeling/models/glide_llama.py function rotate_half (line 28) | def rotate_half(x): function apply_single_rotary_pos_emb (line 35) | def apply_single_rotary_pos_emb(q, cos, sin, position_ids): function glide_llama_causal_lm_forward (line 45) | def glide_llama_causal_lm_forward( function glide_llama_model_forward (line 121) | def glide_llama_model_forward( class GlideLlamaConfig (line 217) | class GlideLlamaConfig(LlamaConfig): method __init__ (line 220) | def __init__( class LlamaCrossAttention (line 231) | class LlamaCrossAttention(nn.Module): method __init__ (line 234) | def __init__(self, config: GlideLlamaConfig): method _shape (line 258) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 261) | def forward( class GlideLlamaDecoderLayer (line 309) | class GlideLlamaDecoderLayer(nn.Module): method __init__ (line 310) | def __init__(self, config: GlideLlamaConfig, layer_idx: Optional[int] ... method from_native_module (line 320) | def from_native_module(module: LlamaDecoderLayer, *args, **kwargs) -> ... method forward (line 329) | def forward( class GlideLlamaForCausalLM (line 411) | class GlideLlamaForCausalLM(LlamaForCausalLM): method __init__ (line 412) | def __init__(self, config: GlideLlamaConfig): FILE: colossalai/inference/modeling/models/nopadding_baichuan.py function baichuan_rmsnorm_forward (line 25) | def baichuan_rmsnorm_forward( class NopadBaichuanAttention (line 54) | class NopadBaichuanAttention(ParallelModule): method __init__ (line 55) | def __init__( method from_native_module (line 97) | def from_native_module( method forward (line 123) | def forward( class NopadBaichuanMLP (line 218) | class NopadBaichuanMLP(NopadLlamaMLP): method from_native_module (line 220) | def from_native_module( FILE: colossalai/inference/modeling/models/nopadding_llama.py function llama_causal_lm_forward (line 35) | def llama_causal_lm_forward( function llama_model_forward (line 68) | def llama_model_forward( function llama_decoder_layer_forward (line 168) | def llama_decoder_layer_forward( function llama_rmsnorm_forward (line 239) | def llama_rmsnorm_forward( class NopadLlamaMLP (line 259) | class NopadLlamaMLP(LlamaMLP, ParallelModule): method __init__ (line 260) | def __init__( method from_native_module (line 295) | def from_native_module( method _load_from_state_dict (line 323) | def _load_from_state_dict( method forward (line 373) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: method extra_repr (line 384) | def extra_repr(self) -> str: class NopadLlamaAttention (line 388) | class NopadLlamaAttention(LlamaAttention, ParallelModule): method __init__ (line 389) | def __init__( method from_native_module (line 453) | def from_native_module( method forward (line 489) | def forward( method _load_from_state_dict (line 592) | def _load_from_state_dict( method extra_repr (line 676) | def extra_repr(self) -> str: FILE: colossalai/inference/modeling/models/pixart_alpha.py function pixart_alpha_forward (line 23) | def pixart_alpha_forward( FILE: colossalai/inference/modeling/models/stablediffusion3.py function sd3_forward (line 12) | def sd3_forward( FILE: colossalai/inference/modeling/policy/glide_llama.py class GlideLlamaModelPolicy (line 13) | class GlideLlamaModelPolicy(LlamaForCausalLMPolicy): method module_policy (line 14) | def module_policy(self): method postprocess (line 42) | def postprocess(self): FILE: colossalai/inference/modeling/policy/nopadding_baichuan.py class NoPaddingBaichuanModelInferPolicy (line 19) | class NoPaddingBaichuanModelInferPolicy(LlamaForCausalLMPolicy, RPC_PARAM): method __init__ (line 20) | def __init__(self) -> None: method module_policy (line 23) | def module_policy(self): method postprocess (line 102) | def postprocess(self): method to_rpc_param (line 106) | def to_rpc_param(self) -> str: method from_rpc_param (line 110) | def from_rpc_param() -> "NoPaddingBaichuanModelInferPolicy": FILE: colossalai/inference/modeling/policy/nopadding_llama.py class NoPaddingLlamaModelInferPolicy (line 18) | class NoPaddingLlamaModelInferPolicy(LlamaForCausalLMPolicy, RPC_PARAM): method __init__ (line 19) | def __init__(self) -> None: method module_policy (line 22) | def module_policy(self): method postprocess (line 106) | def postprocess(self): method to_rpc_param (line 110) | def to_rpc_param(self) -> str: method from_rpc_param (line 114) | def from_rpc_param() -> "NoPaddingLlamaModelInferPolicy": FILE: colossalai/inference/modeling/policy/pixart_alpha.py class PixArtAlphaInferPolicy (line 17) | class PixArtAlphaInferPolicy(Policy, RPC_PARAM): method __init__ (line 18) | def __init__(self) -> None: method module_policy (line 21) | def module_policy(self): method preprocess (line 65) | def preprocess(self) -> nn.Module: method postprocess (line 68) | def postprocess(self): method config_sanity_check (line 71) | def config_sanity_check(self): method to_rpc_param (line 74) | def to_rpc_param(self) -> str: method from_rpc_param (line 78) | def from_rpc_param() -> "PixArtAlphaInferPolicy": FILE: colossalai/inference/modeling/policy/stablediffusion3.py class StableDiffusion3InferPolicy (line 17) | class StableDiffusion3InferPolicy(Policy, RPC_PARAM): method __init__ (line 18) | def __init__(self) -> None: method module_policy (line 21) | def module_policy(self): method preprocess (line 64) | def preprocess(self) -> nn.Module: method postprocess (line 67) | def postprocess(self): method config_sanity_check (line 70) | def config_sanity_check(self): method to_rpc_param (line 73) | def to_rpc_param(self) -> str: method from_rpc_param (line 77) | def from_rpc_param() -> "StableDiffusion3InferPolicy": FILE: colossalai/inference/sampler.py function greedy_sample (line 9) | def greedy_sample( function multinomial_sample (line 19) | def multinomial_sample( function beam_search_sample (line 29) | def beam_search_sample( function search_tokens (line 64) | def search_tokens( FILE: colossalai/inference/server/api_server.py function health_check (line 42) | def health_check() -> JSONResponse: function engine_check (line 48) | def engine_check() -> bool: function generate (line 57) | async def generate(request: Request) -> Response: function create_completion (line 99) | async def create_completion(request: Request): function create_chat (line 115) | async def create_chat(request: Request): function get_generation_config (line 130) | def get_generation_config(request): function add_engine_config (line 138) | def add_engine_config(parser): function parse_args (line 167) | def parse_args(): FILE: colossalai/inference/server/chat_service.py class ChatServing (line 14) | class ChatServing: method __init__ (line 15) | def __init__( method create_chat (line 28) | async def create_chat(self, request: Request, generation_config): method chat_completion_stream_generator (line 52) | async def chat_completion_stream_generator(self, request, request_dict... method chat_completion_full_generator (line 88) | async def chat_completion_full_generator( method get_chat_request_role (line 121) | def get_chat_request_role(self, request: Request, request_dict: dict) ... method _load_chat_template (line 128) | def _load_chat_template(self, chat_template): FILE: colossalai/inference/server/completion_service.py class CompletionServing (line 8) | class CompletionServing: method __init__ (line 9) | def __init__(self, engine: AsyncInferenceEngine, served_model: str): method create_completion (line 18) | async def create_completion(self, request, generation_config): FILE: colossalai/inference/server/utils.py class NumericIDGenerator (line 7) | class NumericIDGenerator: method __new__ (line 10) | def __new__(cls): method __call__ (line 16) | def __call__(self): class ChatMessage (line 24) | class ChatMessage(BaseModel): class DeltaMessage (line 29) | class DeltaMessage(BaseModel): class ChatCompletionResponseStreamChoice (line 34) | class ChatCompletionResponseStreamChoice(BaseModel): FILE: colossalai/inference/spec/drafter.py class Drafter (line 13) | class Drafter: method __init__ (line 22) | def __init__( method get_model (line 36) | def get_model(self) -> nn.Module: method trim_kv_cache (line 40) | def trim_kv_cache( method speculate (line 65) | def speculate( FILE: colossalai/inference/spec/struct.py class DrafterOutput (line 8) | class DrafterOutput: method __post_init__ (line 25) | def __post_init__(self): class GlideInput (line 33) | class GlideInput: method glimpse_ready (line 52) | def glimpse_ready(self): FILE: colossalai/inference/struct.py class RequestStatus (line 15) | class RequestStatus(enum.Enum): method is_finished (line 34) | def is_finished(status: "RequestStatus") -> bool: method is_running (line 42) | def is_running(status: "RequestStatus") -> bool: method is_waiting (line 46) | def is_waiting(status: "RequestStatus") -> bool: class DiffusionSequence (line 51) | class DiffusionSequence: class Sequence (line 62) | class Sequence: method __post_init__ (line 91) | def __post_init__(self): method sentence_len (line 96) | def sentence_len(self) -> int: method input_len (line 103) | def input_len(self) -> int: method output_len (line 110) | def output_len(self) -> int: method check_finish (line 116) | def check_finish(self) -> bool: method revoke_finished_status (line 135) | def revoke_finished_status(self) -> None: method __hash__ (line 143) | def __hash__(self): method mark_running (line 146) | def mark_running(self) -> None: method mark_finished (line 155) | def mark_finished(self) -> None: method mark_aborted (line 161) | def mark_aborted(self) -> None: method recycle (line 167) | def recycle(self) -> None: method __repr__ (line 177) | def __repr__(self) -> str: function _pad_to_max (line 190) | def _pad_to_max(x: List[int], max_len: int, pad: int) -> List[int]: FILE: colossalai/inference/utils.py function init_to_get_rotary (line 22) | def init_to_get_rotary(self, base=10000, use_elem=False): function has_index_file (line 67) | def has_index_file(checkpoint_path: str) -> Tuple[bool, Optional[Path]]: function get_model_size (line 103) | def get_model_size(model: nn.Module): function find_available_ports (line 116) | def find_available_ports(num: int): function get_alibi_slopes (line 125) | def get_alibi_slopes(num_heads: int, device: torch.device) -> torch.Tensor: function can_use_flash_attn2 (line 150) | def can_use_flash_attn2(dtype: torch.dtype) -> bool: class ModelType (line 166) | class ModelType(Enum): function get_model_type (line 172) | def get_model_type(model_or_path: Union[nn.Module, str, DiffusionPipelin... FILE: colossalai/initialize.py function launch (line 20) | def launch( function launch_from_slurm (line 78) | def launch_from_slurm( function launch_from_openmpi (line 115) | def launch_from_openmpi( function launch_from_torch (line 154) | def launch_from_torch(backend: str = "nccl", seed: int = 1024, verbose: ... FILE: colossalai/interface/model.py function extract_lora_layers (line 9) | def extract_lora_layers(model: PeftModel, names: Set[str], adapter_name:... class PeftUnwrapMixin (line 49) | class PeftUnwrapMixin: method __init__ (line 50) | def __init__(self, peft_model: PeftModel): method named_parameters (line 61) | def named_parameters(self): method named_buffers (line 67) | def named_buffers(self): method _modules (line 71) | def _modules(self): method _non_persistent_buffers_set (line 75) | def _non_persistent_buffers_set(self): method patch_state_dict (line 78) | def patch_state_dict(self, state_dict: Dict[str, torch.Tensor]): method state_dict (line 86) | def state_dict(self): method load_state_dict (line 94) | def load_state_dict(self, state_dict, strict: bool = True, assign: boo... method __hash__ (line 98) | def __hash__(self): class ModelWrapper (line 102) | class ModelWrapper(nn.Module): method __init__ (line 110) | def __init__(self, module: nn.Module) -> None: method unwrap (line 114) | def unwrap(self, unwrap_peft: bool = True): method forward (line 126) | def forward(self, *args, **kwargs): class AMPModelMixin (line 130) | class AMPModelMixin: method update_master_params (line 133) | def update_master_params(self): FILE: colossalai/interface/optimizer.py class OptimizerWrapper (line 10) | class OptimizerWrapper: method __init__ (line 18) | def __init__(self, optim: Optimizer): method parameters (line 22) | def parameters(self): method param_groups (line 30) | def param_groups(self): method defaults (line 34) | def defaults(self): method add_param_group (line 37) | def add_param_group(self, *args, **kwargs): method step (line 40) | def step(self, *args, **kwargs): method zero_grad (line 46) | def zero_grad(self, *args, **kwargs): method backward (line 52) | def backward(self, loss: Tensor, inputs=None, retain_graph=False, **kw... method backward_by_grad (line 58) | def backward_by_grad(self, tensor: Tensor, grad: Tensor, inputs: Tenso... method state_dict (line 78) | def state_dict(self): method load_state_dict (line 84) | def load_state_dict(self, *args, **kwargs): method clip_grad_by_value (line 90) | def clip_grad_by_value(self, clip_value: float, *args, **kwargs) -> None: method clip_grad_by_norm (line 103) | def clip_grad_by_norm( method scale_loss (line 126) | def scale_loss(self, loss: Tensor): method unscale_grad (line 139) | def unscale_grad(self): method unwrap (line 149) | def unwrap(self): method get_grad_norm (line 155) | def get_grad_norm(self, norm_type: Union[float, int] = 2.0, **kwargs) ... class DistributedOptim (line 168) | class DistributedOptim(Optimizer): method setup_distributed (line 169) | def setup_distributed( FILE: colossalai/interface/pretrained.py function get_pretrained_path (line 11) | def get_pretrained_path(model: Module) -> Optional[str]: function set_pretrained_path (line 15) | def set_pretrained_path(model: Module, path: str) -> None: FILE: colossalai/kernel/jit/bias_dropout_add.py function bias_dropout_add (line 4) | def bias_dropout_add(x, bias, residual, prob, training): function bias_dropout_add_fused_train (line 12) | def bias_dropout_add_fused_train( function bias_dropout_add_fused_inference (line 19) | def bias_dropout_add_fused_inference( FILE: colossalai/kernel/jit/bias_gelu.py function bias_gelu (line 14) | def bias_gelu(bias, y): function bias_gelu_back (line 23) | def bias_gelu_back(g, bias, y): class GeLUFunction (line 31) | class GeLUFunction(torch.autograd.Function): method forward (line 34) | def forward(ctx, input, bias): method backward (line 39) | def backward(ctx, grad_output): FILE: colossalai/kernel/jit/option.py function set_jit_fusion_options (line 11) | def set_jit_fusion_options(): function warmup_jit_fusion (line 39) | def warmup_jit_fusion( FILE: colossalai/kernel/kernel_loader.py class KernelLoader (line 31) | class KernelLoader: method register_extension (line 43) | def register_extension(cls, extension: _Extension): method load (line 53) | def load(self, ext_name: str = None): class CPUAdamLoader (line 86) | class CPUAdamLoader(KernelLoader): class LayerNormLoader (line 90) | class LayerNormLoader(KernelLoader): class MoeLoader (line 94) | class MoeLoader(KernelLoader): class FusedOptimizerLoader (line 98) | class FusedOptimizerLoader(KernelLoader): class InferenceOpsLoader (line 102) | class InferenceOpsLoader(KernelLoader): class ScaledMaskedSoftmaxLoader (line 106) | class ScaledMaskedSoftmaxLoader(KernelLoader): class ScaledUpperTriangleMaskedSoftmaxLoader (line 110) | class ScaledUpperTriangleMaskedSoftmaxLoader(KernelLoader): class FlashAttentionLoader (line 114) | class FlashAttentionLoader(KernelLoader): class FlashAttentionDaoLoader (line 122) | class FlashAttentionDaoLoader(KernelLoader): class FlashAttentionWithCustomMaskLoader (line 126) | class FlashAttentionWithCustomMaskLoader(KernelLoader): class FlashAttentionForFloatAndCustomMaskLoader (line 130) | class FlashAttentionForFloatAndCustomMaskLoader(KernelLoader): FILE: colossalai/kernel/triton/context_attn_unpad.py function _fwd_context_paged_attention_kernel (line 16) | def _fwd_context_paged_attention_kernel( function _fwd_context_paged_attention_kernel_v2 (line 193) | def _fwd_context_paged_attention_kernel_v2( function _alibi_fwd_context_paged_attention_kernel (line 368) | def _alibi_fwd_context_paged_attention_kernel( function context_attention_unpadded (line 552) | def context_attention_unpadded( FILE: colossalai/kernel/triton/flash_decoding.py function _flash_decoding_fwd_kernel (line 11) | def _flash_decoding_fwd_kernel( function _alibi_flash_decoding_fwd_kernel (line 135) | def _alibi_flash_decoding_fwd_kernel( function _flash_decoding_fwd_reduce_kernel (line 258) | def _flash_decoding_fwd_reduce_kernel( function flash_decoding_attention (line 318) | def flash_decoding_attention( FILE: colossalai/kernel/triton/fused_rotary_embedding.py function fused_rotary_emb (line 7) | def fused_rotary_emb( function fused_rotary_embedding (line 120) | def fused_rotary_embedding( FILE: colossalai/kernel/triton/kvcache_copy.py function _copy_to_kcache_seqlen_n_kernel (line 11) | def _copy_to_kcache_seqlen_n_kernel( function _copy_to_kvcache_seqlen1_kernel (line 65) | def _copy_to_kvcache_seqlen1_kernel( function copy_k_to_blocked_cache (line 130) | def copy_k_to_blocked_cache( function copy_kv_to_blocked_cache (line 208) | def copy_kv_to_blocked_cache( FILE: colossalai/kernel/triton/llama_act_combine_kernel.py function _llama_act_combine_forward (line 25) | def _llama_act_combine_forward( function _llama_act_combine_backward (line 54) | def _llama_act_combine_backward( class LlamaActCombine (line 99) | class LlamaActCombine(torch.autograd.Function): method forward (line 112) | def forward(ctx: Any, x_gate: torch.Tensor, x_up: torch.Tensor, activa... method backward (line 158) | def backward(ctx: Any, *grad_outputs: Tensor) -> Tuple[Tensor, Tensor,... FILE: colossalai/kernel/triton/no_pad_rotary_embedding.py function rotary_embedding_kernel (line 26) | def rotary_embedding_kernel( function fused_rotary_embedding_kernel (line 127) | def fused_rotary_embedding_kernel( function fused_rotary_embedding_kernel_v2 (line 273) | def fused_rotary_embedding_kernel_v2( function decoding_fused_rotary_embedding_kernel (line 379) | def decoding_fused_rotary_embedding_kernel( function rotary_embedding (line 480) | def rotary_embedding( function decoding_fused_rotary_embedding (line 575) | def decoding_fused_rotary_embedding( FILE: colossalai/kernel/triton/qkv_matmul_kernel.py function qkv_gemm_4d_kernel (line 17) | def qkv_gemm_4d_kernel( FILE: colossalai/kernel/triton/rms_layernorm.py function _rmsnorm_kernel (line 15) | def _rmsnorm_kernel( function _rmsnorm_with_residual_kernel (line 51) | def _rmsnorm_with_residual_kernel( function rms_layernorm (line 93) | def rms_layernorm(x, weight, eps, norm_output=None, residual=None): FILE: colossalai/kernel/triton/rotary_cache_copy.py function prefill_cache_kernel (line 7) | def prefill_cache_kernel( function decoding_cache_kernel (line 46) | def decoding_cache_kernel( function get_xine_cache (line 80) | def get_xine_cache(lengths: torch.Tensor, cos_cache: torch.Tensor, sin_c... FILE: colossalai/kernel/triton/softmax.py function softmax_kernel (line 19) | def softmax_kernel(output_ptr, input_ptr, row_stride, n_cols, mask_ptr, ... function softmax (line 50) | def softmax(input: torch.Tensor, mask: torch.Tensor = None, dim=-1) -> t... FILE: colossalai/lazy/construction.py class ConstructorManager (line 48) | class ConstructorManager: method apply (line 54) | def apply(overwrites: Dict[Callable, Callable]): method undo (line 60) | def undo(): method redo (line 67) | def redo(): method disable (line 75) | def disable(): method clear (line 84) | def clear(): FILE: colossalai/lazy/lazy_init.py class _MyTensor (line 79) | class _MyTensor(Tensor): method __new__ (line 86) | def __new__(cls, func, *args, concrete_data=None, **kwargs) -> "_MyTen... method __torch_function__ (line 97) | def __torch_function__(cls, func, types, args=(), kwargs=None): function _data_tolist (line 102) | def _data_tolist(tensor: torch.Tensor) -> list: function _convert_cls (line 107) | def _convert_cls(tensor: "LazyTensor", target: torch.Tensor, requires_gr... class LazyTensor (line 134) | class LazyTensor(torch.Tensor): method __new__ (line 175) | def __new__(cls, func, *args, meta_data=None, concrete_data=None, **kw... method __init__ (line 195) | def __init__(self, func, *args, meta_data=None, concrete_data=None, **... method device (line 204) | def device(self) -> torch.device: method __repr__ (line 207) | def __repr__(self): method materialize (line 210) | def materialize(self) -> torch.Tensor: method clean (line 221) | def clean(self) -> None: method _replace_with_materialized (line 229) | def _replace_with_materialized(x): method _materialize_data (line 234) | def _materialize_data(self) -> torch.Tensor: method _rerun_ops (line 249) | def _rerun_ops(self, target=None) -> torch.Tensor: method __torch_function__ (line 283) | def __torch_function__(cls, func, types, args=(), kwargs=None): method to (line 361) | def to(self, *args, **kwargs) -> torch.Tensor: method cpu (line 384) | def cpu(self, memory_format: torch.memory_format = torch.preserve_form... method cuda (line 387) | def cuda(self, device=None, non_blocking=False, memory_format: torch.m... method clone (line 391) | def clone(self) -> "LazyTensor": method detach (line 400) | def detach(self) -> Tensor: method __deepcopy__ (line 403) | def __deepcopy__(self, memo): method data (line 431) | def data(self): method data (line 435) | def data(self, other: "LazyTensor"): method tolist (line 460) | def tolist(self) -> list: method __hash__ (line 466) | def __hash__(self): method __rpow__ (line 469) | def __rpow__(self, other): class LazyInitContext (line 474) | class LazyInitContext: method __init__ (line 486) | def __init__( method __enter__ (line 496) | def __enter__(self): method __exit__ (line 584) | def __exit__(self, exc_type, exc_val, exc_tb): method materialize (line 591) | def materialize(module: nn.Module, verbose: bool = False) -> nn.Module: function _apply_to_lazy_module (line 605) | def _apply_to_lazy_module( function _is_int_tuple (line 654) | def _is_int_tuple(args) -> bool: function _copy_tensor (line 663) | def _copy_tensor(tensor: Tensor, requires_grad: bool) -> Tensor: FILE: colossalai/lazy/pretrained.py class PretrainedManager (line 11) | class PretrainedManager: method inject (line 15) | def inject() -> None: method recover (line 25) | def recover() -> None: function new_from_pretrained (line 36) | def new_from_pretrained( FILE: colossalai/legacy/amp/__init__.py function convert_to_amp (line 18) | def convert_to_amp(model: nn.Module, optimizer: Optimizer, criterion: _L... FILE: colossalai/legacy/amp/amp_type.py class AMP_TYPE (line 7) | class AMP_TYPE(Enum): FILE: colossalai/legacy/amp/apex_amp/__init__.py function convert_to_apex_amp (line 7) | def convert_to_apex_amp(model: nn.Module, optimizer: Optimizer, amp_conf... FILE: colossalai/legacy/amp/apex_amp/apex_amp.py class ApexAMPOptimizer (line 17) | class ApexAMPOptimizer(OptimizerWrapper): method backward (line 22) | def backward(self, loss: Tensor): method clip_grad_norm (line 31) | def clip_grad_norm(self, model: nn.Module, max_norm: float): FILE: colossalai/legacy/amp/naive_amp/__init__.py function convert_to_naive_amp (line 13) | def convert_to_naive_amp(model: nn.Module, optimizer: Optimizer, amp_con... FILE: colossalai/legacy/amp/naive_amp/_fp16_optimizer.py function load_fused_optim (line 27) | def load_fused_optim(): function _multi_tensor_copy_this_to_that (line 34) | def _multi_tensor_copy_this_to_that(this, that, overflow_buf=None): class FP16Optimizer (line 54) | class FP16Optimizer(Optimizer): method __init__ (line 66) | def __init__( method max_norm (line 171) | def max_norm(self): method grad_scaler (line 176) | def grad_scaler(self): method loss_scale (line 186) | def loss_scale(self): method optimizer (line 195) | def optimizer(self): method defaults (line 204) | def defaults(self): method _check_overflow (line 212) | def _check_overflow(self): method zero_grad (line 233) | def zero_grad(self, set_to_none=True): method _get_fp32_param_groups_to_update (line 244) | def _get_fp32_param_groups_to_update(self): method _unscale_grads (line 247) | def _unscale_grads(self): method _assign_grad_to_fp32_master_param (line 253) | def _assign_grad_to_fp32_master_param(self): method _update_fp16_param_from_fp32_param (line 262) | def _update_fp16_param_from_fp32_param(self): method step (line 273) | def step(self): method backward (line 302) | def backward(self, loss): method state_dict (line 312) | def state_dict(self): method load_state_dict (line 322) | def load_state_dict(self, state_dict): method clip_grad_norm (line 344) | def clip_grad_norm(self, clip_grad): method _get_state (line 358) | def _get_state(self): method _set_state (line 361) | def _set_state(self, value): method _get_param_groups (line 369) | def _get_param_groups(self): method _set_param_groups (line 372) | def _set_param_groups(self, value): FILE: colossalai/legacy/amp/naive_amp/_utils.py function has_inf_or_nan (line 6) | def has_inf_or_nan(tensor): function zero_gard_by_list (line 35) | def zero_gard_by_list(tensor_list: List[Tensor], set_to_none: bool = Tru... FILE: colossalai/legacy/amp/naive_amp/naive_amp.py class NaiveAMPOptimizer (line 21) | class NaiveAMPOptimizer(OptimizerWrapper): method __init__ (line 35) | def __init__(self, optim: Optimizer, *args, **kwargs): method backward (line 39) | def backward(self, loss: Tensor): method step (line 42) | def step(self): method clip_grad_norm (line 45) | def clip_grad_norm(self, model: nn.Module, max_norm: float): class NaiveAMPModel (line 55) | class NaiveAMPModel(nn.Module): method __init__ (line 71) | def __init__( method sync_buffer (line 93) | def sync_buffer(self): method sync_buffer (line 97) | def sync_buffer(self, state: bool): method _convert_to_fp16 (line 100) | def _convert_to_fp16(self, input_: Any): method _convert_to_fp32 (line 105) | def _convert_to_fp32(self, input_: Any): method _reduce_module_buffer (line 110) | def _reduce_module_buffer(self): method eval (line 132) | def eval(self): method forward (line 139) | def forward(self, *args, **kwargs): FILE: colossalai/legacy/amp/torch_amp/__init__.py function convert_to_torch_amp (line 12) | def convert_to_torch_amp( FILE: colossalai/legacy/amp/torch_amp/_grad_scaler.py class _MultiDeviceReplicator (line 20) | class _MultiDeviceReplicator(object): method __init__ (line 25) | def __init__(self, master_tensor: torch.Tensor) -> None: method get (line 30) | def get(self, device) -> torch.Tensor: class OptState (line 43) | class OptState(Enum): function _refresh_per_optimizer_state (line 49) | def _refresh_per_optimizer_state(): class GradScaler (line 53) | class GradScaler(object): method __init__ (line 121) | def __init__(self, init_scale=2.0**16, growth_factor=2.0, backoff_fact... method _check_scale_growth_tracker (line 151) | def _check_scale_growth_tracker(self, funcname) -> Tuple[torch.Tensor,... method _lazy_init_scale_growth_tracker (line 157) | def _lazy_init_scale_growth_tracker(self, dev): method scale (line 162) | def scale(self, outputs): method _unscale_grads_ (line 207) | def _unscale_grads_(self, optimizer, inv_scale, found_inf, allow_fp16): method unscale_ (line 253) | def unscale_(self, optimizer): method _maybe_opt_step (line 305) | def _maybe_opt_step(self, optimizer, optimizer_state, *args, **kwargs): method step (line 311) | def step(self, optimizer, *args, **kwargs): method update (line 367) | def update(self, new_scale=None): method _get_scale_async (line 441) | def _get_scale_async(self): method get_scale (line 444) | def get_scale(self): method get_growth_factor (line 456) | def get_growth_factor(self): method set_growth_factor (line 462) | def set_growth_factor(self, new_factor): method get_backoff_factor (line 469) | def get_backoff_factor(self): method set_backoff_factor (line 475) | def set_backoff_factor(self, new_factor): method get_growth_interval (line 482) | def get_growth_interval(self): method set_growth_interval (line 488) | def set_growth_interval(self, new_interval): method _get_growth_tracker (line 495) | def _get_growth_tracker(self): method is_enabled (line 501) | def is_enabled(self): method state_dict (line 507) | def state_dict(self): method load_state_dict (line 535) | def load_state_dict(self, state_dict): method __getstate__ (line 561) | def __getstate__(self): method __setstate__ (line 577) | def __setstate__(self, state): method _check_inf_per_device (line 580) | def _check_inf_per_device(self, optimizer): method _found_inf_per_device (line 592) | def _found_inf_per_device(self, optimizer): FILE: colossalai/legacy/amp/torch_amp/torch_amp.py class TorchAMPOptimizer (line 18) | class TorchAMPOptimizer(OptimizerWrapper): method __init__ (line 34) | def __init__(self, optim: Optimizer, *args, **kwargs): method backward (line 38) | def backward(self, loss: Tensor): method step (line 46) | def step(self): method clip_grad_norm (line 51) | def clip_grad_norm(self, model: nn.Module, max_norm: float): class TorchAMPModel (line 63) | class TorchAMPModel(nn.Module): method __init__ (line 71) | def __init__(self, model: nn.Module) -> None: method forward (line 76) | def forward(self, *args, **kwargs): class TorchAMPLoss (line 83) | class TorchAMPLoss(nn.Module): method __init__ (line 90) | def __init__(self, loss: _Loss): method forward (line 95) | def forward(self, *args, **kwargs): FILE: colossalai/legacy/builder/builder.py function build_from_config (line 9) | def build_from_config(module, config: dict): function build_from_registry (line 26) | def build_from_registry(config, registry: Registry): function build_gradient_handler (line 62) | def build_gradient_handler(config, model, optimizer): FILE: colossalai/legacy/communication/collective.py function all_gather (line 18) | def all_gather(tensor: Tensor, dim: int, parallel_mode: ParallelMode, as... function reduce_scatter (line 53) | def reduce_scatter( function all_reduce (line 94) | def all_reduce( function broadcast (line 130) | def broadcast(tensor: Tensor, src: int, parallel_mode: ParallelMode, asy... function reduce (line 162) | def reduce(tensor: Tensor, dst: int, parallel_mode: ParallelMode, op: Re... function scatter_object_list (line 194) | def scatter_object_list(scatter_object_output_list, scatter_object_input... FILE: colossalai/legacy/communication/p2p.py function _get_tensor_shape (line 20) | def _get_tensor_shape(tensor_shape: TensorShape, chunk_tensor: bool = Fa... function create_recv_buffer_with_shapes (line 43) | def create_recv_buffer_with_shapes(recv_shapes, dtype, scatter_gather_te... function process_object_to_send (line 60) | def process_object_to_send(object_send, scatter_gather_tensors): function filling_ops_queue (line 79) | def filling_ops_queue(obj, comm_op, comm_rank, ops_queue): function _communicate (line 89) | def _communicate( function recv_forward (line 197) | def recv_forward( function recv_backward (line 222) | def recv_backward( function send_forward (line 247) | def send_forward(output_tensor, next_rank=None, scatter_gather_tensors=F... function send_backward (line 258) | def send_backward(input_tensor_grad, prev_rank=None, scatter_gather_tens... function send_forward_recv_backward (line 271) | def send_forward_recv_backward( function send_backward_recv_forward (line 299) | def send_backward_recv_forward( function send_forward_recv_forward (line 332) | def send_forward_recv_forward( function send_backward_recv_backward (line 364) | def send_backward_recv_backward( function send_forward_backward_recv_forward_backward (line 396) | def send_forward_backward_recv_forward_backward( FILE: colossalai/legacy/communication/p2p_v2.py function init_process_group (line 21) | def init_process_group(): function _acquire_pair_group_handle (line 35) | def _acquire_pair_group_handle(first_rank: int, second_rank: int) -> Pro... function _cuda_safe_tensor_to_object (line 53) | def _cuda_safe_tensor_to_object(tensor: torch.Tensor, tensor_size: torch... function _broadcast_object_list (line 78) | def _broadcast_object_list(object_list: List[Any], src: int, dst: int, d... function _send_object (line 159) | def _send_object(object: Any, dst: int) -> None: function _recv_object (line 182) | def _recv_object(src: int) -> Any: function recv_forward (line 207) | def recv_forward(prev_rank: int = None) -> Any: function recv_backward (line 227) | def recv_backward(next_rank: int = None) -> Any: function send_forward (line 247) | def send_forward(output_object: Any, next_rank: int = None) -> None: function send_backward (line 260) | def send_backward(input_object: Any, prev_rank: int = None) -> None: FILE: colossalai/legacy/communication/ring.py function ring_forward (line 11) | def ring_forward(tensor_send_next: torch.Tensor, parallel_mode: Parallel... FILE: colossalai/legacy/communication/utils.py function send_meta_helper (line 13) | def send_meta_helper(obj, next_rank, tensor_kwargs): function send_obj_meta (line 20) | def send_obj_meta(obj, need_meta=True, next_rank=None) -> bool: function recv_meta_helper (line 52) | def recv_meta_helper(prev_rank, tensor_kwargs): function recv_obj_meta (line 60) | def recv_obj_meta(obj_shape, prev_rank=None) -> torch.Size: function split_tensor_into_1d_equal_chunks (line 92) | def split_tensor_into_1d_equal_chunks(tensor: torch.Tensor, new_buffer=F... function gather_split_1d_tensor (line 113) | def gather_split_1d_tensor(tensor: torch.Tensor) -> torch.Tensor: FILE: colossalai/legacy/context/parallel_context.py class ParallelContext (line 24) | class ParallelContext(metaclass=SingletonMeta): method __init__ (line 34) | def __init__(self): method config (line 60) | def config(self): method verbose (line 64) | def verbose(self): method verbose (line 68) | def verbose(self, verbose_: bool): method logger (line 72) | def logger(self): method load_config (line 77) | def load_config(self, config: Union[dict, str]): method detect_num_processes_on_current_node (line 94) | def detect_num_processes_on_current_node(self): method _check_parallel_mode (line 102) | def _check_parallel_mode(parallel_mode: ParallelMode): method get_global_rank (line 107) | def get_global_rank(self): method add_global_rank (line 115) | def add_global_rank(self, parallel_mode: ParallelMode, rank: int): method get_local_rank (line 129) | def get_local_rank(self, parallel_mode: ParallelMode): method _add_local_rank (line 145) | def _add_local_rank(self, parallel_mode: ParallelMode, rank: int): method get_next_global_rank (line 159) | def get_next_global_rank(self, parallel_mode: ParallelMode): method get_prev_global_rank (line 181) | def get_prev_global_rank(self, parallel_mode: ParallelMode): method is_first_rank (line 203) | def is_first_rank(self, parallel_mode: ParallelMode): method is_last_rank (line 221) | def is_last_rank(self, parallel_mode: ParallelMode): method is_pipeline_first_stage (line 240) | def is_pipeline_first_stage(self, ignore_virtual=False): method is_pipeline_last_stage (line 246) | def is_pipeline_last_stage(self, ignore_virtual=False): method get_world_size (line 255) | def get_world_size(self, parallel_mode: ParallelMode): method _add_world_size (line 271) | def _add_world_size(self, parallel_mode: ParallelMode, world_size: int): method get_group (line 285) | def get_group(self, parallel_mode: ParallelMode): method _add_group (line 301) | def _add_group(self, parallel_mode: ParallelMode, group: dist.ProcessG... method get_cpu_group (line 315) | def get_cpu_group(self, parallel_mode: ParallelMode): method _add_cpu_group (line 328) | def _add_cpu_group(self, parallel_mode: ParallelMode, group: dist.Proc... method get_ranks_in_group (line 341) | def get_ranks_in_group(self, parallel_mode: ParallelMode): method _add_ranks_in_group (line 357) | def _add_ranks_in_group(self, parallel_mode: ParallelMode, ranks: list): method init_global_dist (line 371) | def init_global_dist(self, rank: int, world_size: int, backend: str, h... method _register_dist (line 391) | def _register_dist(self, local_rank, world_size, process_group, cpu_gr... method check_sanity (line 398) | def check_sanity(self): method _set_parallel_size_from_config (line 415) | def _set_parallel_size_from_config(self, config: dict, key: str, attr_... method init_parallel_groups (line 427) | def init_parallel_groups(self): method is_initialized (line 502) | def is_initialized(self, parallel_mode: ParallelMode): method destroy (line 514) | def destroy(self): method set_device (line 523) | def set_device(self, device_ordinal: int = None): method set_seed (line 538) | def set_seed(self, seed: int): method set_virtual_pipeline_parallel_size (line 589) | def set_virtual_pipeline_parallel_size(self, size): method set_virtual_pipeline_parallel_rank (line 592) | def set_virtual_pipeline_parallel_rank(self, rank): FILE: colossalai/legacy/context/parallel_mode.py class ParallelMode (line 8) | class ParallelMode(Enum): FILE: colossalai/legacy/context/process_group_initializer/initializer_1d.py class Initializer_1D (line 14) | class Initializer_1D(ProcessGroupInitializer): method __init__ (line 26) | def __init__(self, *args, **kwargs): method init_dist_group (line 30) | def init_dist_group(self): FILE: colossalai/legacy/context/process_group_initializer/initializer_2d.py function _check_summa_env_var (line 12) | def _check_summa_env_var(summa_dim): class Initializer_2D_Row (line 25) | class Initializer_2D_Row(ProcessGroupInitializer): method __init__ (line 39) | def __init__(self, num_group, summa_dim, *args, **kwargs): method init_dist_group (line 44) | def init_dist_group(self): class Initializer_2D_Col (line 73) | class Initializer_2D_Col(ProcessGroupInitializer): method __init__ (line 87) | def __init__(self, num_group, summa_dim, *args, **kwargs): method init_dist_group (line 92) | def init_dist_group(self): class Initializer_2D (line 123) | class Initializer_2D(ProcessGroupInitializer): method __init__ (line 136) | def __init__(self, *args, **kwargs): method init_dist_group (line 147) | def init_dist_group(self): FILE: colossalai/legacy/context/process_group_initializer/initializer_2p5d.py function _check_tesseract_env_var (line 16) | def _check_tesseract_env_var(tesseract_dim: int, tesseract_dep: int): class Initializer_2p5D_ROW (line 36) | class Initializer_2p5D_ROW(ProcessGroupInitializer): method __init__ (line 50) | def __init__(self, tesseract_dim: int, tesseract_dep: int, *args): method init_dist_group (line 59) | def init_dist_group(self): class Initializer_2p5D_Col (line 93) | class Initializer_2p5D_Col(ProcessGroupInitializer): method __init__ (line 107) | def __init__(self, tesseract_dim: int, tesseract_dep: int, *args): method init_dist_group (line 113) | def init_dist_group(self): class Initializer_2p5D_Dep (line 147) | class Initializer_2p5D_Dep(ProcessGroupInitializer): method __init__ (line 161) | def __init__(self, tesseract_dim: int, tesseract_dep: int, *args): method init_dist_group (line 167) | def init_dist_group(self): class Initializer_2p5D_XZ (line 202) | class Initializer_2p5D_XZ(ProcessGroupInitializer): method __init__ (line 216) | def __init__(self, tesseract_dim: int, tesseract_dep: int, *args): method init_dist_group (line 222) | def init_dist_group(self): class Initializer_2p5D (line 257) | class Initializer_2p5D(ProcessGroupInitializer): method __init__ (line 271) | def __init__( method init_dist_group (line 297) | def init_dist_group(self): FILE: colossalai/legacy/context/process_group_initializer/initializer_3d.py function _check_depth_env_var (line 15) | def _check_depth_env_var(depth): class Initializer_3D_Input (line 28) | class Initializer_3D_Input(ProcessGroupInitializer): method __init__ (line 42) | def __init__(self, num_group: int, depth: int, *args): method init_dist_group (line 47) | def init_dist_group(self): class Initializer_3D_Weight (line 79) | class Initializer_3D_Weight(ProcessGroupInitializer): method __init__ (line 93) | def __init__(self, num_group: int, depth: int, *args): method init_dist_group (line 98) | def init_dist_group(self): class Initializer_3D_Output (line 130) | class Initializer_3D_Output(ProcessGroupInitializer): method __init__ (line 144) | def __init__(self, num_group: int, depth: int, *args): method init_dist_group (line 149) | def init_dist_group(self): class Initializer_3D_InputxWeight (line 181) | class Initializer_3D_InputxWeight(ProcessGroupInitializer): method __init__ (line 195) | def __init__(self, num_group: int, depth: int, *args): method init_dist_group (line 200) | def init_dist_group(self): class Initializer_3D_OutputxWeight (line 235) | class Initializer_3D_OutputxWeight(ProcessGroupInitializer): method __init__ (line 249) | def __init__(self, num_group: int, depth: int, *args): method init_dist_group (line 254) | def init_dist_group(self): class Initializer_3D (line 290) | class Initializer_3D(ProcessGroupInitializer): method __init__ (line 302) | def __init__(self, *args): method init_dist_group (line 317) | def init_dist_group(self): FILE: colossalai/legacy/context/process_group_initializer/initializer_data.py class Initializer_Data (line 13) | class Initializer_Data(ProcessGroupInitializer): method __init__ (line 25) | def __init__(self, *args, **kwargs): method init_dist_group (line 29) | def init_dist_group(self): FILE: colossalai/legacy/context/process_group_initializer/initializer_model.py class Initializer_Model (line 13) | class Initializer_Model(ProcessGroupInitializer): method __init__ (line 26) | def __init__(self, *args, **kwargs): method init_dist_group (line 31) | def init_dist_group(self): FILE: colossalai/legacy/context/process_group_initializer/initializer_pipeline.py class Initializer_Pipeline (line 13) | class Initializer_Pipeline(ProcessGroupInitializer): method __init__ (line 25) | def __init__(self, *args, **kwargs): method init_dist_group (line 30) | def init_dist_group(self): FILE: colossalai/legacy/context/process_group_initializer/initializer_sequence.py class Initializer_Sequence_DP (line 13) | class Initializer_Sequence_DP(ProcessGroupInitializer): method __init__ (line 28) | def __init__(self, *args, **kwargs): method init_dist_group (line 33) | def init_dist_group(self): class Initializer_Sequence (line 62) | class Initializer_Sequence(ProcessGroupInitializer): method __init__ (line 74) | def __init__(self, *args, **kwargs): method init_dist_group (line 80) | def init_dist_group(self): FILE: colossalai/legacy/context/process_group_initializer/initializer_tensor.py class Initializer_Tensor (line 13) | class Initializer_Tensor(ProcessGroupInitializer): method __init__ (line 25) | def __init__(self, *args, **kwargs): method init_dist_group (line 29) | def init_dist_group(self): FILE: colossalai/legacy/context/process_group_initializer/process_group_initializer.py class ProcessGroupInitializer (line 9) | class ProcessGroupInitializer(ABC): method __init__ (line 21) | def __init__( method init_dist_group (line 39) | def init_dist_group(self): FILE: colossalai/legacy/context/random/_helper.py function get_seeds (line 16) | def get_seeds(): function get_states (line 25) | def get_states(copy=False): function get_current_mode (line 43) | def get_current_mode(): function add_seed (line 52) | def add_seed(parallel_mode: ParallelMode, seed: int, overwrite: bool = F... function set_mode (line 69) | def set_mode(parallel_mode: ParallelMode): function set_seed_states (line 82) | def set_seed_states(parallel_mode: ParallelMode, state: Tensor): function sync_states (line 95) | def sync_states(): function seed (line 102) | def seed(parallel_mode: ParallelMode): function with_seed (line 123) | def with_seed(func, parallel_mode: ParallelMode): function moe_set_seed (line 162) | def moe_set_seed(seed): function reset_seeds (line 172) | def reset_seeds(): FILE: colossalai/legacy/context/random/seed_manager.py class SeedManager (line 10) | class SeedManager: method __init__ (line 18) | def __init__(self): method current_mode (line 24) | def current_mode(self): method seeds (line 28) | def seeds(self): method seed_states (line 32) | def seed_states(self): method set_state (line 35) | def set_state(self, parallel_mode: ParallelMode, state: Tensor): method set_mode (line 48) | def set_mode(self, parallel_mode: ParallelMode): method add_seed (line 62) | def add_seed(self, parallel_mode: ParallelMode, seed: int, overwrite: ... method reset (line 86) | def reset(self): FILE: colossalai/legacy/engine/_base_engine.py class Engine (line 23) | class Engine: method __init__ (line 62) | def __init__( method ophooks (line 110) | def ophooks(self): method model (line 115) | def model(self): method optimizer (line 120) | def optimizer(self): method criterion (line 125) | def criterion(self): method schedule (line 130) | def schedule(self): method uses_pipeline (line 135) | def uses_pipeline(self): method add_hook (line 139) | def add_hook(self, ophook: Type[BaseOpHook]) -> None: method remove_hook (line 149) | def remove_hook(self, ophook: Type[BaseOpHook]) -> None: method zero_grad (line 154) | def zero_grad(self): method step (line 158) | def step(self): method backward (line 164) | def backward(self, loss: Tensor): method backward_by_grad (line 175) | def backward_by_grad(self, tensor, grad): method __call__ (line 187) | def __call__(self, *args, **kwargs): method _all_reduce_gradients (line 195) | def _all_reduce_gradients(self): method execute_schedule (line 200) | def execute_schedule(self, data_iter: Iterable, **kwargs): method train (line 210) | def train(self): method eval (line 215) | def eval(self): FILE: colossalai/legacy/engine/gradient_accumulation/__init__.py function accumulate_gradient (line 25) | def accumulate_gradient( FILE: colossalai/legacy/engine/gradient_accumulation/_gradient_accumulation.py class GradAccumOptimizer (line 18) | class GradAccumOptimizer(OptimizerWrapper): method __init__ (line 29) | def __init__(self, optim: Optimizer, accumulate_size: int, model: nn.M... method zero_grad (line 38) | def zero_grad(self, *args, **kwargs) -> None: method step (line 50) | def step(self, *args, **kwargs) -> None: method clip_grad_norm (line 65) | def clip_grad_norm(self, model: nn.Module, max_norm: float) -> None: method backward (line 79) | def backward(self, loss: Tensor) -> None: method backward_by_grad (line 97) | def backward_by_grad(self, tensor: Tensor, grad: Tensor) -> None: class GradAccumDataloader (line 115) | class GradAccumDataloader: method __init__ (line 130) | def __init__(self, dataloader: Iterable, accumulate_size: int) -> None: method __getattr__ (line 135) | def __getattr__(self, __name: str) -> Any: method __len__ (line 138) | def __len__(self) -> int: method __iter__ (line 141) | def __iter__(self) -> Iterable: method __next__ (line 146) | def __next__(self) -> Union[Tensor, Tuple[Tensor]]: class GradAccumLrSchedulerByStep (line 164) | class GradAccumLrSchedulerByStep(_LRScheduler): method __init__ (line 174) | def __init__(self, lr_scheduler: _LRScheduler, accumulate_size: int) -... method compute_effective_steps_per_epoch (line 180) | def compute_effective_steps_per_epoch(dataloader: Iterable, accumulate... method __getattr__ (line 193) | def __getattr__(self, __name: str) -> Any: method step (line 196) | def step(self, *args, **kwargs) -> None: method get_lr (line 211) | def get_lr(self) -> Tensor: method get_last_lr (line 221) | def get_last_lr(self) -> Tensor: method print_lr (line 231) | def print_lr(self, *args, **kwargs) -> None: method state_dict (line 241) | def state_dict(self) -> dict: method load_state_dict (line 250) | def load_state_dict(self, state_dict: dict) -> None: class GradAccumGradientHandler (line 260) | class GradAccumGradientHandler: method __init__ (line 274) | def __init__(self, grad_handler: BaseGradientHandler, accumulate_size:... method handle_gradient (line 282) | def handle_gradient(self) -> None: FILE: colossalai/legacy/engine/gradient_handler/_base_gradient_handler.py class BaseGradientHandler (line 7) | class BaseGradientHandler(ABC): method __init__ (line 16) | def __init__(self, model, optimizer): method handle_gradient (line 21) | def handle_gradient(self): FILE: colossalai/legacy/engine/gradient_handler/_data_parallel_gradient_handler.py class DataParallelGradientHandler (line 10) | class DataParallelGradientHandler(BaseGradientHandler): method handle_gradient (line 22) | def handle_gradient(self): FILE: colossalai/legacy/engine/gradient_handler/_moe_gradient_handler.py class MoeGradientHandler (line 12) | class MoeGradientHandler(BaseGradientHandler): method __init__ (line 24) | def __init__(self, model, optimizer=None): method handle_gradient (line 27) | def handle_gradient(self): FILE: colossalai/legacy/engine/gradient_handler/_pipeline_parallel_gradient_handler.py class PipelineSharedModuleGradientHandler (line 16) | class PipelineSharedModuleGradientHandler(BaseGradientHandler): method handle_gradient (line 28) | def handle_gradient(self): FILE: colossalai/legacy/engine/gradient_handler/_sequence_parallel_gradient_handler.py class SequenceParallelGradientHandler (line 10) | class SequenceParallelGradientHandler(BaseGradientHandler): method handle_gradient (line 22) | def handle_gradient(self): FILE: colossalai/legacy/engine/gradient_handler/_zero_gradient_handler.py class ZeROGradientHandler (line 7) | class ZeROGradientHandler(BaseGradientHandler): method handle_gradient (line 18) | def handle_gradient(self): FILE: colossalai/legacy/engine/gradient_handler/utils.py function bucket_allreduce (line 8) | def bucket_allreduce(param_list: Iterable[nn.Parameter], group=None): FILE: colossalai/legacy/engine/schedule/_base_schedule.py class BaseSchedule (line 13) | class BaseSchedule(ABC): method __init__ (line 24) | def __init__(self, data_process_func: Callable = None): method _move_tensor (line 29) | def _move_tensor(element): method _move_to_device (line 35) | def _move_to_device(self, data): method _get_batch_size (line 54) | def _get_batch_size(self, data): method load_batch (line 64) | def load_batch(self, data_iter, to_gpu=True): method pre_processing (line 84) | def pre_processing(self, engine): method forward_backward_step (line 88) | def forward_backward_step( method _call_engine (line 107) | def _call_engine(engine, inputs): method _call_engine_criterion (line 120) | def _call_engine_criterion(engine, outputs, labels): FILE: colossalai/legacy/engine/schedule/_non_pipeline_schedule.py class NonPipelineSchedule (line 14) | class NonPipelineSchedule(BaseSchedule): method __init__ (line 34) | def __init__(self, data_process_func: Callable = None): method forward_backward_step (line 48) | def forward_backward_step( FILE: colossalai/legacy/engine/schedule/_pipeline_schedule.py function get_tensor_shape (line 20) | def get_tensor_shape(): function pack_return_tensors (line 52) | def pack_return_tensors(return_tensors): class PipelineSchedule (line 71) | class PipelineSchedule(BaseSchedule): method __init__ (line 98) | def __init__( method load_batch (line 137) | def load_batch(self, data_iter): method _get_data_slice (line 145) | def _get_data_slice(self, data, offset): method load_micro_batch (line 163) | def load_micro_batch(self): method pre_processing (line 168) | def pre_processing(self, engine): method _call_engine (line 184) | def _call_engine(model, data): method _get_actual_forward_func (line 207) | def _get_actual_forward_func(self, module): method _get_data_label_for_current_step (line 216) | def _get_data_label_for_current_step(self, stage_output, micro_batch_d... method _forward_step (line 242) | def _forward_step(self, engine, input_obj, return_tensors, return_outp... method _backward_step (line 279) | def _backward_step(self, engine, input_obj, output_obj, output_obj_grad): method forward_backward_step (line 321) | def forward_backward_step(self, engine, data_iter, forward_only=False,... class InterleavedPipelineSchedule (line 457) | class InterleavedPipelineSchedule(PipelineSchedule): method __init__ (line 458) | def __init__( method pre_processing (line 495) | def pre_processing(self, engine): method load_batch (line 509) | def load_batch(self, data_iter): method load_micro_batch (line 514) | def load_micro_batch(self, model_chunk_id): method _forward_step (line 519) | def _forward_step( method forward_backward_step (line 560) | def forward_backward_step(self, engine, data_iter, forward_only=False,... FILE: colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py function pack_return_tensors (line 17) | def pack_return_tensors(return_tensors): class PipelineScheduleV2 (line 36) | class PipelineScheduleV2(PipelineSchedule): method forward_backward_step (line 62) | def forward_backward_step( FILE: colossalai/legacy/global_variables.py class TensorParallelEnv (line 4) | class TensorParallelEnv(object): method __new__ (line 7) | def __new__(cls, *args, **kwargs): method __init__ (line 12) | def __init__(self, *args, **kwargs): method load (line 15) | def load( method save (line 43) | def save(self): FILE: colossalai/legacy/inference/async_engine.py class RequestTracker (line 9) | class RequestTracker: method __init__ (line 14) | def __init__(self) -> None: method __contains__ (line 19) | def __contains__(self, item): method init_event (line 22) | def init_event(self): method add_request (line 25) | def add_request(self, request_id: str): method add_stop (line 31) | def add_stop(self): method process_request_output (line 38) | def process_request_output(self, request_output: RequestOutput) -> None: method wait_for_new_requests (line 42) | async def wait_for_new_requests(self): method __aiter__ (line 45) | def __aiter__(self): method __anext__ (line 48) | async def __anext__(self) -> RequestOutput: class Async_Engine (line 56) | class Async_Engine: method __init__ (line 64) | def __init__( method _step (line 75) | def _step(self): method abort_request (line 85) | def abort_request(self, request_id: str): method _has_requests_in_progress (line 88) | def _has_requests_in_progress(self): method run_loop_fwd (line 91) | async def run_loop_fwd(self): method is_running (line 100) | def is_running(self): method start_background_loop (line 103) | def start_background_loop(self): method add_request (line 112) | async def add_request(self, request_id: str, prompt: str, sampling_par... method generate (line 116) | async def generate(self, request_id: str, prompt: str, sampling_params... FILE: colossalai/legacy/inference/async_manager.py class Async_DynamicBatchManager (line 8) | class Async_DynamicBatchManager(DynamicBatchManager): method __init__ (line 9) | def __init__( method _step (line 47) | def _step(self): method _prefill_batch (line 88) | def _prefill_batch(self, batch): method _decode_batch (line 103) | def _decode_batch(self, batch: Batch): method _handle_finish_req (line 114) | def _handle_finish_req(self, batch: Batch, has_new_finished_req): method _output_process (line 124) | def _output_process(self, finished_reqs: List[Req]): function start_dynamic_batching (line 135) | def start_dynamic_batching(args, tp_engine, waiting_req_list): FILE: colossalai/legacy/inference/dynamic_batching/get_tokenizer.py function get_tokenizer (line 12) | def get_tokenizer( FILE: colossalai/legacy/inference/dynamic_batching/infer_batch.py class InferSamplingParams (line 14) | class InferSamplingParams: method __init__ (line 15) | def __init__( class InferBatch (line 37) | class InferBatch: method init_batch (line 60) | def init_batch( method free_self (line 131) | def free_self(self) -> None: method filter (line 148) | def filter(self, request_ids: List[int]) -> "InferBatch": method merge (line 229) | def merge(cls, batch1, batch2) -> "InferBatch": method __len__ (line 298) | def __len__(self): method get_post_sample_tensors (line 301) | def get_post_sample_tensors(self) -> Tuple[torch.Tensor, torch.Tensor,... FILE: colossalai/legacy/inference/dynamic_batching/io_struct.py class Req (line 8) | class Req: method __init__ (line 9) | def __init__(self, request_id, prompt_ids, sample_params: SamplingPara... method to_rpc_obj (line 21) | def to_rpc_obj(self): method stop_sequences_matched (line 29) | def stop_sequences_matched(self): method __repr__ (line 42) | def __repr__(self): class Batch (line 46) | class Batch: method __init__ (line 47) | def __init__(self, batch_id, reqs: List[Req]): method input_tokens (line 52) | def input_tokens(self): method calcu_max_tokens (line 58) | def calcu_max_tokens(self): method calcu_used_tokens (line 64) | def calcu_used_tokens(self): method mark_finished_req (line 70) | def mark_finished_req(self, eos_id, engine_max_output_len): method filter_finished (line 87) | def filter_finished(self) -> List[Req]: method is_clear (line 103) | def is_clear(self): method merge (line 106) | def merge(self, mini_batch): method __repr__ (line 112) | def __repr__(self): method __len__ (line 115) | def __len__(self): class BatchTokenIdOut (line 119) | class BatchTokenIdOut: method __init__ (line 120) | def __init__(self): class BatchStrOut (line 126) | class BatchStrOut: method __init__ (line 127) | def __init__(self): class AbortReq (line 133) | class AbortReq: method __init__ (line 134) | def __init__(self, req_id): class RequestOutput (line 138) | class RequestOutput: method __init__ (line 148) | def __init__( method __repr__ (line 160) | def __repr__(self) -> str: FILE: colossalai/legacy/inference/dynamic_batching/ray_dist_init.py function log_cuda_info (line 23) | def log_cuda_info(scope_name: str): class Worker (line 37) | class Worker: method __init__ (line 38) | def __init__( method setup (line 55) | def setup(self, world_size, rank, port): method add_input (line 87) | def add_input(self, request_id: str, prompt: str, sampling_params: Sam... method abort (line 90) | def abort(self, request_id: str): method step (line 93) | def step(self) -> List[RequestOutput]: method add_req (line 96) | def add_req(self, prompt_ids: List[int], sampling_params: SamplingPara... method is_running (line 99) | def is_running(self): class Driver (line 103) | class Driver: method __init__ (line 104) | def __init__(self, router_config: RooterArgsClass, engine_config: Engi... method add_input (line 138) | def add_input(self, request_id: str, prompt: str, sampling_params: Sam... method abort (line 141) | def abort(self, request_id: str): method step (line 144) | def step(self): method add_req (line 149) | def add_req(self, request_id: str, prompt_ids: List[int], sampling_par... method is_running (line 152) | def is_running(self): FILE: colossalai/legacy/inference/dynamic_batching/ray_init_config.py class EngineArgsClass (line 9) | class EngineArgsClass(BaseModel): class RooterArgsClass (line 19) | class RooterArgsClass(BaseModel): class RayInitConfig (line 30) | class RayInitConfig(BaseModel): method from_yaml_path (line 37) | def from_yaml_path(cls, path: str): FILE: colossalai/legacy/inference/dynamic_batching/req_queue.py class ReqQueue (line 11) | class ReqQueue: method __init__ (line 12) | def __init__(self, max_total_tokens, batch_max_tokens, running_max_req... method append (line 19) | def append(self, req): method _init_cache_list (line 23) | def _init_cache_list(self, current_batch: Batch): method _can_add_new_req (line 33) | def _can_add_new_req(self, req): method generate_new_batch (line 47) | def generate_new_batch(self, current_batch: Batch = None): method __len__ (line 72) | def __len__(self): FILE: colossalai/legacy/inference/dynamic_batching/sampling_params.py class SamplingParams (line 9) | class SamplingParams: method __init__ (line 10) | def __init__( method verify (line 42) | def verify(self): method stop_sentences_to_token_ids (line 57) | def stop_sentences_to_token_ids(self, tokenizer): method to_dict (line 73) | def to_dict(self): FILE: colossalai/legacy/inference/dynamic_batching/stats.py class Stats (line 6) | class Stats: method __init__ (line 7) | def __init__(self, log_status, log_stats_interval) -> None: method count_prompt_tokens (line 16) | def count_prompt_tokens(self, run_batch): method count_output_tokens (line 23) | def count_output_tokens(self, run_batch): method print_stats (line 30) | def print_stats(self): FILE: colossalai/legacy/inference/hybridengine/engine.py class CaiInferEngine (line 22) | class CaiInferEngine: method __init__ (line 60) | def __init__( method inference (line 125) | def inference(self, input_list): method _shardformer (line 145) | def _shardformer(self, model, model_policy, stage_manager, tp_group): method _init_manager (line 160) | def _init_manager(self, model, max_batch_size: int, max_input_len: int... FILE: colossalai/legacy/inference/hybridengine/modeling/_utils.py function copy_kv_to_mem_cache (line 12) | def copy_kv_to_mem_cache(layer_id, key_buffer, value_buffer, context_mem... function init_to_get_rotary (line 26) | def init_to_get_rotary(self, base=10000, use_elem=False): FILE: colossalai/legacy/inference/hybridengine/modeling/llama.py function rotate_half (line 39) | def rotate_half(x): function apply_rotary_pos_emb (line 46) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids): function llama_triton_context_attention (line 58) | def llama_triton_context_attention( function llama_triton_token_attention (line 98) | def llama_triton_token_attention(query_states, attn_output, infer_state,... class LlamaInferenceForwards (line 127) | class LlamaInferenceForwards: method llama_causal_lm_forward (line 134) | def llama_causal_lm_forward( method llama_model_forward (line 196) | def llama_model_forward( method llama_decoder_layer_forward (line 342) | def llama_decoder_layer_forward( method llama_flash_attn_kvcache_forward (line 385) | def llama_flash_attn_kvcache_forward( FILE: colossalai/legacy/inference/hybridengine/polices/llama.py function get_triton_rmsnorm_forward (line 31) | def get_triton_rmsnorm_forward(): class LlamaModelInferPolicy (line 42) | class LlamaModelInferPolicy(LlamaForCausalLMPolicy): method __init__ (line 43) | def __init__(self) -> None: method module_policy (line 46) | def module_policy(self): method postprocess (line 132) | def postprocess(self): method get_held_layers (line 136) | def get_held_layers(self) -> List[Module]: FILE: colossalai/legacy/inference/manager.py class DynamicBatchManager (line 15) | class DynamicBatchManager: method __init__ (line 16) | def __init__( method add_req (line 64) | def add_req(self, request_id: str, prompt_ids: List[int], sampling_par... method add_input (line 77) | def add_input(self, request_id, prompts, sampling_params): method abort (line 89) | def abort(self, request_id): method loop_for_fwd (line 101) | def loop_for_fwd(self): method _step (line 123) | def _step(self): method _init_batch (line 162) | def _init_batch(self, batch: Batch, dtype="fp16"): method _prefill_batch (line 184) | def _prefill_batch(self, batch): method _decode_batch (line 199) | def _decode_batch(self, batch: Batch): method _filter_batch (line 209) | def _filter_batch(self, batch: Batch): method _merge_batch (line 217) | def _merge_batch(self, batch1, batch2): method _remove_batch (line 229) | def _remove_batch(self, batch): method _handle_finish_req (line 237) | def _handle_finish_req(self, batch: Batch, has_new_finished_req): method _filter_running_batch (line 246) | def _filter_running_batch(self): method _add_token_id_to_req (line 250) | def _add_token_id_to_req(self, batch: Batch, req_ans): method _output_process (line 257) | def _output_process(self, finished_reqs: List[Req]): method clean_up (line 265) | def clean_up(self): method generate (line 269) | def generate(self, request_id, prompts, sampling_params): method is_running (line 276) | def is_running(self): function start_dynamic_batching (line 280) | def start_dynamic_batching(args, tp_engine, waiting_req_list): FILE: colossalai/legacy/inference/pipeline/benchmark/benchmark.py function data_gen (line 18) | def data_gen(batch_size: int = 4, seq_len: int = 512): function print_details_info (line 30) | def print_details_info(timestamps, model_config, args, whole_end2end): FILE: colossalai/legacy/inference/pipeline/microbatch_manager.py class Status (line 12) | class Status(Enum): class MicroBatchDescription (line 19) | class MicroBatchDescription: method __init__ (line 30) | def __init__( method update (line 44) | def update(self, *args, **kwargs): method state (line 48) | def state(self): method cur_length (line 63) | def cur_length(self): class HeadMicroBatchDescription (line 70) | class HeadMicroBatchDescription(MicroBatchDescription): method __init__ (line 82) | def __init__( method update (line 96) | def update(self, new_token: torch.Tensor = None): method _update_newtokens (line 102) | def _update_newtokens(self, new_token: torch.Tensor): method _update_attnmask (line 108) | def _update_attnmask(self): method cur_length (line 114) | def cur_length(self): class BodyMicroBatchDescription (line 125) | class BodyMicroBatchDescription(MicroBatchDescription): method __init__ (line 133) | def __init__( method cur_length (line 143) | def cur_length(self): class MicroBatchManager (line 151) | class MicroBatchManager: method __init__ (line 162) | def __init__( method add_description (line 181) | def add_description(self, inputs_dict: Dict[str, torch.Tensor]): method step (line 191) | def step(self, new_token: torch.Tensor = None): method export_new_tokens (line 206) | def export_new_tokens(self): method is_micro_batch_done (line 212) | def is_micro_batch_done(self): method clear (line 220) | def clear(self): method next (line 225) | def next(self): method _remove_description (line 228) | def _remove_description(self): method cur_description (line 232) | def cur_description(self) -> MicroBatchDescription: method cur_infer_state (line 236) | def cur_infer_state(self): method cur_state (line 242) | def cur_state(self): FILE: colossalai/legacy/inference/quant/gptq/cai_gptq/cai_quant_linear.py class CaiQuantLinear (line 29) | class CaiQuantLinear(nn.Module): method __init__ (line 30) | def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_... method pack (line 73) | def pack(self, linear, scales, zeros, g_idx=None): method init_q4 (line 145) | def init_q4(self): method forward (line 174) | def forward(self, x): function split_column_copy (line 202) | def split_column_copy(gptq_linear, cai_linear, tp_size=1, tp_rank=0, spl... function split_row_copy (line 231) | def split_row_copy(gptq_linear, cai_linear, tp_rank=0, split_num=1): class RowCaiQuantLinear (line 258) | class RowCaiQuantLinear(CaiQuantLinear, ParallelModule): method __init__ (line 259) | def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_... method from_native_module (line 266) | def from_native_module( method forward (line 303) | def forward(self, x): class ColCaiQuantLinear (line 312) | class ColCaiQuantLinear(CaiQuantLinear, ParallelModule): method __init__ (line 313) | def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_... method from_native_module (line 320) | def from_native_module( FILE: colossalai/legacy/inference/quant/gptq/cai_gptq/gptq_op.py class CaiGPTQLinearOp (line 6) | class CaiGPTQLinearOp(torch.nn.Module): method __init__ (line 7) | def __init__(self, gptq_group_size, gptq_quant_bits): method forward (line 14) | def forward( FILE: colossalai/legacy/inference/quant/smoothquant/models/base_model.py class BaseSmoothForCausalLM (line 30) | class BaseSmoothForCausalLM(nn.Module, PushToHubMixin): method __init__ (line 33) | def __init__(self, model: PreTrainedModel, quantized: bool = False): method quantized (line 44) | def quantized(self): method init_cache_manager (line 47) | def init_cache_manager(self, max_total_token_num=2048): method init_batch_state (line 56) | def init_batch_state(self, max_output_len=256, **kwargs): method quantize (line 97) | def quantize( method forward (line 104) | def forward(self, *args, **kwargs): method generate (line 107) | def generate(self, **kwargs): method prepare_inputs_for_generation (line 117) | def prepare_inputs_for_generation(self, *args, **kwargs): method collect_act_scales (line 121) | def collect_act_scales(self, model, tokenizer, dataset, device, num_sa... method collect_act_dict (line 126) | def collect_act_dict(self, model, tokenizer, dataset, act_dict, device... method get_act_scales (line 135) | def get_act_scales(self, model, tokenizer, dataset, num_samples=512, s... method smooth_ln_fcs (line 168) | def smooth_ln_fcs(self, ln, fcs, act_scales, alpha=0.5): method create_quantized_model (line 190) | def create_quantized_model(model): method save_quantized (line 194) | def save_quantized( method save_pretrained (line 255) | def save_pretrained( method from_pretrained (line 268) | def from_pretrained( method from_quantized (line 363) | def from_quantized( method __getattr__ (line 480) | def __getattr__(self, item): FILE: colossalai/legacy/inference/quant/smoothquant/models/linear.py class W8A8BFP32O32LinearSiLU (line 17) | class W8A8BFP32O32LinearSiLU(torch.nn.Module): method __init__ (line 18) | def __init__(self, in_features, out_features, alpha=1.0, beta=1.0): method to (line 39) | def to(self, *args, **kwargs): method forward (line 46) | def forward(self, x): method from_float (line 54) | def from_float(module: torch.nn.Linear, input_scale): class W8A8B8O8Linear (line 66) | class W8A8B8O8Linear(torch.nn.Module): method __init__ (line 68) | def __init__(self, in_features, out_features, alpha=1.0, beta=1.0): method to (line 90) | def to(self, *args, **kwargs): method forward (line 97) | def forward(self, x): method from_float (line 105) | def from_float(module: torch.nn.Linear, input_scale, output_scale): class W8A8BFP32OFP32Linear (line 122) | class W8A8BFP32OFP32Linear(torch.nn.Module): method __init__ (line 124) | def __init__(self, in_features, out_features, alpha=1.0, beta=1.0): method _apply (line 145) | def _apply(self, fn): method to (line 151) | def to(self, *args, **kwargs): method forward (line 159) | def forward(self, x): method from_float (line 167) | def from_float(module: torch.nn.Linear, input_scale): FILE: colossalai/legacy/inference/quant/smoothquant/models/llama.py class LLamaSmoothquantAttention (line 38) | class LLamaSmoothquantAttention(nn.Module): method __init__ (line 39) | def __init__( method _init_rope (line 74) | def _init_rope(self): method pack (line 82) | def pack( method _shape (line 112) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 116) | def forward( class LlamaLayerNormQ (line 235) | class LlamaLayerNormQ(torch.nn.Module): method __init__ (line 236) | def __init__(self, dim, eps=1e-5): method forward (line 242) | def forward(self, x): method from_float (line 248) | def from_float(module: torch.nn.LayerNorm, output_scale: float): class LlamaSmoothquantMLP (line 255) | class LlamaSmoothquantMLP(nn.Module): method __init__ (line 256) | def __init__(self, intermediate_size, hidden_size): method pack (line 264) | def pack( method forward (line 281) | def forward( class LlamaSmoothquantDecoderLayer (line 295) | class LlamaSmoothquantDecoderLayer(nn.Module): method __init__ (line 296) | def __init__(self, config: LlamaConfig): method pack (line 307) | def pack( method forward (line 348) | def forward( class LlamaApplyRotary (line 401) | class LlamaApplyRotary(nn.Module): method __init__ (line 402) | def __init__(self): method forward (line 405) | def forward(self, x, cos, sin, position_ids): function llama_decoder_layer_forward (line 417) | def llama_decoder_layer_forward( function init_to_get_rotary (line 512) | def init_to_get_rotary(config, base=10000, use_elem=False): function llama_model_forward (line 559) | def llama_model_forward( class SmoothLlamaForCausalLM (line 717) | class SmoothLlamaForCausalLM(BaseSmoothForCausalLM): method __init__ (line 720) | def __init__(self, model: PreTrainedModel, quantized: bool = False): method get_act_dict (line 724) | def get_act_dict( method smooth_fn (line 771) | def smooth_fn(self, scales, alpha=0.5): method create_quantized_model (line 780) | def create_quantized_model(model): method quantized (line 790) | def quantized( FILE: colossalai/legacy/inference/serving/ray_serve/Colossal_Inference_rayserve.py class GenConfigArgs (line 22) | class GenConfigArgs(BaseModel): function log_cuda_info (line 32) | def log_cuda_info(scope_name: str): class Worker (line 46) | class Worker: method __init__ (line 47) | def __init__(self, model_path: str, tp_size: int, max_batch_size: int,... method setup (line 55) | def setup(self, world_size, rank, port): method generate (line 81) | def generate(self, text: Union[str, List[str]]) -> str: class Driver (line 106) | class Driver: method __init__ (line 107) | def __init__(self, config: GenConfigArgs): method batch_generate (line 138) | async def batch_generate(self, requests: List[str]): method __call__ (line 144) | async def __call__(self, request: starlette.requests.Request) -> Any: function app (line 148) | def app(args: GenConfigArgs) -> Application: FILE: colossalai/legacy/inference/serving/ray_serve/send_request.py function send_query (line 6) | def send_query(text): FILE: colossalai/legacy/inference/serving/ray_serve/send_requests.py function send_query (line 6) | def send_query(text): FILE: colossalai/legacy/inference/serving/torch_serve/Colossal_Inference_Handler.py class ColossalInferenceHandler (line 21) | class ColossalInferenceHandler(BaseHandler, ABC): method __init__ (line 26) | def __init__(self): method initialize (line 35) | def initialize(self, ctx): method preprocess (line 114) | def preprocess(self, requests): method inference (line 156) | def inference(self, input_batch): method postprocess (line 188) | def postprocess(self, inference_output): FILE: colossalai/legacy/inference/tensor_parallel/batch_infer_state.py class BatchInferState (line 12) | class BatchInferState: method total_token_num (line 39) | def total_token_num(self): method set_cache_manager (line 44) | def set_cache_manager(self, manager: MemoryManager): method init_block_loc (line 49) | def init_block_loc( method init_from_batch (line 63) | def init_from_batch( FILE: colossalai/legacy/inference/tensor_parallel/engine.py class TPInferEngine (line 31) | class TPInferEngine: method __init__ (line 52) | def __init__( method _init_manager (line 108) | def _init_manager(self) -> None: method _post_init_gptq_buffer (line 130) | def _post_init_gptq_buffer(self, model: nn.Module) -> None: method _optimize_model (line 178) | def _optimize_model(self, model: nn.Module) -> None: method _prepare_with_shard_config (line 189) | def _prepare_with_shard_config(self, shard_config: Optional[ShardConfi... method _shard_model_by (line 217) | def _shard_model_by(self, shardformer: ShardFormer, model: nn.Module) ... method supported_models (line 234) | def supported_models(self) -> List[str]: method generate (line 237) | def generate(self, input_tokens: Union[BatchEncoding, dict, list, torc... method prepare_batch_state (line 258) | def prepare_batch_state(self, inputs) -> BatchInferState: method _generate_by_set_infer_state (line 329) | def _generate_by_set_infer_state(self, input_tokens, **generate_kwargs... method _generate_by_pass_infer_state (line 369) | def _generate_by_pass_infer_state( method _update_batch_state (line 383) | def _update_batch_state(self, infer_state: Optional[BatchInferState]) ... method forward (line 390) | def forward(self, batch_id, is_prefill): method _prefill_batch (line 461) | def _prefill_batch(self, batch_id): method _decode_batch (line 465) | def _decode_batch(self, batch_id): method add_request (line 477) | def add_request(): FILE: colossalai/legacy/inference/tensor_parallel/kvcache_manager.py class MemoryManager (line 12) | class MemoryManager: method __init__ (line 25) | def __init__( method _init_mem_states (line 40) | def _init_mem_states(self, size, device): method _init_kv_buffers (line 46) | def _init_kv_buffers(self, size, device, dtype, head_num, head_dim, la... method alloc (line 56) | def alloc(self, required_size): method alloc_contiguous (line 69) | def alloc_contiguous(self, required_size): method free (line 96) | def free(self, free_index): method free_all (line 102) | def free_all(self): FILE: colossalai/legacy/inference/tensor_parallel/modeling/_utils.py function copy_kv_to_mem_cache (line 12) | def copy_kv_to_mem_cache(layer_id, key_buffer, value_buffer, context_mem... function init_to_get_rotary (line 26) | def init_to_get_rotary(self, base=10000, use_elem=False): FILE: colossalai/legacy/inference/tensor_parallel/modeling/bloom.py function generate_alibi (line 32) | def generate_alibi(n_head, dtype=torch.float16): class BloomInferenceForwards (line 60) | class BloomInferenceForwards: method bloom_model_forward (line 70) | def bloom_model_forward( method bloom_for_causal_lm_forward (line 266) | def bloom_for_causal_lm_forward( method bloom_for_causal_lm_prepare_inputs_for_generation (line 345) | def bloom_for_causal_lm_prepare_inputs_for_generation( method bloom_block_forward (line 378) | def bloom_block_forward( method bloom_attention_forward (line 436) | def bloom_attention_forward( FILE: colossalai/legacy/inference/tensor_parallel/modeling/chatglm2.py function _init_to_get_rotary (line 34) | def _init_to_get_rotary(self, base=10000): function get_masks (line 68) | def get_masks(self, input_ids, past_length, padding_mask=None): class ChatGLM2InferenceForwards (line 90) | class ChatGLM2InferenceForwards: method chatglm_for_conditional_generation_forward (line 97) | def chatglm_for_conditional_generation_forward( method chatglm_model_forward (line 218) | def chatglm_model_forward( method chatglm_encoder_forward (line 298) | def chatglm_encoder_forward( method chatglm_glmblock_forward (line 344) | def chatglm_glmblock_forward( method chatglm_flash_attn_kvcache_forward (line 387) | def chatglm_flash_attn_kvcache_forward( FILE: colossalai/legacy/inference/tensor_parallel/modeling/llama.py function rotate_half (line 34) | def rotate_half(x): function apply_rotary_pos_emb (line 41) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids): function llama_triton_context_attention (line 53) | def llama_triton_context_attention( function llama_triton_token_attention (line 81) | def llama_triton_token_attention(query_states, attn_output, infer_state,... class LlamaInferenceForwards (line 111) | class LlamaInferenceForwards: method llama_model_forward (line 118) | def llama_model_forward( method llama_decoder_layer_forward (line 262) | def llama_decoder_layer_forward( method llama_flash_attn_kvcache_forward (line 305) | def llama_flash_attn_kvcache_forward( FILE: colossalai/legacy/inference/tensor_parallel/policies/bloom.py function get_triton_layernorm_forward (line 21) | def get_triton_layernorm_forward(): class BloomModelInferPolicy (line 32) | class BloomModelInferPolicy(BloomForCausalLMPolicy): method __init__ (line 33) | def __init__(self) -> None: method module_policy (line 36) | def module_policy(self): FILE: colossalai/legacy/inference/tensor_parallel/policies/chatglm2.py class ChatGLM2InferPolicy (line 24) | class ChatGLM2InferPolicy(ChatGLMModelPolicy): method __init__ (line 25) | def __init__(self) -> None: method module_policy (line 28) | def module_policy(self): method postprocess (line 58) | def postprocess(self): class ChatGLM2ForConditionalGenerationInferPolicy (line 63) | class ChatGLM2ForConditionalGenerationInferPolicy(ChatGLM2InferPolicy): method __init__ (line 64) | def __init__(self) -> None: method module_policy (line 67) | def module_policy(self): method postprocess (line 76) | def postprocess(self): FILE: colossalai/legacy/inference/tensor_parallel/policies/llama.py function get_triton_rmsnorm_forward (line 23) | def get_triton_rmsnorm_forward(): class LlamaModelInferPolicy (line 34) | class LlamaModelInferPolicy(LlamaForCausalLMPolicy): method __init__ (line 35) | def __init__(self) -> None: method module_policy (line 38) | def module_policy(self): method postprocess (line 119) | def postprocess(self): FILE: colossalai/legacy/initialize.py function get_default_parser (line 40) | def get_default_parser(): function launch (line 58) | def launch( function launch_from_slurm (line 126) | def launch_from_slurm( function launch_from_openmpi (line 165) | def launch_from_openmpi( function launch_from_torch (line 206) | def launch_from_torch( function initialize (line 242) | def initialize( FILE: colossalai/legacy/moe/layer/experts.py class MLPExperts (line 18) | class MLPExperts(nn.Module): method __init__ (line 33) | def __init__( method reset_parameters (line 92) | def reset_parameters(self): method forward (line 106) | def forward( FILE: colossalai/legacy/moe/layer/layers.py class SparseMLP (line 17) | class SparseMLP(nn.Module): method __init__ (line 47) | def __init__( method reset_parameters (line 147) | def reset_parameters(self): method forward (line 150) | def forward(self, inputs: torch.Tensor) -> torch.Tensor: method _local_process (line 219) | def _local_process(self, expert_in: torch.Tensor) -> torch.Tensor: method _ep_process (line 224) | def _ep_process( method _tp_process (line 303) | def _tp_process( function apply_load_balance (line 389) | def apply_load_balance(model: nn.Module, optim: Any) -> None: FILE: colossalai/legacy/moe/layer/routers.py class MLPExperts (line 18) | class MLPExperts(nn.Module): method __init__ (line 33) | def __init__( method reset_parameters (line 92) | def reset_parameters(self): method forward (line 106) | def forward( FILE: colossalai/legacy/moe/load_balance.py class LoadBalancer (line 15) | class LoadBalancer: method __init__ (line 16) | def __init__( method _clear_load (line 46) | def _clear_load(self) -> None: method _sync_load (line 49) | def _sync_load(self) -> Tensor: method _get_diff_from_avg (line 58) | def _get_diff_from_avg(data: List, group: int, avg: float) -> float: method _swap_data (line 62) | def _swap_data(data: List, group_i: int, index_i: int, group_j: int, i... method _normalize_data (line 69) | def _normalize_data(data: List) -> List: method _get_swap_loss (line 75) | def _get_swap_loss( method _check_convergence (line 103) | def _check_convergence(data: List, avg: float, tolerance: float): method _beam_search (line 112) | def _beam_search( method _load_to_list (line 182) | def _load_to_list(self, load: Tensor) -> List: method _search_balance (line 194) | def _search_balance( method _swap_expert_single_tensor (line 260) | def _swap_expert_single_tensor( method _swap_expert_param_and_optim (line 278) | def _swap_expert_param_and_optim( method _gather_global_dp_group (line 319) | def _gather_global_dp_group(self, data: Tensor) -> Tensor: method _swap_moe_param (line 325) | def _swap_moe_param(self, swap_list: List, optim: LowLevelZeroOptimize... method update_load (line 417) | def update_load(self, load: Tensor) -> None: method balance_load (line 428) | def balance_load(self, optim: LowLevelZeroOptimizer) -> None: FILE: colossalai/legacy/moe/manager.py class MoEManager (line 11) | class MoEManager(metaclass=SingletonMeta): method __init__ (line 16) | def __init__(self): method parallel_info_dict (line 41) | def parallel_info_dict(self): method is_initialized (line 45) | def is_initialized(self): method setup (line 48) | def setup( method get_info (line 99) | def get_info(self, num_experts: int, use_tp: bool = False) -> Tuple[in... method reset_loss (line 148) | def reset_loss(self): method add_loss (line 151) | def add_loss(self, aux_loss: float = 0.0, z_loss: float = 0.0): method get_loss (line 155) | def get_loss(self): method get_parallel (line 159) | def get_parallel(self): FILE: colossalai/legacy/moe/openmoe/benchmark/benchmark_cai.py function move_to_cuda (line 27) | def move_to_cuda(batch, device): function load_ckpt (line 31) | def load_ckpt(repo_name: str, model: OpenMoeForCausalLM, booster: Booster): class RandomDataset (line 44) | class RandomDataset(Dataset): method __init__ (line 45) | def __init__( method __len__ (line 78) | def __len__(self): method __getitem__ (line 81) | def __getitem__(self, idx): function parse_args (line 89) | def parse_args(): function main (line 145) | def main(): FILE: colossalai/legacy/moe/openmoe/benchmark/benchmark_fsdp.py class RandomDataset (line 20) | class RandomDataset(Dataset): method __init__ (line 21) | def __init__(self, num_samples: int = 1000, max_length: int = 2048, vo... method __len__ (line 27) | def __len__(self): method __getitem__ (line 30) | def __getitem__(self, idx): function fsdp_main (line 38) | def fsdp_main(rank, world_size, args): FILE: colossalai/legacy/moe/openmoe/benchmark/utils.py function print_model_numel (line 12) | def print_model_numel(logger: DistributedLogger, model: nn.Module) -> None: function get_model_numel (line 29) | def get_model_numel(model: nn.Module) -> None: function divide (line 34) | def divide(x: float, y: float) -> float: function all_reduce_mean (line 43) | def all_reduce_mean(x: float, world_size: int) -> float: class Timer (line 52) | class Timer: method __init__ (line 53) | def __init__(self) -> None: method start (line 57) | def start(self) -> None: method end (line 60) | def end(self) -> None: method reset (line 65) | def reset(self) -> None: class PerformanceEvaluator (line 69) | class PerformanceEvaluator: method __init__ (line 81) | def __init__( method on_step_start (line 98) | def on_step_start(self, step: int) -> None: method on_step_end (line 105) | def on_step_end(self, input_ids: Tensor, **kwargs) -> None: method on_fit_end (line 116) | def on_fit_end(self) -> None: FILE: colossalai/legacy/moe/openmoe/infer.py function parse_args (line 9) | def parse_args(): function inference (line 15) | def inference(args): FILE: colossalai/legacy/moe/openmoe/model/convert_openmoe_ckpt.py function t5x_attention_lookup (line 44) | def t5x_attention_lookup(params, i, prefix, layer_name="attention"): function t5x_mlp_lookup (line 53) | def t5x_mlp_lookup(params, i, prefix, split_mlp_wi=False): function t5x_extra_mlp_lookup (line 66) | def t5x_extra_mlp_lookup(params, i, prefix, split_mlp_wi=False): function t5x_experts_lookup (line 79) | def t5x_experts_lookup(params, i, prefix, split_mlp_wi=False): function t5x_gate_lookup (line 92) | def t5x_gate_lookup(params, i, prefix, split_mlp_wi=False): function t5x_layer_norm_lookup (line 97) | def t5x_layer_norm_lookup(params, i, prefix, layer_name): function convert_t5x_to_pytorch (line 102) | def convert_t5x_to_pytorch(variables: dict, *, num_layers: int, moe_inte... function make_state_dict (line 164) | def make_state_dict(converted_params): function load_t5x_weights_in_t5 (line 172) | def load_t5x_weights_in_t5(model, config, t5x_checkpoint_path): function convert_t5x_checkpoint_to_pytorch (line 182) | def convert_t5x_checkpoint_to_pytorch(t5x_checkpoint_path, config_file, ... FILE: colossalai/legacy/moe/openmoe/model/modeling_openmoe.py function set_openmoe_args (line 65) | def set_openmoe_args( function _make_causal_mask (line 141) | def _make_causal_mask( function _expand_mask (line 159) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option... function generate_fixed_pos_embedding (line 173) | def generate_fixed_pos_embedding(features, length, min_timescale=1.0, ma... function apply_rotary_embedding (line 197) | def apply_rotary_embedding(q, k, cos, sin, decode=False, rotary_index=No... function rotate_half (line 236) | def rotate_half(x): function SwiGLU (line 243) | def SwiGLU(x): class OpenMoeMLP (line 255) | class OpenMoeMLP(nn.Module): method __init__ (line 256) | def __init__(self, config: LlamaConfig): method forward (line 268) | def forward(self, x): function repeat_kv (line 290) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class OpenMoeAttention (line 302) | class OpenMoeAttention(nn.Module): method __init__ (line 305) | def __init__(self, config: LlamaConfig): method _shape (line 322) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 325) | def forward( class OpenMoeDecoderLayer (line 442) | class OpenMoeDecoderLayer(nn.Module): method __init__ (line 443) | def __init__(self, config: LlamaConfig, moe: bool): method forward (line 475) | def forward( class OpenMoePreTrainedModel (line 557) | class OpenMoePreTrainedModel(PreTrainedModel): method _init_weights (line 564) | def _init_weights(self, module): method _set_gradient_checkpointing (line 575) | def _set_gradient_checkpointing(self, module, value=False): class OpenMoeModel (line 648) | class OpenMoeModel(OpenMoePreTrainedModel): method __init__ (line 656) | def __init__(self, config: LlamaConfig): method get_input_embeddings (line 674) | def get_input_embeddings(self): method set_input_embeddings (line 677) | def set_input_embeddings(self, value): method _prepare_decoder_attention_mask (line 681) | def _prepare_decoder_attention_mask(self, attention_mask, input_shape,... method forward (line 705) | def forward( class OpenMoeForCausalLM (line 833) | class OpenMoeForCausalLM(OpenMoePreTrainedModel): method __init__ (line 836) | def __init__(self, config): method get_input_embeddings (line 846) | def get_input_embeddings(self): method set_input_embeddings (line 849) | def set_input_embeddings(self, value): method get_output_embeddings (line 852) | def get_output_embeddings(self): method set_output_embeddings (line 855) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 858) | def set_decoder(self, decoder): method get_decoder (line 861) | def get_decoder(self): method forward (line 866) | def forward( method prepare_inputs_for_generation (line 989) | def prepare_inputs_for_generation( method _reorder_cache (line 1020) | def _reorder_cache(past_key_values, beam_idx): method _calculate_router_loss (line 1028) | def _calculate_router_loss(self, aux_loss: list = None, z_loss: list =... method _calculate_loss (line 1036) | def _calculate_loss(self, logits: torch.Tensor, targets: torch.Tensor)... class ZLossCrossEntropy (line 1073) | class ZLossCrossEntropy(torch.autograd.Function): method forward (line 1098) | def forward(ctx, logits, targets, z_loss): method backward (line 1115) | def backward(ctx, *grad_outputs): FILE: colossalai/legacy/moe/openmoe/model/openmoe_policy.py class OpenMoePolicy (line 22) | class OpenMoePolicy(Policy): method config_sanity_check (line 23) | def config_sanity_check(self): method preprocess (line 26) | def preprocess(self): method module_policy (line 38) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... method postprocess (line 86) | def postprocess(self): method set_pipeline_forward (line 89) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... method get_held_layers (line 108) | def get_held_layers(self) -> List[Module]: method distribute_layers (line 129) | def distribute_layers(self, num_layers: int, num_stages: int) -> List[... class OpenMoeModelPolicy (line 144) | class OpenMoeModelPolicy(OpenMoePolicy): method __init__ (line 145) | def __init__(self) -> None: method module_policy (line 148) | def module_policy(self): method get_held_layers (line 159) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 164) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class OpenMoeForCausalLMPolicy (line 169) | class OpenMoeForCausalLMPolicy(OpenMoePolicy): method module_policy (line 170) | def module_policy(self): method get_held_layers (line 199) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 207) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class OpenMoePipelineForwards (line 224) | class OpenMoePipelineForwards: method openmoe_model_forward (line 231) | def openmoe_model_forward( method llama_for_causal_lm_forward (line 409) | def llama_for_causal_lm_forward( FILE: colossalai/legacy/moe/openmoe/train.py function move_to_cuda (line 27) | def move_to_cuda(batch, device): function load_ckpt (line 31) | def load_ckpt(repo_name: str, model: OpenMoeForCausalLM, booster: Booster): function tokenize_data (line 44) | def tokenize_data(batch, tokenizer: T5Tokenizer, max_length: int) -> Dict: class RandomDataset (line 59) | class RandomDataset(Dataset): method __init__ (line 60) | def __init__(self, num_samples: int = 1000, max_length: int = 2048, vo... method __len__ (line 68) | def __len__(self): method __getitem__ (line 71) | def __getitem__(self, idx): function parse_args (line 79) | def parse_args(): function main (line 205) | def main(): FILE: colossalai/legacy/moe/utils.py class ForceFP32Parameter (line 16) | class ForceFP32Parameter(torch.nn.Parameter): method half (line 17) | def half(self, memory_format=None): class NormalNoiseGenerator (line 21) | class NormalNoiseGenerator: method __init__ (line 31) | def __init__(self, num_experts: int): method __call__ (line 37) | def __call__(self, inputs: torch.Tensor): class UniformNoiseGenerator (line 42) | class UniformNoiseGenerator: method __init__ (line 53) | def __init__(self, eps: float = 1e-2): method __call__ (line 59) | def __call__(self, inputs: torch.Tensor): function autocast_softmax (line 64) | def autocast_softmax(logit: torch.Tensor, dim: int): function get_noise_generator (line 68) | def get_noise_generator(noise_type: str, num_experts: int) -> Callable: function get_activation (line 80) | def get_activation(act: str) -> Callable: function SwiGLU (line 93) | def SwiGLU(x): function skip_init (line 106) | def skip_init(): function get_moe_epsize_param_dict (line 136) | def get_moe_epsize_param_dict(model: nn.Module) -> Dict[int, List[nn.Par... function sync_moe_model_param (line 157) | def sync_moe_model_param(model: nn.Module): function set_moe_args (line 178) | def set_moe_args(config: Any, args: dict): function create_ep_hierarchical_group (line 183) | def create_ep_hierarchical_group( FILE: colossalai/legacy/nn/_ops/_utils.py function convert_to_colo_tensor (line 15) | def convert_to_colo_tensor(tensor: Optional[GeneralTensor], pg: ProcessG... function set_parallel_input (line 21) | def set_parallel_input(input_parallel: bool): function get_parallel_input (line 25) | def get_parallel_input(): function vocab_range_from_per_partition_vocab_size (line 29) | def vocab_range_from_per_partition_vocab_size(per_partition_vocab_size, ... function vocab_range_from_global_vocab_size (line 35) | def vocab_range_from_global_vocab_size(global_vocab_size, rank, world_si... function _reduce (line 40) | def _reduce(input_, pg: ProcessGroup): function _split (line 51) | def _split(input_, pg: ProcessGroup, dim=-1): function _gather (line 71) | def _gather(input_, pg: ProcessGroup, dim=-1): class _ReduceGrad (line 91) | class _ReduceGrad(torch.autograd.Function): method symbolic (line 101) | def symbolic(graph, input_): method forward (line 105) | def forward(ctx, input_, process_group): method backward (line 110) | def backward(ctx, grad_output): class _ReduceInput (line 114) | class _ReduceInput(torch.autograd.Function): method symbolic (line 124) | def symbolic(graph, input_): method forward (line 128) | def forward(ctx, input_, process_group): method backward (line 132) | def backward(ctx, grad_output): class _SplitForwardGatherBackward (line 136) | class _SplitForwardGatherBackward(torch.autograd.Function): method symbolic (line 147) | def symbolic(graph, input_): method forward (line 151) | def forward(ctx, input_, process_group, dim): method backward (line 157) | def backward(ctx, grad_output): class _GatherForwardSplitBackward (line 161) | class _GatherForwardSplitBackward(torch.autograd.Function): method symbolic (line 171) | def symbolic(graph, input_): method forward (line 175) | def forward(ctx, input_, process_group, dim): method backward (line 181) | def backward(ctx, grad_output): function reduce_grad (line 185) | def reduce_grad(input_, process_group): function reduce_input (line 189) | def reduce_input(input_, process_group): function split_forward_gather_backward (line 193) | def split_forward_gather_backward(input_, process_group, dim): function gather_forward_split_backward (line 197) | def gather_forward_split_backward(input_, process_group, dim): function _all_to_all (line 201) | def _all_to_all(x: torch.Tensor, pg: ProcessGroup, scatter_dim: int, gat... class _DualAllToAll (line 219) | class _DualAllToAll(torch.autograd.Function): method forward (line 221) | def forward(ctx, x, pg, scatter_dim, gather_dim): method backward (line 228) | def backward(ctx, grad): function dual_all_to_all (line 232) | def dual_all_to_all(x, pg, scatter_dim: int, gather_dim: int): function _all_to_all_for_tablewise (line 239) | def _all_to_all_for_tablewise( class _DualAllToAllForTablewise (line 266) | class _DualAllToAllForTablewise(torch.autograd.Function): method forward (line 268) | def forward(ctx, x, pg, scatter_strides, gather_strides): method backward (line 275) | def backward(ctx, grad): function dual_all_to_all_tablewise (line 284) | def dual_all_to_all_tablewise(x, pg, scatter_strides, gather_strides): FILE: colossalai/legacy/nn/layer/base_layer.py class ParallelLayer (line 12) | class ParallelLayer(nn.Module): method __init__ (line 15) | def __init__(self): method _load_from_global_state_dict (line 38) | def _load_from_global_state_dict( method _save_to_global_state_dict (line 45) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method _load_from_state_dict (line 48) | def _load_from_state_dict( method _save_to_state_dict (line 62) | def _save_to_state_dict(self, destination, prefix, keep_vars): method use_local_state_dict (line 69) | def use_local_state_dict(cls): FILE: colossalai/legacy/nn/layer/colossalai_layer/_utils.py function partition_batch (line 12) | def partition_batch(input_) -> Tensor: class ColossalaiModule (line 23) | class ColossalaiModule(nn.Module): method __init__ (line 24) | def __init__(self, module: nn.Module, **kwargs): method __getattr__ (line 30) | def __getattr__(self, name: str): method forward (line 39) | def forward(self, *args): FILE: colossalai/legacy/nn/layer/colossalai_layer/dropout.py class Dropout (line 10) | class Dropout(ColossalaiModule): method __init__ (line 18) | def __init__(self, p: float = 0.5, inplace: bool = False) -> None: method forward (line 26) | def forward(self, *args): FILE: colossalai/legacy/nn/layer/colossalai_layer/embedding.py class Embedding (line 40) | class Embedding(ColossalaiModule): method __init__ (line 70) | def __init__( class PatchEmbedding (line 112) | class PatchEmbedding(ColossalaiModule): method __init__ (line 133) | def __init__( FILE: colossalai/legacy/nn/layer/colossalai_layer/linear.py class Linear (line 35) | class Linear(ColossalaiModule): method __init__ (line 67) | def __init__( class Classifier (line 94) | class Classifier(ColossalaiModule): method __init__ (line 112) | def __init__( FILE: colossalai/legacy/nn/layer/colossalai_layer/normalization.py class LayerNorm (line 22) | class LayerNorm(ColossalaiModule): method __init__ (line 36) | def __init__(self, normalized_shape: int, eps=1e-05, bias=True, dtype=... FILE: colossalai/legacy/nn/layer/parallel_1d/_operation.py class FusedLayerNormAffineFunction1D (line 12) | class FusedLayerNormAffineFunction1D(torch.autograd.Function): method forward (line 27) | def forward(ctx, input, weight, bias, normalized_shape, eps): method backward (line 40) | def backward(ctx, grad_output): class LinearWithAsyncCommunication (line 50) | class LinearWithAsyncCommunication(torch.autograd.Function): method forward (line 56) | def forward(ctx, input_, weight, bias, parallel_mode, async_grad_allre... method backward (line 68) | def backward(ctx, grad_output): function linear_with_async_comm (line 97) | def linear_with_async_comm(input_, weight, bias, parallel_mode, async_gr... FILE: colossalai/legacy/nn/layer/parallel_1d/_utils.py function set_parallel_input (line 13) | def set_parallel_input(input_parallel: bool): function get_parallel_input (line 17) | def get_parallel_input(): function vocab_range_from_per_partition_vocab_size (line 21) | def vocab_range_from_per_partition_vocab_size(per_partition_vocab_size, ... function vocab_range_from_global_vocab_size (line 27) | def vocab_range_from_global_vocab_size(global_vocab_size, rank, world_si... function _reduce (line 32) | def _reduce(input_, parallel_mode): function _split (line 42) | def _split(input_, parallel_mode, dim=-1): function _gather (line 62) | def _gather(input_, parallel_mode, dim=-1): class _ReduceGrad (line 81) | class _ReduceGrad(torch.autograd.Function): method symbolic (line 91) | def symbolic(graph, input_): method forward (line 95) | def forward(ctx, input_, parallel_mode): method backward (line 100) | def backward(ctx, grad_output): class _ReduceInput (line 104) | class _ReduceInput(torch.autograd.Function): method symbolic (line 114) | def symbolic(graph, input_): method forward (line 118) | def forward(ctx, input_, parallel_mode): method backward (line 122) | def backward(ctx, grad_output): class _SplitForwardGatherBackward (line 126) | class _SplitForwardGatherBackward(torch.autograd.Function): method symbolic (line 137) | def symbolic(graph, input_): method forward (line 141) | def forward(ctx, input_, parallel_mode, dim): method backward (line 147) | def backward(ctx, grad_output): class _GatherForwardSplitBackward (line 151) | class _GatherForwardSplitBackward(torch.autograd.Function): method symbolic (line 161) | def symbolic(graph, input_): method forward (line 165) | def forward(ctx, input_, parallel_mode, dim): method backward (line 171) | def backward(ctx, grad_output): function reduce_grad (line 175) | def reduce_grad(input_, parallel_mode): function reduce_input (line 179) | def reduce_input(input_, parallel_mode): function split_forward_gather_backward (line 183) | def split_forward_gather_backward(input_, parallel_mode, dim): function gather_forward_split_backward (line 187) | def gather_forward_split_backward(input_, parallel_mode, dim): FILE: colossalai/legacy/nn/layer/parallel_1d/layers.py class Linear1D (line 51) | class Linear1D(ColossalaiModule): method __init__ (line 71) | def __init__( class LayerNorm1D (line 108) | class LayerNorm1D(ColossalaiModule): method __init__ (line 150) | def __init__(self, normalized_shape: int, eps=1e-05, bias=True, dtype=... method _load_from_state_dict (line 163) | def _load_from_state_dict(self, state_dict, prefix, *args): method _save_to_state_dict (line 180) | def _save_to_state_dict(self, destination, prefix, keep_vars): class Classifier1D (line 186) | class Classifier1D(ParallelLayer): method __init__ (line 204) | def __init__( method reset_parameters (line 241) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method _set_tensor_parallel_attributes (line 249) | def _set_tensor_parallel_attributes(self): method _load_from_global_state_dict (line 254) | def _load_from_global_state_dict(self, state_dict, prefix, *args): method _save_to_global_state_dict (line 278) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 295) | def forward(self, input_: Tensor) -> Tensor: class VocabParallelClassifier1D (line 320) | class VocabParallelClassifier1D(ParallelLayer): method __init__ (line 338) | def __init__( method reset_parameters (line 377) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method _set_tensor_parallel_attributes (line 384) | def _set_tensor_parallel_attributes(self): method _load_from_global_state_dict (line 391) | def _load_from_global_state_dict(self, state_dict, prefix, *args): method _save_to_global_state_dict (line 415) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 432) | def forward(self, input_: Tensor) -> Tensor: class Linear1D_Col (line 451) | class Linear1D_Col(ParallelLayer): method __init__ (line 476) | def __init__( method reset_parameters (line 515) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method _set_tensor_parallel_attributes (line 521) | def _set_tensor_parallel_attributes(self): method _load_from_global_state_dict (line 527) | def _load_from_global_state_dict(self, state_dict, prefix, *args): method _save_to_global_state_dict (line 550) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 565) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]: class Linear1D_Row (line 591) | class Linear1D_Row(ParallelLayer): method __init__ (line 611) | def __init__( method chunk_weight (line 656) | def chunk_weight(self): method reset_parameters (line 659) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method _set_tensor_parallel_attributes (line 666) | def _set_tensor_parallel_attributes(self): method _load_from_global_state_dict (line 670) | def _load_from_global_state_dict(self, state_dict, prefix, *args): method _save_to_global_state_dict (line 693) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 708) | def forward(self, input_: Tensor) -> Tensor: class Embedding1D (line 754) | class Embedding1D(ParallelLayer): method __init__ (line 784) | def __init__( method _set_tensor_parallel_attributes (line 814) | def _set_tensor_parallel_attributes(self): method reset_parameters (line 817) | def reset_parameters(self, weight_initializer) -> None: method _fill_padding_idx_with_zero (line 823) | def _fill_padding_idx_with_zero(self) -> None: method _load_from_global_state_dict (line 828) | def _load_from_global_state_dict(self, state_dict, prefix, *args): method _save_to_global_state_dict (line 842) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 854) | def forward(self, input_: Tensor) -> Tensor: class VocabParallelEmbedding1D (line 863) | class VocabParallelEmbedding1D(ParallelLayer): method __init__ (line 893) | def __init__( method _set_tensor_parallel_attributes (line 929) | def _set_tensor_parallel_attributes(self): method reset_parameters (line 932) | def reset_parameters(self, weight_initializer) -> None: method _fill_padding_idx_with_zero (line 938) | def _fill_padding_idx_with_zero(self) -> None: method _load_from_global_state_dict (line 947) | def _load_from_global_state_dict(self, state_dict, prefix, *args): method _save_to_global_state_dict (line 961) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 973) | def forward(self, input_: Tensor) -> Tensor: class Dropout1D (line 992) | class Dropout1D(ParallelLayer): method __init__ (line 1000) | def __init__(self, p: float = 0.5, inplace: bool = False): method forward (line 1006) | def forward(self, input_: Tensor) -> Tensor: class PatchEmbedding1D (line 1016) | class PatchEmbedding1D(ColossalaiModule): method __init__ (line 1040) | def __init__( method _load_from_state_dict (line 1065) | def _load_from_state_dict(self, state_dict, prefix, *args): method _save_to_state_dict (line 1077) | def _save_to_state_dict(self, destination, prefix, keep_vars): FILE: colossalai/legacy/nn/layer/parallel_2d/_operation.py function matmul_2d (line 14) | def matmul_2d( class _Classifier2D (line 74) | class _Classifier2D(torch.autograd.Function): method forward (line 77) | def forward( method backward (line 129) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: function classifier_2d (line 147) | def classifier_2d( class Matmul_AB_2D (line 200) | class Matmul_AB_2D(torch.autograd.Function): method forward (line 224) | def forward( method backward (line 318) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: class Matmul_ABT_2D (line 352) | class Matmul_ABT_2D(torch.autograd.Function): method forward (line 377) | def forward( method backward (line 475) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: class Matmul_ATB_2D (line 510) | class Matmul_ATB_2D(torch.autograd.Function): method forward (line 534) | def forward( method backward (line 632) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: class _Add_Bias_2D (line 667) | class _Add_Bias_2D(torch.autograd.Function): method forward (line 670) | def forward( method backward (line 705) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: function add_bias_2d (line 718) | def add_bias_2d( class _Layernorm_2D (line 769) | class _Layernorm_2D(torch.autograd.Function): method forward (line 772) | def forward( method backward (line 792) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: function layernorm_2d (line 813) | def layernorm_2d( class _AllGatherTensor2D (line 838) | class _AllGatherTensor2D(torch.autograd.Function): method forward (line 841) | def forward(ctx: Any, inputs: Tensor, dim: int, parallel_mode: Paralle... method backward (line 850) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: function all_gather_tensor_2d (line 855) | def all_gather_tensor_2d(tensor: Tensor, dim: int, parallel_mode: Parall... function split_batch_2d (line 870) | def split_batch_2d(input_: Tensor, dim: int = 0) -> Tensor: class _ReduceTensor2D (line 893) | class _ReduceTensor2D(torch.autograd.Function): method forward (line 895) | def forward(ctx, input_, parallel_mode): method backward (line 899) | def backward(ctx, output_grad): function reduce_tensor_2d (line 903) | def reduce_tensor_2d(input_: Tensor, parallel_mode: ParallelMode) -> Ten... class _ReduceScatterTensor2D (line 917) | class _ReduceScatterTensor2D(torch.autograd.Function): method forward (line 919) | def forward(ctx, input_, dim, parallel_mode): method backward (line 925) | def backward(ctx, output_grad): function reduce_scatter_tensor_2d (line 929) | def reduce_scatter_tensor_2d(tensor: Tensor, dim: int, parallel_mode: Pa... class _ReduceByBatch2D (line 948) | class _ReduceByBatch2D(torch.autograd.Function): method symbolic (line 950) | def symbolic(graph, input_, reduce_mean: bool = False): method forward (line 959) | def forward(ctx, input_, reduce_mean: bool = False): method backward (line 970) | def backward(ctx, output_grad): function reduce_by_batch_2d (line 977) | def reduce_by_batch_2d(input_, reduce_mean: bool = False) -> Tensor: FILE: colossalai/legacy/nn/layer/parallel_2d/_utils.py function get_summa_dim_from_env (line 6) | def get_summa_dim_from_env() -> int: function assert_summa_initialization (line 19) | def assert_summa_initialization(): FILE: colossalai/legacy/nn/layer/parallel_2d/layers.py class Linear2D (line 39) | class Linear2D(ParallelLayer): method __init__ (line 58) | def __init__( method _set_tensor_parallel_attributes (line 101) | def _set_tensor_parallel_attributes(self): method reset_parameters (line 106) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method _load_from_global_state_dict (line 112) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 145) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 173) | def forward(self, x: Tensor) -> Tensor: class LayerNorm2D (line 231) | class LayerNorm2D(ParallelLayer): method __init__ (line 245) | def __init__(self, normalized_shape: int, eps: float = 1e-05, bias=Tru... method _set_tensor_parallel_attributes (line 272) | def _set_tensor_parallel_attributes(self): method _load_from_global_state_dict (line 277) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 309) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 336) | def forward(self, x: Tensor) -> Tensor: class PatchEmbedding2D (line 390) | class PatchEmbedding2D(ParallelLayer): method __init__ (line 411) | def __init__( method _set_tensor_parallel_attribute (line 465) | def _set_tensor_parallel_attribute(self): method reset_parameters (line 471) | def reset_parameters(self, weight_initializer, bias_initializer, posit... method _load_from_global_state_dict (line 479) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 521) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 550) | def forward(self, input_: Tensor) -> Tensor: class Embedding2D (line 575) | class Embedding2D(ParallelLayer): method __init__ (line 605) | def __init__( method _set_tensor_parallel_attributes (line 636) | def _set_tensor_parallel_attributes(self): method reset_parameters (line 639) | def reset_parameters(self, weight_initializer) -> None: method _fill_padding_idx_with_zero (line 645) | def _fill_padding_idx_with_zero(self) -> None: method _load_from_global_state_dict (line 650) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 677) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 701) | def forward(self, input_: Tensor) -> Tensor: class VocabParallelEmbedding2D (line 711) | class VocabParallelEmbedding2D(ParallelLayer): method __init__ (line 741) | def __init__( method _set_tensor_parallel_attributes (line 778) | def _set_tensor_parallel_attributes(self): method reset_parameters (line 781) | def reset_parameters(self, weight_initializer) -> None: method _fill_padding_idx_with_zero (line 787) | def _fill_padding_idx_with_zero(self) -> None: method _load_from_global_state_dict (line 796) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 823) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 847) | def forward(self, input_: Tensor) -> Tensor: class Classifier2D (line 862) | class Classifier2D(ParallelLayer): method __init__ (line 880) | def __init__( method _set_tensor_parallel_attributes (line 924) | def _set_tensor_parallel_attributes(self): method reset_parameters (line 928) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method _load_from_global_state_dict (line 942) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 976) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 1005) | def forward(self, input_: Tensor) -> Tensor: class VocabParallelClassifier2D (line 1026) | class VocabParallelClassifier2D(ParallelLayer): method __init__ (line 1044) | def __init__( method _set_tensor_parallel_attributes (line 1091) | def _set_tensor_parallel_attributes(self): method reset_parameters (line 1097) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method _load_from_global_state_dict (line 1104) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 1138) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 1168) | def forward(self, x: Tensor) -> Tensor: FILE: colossalai/legacy/nn/layer/parallel_2p5d/_operation.py function get_parallel_group (line 14) | def get_parallel_group(parallel_mode: ParallelMode): function get_global_rank (line 18) | def get_global_rank(): function get_parallel_rank (line 22) | def get_parallel_rank(parallel_mode: ParallelMode): class _Classifier2p5D (line 26) | class _Classifier2p5D(torch.autograd.Function): method forward (line 29) | def forward( method backward (line 81) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: function classifier_2p5d (line 100) | def classifier_2p5d( class Matmul_AB_2p5D (line 153) | class Matmul_AB_2p5D(torch.autograd.Function): method forward (line 178) | def forward( method backward (line 278) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: class Matmul_ABT_2p5D (line 314) | class Matmul_ABT_2p5D(torch.autograd.Function): method forward (line 339) | def forward( method backward (line 443) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: class Matmul_ATB_2p5D (line 479) | class Matmul_ATB_2p5D(torch.autograd.Function): method forward (line 504) | def forward( method backward (line 606) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: class _Add_Bias_2p5D (line 642) | class _Add_Bias_2p5D(torch.autograd.Function): method forward (line 645) | def forward( method backward (line 694) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: function add_bias_2p5d (line 805) | def add_bias_2p5d( class _Layernorm2p5D (line 859) | class _Layernorm2p5D(torch.autograd.Function): method forward (line 876) | def forward( method backward (line 889) | def backward(ctx, output_grad): function layernorm_2p5d (line 910) | def layernorm_2p5d( class _AllGatherTensor2p5D (line 929) | class _AllGatherTensor2p5D(torch.autograd.Function): method forward (line 932) | def forward(ctx: Any, inputs: Tensor, dim: int, col_parallel_mode: Par... method backward (line 941) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: function all_gather_tensor_2p5d (line 946) | def all_gather_tensor_2p5d(inputs: Tensor, dim: int, col_parallel_mode: ... class SplitFirst (line 961) | class SplitFirst(torch.autograd.Function): method forward (line 976) | def forward(ctx: Any, inputs: Tensor, tesseract_dim: int, col_parallel... method backward (line 987) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]: function split_batch_2p5d (line 996) | def split_batch_2p5d(input_: Tensor, dim: int = 0) -> Tensor: class _ReduceTensor2p5D (line 1021) | class _ReduceTensor2p5D(torch.autograd.Function): method forward (line 1023) | def forward(ctx, input_, parallel_mode): method backward (line 1027) | def backward(ctx, output_grad): function reduce_tensor_2p5d (line 1031) | def reduce_tensor_2p5d(input_: Tensor, parallel_mode: ParallelMode) -> T... class _ReduceScatterTensor2p5D (line 1045) | class _ReduceScatterTensor2p5D(torch.autograd.Function): method forward (line 1047) | def forward(ctx, input_, dim, parallel_mode): method backward (line 1053) | def backward(ctx, output_grad): function reduce_scatter_tensor_2p5d (line 1057) | def reduce_scatter_tensor_2p5d(input_: Tensor, dim: int, parallel_mode: ... class _RreduceByBatch2p5D (line 1078) | class _RreduceByBatch2p5D(torch.autograd.Function): method symbolic (line 1080) | def symbolic(graph, input_, reduce_mean: bool = False): method forward (line 1089) | def forward(ctx, input_, reduce_mean: bool = False): method backward (line 1100) | def backward(ctx, output_grad): function reduce_by_batch_2p5d (line 1107) | def reduce_by_batch_2p5d(input_, reduce_mean: bool = False) -> Tensor: FILE: colossalai/legacy/nn/layer/parallel_2p5d/_utils.py function get_tesseract_dim_dep_from_env (line 6) | def get_tesseract_dim_dep_from_env(): function assert_tesseract_initialization (line 21) | def assert_tesseract_initialization(): FILE: colossalai/legacy/nn/layer/parallel_2p5d/layers.py class Linear2p5D (line 40) | class Linear2p5D(ParallelLayer): method __init__ (line 59) | def __init__( method _set_tensor_parallel_attributes (line 103) | def _set_tensor_parallel_attributes(self): method reset_parameters (line 108) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method _load_from_global_state_dict (line 114) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 153) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 182) | def forward(self, x: Tensor) -> Tensor: class LayerNorm2p5D (line 243) | class LayerNorm2p5D(ParallelLayer): method __init__ (line 257) | def __init__(self, normalized_shape: int, eps: float = 1e-05, bias=Tru... method _set_tensor_parallel_attribute (line 285) | def _set_tensor_parallel_attribute(self): method _load_from_global_state_dict (line 290) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 322) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 349) | def forward(self, x: Tensor) -> Tensor: class PatchEmbedding2p5D (line 403) | class PatchEmbedding2p5D(ParallelLayer): method __init__ (line 424) | def __init__( method _set_tensor_parallel_attribute (line 478) | def _set_tensor_parallel_attribute(self): method reset_parameters (line 484) | def reset_parameters(self, weight_initializer, bias_initializer, posit... method _load_from_global_state_dict (line 492) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 534) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 563) | def forward(self, input_: Tensor) -> Tensor: class Embedding2p5D (line 588) | class Embedding2p5D(ParallelLayer): method __init__ (line 618) | def __init__( method _set_tensor_parallel_attributes (line 649) | def _set_tensor_parallel_attributes(self): method reset_parameters (line 652) | def reset_parameters(self, weight_initializer) -> None: method _fill_padding_idx_with_zero (line 658) | def _fill_padding_idx_with_zero(self) -> None: method _load_from_global_state_dict (line 663) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 690) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 714) | def forward(self, input_: Tensor) -> Tensor: class VocabParallelEmbedding2p5D (line 725) | class VocabParallelEmbedding2p5D(ParallelLayer): method __init__ (line 755) | def __init__( method _set_tensor_parallel_attributes (line 792) | def _set_tensor_parallel_attributes(self): method reset_parameters (line 795) | def reset_parameters(self, weight_initializer) -> None: method _fill_padding_idx_with_zero (line 801) | def _fill_padding_idx_with_zero(self) -> None: method _load_from_global_state_dict (line 806) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 833) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 857) | def forward(self, input_: Tensor) -> Tensor: class Classifier2p5D (line 876) | class Classifier2p5D(ParallelLayer): method __init__ (line 894) | def __init__( method _set_tensor_parallel_attributes (line 939) | def _set_tensor_parallel_attributes(self): method reset_parameters (line 943) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method _load_from_global_state_dict (line 957) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 991) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 1020) | def forward(self, input_: Tensor) -> Tensor: class VocabParallelClassifier2p5D (line 1041) | class VocabParallelClassifier2p5D(ParallelLayer): method __init__ (line 1059) | def __init__( method _set_tensor_parallel_attributes (line 1107) | def _set_tensor_parallel_attributes(self): method reset_parameters (line 1113) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method _load_from_global_state_dict (line 1120) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method forward (line 1154) | def forward(self, x: Tensor) -> Tensor: FILE: colossalai/legacy/nn/layer/parallel_3d/_operation.py class _Linear3D (line 18) | class _Linear3D(torch.autograd.Function): method forward (line 21) | def forward( method backward (line 46) | def backward(ctx, output_grad: Tensor) -> Tuple[Tensor, ...]: function linear_3d (line 64) | def linear_3d( class _Classifier3D (line 94) | class _Classifier3D(torch.autograd.Function): method forward (line 97) | def forward( method backward (line 130) | def backward(ctx, output_grad: Tensor) -> Tuple[Tensor, ...]: function classifier_3d (line 155) | def classifier_3d( class _VocabParallelClassifier3D (line 189) | class _VocabParallelClassifier3D(torch.autograd.Function): method forward (line 192) | def forward( method backward (line 224) | def backward(ctx, output_grad: Tensor) -> Tuple[Tensor, ...]: function vocab_parallel_classifier_3d (line 249) | def vocab_parallel_classifier_3d( function norm_forward (line 284) | def norm_forward(x: Tensor, mean: Tensor, sqr_mean: Tensor, weight: Tens... function norm_backward (line 295) | def norm_backward(grad: Tensor, mu: Tensor, sigma: Tensor, weight: Tensor): class _Layernorm3D (line 307) | class _Layernorm3D(torch.autograd.Function): method forward (line 310) | def forward( method backward (line 341) | def backward(ctx, output_grad: Tensor) -> Tuple[Tensor, ...]: function layernorm_3d (line 359) | def layernorm_3d( function split_tensor_3d (line 400) | def split_tensor_3d(tensor: Tensor, dim: int, parallel_mode: ParallelMod... function split_batch_3d (line 429) | def split_batch_3d( class _ReduceTensor3D (line 461) | class _ReduceTensor3D(torch.autograd.Function): method forward (line 463) | def forward(ctx, input_, parallel_mode): method backward (line 467) | def backward(ctx, output_grad): function reduce_tensor_3d (line 471) | def reduce_tensor_3d(tensor: Tensor, parallel_mode: ParallelMode) -> Ten... class _AllGatherTensor3D (line 485) | class _AllGatherTensor3D(torch.autograd.Function): method forward (line 487) | def forward(ctx, input_, dim, parallel_mode): method backward (line 494) | def backward(ctx, output_grad): function all_gather_tensor_3d (line 499) | def all_gather_tensor_3d(tensor: Tensor, dim: int, parallel_mode: Parall... class _ReduceScatterTensor3D (line 514) | class _ReduceScatterTensor3D(torch.autograd.Function): method forward (line 516) | def forward(ctx, input_, dim, parallel_mode): method backward (line 522) | def backward(ctx, output_grad): function reduce_scatter_tensor_3d (line 527) | def reduce_scatter_tensor_3d(tensor: Tensor, dim: int, parallel_mode: Pa... class _ReduceByBatch3D (line 548) | class _ReduceByBatch3D(torch.autograd.Function): method forward (line 551) | def forward( method backward (line 569) | def backward(ctx, output_grad: Tensor) -> Tuple[Tensor, ...]: function reduce_by_batch_3d (line 576) | def reduce_by_batch_3d( FILE: colossalai/legacy/nn/layer/parallel_3d/_utils.py function get_depth_from_env (line 18) | def get_depth_from_env() -> int: function get_parallel_mode_from_env (line 31) | def get_parallel_mode_from_env(group): function swap_in_out_group (line 42) | def swap_in_out_group(): function dbg_check_shape (line 50) | def dbg_check_shape(tensor: Tensor, shape: tuple): class AsyncGradientBucket (line 57) | class AsyncGradientBucket(object): method __init__ (line 58) | def __init__(self): method __len__ (line 61) | def __len__(self): method push (line 64) | def push(self, async_op, grad_tensor, param_id): method pop (line 68) | def pop(self, param_id): method synchronize (line 76) | def synchronize(self, params): function push_async_grad (line 89) | def push_async_grad(op, grad, param_id): function pop_async_grad (line 93) | def pop_async_grad(param_id): function _async_grad_hook (line 97) | def _async_grad_hook(grad, param_id): function register_async_grad_hook (line 102) | def register_async_grad_hook(param): function synchronize (line 106) | def synchronize(params=list()): FILE: colossalai/legacy/nn/layer/parallel_3d/layers.py class LayerNorm3D (line 47) | class LayerNorm3D(ParallelLayer): method __init__ (line 61) | def __init__(self, normalized_shape: int, eps: float = 1e-12, bias=Tru... method _set_tensor_parallel_attributes (line 86) | def _set_tensor_parallel_attributes(self) -> None: method reset_parameters (line 91) | def reset_parameters(self) -> None: method _load_from_global_state_dict (line 98) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 131) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 150) | def forward(self, input_: Tensor) -> Tensor: class Linear3D (line 163) | class Linear3D(ParallelLayer): method __init__ (line 180) | def __init__( method _set_tensor_parallel_attributes (line 224) | def _set_tensor_parallel_attributes(self) -> None: method _sync_grad_hook (line 229) | def _sync_grad_hook(self, grad) -> Tensor: method reset_parameters (line 233) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method _load_from_global_state_dict (line 249) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 290) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 327) | def forward(self, input_: Tensor) -> Tensor: class Classifier3D (line 345) | class Classifier3D(ParallelLayer): method __init__ (line 363) | def __init__( method _set_tensor_parallel_attributes (line 405) | def _set_tensor_parallel_attributes(self) -> None: method reset_parameters (line 409) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method _load_from_global_state_dict (line 424) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 456) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 477) | def forward(self, input_: Tensor) -> Tensor: class VocabParallelClassifier3D (line 489) | class VocabParallelClassifier3D(ParallelLayer): method __init__ (line 507) | def __init__( method _set_tensor_parallel_attributes (line 556) | def _set_tensor_parallel_attributes(self) -> None: method reset_parameters (line 562) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method _load_from_global_state_dict (line 580) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 622) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 658) | def forward(self, input_: Tensor) -> Tensor: class PatchEmbedding3D (line 670) | class PatchEmbedding3D(ParallelLayer): method __init__ (line 691) | def __init__( method _set_tensor_parallel_attributes (line 744) | def _set_tensor_parallel_attributes(self) -> None: method _sync_grad_hook (line 750) | def _sync_grad_hook(self, grad) -> Tensor: method reset_parameters (line 754) | def reset_parameters(self, weight_initializer, bias_initializer, posit... method _load_from_global_state_dict (line 772) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 812) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 833) | def forward(self, input_: Tensor) -> Tensor: class Embedding3D (line 849) | class Embedding3D(ParallelLayer): method __init__ (line 879) | def __init__( method _set_tensor_parallel_attributes (line 912) | def _set_tensor_parallel_attributes(self) -> None: method _sync_grad_hook (line 915) | def _sync_grad_hook(self, grad) -> Tensor: method reset_parameters (line 919) | def reset_parameters(self, weight_initializer) -> None: method _fill_padding_idx_with_zero (line 929) | def _fill_padding_idx_with_zero(self) -> None: method _load_from_global_state_dict (line 934) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 959) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 975) | def forward(self, input_: Tensor) -> Tensor: class VocabParallelEmbedding3D (line 985) | class VocabParallelEmbedding3D(ParallelLayer): method __init__ (line 1015) | def __init__( method _set_tensor_parallel_attributes (line 1054) | def _set_tensor_parallel_attributes(self): method reset_parameters (line 1057) | def reset_parameters(self, weight_initializer) -> None: method _fill_padding_idx_with_zero (line 1063) | def _fill_padding_idx_with_zero(self) -> None: method _load_from_global_state_dict (line 1072) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw... method _save_to_global_state_dict (line 1107) | def _save_to_global_state_dict(self, destination, prefix, keep_vars): method forward (line 1140) | def forward(self, input_: Tensor) -> Tensor: FILE: colossalai/legacy/nn/layer/parallel_sequence/_operation.py class RingQK (line 15) | class RingQK(torch.autograd.Function): method forward (line 22) | def forward(ctx, sub_q, sub_k, batch_size, num_attention_heads, sub_se... method backward (line 55) | def backward(ctx, grad_output): class RingAV (line 92) | class RingAV(torch.autograd.Function): method forward (line 99) | def forward(ctx, attention_score, sub_v, batch_size, num_attention_hea... method backward (line 132) | def backward(ctx, grad_output): FILE: colossalai/legacy/nn/layer/parallel_sequence/_utils.py function _calc_incoming_device_range (line 5) | def _calc_incoming_device_range(i, rank, world_size, sub_seq_length): function _calc_current_device_range (line 12) | def _calc_current_device_range(rank, sub_seq_length): FILE: colossalai/legacy/nn/layer/parallel_sequence/layers.py class TransformerSelfAttentionRing (line 20) | class TransformerSelfAttentionRing(nn.Module): method __init__ (line 34) | def __init__( method forward (line 98) | def forward(self, hidden_states, attention_mask): method __repr__ (line 198) | def __repr__(self): class _Linear (line 208) | class _Linear(nn.Module): method __init__ (line 227) | def __init__(self, input_size, output_size, bias=True, skip_bias_add=F... method forward (line 251) | def forward(self, input_): method __repr__ (line 261) | def __repr__(self): FILE: colossalai/legacy/nn/layer/utils/common.py class CheckpointModule (line 16) | class CheckpointModule(nn.Module): method __init__ (line 17) | def __init__(self, checkpoint: bool = True, offload: bool = False): method _forward (line 23) | def _forward(self, *args, **kwargs): method forward (line 26) | def forward(self, *args, **kwargs): method train (line 32) | def train(self, mode: bool = True): method eval (line 36) | def eval(self): function divide (line 41) | def divide(numerator, denominator): function swish (line 56) | def swish(x: Tensor) -> Tensor: function set_tensor_parallel_attribute_by_size (line 63) | def set_tensor_parallel_attribute_by_size(param, size): function set_tensor_parallel_attribute_by_partition (line 68) | def set_tensor_parallel_attribute_by_partition(param, num_partitions): function get_tensor_parallel_mode (line 73) | def get_tensor_parallel_mode(): function _ntuple (line 80) | def _ntuple(n): FILE: colossalai/legacy/nn/layer/vanilla/layers.py function drop_path (line 18) | def drop_path(x, drop_prob: float = 0.0, training: bool = False): class DropPath (line 41) | class DropPath(nn.Module): method __init__ (line 50) | def __init__(self, drop_prob=None): method forward (line 54) | def forward(self, x): class WrappedDropout (line 58) | class WrappedDropout(nn.Module): method __init__ (line 74) | def __init__(self, p: float = 0.5, inplace: bool = False, mode=None): method nonefunc (line 86) | def nonefunc(self, inputs): method normalfunc (line 89) | def normalfunc(self, inputs): method forward (line 93) | def forward(self, inputs): class WrappedDropPath (line 97) | class WrappedDropPath(nn.Module): method __init__ (line 110) | def __init__(self, p: float = 0.0, mode=None): method nonefunc (line 120) | def nonefunc(self, inputs): method normalfunc (line 123) | def normalfunc(self, inputs): method forward (line 127) | def forward(self, inputs): class VanillaPatchEmbedding (line 132) | class VanillaPatchEmbedding(nn.Module): method __init__ (line 154) | def __init__( method reset_parameters (line 192) | def reset_parameters(self, weight_initializer, bias_initializer, posit... method forward (line 198) | def forward(self, input_: Tensor) -> Tensor: class VanillaClassifier (line 214) | class VanillaClassifier(nn.Module): method __init__ (line 232) | def __init__( method reset_parameters (line 265) | def reset_parameters(self, weight_initializer, bias_initializer): method forward (line 274) | def forward(self, input_: Tensor) -> Tensor: class VanillaLayerNorm (line 279) | class VanillaLayerNorm(nn.Module): method __init__ (line 294) | def __init__(self, normalized_shape: int, eps=1e-05, bias=True, dtype=... method forward (line 308) | def forward(self, x: Tensor) -> Tensor: class VanillaLinear (line 313) | class VanillaLinear(nn.Module): method __init__ (line 331) | def __init__( method forward (line 356) | def forward(self, input: Tensor) -> Tensor: FILE: colossalai/legacy/nn/layer/wrapper/pipeline_wrapper.py class PipelineSharedModuleWrapper (line 10) | class PipelineSharedModuleWrapper: method __init__ (line 11) | def __init__(self, pipeline_ranks: Union[List[int], Tuple[int]]) -> None: method _init_group (line 18) | def _init_group(self): method register_module (line 34) | def register_module(self, module: nn.Module): method register_parameter (line 43) | def register_parameter(self, param: nn.Parameter): FILE: colossalai/legacy/nn/loss/__init__.py class CrossEntropyLoss (line 27) | class CrossEntropyLoss(_Loss): method __init__ (line 28) | def __init__(self, reduction: bool = True, *args, **kwargs): method forward (line 39) | def forward(self, *args): FILE: colossalai/legacy/nn/loss/loss_1d.py class _VocabParallelCrossEntropy1D (line 11) | class _VocabParallelCrossEntropy1D(torch.autograd.Function): method forward (line 14) | def forward(ctx, vocab_parallel_logits, targets, process_group): method backward (line 62) | def backward(ctx, grad_output): class VocabParallelCrossEntropyLoss1D (line 83) | class VocabParallelCrossEntropyLoss1D(_Loss): method __init__ (line 90) | def __init__(self, reduction=True): method forward (line 94) | def forward(self, logits, targets, process_group=None): FILE: colossalai/legacy/nn/loss/loss_2d.py class CrossEntropyLoss2D (line 16) | class CrossEntropyLoss2D(_Loss): method __init__ (line 35) | def __init__(self, reduction=True, *args, **kwargs): method forward (line 42) | def forward(self, logits, targets): class _VocabParallelCrossEntropy2D (line 60) | class _VocabParallelCrossEntropy2D(torch.autograd.Function): method forward (line 65) | def forward(ctx, logits, targets): method backward (line 109) | def backward(ctx, output_grad): class VocabParallelCrossEntropyLoss2D (line 131) | class VocabParallelCrossEntropyLoss2D(_Loss): method __init__ (line 138) | def __init__(self, reduction=True): method forward (line 142) | def forward(self, logits, targets): FILE: colossalai/legacy/nn/loss/loss_2p5d.py class CrossEntropyLoss2p5D (line 16) | class CrossEntropyLoss2p5D(_Loss): method __init__ (line 35) | def __init__(self, reduction=True, *args, **kwargs): method forward (line 42) | def forward(self, logits, targets): class _VocabParallelCrossEntropy2p5D (line 57) | class _VocabParallelCrossEntropy2p5D(torch.autograd.Function): method forward (line 62) | def forward(ctx, logits, targets): method backward (line 103) | def backward(ctx, output_grad): class VocabParallelCrossEntropyLoss2p5D (line 125) | class VocabParallelCrossEntropyLoss2p5D(_Loss): method __init__ (line 133) | def __init__(self, reduction=True): method forward (line 137) | def forward(self, logits, targets): FILE: colossalai/legacy/nn/loss/loss_3d.py class CrossEntropyLoss3D (line 16) | class CrossEntropyLoss3D(_Loss): method __init__ (line 35) | def __init__(self, reduction=True, *args, **kwargs): method forward (line 43) | def forward(self, logits, targets): class _VocabParallelCrossEntropy3D (line 59) | class _VocabParallelCrossEntropy3D(torch.autograd.Function): method forward (line 65) | def forward(ctx, logits, targets, output_parallel_mode): method backward (line 102) | def backward(ctx, output_grad): class VocabParallelCrossEntropyLoss3D (line 121) | class VocabParallelCrossEntropyLoss3D(_Loss): method __init__ (line 128) | def __init__(self, reduction=True): method forward (line 135) | def forward(self, logits, targets): FILE: colossalai/legacy/nn/metric/__init__.py class Accuracy (line 17) | class Accuracy(nn.Module): method __init__ (line 18) | def __init__(self): method forward (line 26) | def forward(self, *args): FILE: colossalai/legacy/nn/metric/_utils.py function calc_acc (line 4) | def calc_acc(logits, targets): FILE: colossalai/legacy/nn/metric/accuracy_2d.py class Accuracy2D (line 9) | class Accuracy2D(nn.Module): method __init__ (line 12) | def __init__(self): method forward (line 15) | def forward(self, logits, targets): FILE: colossalai/legacy/nn/metric/accuracy_2p5d.py class Accuracy2p5D (line 9) | class Accuracy2p5D(nn.Module): method __init__ (line 12) | def __init__(self): method forward (line 15) | def forward(self, logits, targets): FILE: colossalai/legacy/nn/metric/accuracy_3d.py class Accuracy3D (line 11) | class Accuracy3D(nn.Module): method __init__ (line 14) | def __init__(self): method forward (line 19) | def forward(self, logits, targets): FILE: colossalai/legacy/nn/parallel/data_parallel.py function free_storage (line 14) | def free_storage(data: torch.Tensor) -> None: function _cast_float (line 23) | def _cast_float(args, dtype: torch.dtype): class ColoDDP (line 33) | class ColoDDP(torch.nn.Module): method __init__ (line 52) | def __init__( method parameters (line 76) | def parameters(self, recurse: bool = True): method named_parameters (line 79) | def named_parameters(self, prefix: str = "", recurse: bool = True): method named_buffers (line 82) | def named_buffers(self, prefix: str = "", recurse: bool = True): method named_children (line 85) | def named_children(self): method named_modules (line 88) | def named_modules( method forward (line 93) | def forward(self, *args, **kwargs): method backward (line 97) | def backward(self, loss: torch.Tensor): method grad_handle (line 110) | def grad_handle(self, p, grad): method _save_grad (line 133) | def _save_grad(p, grad): method zero_grad (line 139) | def zero_grad(self, set_to_none: bool = False) -> None: method set_params_to_ignore (line 153) | def set_params_to_ignore(params_to_ignore: Iterable[torch.Tensor]) -> ... method state_dict (line 171) | def state_dict(self, destination=None, prefix="", keep_vars=False): method load_state_dict (line 174) | def load_state_dict(self, state_dict: "OrderedDict[str, torch.Tensor]"... FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/base_embedding.py class BaseEmbeddingBag (line 6) | class BaseEmbeddingBag(abc.ABC, nn.Module): method __init__ (line 7) | def __init__( FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/cache_mgr.py class EvictionStrategy (line 14) | class EvictionStrategy(Enum): function _wait_for_data (line 20) | def _wait_for_data(t, stream: Optional[torch.cuda.streams.Stream]) -> None: class CachedParamMgr (line 37) | class CachedParamMgr(torch.nn.Module): method __init__ (line 54) | def __init__( method _reset_comm_stats (line 100) | def _reset_comm_stats(self): method timer (line 111) | def timer(self, name): method _find_evict_gpu_idxs (line 120) | def _find_evict_gpu_idxs(self, evict_num: int) -> torch.Tensor: method _init_weight (line 142) | def _init_weight(self, weight): method cpu_weight_data (line 188) | def cpu_weight_data(self, row_idx: int) -> torch.Tensor: method cuda_available_row_num (line 204) | def cuda_available_row_num(self): method reorder (line 208) | def reorder(self, ids_freq_mapping: Optional[List[int]] = None, warmup... method flush (line 273) | def flush(self): method print_comm_stats (line 295) | def print_comm_stats(self): method _id_to_cached_cuda_id (line 315) | def _id_to_cached_cuda_id(self, ids: torch.Tensor) -> torch.Tensor: method prepare_ids (line 329) | def prepare_ids(self, ids: torch.Tensor) -> torch.Tensor: method _row_in_cuda (line 382) | def _row_in_cuda(self, row_id: int) -> bool: method _prepare_rows_on_cuda (line 386) | def _prepare_rows_on_cuda(self, cpu_row_idxs: torch.Tensor) -> None: method _find_free_cuda_row (line 529) | def _find_free_cuda_row(self) -> int: method _evict (line 535) | def _evict(self) -> int: method _admit (line 576) | def _admit(self, row_id: int): FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/cached_embedding.py class CachedEmbeddingBag (line 11) | class CachedEmbeddingBag(BaseEmbeddingBag): method __init__ (line 39) | def __init__( method set_cache_mgr_async_copy (line 81) | def set_cache_mgr_async_copy(self, flag): method _weight_alloc (line 84) | def _weight_alloc(self, dtype, device): method _preprocess (line 92) | def _preprocess( method forward (line 116) | def forward(self, input, offsets=None, per_sample_weights=None, shape_... method weight (line 139) | def weight(self): method named_parameters (line 142) | def named_parameters(self, prefix: str = "", recurse: bool = True) -> ... method parameters (line 145) | def parameters(self, recurse: bool = True) -> Iterator[Parameter]: method set_cache_op (line 148) | def set_cache_op(self, cache_op: bool = True): method num_hits_history (line 154) | def num_hits_history(self): method num_miss_history (line 158) | def num_miss_history(self): method num_write_back_history (line 162) | def num_write_back_history(self): method swap_in_bandwidth (line 166) | def swap_in_bandwidth(self): method swap_out_bandwidth (line 178) | def swap_out_bandwidth(self): FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/copyer.py class LimitBuffIndexCopyer (line 5) | class LimitBuffIndexCopyer(object): method __init__ (line 13) | def __init__(self, size: int) -> None: method index_copy (line 17) | def index_copy(self, dim: int, src_index: LongTensor, tgt_index: LongT... FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/embedding_config.py class TablewiseEmbeddingBagConfig (line 4) | class TablewiseEmbeddingBagConfig: method __init__ (line 13) | def __init__( FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/parallel_cached_embedding.py function get_partition (line 14) | def get_partition(embedding_dim, rank, world_size) -> Tuple[int, int, bo... class ParallelCachedEmbeddingBag (line 33) | class ParallelCachedEmbeddingBag(CachedEmbeddingBag): method __init__ (line 34) | def __init__( method _weight_alloc (line 85) | def _weight_alloc(self, dtype, device): method forward (line 98) | def forward( method set_cache_op (line 130) | def set_cache_op(self, cache_op: bool = True): method from_pretrained (line 134) | def from_pretrained( method print_comm_stats_ (line 170) | def print_comm_stats_(self): method element_size (line 173) | def element_size(self): FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/parallel_cached_embedding_tablewise.py class ParallelCachedEmbeddingBagTablewise (line 15) | class ParallelCachedEmbeddingBagTablewise(CachedEmbeddingBag): method __init__ (line 21) | def __init__( method forward (line 104) | def forward( method split_along_rank (line 146) | def split_along_rank( method set_cache_op (line 222) | def set_cache_op(self, cache_op: bool = True): method print_comm_stats_ (line 225) | def print_comm_stats_(self): method element_size (line 228) | def element_size(self): FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/parallel_cached_embedding_tablewise_split_cache.py class ParallelCachedEmbeddingBagTablewiseSpiltCache (line 17) | class ParallelCachedEmbeddingBagTablewiseSpiltCache(abc.ABC, nn.Module): method __init__ (line 22) | def __init__( method forward (line 88) | def forward(self, indices: torch.Tensor, offsets: torch.Tensor = None,... method element_size (line 135) | def element_size(self): method print_comm_stats_ (line 140) | def print_comm_stats_(self): FILE: colossalai/legacy/nn/parallel/layers/colo_module.py class ColoModule (line 7) | class ColoModule(object): method __init__ (line 8) | def __init__(self): method _register_shard_params (line 12) | def _register_shard_params(self, params: List[str]): method _register_allowed_patterns (line 15) | def _register_allowed_patterns( method _set_default (line 25) | def _set_default(self, compute_pattern: ComputePattern, target_mode): method has_compute_pattern (line 28) | def has_compute_pattern(self, compute_pattern: ComputePattern): method get_dist_specs (line 31) | def get_dist_specs(self, compute_pattern: ComputePattern): method has_compute_pattern_with_mode (line 35) | def has_compute_pattern_with_mode(self, compute_pattern: ComputePatter... method get_dist_specs_with_mode (line 38) | def get_dist_specs_with_mode(self, compute_pattern: ComputePattern, mo... method get_param_names (line 42) | def get_param_names(self): method register (line 45) | def register(self, compute_pattern, pg): FILE: colossalai/legacy/nn/parallel/layers/embedding.py class ColoEmbedding (line 6) | class ColoEmbedding(ColoModule): method __init__ (line 7) | def __init__(self): method register (line 11) | def register(self, compute_pattern, pg: ProcessGroup): method _set_TP1D (line 16) | def _set_TP1D(self, pg: ProcessGroup): FILE: colossalai/legacy/nn/parallel/layers/linear.py class ColoLinear (line 6) | class ColoLinear(ColoModule): method __init__ (line 7) | def __init__(self): method register (line 11) | def register(self, compute_pattern, pg: ProcessGroup): method _set_TP1D (line 16) | def _set_TP1D(self, pg): FILE: colossalai/legacy/nn/parallel/layers/module_utils.py function register_colo_module (line 13) | def register_colo_module(module_type: type, colo_module: ColoModule): function is_colo_module (line 18) | def is_colo_module(module: torch.nn.Module): function get_colo_module (line 26) | def get_colo_module(module: torch.nn.Module): function check_colo_module (line 36) | def check_colo_module(module: torch.nn.Module, pg: ProcessGroup, recursi... function init_colo_module (line 88) | def init_colo_module( FILE: colossalai/legacy/nn/parallel/reducer.py class Bucket (line 15) | class Bucket: method __init__ (line 16) | def __init__(self, size: int, dtype: torch.dtype, device: torch.device... method flush (line 22) | def flush(self) -> None: method alloc (line 38) | def alloc(self) -> None: method free (line 42) | def free(self) -> None: method append (line 46) | def append(self, tensor: Tensor, callback_fn: Callable): method avail_size (line 58) | def avail_size(self) -> int: class Reducer (line 62) | class Reducer: method __init__ (line 63) | def __init__(self, bucket_size_mb: int = 25): method all_reduce_async (line 68) | def all_reduce_async( method flush (line 89) | def flush(self) -> None: method free (line 94) | def free(self) -> None: method _get_bucket_size (line 99) | def _get_bucket_size(self, element_size: int) -> int: method _get_bucket (line 106) | def _get_bucket(self, tensor: Tensor, group: ProcessGroup) -> Bucket: FILE: colossalai/legacy/pipeline/layer_spec.py class LayerSpec (line 6) | class LayerSpec: method __init__ (line 9) | def __init__(self, typename, *module_args, **module_kwargs): method __repr__ (line 19) | def __repr__(self): method param_count (line 23) | def param_count(self): method build (line 26) | def build(self): method set_children (line 44) | def set_children(self, children): method count_params (line 47) | def count_params(self): method reset_param_count (line 54) | def reset_param_count(self): FILE: colossalai/legacy/pipeline/middleware/adaptor/fx.py function partition_name_to_id (line 7) | def partition_name_to_id(partition_name, is_input=False, is_output=False): function find_input_in_partition (line 28) | def find_input_in_partition(node, partitions, input_partitions=None): function find_output_in_partition (line 57) | def find_output_in_partition(node, partitions, output_partitions=None): function get_topology (line 94) | def get_topology(gm: GraphModule): FILE: colossalai/legacy/pipeline/middleware/topo.py class ValPosition (line 8) | class ValPosition: method __str__ (line 12) | def __str__(self) -> str: method __repr__ (line 16) | def __repr__(self) -> str: class PartitionInputVal (line 20) | class PartitionInputVal(object): method __init__ (line 21) | def __init__(self, partition_id, offset) -> None: method get (line 26) | def get(self): method __str__ (line 29) | def __str__(self) -> str: method __repr__ (line 34) | def __repr__(self) -> str: class PartitionOutputVal (line 38) | class PartitionOutputVal(object): method __init__ (line 39) | def __init__(self) -> None: method add (line 43) | def add(self, partition_id, offset): method get (line 47) | def get(self): method __str__ (line 50) | def __str__(self) -> str: method __repr__ (line 58) | def __repr__(self) -> str: class Partition (line 62) | class Partition(object): method __init__ (line 63) | def __init__(self) -> None: method add_input_val (line 67) | def add_input_val(self, input_val: PartitionInputVal): method add_output_val (line 70) | def add_output_val(self, output_val: PartitionOutputVal): method get_input_vals (line 73) | def get_input_vals(self): method get_output_vals (line 76) | def get_output_vals(self): method get_output_offsets (line 80) | def get_output_offsets(self, dst_partition_id): method get_input_partition_ids (line 91) | def get_input_partition_ids(self): method get_output_partition_ids (line 100) | def get_output_partition_ids(self): method __str__ (line 109) | def __str__(self) -> str: method __repr__ (line 123) | def __repr__(self) -> str: class Topo (line 139) | class Topo(object): method __init__ (line 140) | def __init__(self, input_partition_id=None, output_partition_id=None) ... method set_input_partition_id (line 145) | def set_input_partition_id(self, partition_id: int): method set_output_partition_id (line 148) | def set_output_partition_id(self, partition_id: int): method get_input_partition_id (line 151) | def get_input_partition_id(self): method get_output_partition_id (line 154) | def get_output_partition_id(self): method set_partitions (line 157) | def set_partitions(self, partition_id: int, partition: Partition): method get_mid_partitions (line 160) | def get_mid_partitions(self): method get_mid_partition_ids (line 168) | def get_mid_partition_ids(self): method get_input_partition (line 171) | def get_input_partition(self): method get_output_partition (line 176) | def get_output_partition(self): method get_partition_by_id (line 181) | def get_partition_by_id(self, partition_id): method __str__ (line 184) | def __str__(self) -> str: method __repr__ (line 209) | def __repr__(self) -> str: FILE: colossalai/legacy/pipeline/pipelinable.py class PipelinableContext (line 20) | class PipelinableContext(InsertPostInitMethodToModuleSubClasses): method __init__ (line 25) | def __init__(self, policy: str = "balanced"): method policy (line 35) | def policy(self): method policy (line 39) | def policy(self, policy: str): method layers_count (line 43) | def layers_count(self): method funcs_count (line 47) | def funcs_count(self): method _pre_context_exec (line 50) | def _pre_context_exec(self): method _post_context_exec (line 58) | def _post_context_exec(self): method _post_init_method (line 67) | def _post_init_method(self, module: torch.nn.Module, *args, **kwargs): method to_layer_list (line 122) | def to_layer_list(self, exec_seq=None): method partition (line 187) | def partition(self, num_chunks, pipeline_size, rank): class PipelinableModel (line 237) | class PipelinableModel(torch.nn.Module): method __init__ (line 238) | def __init__(self, module_list, front_func_dict, behind_func_dict): method forward (line 244) | def forward(self, *input_tensor, **kwargs): FILE: colossalai/legacy/pipeline/pipeline_process_group.py class PipelineProcessGroup (line 10) | class PipelineProcessGroup: method __init__ (line 13) | def __init__(self) -> None: method set_global_info (line 16) | def set_global_info( method _initialize_process_group (line 56) | def _initialize_process_group(self): method _initialize_pp_process_group (line 66) | def _initialize_pp_process_group(self) -> None: method _initialize_tp_dp_process_group (line 78) | def _initialize_tp_dp_process_group(self) -> None: method get_global_rank (line 85) | def get_global_rank(self): method get_world_size (line 88) | def get_world_size(self): method get_dp_degree (line 91) | def get_dp_degree(self) -> int: method get_tp_degree (line 94) | def get_tp_degree(self) -> int: method get_local_device_mesh_size (line 97) | def get_local_device_mesh_size(self) -> int: method get_device_mesh_num (line 100) | def get_device_mesh_num(self) -> int: method get_stage_num (line 103) | def get_stage_num(self) -> int: method is_first_stage (line 106) | def is_first_stage(self) -> bool: method is_last_stage (line 109) | def is_last_stage(self) -> bool: method check_pp_rank_valid (line 112) | def check_pp_rank_valid(self, pp_rank: int) -> bool: method get_local_pp_rank (line 115) | def get_local_pp_rank(self) -> int: method get_prev_pp_rank (line 118) | def get_prev_pp_rank(self) -> int: method get_next_pp_rank (line 124) | def get_next_pp_rank(self) -> int: method get_local_stage_global_ranks (line 130) | def get_local_stage_global_ranks(self) -> List[int]: method local_dp_rank (line 133) | def local_dp_rank(self) -> int: method local_tp_rank (line 136) | def local_tp_rank(self) -> int: method get_pp_global_ranks (line 139) | def get_pp_global_ranks(self) -> int: method get_dp_global_ranks (line 142) | def get_dp_global_ranks(self): method get_tp_global_ranks (line 145) | def get_tp_global_ranks(self): method get_chimera_all_reduce_group (line 148) | def get_chimera_all_reduce_group(self, pp_rank: int): FILE: colossalai/legacy/pipeline/rpc/_pipeline_base.py class Phase (line 20) | class Phase(Enum): class UniqueKey (line 27) | class UniqueKey: method __init__ (line 32) | def __init__(self, microbatch_id, phase) -> None: method __eq__ (line 36) | def __eq__(self, __o: object) -> bool: method __hash__ (line 39) | def __hash__(self) -> int: method __repr__ (line 42) | def __repr__(self) -> str: class WorkItem (line 46) | class WorkItem: method __init__ (line 71) | def __init__( class BackwardCache (line 78) | class BackwardCache: method __init__ (line 85) | def __init__( class WorkerBase (line 96) | class WorkerBase(ABC): method __init__ (line 97) | def __init__( method _get_future_by_device (line 153) | def _get_future_by_device(self): method _initialize_outstanding_range (line 156) | def _initialize_outstanding_range(self): method _initialize_context_container (line 164) | def _initialize_context_container(self): method _initialize_lock (line 170) | def _initialize_lock(self): method _initialize_partition (line 177) | def _initialize_partition(self): method _get_output_all (line 185) | def _get_output_all(self, key: UniqueKey, ref_use=False, rank=None): method sync_global_worker_rrefs (line 221) | def sync_global_worker_rrefs(self, pp_rank_to_worker_rref: Dict[int, P... method get_output_by_key (line 233) | def get_output_by_key(self, key: UniqueKey, ref_use=False, rank=None, ... method get_numels (line 241) | def get_numels(self) -> int: method get_parameters (line 245) | def get_parameters(self) -> List[torch.Tensor]: method get_parameter_gradients (line 248) | def get_parameter_gradients(self) -> List[torch.Tensor]: method get_partition (line 251) | def get_partition(self): method get_partition_state_dict (line 256) | def get_partition_state_dict(self): method _make_args_kwargs (line 261) | def _make_args_kwargs(self, microbatch, merge=False): method set_input (line 286) | def set_input(self, microbatch_id: int, microbatch: Tuple[Any], forwar... method set_labels (line 349) | def set_labels(self, microbatch_id: int, microlabels: Any): method _begin_backward (line 355) | def _begin_backward(self, microbatch_id: int): method _subscribe_producer (line 378) | def _subscribe_producer(self, microbatch_id: int, forward_only: bool): method subscribe_producer (line 451) | def subscribe_producer(self, microbatch_id: int, forward_only: bool): method _subscribe_consumer (line 463) | def _subscribe_consumer(self, microbatch_id: int): method subscribe_consumer (line 503) | def subscribe_consumer(self, microbatch_id: int): method get_producer_stage_ids (line 515) | def get_producer_stage_ids(self): method get_consumer_stage_ids (line 535) | def get_consumer_stage_ids(self): method _get_producer_consumer (line 553) | def _get_producer_consumer(self) -> None: method pp_rank_to_partition_id (line 562) | def pp_rank_to_partition_id(self, pp_rank: int, topo: Topo): method partition_id_to_pp_rank (line 566) | def partition_id_to_pp_rank(self, partition_id: int, topo: Topo): method get_topo (line 572) | def get_topo(self): method use_middleware (line 580) | def use_middleware(self): method _get_input_offsets_by_index (line 584) | def _get_input_offsets_by_index(self, target_index): method _get_output_offsets_by_index (line 625) | def _get_output_offsets_by_index(self, target_index): method _get_real_args_kwargs_fwd (line 653) | def _get_real_args_kwargs_fwd(self, args_or_kwargs): method _get_real_args_kwargs_bwd (line 715) | def _get_real_args_kwargs_bwd(self, args_or_kwargs): method _get_work_item_key (line 766) | def _get_work_item_key(self) -> UniqueKey: method is_first_stage (line 771) | def is_first_stage(self): method is_last_stage (line 774) | def is_last_stage(self): method need_model_input (line 777) | def need_model_input(self): method is_model_output (line 788) | def is_model_output(self): method is_model_input (line 791) | def is_model_input(self): method _default_data_process_func (line 794) | def _default_data_process_func(self, args_kwargs): method _consume_work_item_by_phase (line 804) | def _consume_work_item_by_phase(self, work_item: WorkItem): method _get_store_len (line 982) | def _get_store_len(self): method _get_parameter_grad_sum (line 985) | def _get_parameter_grad_sum(self): method _is_first_step (line 992) | def _is_first_step(self, work_item: WorkItem) -> bool: method _is_last_step (line 995) | def _is_last_step(self, work_item: WorkItem) -> bool: method _hook_before_step (line 1004) | def _hook_before_step(self): method _wait_for_reset (line 1008) | def _wait_for_reset(self): method _work_loop (line 1014) | def _work_loop(self): method reset_context (line 1043) | def reset_context(self): method initialize_optimizer (line 1058) | def initialize_optimizer(self, optimizer_class: type, **kwargs): method step (line 1061) | def step(self): class PipelineEngineBase (line 1067) | class PipelineEngineBase(ABC, nn.Module): method __init__ (line 1068) | def __init__( method _check_argument (line 1102) | def _check_argument(self) -> None: method _get_actual_stage_num (line 1118) | def _get_actual_stage_num(self) -> int: method _create_pp_rank_to_rpc_worker_id (line 1121) | def _create_pp_rank_to_rpc_worker_id(self) -> None: method _create_pp_rank_to_module_partition_id (line 1133) | def _create_pp_rank_to_module_partition_id(self) -> None: method _init_worker (line 1142) | def _init_worker(self) -> None: method remote_numels (line 1191) | def remote_numels(self) -> Dict[int, int]: method remote_parameters (line 1200) | def remote_parameters(self) -> Dict[int, List[torch.Tensor]]: method remote_grad (line 1210) | def remote_grad(self) -> Dict[int, List[torch.Tensor]]: method get_input_pp_ranks (line 1220) | def get_input_pp_ranks(self) -> List[int]: method get_output_pp_ranks (line 1223) | def get_output_pp_ranks(self) -> List[int]: method _consume_constraint (line 1226) | def _consume_constraint( method _create_ret_future (line 1246) | def _create_ret_future(self, output_pp_ranks: List[int]) -> Dict[int, ... method _set_input (line 1250) | def _set_input(self, input_pp_ranks: List[int], microbatch_id: int, mi... method _set_labels (line 1256) | def _set_labels(self, output_pp_ranks: List[int], microbatch_id: int, ... method _subscribe_forward (line 1263) | def _subscribe_forward(self, microbatch_id: int, output_pp_ranks: List... method _ensure_backward (line 1269) | def _ensure_backward(self, forward_only: bool, input_pp_ranks: List[in... method _collect_forward_result (line 1283) | def _collect_forward_result(self, output_pp_ranks: List[int], ret_futu... method _reset_worker (line 1298) | def _reset_worker(self): method forward_backward (line 1309) | def forward_backward(self, batch: torch.Tensor, labels: torch.Tensor =... method initialize_optimizer (line 1365) | def initialize_optimizer(self, optimizer_class: type, **kwargs): method step (line 1371) | def step(self): FILE: colossalai/legacy/pipeline/rpc/_pipeline_schedule.py class FillDrainWorker (line 16) | class FillDrainWorker(WorkerBase): method _get_work_item_key (line 17) | def _get_work_item_key(self) -> UniqueKey: class FillDrainPipelineEngine (line 33) | class FillDrainPipelineEngine(PipelineEngineBase): method __init__ (line 34) | def __init__( class OneFOneBWorker (line 67) | class OneFOneBWorker(WorkerBase): method _get_work_item_key (line 68) | def _get_work_item_key(self) -> UniqueKey: class OneFOneBPipelineEngine (line 101) | class OneFOneBPipelineEngine(PipelineEngineBase): method __init__ (line 102) | def __init__( class ChimeraWorker (line 136) | class ChimeraWorker(WorkerBase): method _get_producer_consumer (line 137) | def _get_producer_consumer(self) -> None: method _get_work_item_key (line 157) | def _get_work_item_key(self) -> UniqueKey: method _initialize_partition (line 185) | def _initialize_partition(self): method _get_lock_gradient (line 212) | def _get_lock_gradient(self): method is_first_stage (line 218) | def is_first_stage(self): method is_last_stage (line 221) | def is_last_stage(self): method _is_last_step (line 224) | def _is_last_step(self, work_item: WorkItem) -> bool: method _get_step_order (line 236) | def _get_step_order(self) -> List[int]: method _hook_before_step (line 245) | def _hook_before_step(self): class ChimeraPipelineEngine (line 264) | class ChimeraPipelineEngine(PipelineEngineBase): method __init__ (line 265) | def __init__( method _consume_constraint (line 294) | def _consume_constraint( method _create_pp_rank_to_rpc_worker_id (line 299) | def _create_pp_rank_to_rpc_worker_id(self) -> None: method _create_pp_rank_to_module_partition_id (line 306) | def _create_pp_rank_to_module_partition_id(self) -> None: method _create_ret_future (line 313) | def _create_ret_future(self, output_pp_ranks: List[int]) -> Dict[int, ... method _set_input (line 321) | def _set_input(self, input_pp_ranks: List[int], microbatch_id: int, mi... method _set_labels (line 329) | def _set_labels(self, output_pp_ranks: List[int], microbatch_id: int, ... method _subscribe_forward (line 337) | def _subscribe_forward(self, microbatch_id: int, output_pp_ranks: List... method _ensure_backward (line 344) | def _ensure_backward(self, forward_only: bool, input_pp_ranks: List[in... method _collect_forward_result (line 360) | def _collect_forward_result(self, output_pp_ranks: List[int], ret_futu... FILE: colossalai/legacy/pipeline/rpc/utils.py function pyobj_map (line 16) | def pyobj_map(obj: Any, fn: Callable, process_types: Union[Type, Tuple[T... function pytree_map (line 29) | def pytree_map(obj: Any, fn: Callable, process_types: Union[Type, Tuple[... function tensor_shape_list (line 53) | def tensor_shape_list(obj): function get_batch_lengths (line 57) | def get_batch_lengths(batch): function split_batch (line 63) | def split_batch(batch: Any, start, stop, device: str): function type_detail (line 71) | def type_detail(obj): function pytree_filter (line 75) | def pytree_filter(fn, obj, process_types): function get_real_args_kwargs (line 89) | def get_real_args_kwargs(args_or_kwargs): function run_worker (line 104) | def run_worker(rank, args, master_func): function rpc_run (line 137) | def rpc_run(args, master_func): function parse_args (line 142) | def parse_args(): FILE: colossalai/legacy/pipeline/utils.py function _binary_partition (line 12) | def _binary_partition(weights: List, start: int, end: int): function _heap_addition (line 40) | def _heap_addition(weights: List, intervals: int, add_cnt: int): function _calc_partitions (line 73) | def _calc_partitions(weights, value): function _binary_search (line 90) | def _binary_search(weights, num): function partition_uniform (line 114) | def partition_uniform(num_items, pipeline_parallel_size, num_chunks): function partition_balanced (line 137) | def partition_balanced(weights, pipeline_parallel_size, num_chunks): function build_kwargs_for_module (line 154) | def build_kwargs_for_module(function, input_tensor, kw_dict): function build_kwargs_for_function (line 176) | def build_kwargs_for_function(function, kw_dict): function exec_func_with_kwargs (line 184) | def exec_func_with_kwargs(func, kw_dict, input_tensor, kwargs): function exec_funcs_with_kwargs (line 221) | def exec_funcs_with_kwargs(func_dict, func_key, input_tensor, kwargs): function call_module (line 235) | def call_module(module, args=None, kwargs=None): function customized_partition (line 258) | def customized_partition(exec_seq): FILE: colossalai/legacy/registry/registry.py class Registry (line 8) | class Registry: method __init__ (line 18) | def __init__(self, name: str, third_party_library: List[ModuleType] = ... method name (line 24) | def name(self): method register_module (line 27) | def register_module(self, module_class): method get_module (line 44) | def get_module(self, module_name: str): method has (line 64) | def has(self, module_name: str): FILE: colossalai/legacy/tensor/compute_spec.py class ComputePattern (line 4) | class ComputePattern(Enum): class ComputeSpec (line 11) | class ComputeSpec(object): method __init__ (line 19) | def __init__(self, compute_pattern: ComputePattern) -> None: method __repr__ (line 25) | def __repr__(self): method set_output_replicate (line 28) | def set_output_replicate(self, flag: bool = True): FILE: colossalai/legacy/tensor/const.py class TensorType (line 4) | class TensorType(Enum): FILE: colossalai/legacy/tensor/dist_spec_mgr.py function divide (line 13) | def divide(numerator, denominator): class TransformDistSpec (line 28) | class TransformDistSpec(torch.autograd.Function): method forward (line 30) | def forward(ctx, tensor, old_dist_spec, dist_spec, pg, forward_trans_f... method backward (line 38) | def backward(ctx, grad_outputs): class DistSpecManager (line 49) | class DistSpecManager: method _sanity_check (line 53) | def _sanity_check(old_dist_spec: _DistSpec, dist_spec: _DistSpec) -> N... method _shard_as (line 57) | def _shard_as( method _gather (line 86) | def _gather(tensor: torch.Tensor, old_dist_spec: _DistSpec, pg: Proces... method _all_to_all (line 121) | def _all_to_all( method _r2r (line 149) | def _r2r(tensor: torch.Tensor, old_dist_spec: _DistSpec, dist_spec: _D... method _r2s (line 154) | def _r2s(tensor: torch.Tensor, old_dist_spec: _DistSpec, dist_spec: _D... method _s2r (line 159) | def _s2r(tensor: torch.Tensor, old_dist_spec: _DistSpec, dist_spec: _D... method _s2s (line 164) | def _s2s(tensor: torch.Tensor, old_dist_spec: _DistSpec, dist_spec: _D... method handle_trans_spec (line 175) | def handle_trans_spec( method no_grad (line 201) | def no_grad(): FILE: colossalai/legacy/tensor/distspec.py class DistPlacementPattern (line 7) | class DistPlacementPattern(Enum): class _DistSpec (line 12) | class _DistSpec: method __init__ (line 27) | def __init__(self, dist_placement_pattern: DistPlacementPattern, **met... method __eq__ (line 32) | def __eq__(self, other: "_DistSpec") -> bool: method __repr__ (line 40) | def __repr__(self) -> str: function ReplicaSpec (line 49) | def ReplicaSpec() -> _DistSpec: function ShardSpec (line 60) | def ShardSpec(dims: List[int], num_partitions: List[int]) -> _DistSpec: FILE: colossalai/legacy/tensor/op_wrapper.py function _register_colo_op (line 8) | def _register_colo_op(op, func): function colo_op_impl (line 13) | def colo_op_impl(func): FILE: colossalai/legacy/tensor/process_group.py class PyTorchProcessGroupDict (line 9) | class PyTorchProcessGroupDict(metaclass=SingletonMeta): method __init__ (line 10) | def __init__(self): method log_pg_init (line 17) | def log_pg_init(self, rank_list: List[int], backend: str): method get (line 23) | def get(self, rank_list: List[int], backend: str = "nccl"): class ProcessGroup (line 37) | class ProcessGroup: method __init__ (line 52) | def __init__( method set_cpu_groups (line 121) | def set_cpu_groups(self): method has_cpu_groups (line 139) | def has_cpu_groups(self) -> bool: method __repr__ (line 148) | def __repr__(self): method __eq__ (line 156) | def __eq__(self, obj: "ProcessGroup") -> bool: method rank (line 173) | def rank(self) -> int: method ranks_in_group (line 183) | def ranks_in_group(self) -> List[int]: method world_size (line 193) | def world_size(self) -> int: method tp_rank_list (line 203) | def tp_rank_list(self) -> List[int]: method dp_rank_list (line 213) | def dp_rank_list(self) -> List[int]: method tp_local_rank (line 223) | def tp_local_rank(self) -> int: method dp_local_rank (line 233) | def dp_local_rank(self) -> int: method dp_world_size (line 243) | def dp_world_size(self) -> int: method tp_world_size (line 253) | def tp_world_size(self) -> int: method dp_process_group (line 263) | def dp_process_group(self): method tp_process_group (line 273) | def tp_process_group(self): method cpu_dp_process_group (line 283) | def cpu_dp_process_group(self): method cpu_tp_process_group (line 296) | def cpu_tp_process_group(self): method get_ranks_in_dp (line 309) | def get_ranks_in_dp(self) -> List[int]: method get_ranks_in_tp (line 319) | def get_ranks_in_tp(self): FILE: colossalai/legacy/tensor/tensor_spec.py class ColoTensorSpec (line 11) | class ColoTensorSpec: FILE: colossalai/legacy/trainer/_trainer.py class Trainer (line 14) | class Trainer: method __init__ (line 53) | def __init__( method cur_epoch (line 81) | def cur_epoch(self): method cur_epoch (line 86) | def cur_epoch(self, epoch: int): method cur_step (line 92) | def cur_step(self): method max_epochs (line 97) | def max_epochs(self): method max_steps (line 101) | def max_steps(self): method steps_per_epoch (line 105) | def steps_per_epoch(self): method engine (line 109) | def engine(self): method _set_current_step (line 112) | def _set_current_step(self, epoch: int): method _call_timer (line 120) | def _call_timer(self, action: str, item: str, *args, **kwargs) -> None: method _reset_states (line 133) | def _reset_states(self) -> None: method _call_hooks (line 137) | def _call_hooks(self, func, output=None): method _should_display_progress (line 152) | def _should_display_progress(display_progress: bool): method _train_epoch (line 156) | def _train_epoch( method _eval (line 205) | def _eval( method _exceed_max_step (line 252) | def _exceed_max_step(self): method fit (line 255) | def fit( method evaluate (line 348) | def evaluate( method predict (line 391) | def predict(self, data: Union[Any, List[Any]]): FILE: colossalai/legacy/trainer/hooks/_base_hook.py class BaseHook (line 9) | class BaseHook(ABC): method __init__ (line 17) | def __init__(self, priority: int) -> None: method after_hook_is_attached (line 20) | def after_hook_is_attached(self, trainer): method before_train (line 23) | def before_train(self, trainer): method after_train (line 26) | def after_train(self, trainer): method before_train_iter (line 29) | def before_train_iter(self, trainer): method after_train_iter (line 32) | def after_train_iter(self, trainer, output: Tensor, label: Tensor, los... method before_train_epoch (line 42) | def before_train_epoch(self, trainer): method after_train_epoch (line 45) | def after_train_epoch(self, trainer): method before_test (line 48) | def before_test(self, trainer): method after_test (line 51) | def after_test(self, trainer): method before_test_epoch (line 54) | def before_test_epoch(self, trainer): method after_test_epoch (line 57) | def after_test_epoch(self, trainer): method before_test_iter (line 60) | def before_test_iter(self, trainer): method after_test_iter (line 63) | def after_test_iter(self, trainer, output: Tensor, label: Tensor, loss... method init_runner_states (line 73) | def init_runner_states(self, trainer, key, val): FILE: colossalai/legacy/trainer/hooks/_checkpoint_hook.py class SaveCheckpointHook (line 14) | class SaveCheckpointHook(BaseHook): method __init__ (line 30) | def __init__( method after_hook_is_attached (line 48) | def after_hook_is_attached(self, trainer): method after_train_iter (line 56) | def after_train_iter(self, trainer, output, label, loss): method after_train_epoch (line 69) | def after_train_epoch(self, trainer): FILE: colossalai/legacy/trainer/hooks/_commons_.py function _format_number (line 4) | def _format_number(val, prec=5): FILE: colossalai/legacy/trainer/hooks/_log_hook.py class LogByEpochHook (line 20) | class LogByEpochHook(BaseHook): method __init__ (line 31) | def __init__(self, logger, interval: int = 1, priority: int = 1): method _is_epoch_to_log (line 36) | def _is_epoch_to_log(self, trainer): class LogMetricByStepHook (line 41) | class LogMetricByStepHook(BaseHook): method __init__ (line 50) | def __init__(self, priority: int = 10): method after_train_iter (line 53) | def after_train_iter(self, trainer, *args): method after_test_iter (line 61) | def after_test_iter(self, trainer, *args): class LogMetricByEpochHook (line 71) | class LogMetricByEpochHook(LogByEpochHook): method __init__ (line 82) | def __init__(self, logger, interval: int = 1, priority: int = 10) -> N... method _get_str (line 86) | def _get_str(self, trainer, mode): method after_train_epoch (line 93) | def after_train_epoch(self, trainer): method after_test_epoch (line 101) | def after_test_epoch(self, trainer): class TensorboardHook (line 110) | class TensorboardHook(BaseHook): method __init__ (line 123) | def __init__( method _log_by_iter (line 168) | def _log_by_iter(self, trainer, mode: str): method _log_by_epoch (line 177) | def _log_by_epoch(self, trainer, mode: str): method after_test_iter (line 184) | def after_test_iter(self, trainer, *args): method after_test_epoch (line 187) | def after_test_epoch(self, trainer): method after_train_iter (line 190) | def after_train_iter(self, trainer, *args): method after_train_epoch (line 193) | def after_train_epoch(self, trainer): class LogTimingByEpochHook (line 198) | class LogTimingByEpochHook(LogByEpochHook): method __init__ (line 212) | def __init__( method _get_message (line 231) | def _get_message(self, mode): method after_train_epoch (line 251) | def after_train_epoch(self, trainer): method after_test_epoch (line 257) | def after_test_epoch(self, trainer): class LogMemoryByEpochHook (line 265) | class LogMemoryByEpochHook(LogByEpochHook): method __init__ (line 277) | def __init__( method before_train (line 289) | def before_train(self, trainer): method after_train_epoch (line 294) | def after_train_epoch(self, trainer): method after_test (line 299) | def after_test(self, trainer): FILE: colossalai/legacy/trainer/hooks/_lr_scheduler_hook.py class LRSchedulerHook (line 9) | class LRSchedulerHook(MetricHook): method __init__ (line 23) | def __init__( method after_hook_is_attached (line 35) | def after_hook_is_attached(self, trainer): method after_train_epoch (line 41) | def after_train_epoch(self, trainer): method after_train_iter (line 46) | def after_train_iter(self, trainer, output: Tensor, label: Tensor, los... FILE: colossalai/legacy/trainer/hooks/_metric_hook.py class Metric (line 21) | class Metric(ABC): method __init__ (line 32) | def __init__(self, epoch_only: bool): method epoch_only (line 37) | def epoch_only(self): method reset (line 42) | def reset(self) -> None: method update (line 48) | def update(self, *args, **kwargs) -> None: method get_last_step_value (line 54) | def get_last_step_value(self) -> float: method get_accumulated_value (line 58) | def get_accumulated_value(self): method is_better (line 68) | def is_better(a, b) -> bool: class LossMetric (line 76) | class LossMetric(Metric): method __init__ (line 83) | def __init__(self, epoch_only): method reset (line 89) | def reset(self) -> None: method update (line 95) | def update(self, loss) -> None: method get_accumulated_value (line 108) | def get_accumulated_value(self): method get_last_step_value (line 117) | def get_last_step_value(self) -> float: method is_better (line 122) | def is_better(a, b): class LearningRateMetric (line 126) | class LearningRateMetric(Metric): method __init__ (line 134) | def __init__(self, epoch_only: bool, initial_lr: float = 0.0): method reset (line 138) | def reset(self) -> None: method update (line 141) | def update(self, lr) -> None: method get_last_step_value (line 144) | def get_last_step_value(self) -> float: method get_accumulated_value (line 147) | def get_accumulated_value(self): method is_better (line 151) | def is_better(a, b) -> bool: class AccuracyMetric (line 155) | class AccuracyMetric(Metric): method __init__ (line 164) | def __init__(self, epoch_only: bool, accuracy_func: Callable): method reset (line 172) | def reset(self) -> None: method update (line 178) | def update(self, logits, targets, batch_size) -> None: method get_last_step_value (line 199) | def get_last_step_value(self) -> float: method get_accumulated_value (line 204) | def get_accumulated_value(self): method is_better (line 210) | def is_better(a, b) -> bool: class MetricHook (line 214) | class MetricHook(BaseHook): method __init__ (line 226) | def __init__( method _check_metric_states_initialization (line 233) | def _check_metric_states_initialization(self, trainer): class LossHook (line 239) | class LossHook(MetricHook): method __init__ (line 248) | def __init__(self, priority: int = 0): method after_hook_is_attached (line 251) | def after_hook_is_attached(self, trainer): method before_train_epoch (line 262) | def before_train_epoch(self, trainer): method after_train_iter (line 266) | def after_train_iter(self, trainer, logits, label, loss): method before_test_epoch (line 270) | def before_test_epoch(self, trainer): method after_test_iter (line 274) | def after_test_iter(self, trainer, logits, label, loss): class AccuracyHook (line 280) | class AccuracyHook(MetricHook): method __init__ (line 290) | def __init__(self, accuracy_func: Callable, priority: int = 0): method after_hook_is_attached (line 294) | def after_hook_is_attached(self, trainer): method before_test (line 302) | def before_test(self, trainer): method after_test_iter (line 306) | def after_test_iter(self, trainer, logits, targets, *args): class ThroughputMetric (line 312) | class ThroughputMetric(Metric): method __init__ (line 319) | def __init__(self, epoch_only: bool, ignored_steps: int = 0, tflop_per... method reset (line 330) | def reset(self) -> None: method update (line 337) | def update(self, num_samples, time) -> None: method get_last_step_value (line 345) | def get_last_step_value(self) -> float: method get_last_step_info (line 357) | def get_last_step_info(self) -> str: method get_accumulated_value (line 373) | def get_accumulated_value(self) -> float: method is_better (line 381) | def is_better(a, b) -> bool: class ThroughputHook (line 386) | class ThroughputHook(MetricHook): method __init__ (line 398) | def __init__(self, ignored_steps: int = 0, priority: int = 10, tflop_p... method after_hook_is_attached (line 404) | def after_hook_is_attached(self, trainer): method before_train_epoch (line 418) | def before_train_epoch(self, trainer): method after_train_iter (line 422) | def after_train_iter(self, trainer, *args): method before_test (line 428) | def before_test(self, trainer): method after_test_iter (line 432) | def after_test_iter(self, trainer, *args): FILE: colossalai/legacy/utils/activation_checkpoint.py function copy_to_device (line 13) | def copy_to_device(obj, device): class CheckpointFunction (line 30) | class CheckpointFunction(torch.autograd.Function): method forward (line 32) | def forward(ctx, run_function, activation_offload=False, *args): method backward (line 79) | def backward(ctx, *args): function checkpoint (line 141) | def checkpoint(function, activation_offload, *args, use_reentrant: bool ... function _checkpoint_without_reentrant (line 164) | def _checkpoint_without_reentrant(function, activation_offload=False, *a... FILE: colossalai/legacy/utils/checkpoint/module_checkpoint.py function save_checkpoint (line 12) | def save_checkpoint( function load_checkpoint (line 79) | def load_checkpoint( FILE: colossalai/legacy/utils/checkpoint/utils.py function robust_broadcast (line 9) | def robust_broadcast(tensor): function gather_tensor (line 23) | def gather_tensor(colo_tensor: ColoTensor) -> None: function scatter_tensor (line 41) | def scatter_tensor(colo_tensor: ColoTensor, dist_spec: _DistSpec) -> None: FILE: colossalai/legacy/utils/checkpointing.py function broadcast_state_dict (line 21) | def broadcast_state_dict(state_dict, parallel_mode): function partition_tensor_parallel_state_dict (line 28) | def partition_tensor_parallel_state_dict( function gather_tensor_parallel_state_dict (line 66) | def gather_tensor_parallel_state_dict( function _send_state_dict (line 99) | def _send_state_dict(state_dict, dst, parallel_mode): function _recv_state_dict (line 105) | def _recv_state_dict(src, parallel_mode): function partition_pipeline_parallel_state_dict (line 114) | def partition_pipeline_parallel_state_dict(model, state_dict): function gather_pipeline_parallel_state_dict (line 139) | def gather_pipeline_parallel_state_dict(state_dict): function save_checkpoint (line 161) | def save_checkpoint( function broadcast_model (line 202) | def broadcast_model(model: torch.nn.Module): function load_checkpoint (line 214) | def load_checkpoint( FILE: colossalai/legacy/utils/common.py function print_rank_0 (line 26) | def print_rank_0(msg: str, logger=None): function sync_model_param (line 41) | def sync_model_param(model, parallel_mode): function is_dp_rank_0 (line 58) | def is_dp_rank_0(): function is_tp_rank_0 (line 62) | def is_tp_rank_0(): function is_no_pp_or_last_stage (line 66) | def is_no_pp_or_last_stage(): function is_using_ddp (line 70) | def is_using_ddp(): function is_using_pp (line 74) | def is_using_pp(): function is_using_sequence (line 78) | def is_using_sequence(): class model_branch_context (line 82) | class model_branch_context(object): method __enter__ (line 83) | def __enter__(self): method __exit__ (line 86) | def __exit__(self, *exc_info): function is_model_parallel_parameter (line 90) | def is_model_parallel_parameter(p): function _calc_l2_norm (line 94) | def _calc_l2_norm(grads): function _calc_lp (line 112) | def _calc_lp(grads, norm_type): function _move_norm_to_cuda (line 120) | def _move_norm_to_cuda(norm: Union[float, torch.Tensor]) -> Union[float,... function _get_tensor_norm (line 126) | def _get_tensor_norm(norm: Union[float, torch.Tensor], move_to_cuda) -> ... function _compute_local_lp (line 137) | def _compute_local_lp(params: List[ColoParameter], norm_type: float) -> ... function _compute_buckets_lp (line 153) | def _compute_buckets_lp(params: List[ColoParameter], norm_type: float) -... function _compute_pp_grad_lp (line 179) | def _compute_pp_grad_lp(total_lp: float, norm_type: float) -> float: function _compute_grad_lp (line 190) | def _compute_grad_lp(parameters, norm_type: float = 2.0) -> float: function compute_grad_norm (line 217) | def compute_grad_norm(parameters, norm_type: float = 2.0) -> float: function _clip_grad_norm (line 225) | def _clip_grad_norm(parameters, max_norm: float, total_norm: float) -> N... function clip_grad_norm (line 248) | def clip_grad_norm(parameters, max_norm: float, norm_type: float = 2.0) ... function clip_grad_norm_fp32 (line 254) | def clip_grad_norm_fp32(parameters, max_norm, norm_type=2): function count_zeros_fp32 (line 375) | def count_zeros_fp32(parameters): function copy_tensor_parallel_attributes (line 413) | def copy_tensor_parallel_attributes(src_tensor, dst_tensor): function param_is_not_tensor_parallel_duplicate (line 420) | def param_is_not_tensor_parallel_duplicate(param): function switch_virtual_pipeline_parallel_rank (line 427) | def switch_virtual_pipeline_parallel_rank(rank): FILE: colossalai/legacy/utils/data_sampler/base_sampler.py class BaseSampler (line 7) | class BaseSampler(ABC): method __init__ (line 8) | def __init__(self, dataset, batch_size): method __len__ (line 13) | def __len__(self): method __iter__ (line 17) | def __iter__(self): FILE: colossalai/legacy/utils/data_sampler/data_parallel_sampler.py class DataParallelSampler (line 19) | class DataParallelSampler(Sampler): method __init__ (line 31) | def __init__(self, dataset: Dataset, shuffle: bool = False, seed: int ... method __iter__ (line 56) | def __iter__(self) -> Iterator[T_co]: method __len__ (line 88) | def __len__(self) -> int: method set_epoch (line 91) | def set_epoch(self, epoch: int) -> None: function get_dataloader (line 102) | def get_dataloader( FILE: colossalai/legacy/utils/memory.py function _bytes_to_MB (line 17) | def _bytes_to_MB(val, decimal=2): function _get_cpu_memory_info (line 27) | def _get_cpu_memory_info(): function report_memory_usage (line 60) | def report_memory_usage(message, logger=None, report_cpu=False): function colo_device_memory_capacity (line 100) | def colo_device_memory_capacity(device: torch.device) -> int: function colo_device_memory_used (line 121) | def colo_device_memory_used(device: torch.device) -> int: function colo_set_process_memory_fraction (line 145) | def colo_set_process_memory_fraction(ratio: float) -> None: function colo_set_cpu_memory_capacity (line 162) | def colo_set_cpu_memory_capacity(size: int) -> None: function colo_get_cpu_memory_capacity (line 172) | def colo_get_cpu_memory_capacity() -> int: FILE: colossalai/legacy/utils/profiler/extention.py class ProfilerExtension (line 4) | class ProfilerExtension(ABC): method prepare_trace (line 6) | def prepare_trace(self): method start_trace (line 10) | def start_trace(self): method stop_trace (line 14) | def stop_trace(self): method extend_chrome_trace (line 18) | def extend_chrome_trace(self, trace: dict) -> dict: FILE: colossalai/legacy/utils/profiler/legacy/comm_profiler.py function _get_code_location (line 16) | def _get_code_location(depth: int): class CommEvent (line 40) | class CommEvent(object): method __init__ (line 45) | def __init__(self, count: int = 0, comm_vol: float = 0.0, cuda_time: i... method add (line 50) | def add(self, rhs): class CommProfiler (line 56) | class CommProfiler(BaseProfiler): method __init__ (line 59) | def __init__(self, depth: int = 0, total_count: int = 0, total_comm_vo... method reset (line 72) | def reset(self): method enable (line 83) | def enable(self): method disable (line 90) | def disable(self): method to_tensorboard (line 97) | def to_tensorboard(self, writer): method to_file (line 100) | def to_file(self, filename: Path): method show (line 104) | def show(self): method result_str (line 107) | def result_str(self, sep: str = "\n"): method has_aync_op (line 155) | def has_aync_op(self): method activate_profiler (line 158) | def activate_profiler(self, kn: str, vol: float): method close_profiler (line 163) | def close_profiler(self, group=None): method wait_async_op (line 196) | def wait_async_op(self): class CommHandler (line 203) | class CommHandler(object): method __init__ (line 206) | def __init__(self, profiler: CommProfiler): method wait (line 210) | def wait(self): function async_check (line 214) | def async_check(profiler: CommProfiler): function all_reduce (line 220) | def all_reduce( function reduce_scatter (line 237) | def reduce_scatter( function all_gather (line 262) | def all_gather( function broadcast (line 286) | def broadcast( function reduce (line 301) | def reduce( FILE: colossalai/legacy/utils/profiler/legacy/pcie_profiler.py function _get_size (line 9) | def _get_size(dtype: str): function _get_numel (line 18) | def _get_numel(my_list: List[int]) -> int: function _reduce_location (line 25) | def _reduce_location(locations: List[str]) -> str: class PcieEvent (line 34) | class PcieEvent(object): method __init__ (line 37) | def __init__(self, count: int = 0, pcie_vol: int = 0, cuda_time: int =... method add (line 42) | def add(self, rhs): class PcieProfiler (line 48) | class PcieProfiler(BaseProfiler): method __init__ (line 54) | def __init__(self, dtype: str = "fp32", depth: int = 1): method reset (line 66) | def reset(self): method enable (line 75) | def enable(self): method disable (line 81) | def disable(self): method to_tensorboard (line 106) | def to_tensorboard(self, writer): method to_file (line 109) | def to_file(self, filename: Path): method show (line 113) | def show(self): method result_str (line 116) | def result_str(self, sep: str = "\n"): FILE: colossalai/legacy/utils/profiler/legacy/prof_utils.py function _format_time (line 9) | def _format_time(time_us): function _format_memory (line 21) | def _format_memory(nbytes): function _format_bandwidth (line 36) | def _format_bandwidth(volume: float or int, time_us: int): class BaseProfiler (line 46) | class BaseProfiler(ABC): method __init__ (line 47) | def __init__(self, profiler_name: str, priority: int): method enable (line 52) | def enable(self): method disable (line 56) | def disable(self): method to_tensorboard (line 60) | def to_tensorboard(self, writer): method to_file (line 64) | def to_file(self, filename: Path): method show (line 68) | def show(self): class ProfilerContext (line 72) | class ProfilerContext(object): method __init__ (line 95) | def __init__(self, profilers: List[BaseProfiler] = None, enable: bool ... method __enter__ (line 99) | def __enter__(self): method __exit__ (line 105) | def __exit__(self, exc_type, exc_val, exc_tb): method to_tensorboard (line 110) | def to_tensorboard(self, writer): method to_file (line 120) | def to_file(self, log_dir: Union[str, Path]): method show (line 130) | def show(self): FILE: colossalai/legacy/utils/profiler/profiler.py class profile (line 17) | class profile(torch_profile): method __init__ (line 123) | def __init__( method prepare_trace (line 155) | def prepare_trace(self) -> None: method _start_warmup (line 163) | def _start_warmup(self): method start_trace (line 166) | def start_trace(self): method _start_trace (line 174) | def _start_trace(self): method stop_trace (line 177) | def stop_trace(self): method _stop_trace (line 185) | def _stop_trace(self): method export_chrome_trace (line 188) | def export_chrome_trace(self, path: str): FILE: colossalai/legacy/utils/profiler/stateful_tensor_mem_extention.py class DeviceType (line 15) | class DeviceType(Enum): function get_timestamp_us (line 20) | def get_timestamp_us(): function generic_instant_event (line 24) | def generic_instant_event(name, pid, tid, timestamp, args): class StatefulTensorMemoryEvent (line 28) | class StatefulTensorMemoryEvent: method __init__ (line 31) | def __init__(self, timestamp: int, device_type: DeviceType, bytes_: in... method state_dict (line 39) | def state_dict(self): class StatefulTensorMemoryTracer (line 49) | class StatefulTensorMemoryTracer: method __init__ (line 50) | def __init__(self) -> None: method sample (line 54) | def sample(self): method start_trace (line 62) | def start_trace(self): method stop_trace (line 66) | def stop_trace(self): method state_dict (line 69) | def state_dict(self): class StatefulTensorMemoryTracerHook (line 73) | class StatefulTensorMemoryTracerHook(BaseOpHook): method __init__ (line 74) | def __init__(self, tracer: StatefulTensorMemoryTracer): method pre_fwd_exec (line 79) | def pre_fwd_exec(self, module: torch.nn.Module, *args): method post_fwd_exec (line 83) | def post_fwd_exec(self, module: torch.nn.Module, *args): method pre_bwd_exec (line 87) | def pre_bwd_exec(self, module: torch.nn.Module, input_, output): method post_bwd_exec (line 91) | def post_bwd_exec(self, module: torch.nn.Module, input_): method post_iter (line 95) | def post_iter(self): method enable (line 99) | def enable(self): method disable (line 102) | def disable(self): class StatefulTensorMemoryProfilerExtention (line 106) | class StatefulTensorMemoryProfilerExtention(ProfilerExtension): method __init__ (line 107) | def __init__(self, engine: Engine) -> None: method prepare_trace (line 113) | def prepare_trace(self): method start_trace (line 119) | def start_trace(self): method stop_trace (line 123) | def stop_trace(self): method extend_chrome_trace (line 132) | def extend_chrome_trace(self, trace: dict) -> dict: FILE: colossalai/legacy/zero/__init__.py function convert_to_zero_v2 (line 14) | def convert_to_zero_v2( FILE: colossalai/legacy/zero/gemini/colo_init_context.py function _named_params_with_replica (line 13) | def _named_params_with_replica( function _convert_to_coloparam (line 28) | def _convert_to_coloparam( function ColoModulize (line 61) | def ColoModulize(module): class ColoInitContext (line 69) | class ColoInitContext(InsertPostInitMethodToModuleSubClasses): method __init__ (line 70) | def __init__( method _register_colo_modules (line 92) | def _register_colo_modules(self): method _pre_context_exec (line 98) | def _pre_context_exec(self): method _post_init_method (line 101) | def _post_init_method(self, module: torch.nn.Module, *args, **kwargs): function post_process_colo_init_ctx (line 158) | def post_process_colo_init_ctx( FILE: colossalai/legacy/zero/gemini/gemini_context.py class GeminiMemoryManager (line 4) | class GeminiMemoryManager(object): method __init__ (line 5) | def __init__(self, states_cls: EnumMeta): method total_number (line 18) | def total_number(self): method reset (line 21) | def reset(self): method register_new_instance (line 32) | def register_new_instance(self): method delete_instance (line 35) | def delete_instance(self): method print_info (line 38) | def print_info(self): FILE: colossalai/legacy/zero/gemini/ophooks/_shard_grad_ophook.py class ShardGradMemTracerHook (line 9) | class ShardGradMemTracerHook(BaseOpHook): method __init__ (line 14) | def __init__(self): method pre_fwd_exec (line 17) | def pre_fwd_exec(self, module: torch.nn.Module, *args): method post_fwd_exec (line 20) | def post_fwd_exec(self, module: torch.nn.Module, *args): method pre_bwd_exec (line 23) | def pre_bwd_exec(self, module: torch.nn.Module, input, output): method post_bwd_exec (line 28) | def post_bwd_exec(self, module: torch.nn.Module, input): method post_iter (line 31) | def post_iter(self): FILE: colossalai/legacy/zero/gemini/ophooks/_shard_param_ophook.py class ShardParamHook (line 9) | class ShardParamHook(BaseOpHook): method __init__ (line 14) | def __init__(self): method niter (line 17) | def niter(self): method pre_fwd_exec (line 20) | def pre_fwd_exec(self, module: torch.nn.Module, *args): method post_fwd_exec (line 26) | def post_fwd_exec(self, module: torch.nn.Module, *args): method pre_bwd_exec (line 32) | def pre_bwd_exec(self, module: torch.nn.Module, input, output): method post_bwd_exec (line 38) | def post_bwd_exec(self, module: torch.nn.Module, input): method pre_iter (line 44) | def pre_iter(self): method post_iter (line 47) | def post_iter(self): FILE: colossalai/legacy/zero/gemini/ophooks/runtime_mem_tracer_hook.py class TrainingPhase (line 13) | class TrainingPhase(Enum): class GradMemStats (line 18) | class GradMemStats: method __init__ (line 19) | def __init__(self) -> None: method clear (line 23) | def clear(self): class GradMemTracerHook (line 28) | class GradMemTracerHook: method __init__ (line 29) | def __init__(self, grad_stats: GradMemStats): method grad_handle (line 33) | def grad_handle(self, p, grad): method register_grad_hook (line 39) | def register_grad_hook(self, module: torch.nn.Module): method remove_grad_hook (line 45) | def remove_grad_hook(self): class ParamMemTracerHook (line 50) | class ParamMemTracerHook(ColoParamOpHook): method __init__ (line 51) | def __init__(self, memstats: MemStats, gradstats: GradMemStats) -> None: method _free_cuda_params (line 58) | def _free_cuda_params(self, params): method _allocate_params_on_cuda (line 64) | def _allocate_params_on_cuda(self, params: List[torch.nn.Parameter]): method record_model_data_volume (line 85) | def record_model_data_volume(self, params): method pre_op (line 102) | def pre_op(self, params): method post_op (line 116) | def post_op(self, params): method pre_forward (line 119) | def pre_forward(self, params: List[torch.Tensor]) -> None: method post_forward (line 122) | def post_forward(self, params: List[torch.Tensor]) -> None: method pre_backward (line 125) | def pre_backward(self, params: List[torch.Tensor]) -> None: method post_backward (line 128) | def post_backward(self, params: List[torch.Tensor]) -> None: method switch_training_phase (line 132) | def switch_training_phase(self, training_phase: TrainingPhase = Traini... FILE: colossalai/legacy/zero/gemini/ophooks/utils.py class BaseOpHook (line 8) | class BaseOpHook(ABC): method __init__ (line 12) | def __init__(self): method pre_fwd_exec (line 16) | def pre_fwd_exec(self, module: torch.nn.Module, *args): method post_fwd_exec (line 20) | def post_fwd_exec(self, module: torch.nn.Module, *args): method pre_bwd_exec (line 24) | def pre_bwd_exec(self, module: torch.nn.Module, input, output): method post_bwd_exec (line 28) | def post_bwd_exec(self, module: torch.nn.Module, input): method post_iter (line 32) | def post_iter(self): function _apply_to_tensors_only (line 37) | def _apply_to_tensors_only(module, functional, backward_function, outputs): class PreBackwardFunction (line 50) | class PreBackwardFunction(torch.autograd.Function): method forward (line 52) | def forward(ctx, module, pre_backward_function, outputs): method backward (line 60) | def backward(ctx, *args): class PostBackwardFunction (line 65) | class PostBackwardFunction(torch.autograd.Function): method forward (line 67) | def forward(ctx, module, pre_backward_function, output): method backward (line 74) | def backward(ctx, *args): function register_ophooks_recursively (line 85) | def register_ophooks_recursively( FILE: colossalai/legacy/zero/gemini/paramhooks/_param_hookmgr.py class BaseParamHookMgr (line 7) | class BaseParamHookMgr(object): method __init__ (line 8) | def __init__(self, param_list: List[torch.nn.Parameter]) -> None: method register_backward_hooks (line 15) | def register_backward_hooks(self, hook_call: Callable) -> None: method remove_hooks (line 31) | def remove_hooks(self) -> None: FILE: colossalai/legacy/zero/gemini/stateful_tensor.py function sizeof_tensor (line 9) | def sizeof_tensor(tensor: torch.Tensor): class TensorState (line 13) | class TensorState(Enum): class StatefulTensor (line 21) | class StatefulTensor(object): method __init__ (line 32) | def __init__(self, maybe_tensor: Optional[torch.Tensor], state: Option... method data_ptr (line 49) | def data_ptr(self): method set_null (line 54) | def set_null(self) -> None: method is_null (line 60) | def is_null(self) -> bool: method trans_state (line 67) | def trans_state(self, state: TensorState) -> None: method move_to (line 80) | def move_to(self, device: Union[torch.device, int]): method payload_copy (line 97) | def payload_copy(self, tensor) -> None: method payload_reset (line 100) | def payload_reset(self, tensor) -> None: method payload_relay (line 116) | def payload_relay(self, rhs): method payload (line 134) | def payload(self) -> Optional[torch.Tensor]: method payload_size (line 138) | def payload_size(self) -> int: method state (line 142) | def state(self) -> TensorState: method device (line 146) | def device(self) -> torch.device: method dtype (line 150) | def dtype(self) -> torch.dtype: method shape (line 154) | def shape(self): method to (line 157) | def to(self, device: torch.device): method to_ (line 160) | def to_(self, device: torch.device): method __release (line 163) | def __release(self): method __trans_state_update (line 170) | def __trans_state_update(self, from_state: TensorState, to_state: Tens... method __trans_device_update (line 190) | def __trans_device_update(self, from_type: str, to_type: str): method __del__ (line 204) | def __del__(self): FILE: colossalai/legacy/zero/gemini/stateful_tensor_mgr.py class StatefulTensorMgr (line 13) | class StatefulTensorMgr(object): method __init__ (line 21) | def __init__(self, tensor_placement_policy: TensorPlacementPolicy) -> ... method register_stateful_tensor_list (line 33) | def register_stateful_tensor_list(self, tensor_list: List[StatefulTens... method start_iter (line 40) | def start_iter(self): method finish_iter (line 43) | def finish_iter(self): method adjust_layout (line 51) | def adjust_layout(self) -> None: method cpu_gpu_move_volume (line 75) | def cpu_gpu_move_volume(self): method _trans_state (line 78) | def _trans_state(self, trans_state_func, stateful_tensor, state): method _get_layout_info (line 86) | def _get_layout_info(self, compute_idx: int, warmup: bool): FILE: colossalai/legacy/zero/gemini/tensor_placement_policy.py class TensorPlacementPolicy (line 16) | class TensorPlacementPolicy(ABC): method __init__ (line 17) | def __init__(self, device: Optional[torch.device], mem_stats_collector... method evict_tensors (line 22) | def evict_tensors(self, hold_cuda_tensor_list: List[StatefulTensor], *... class CPUTensorPlacementPolicy (line 26) | class CPUTensorPlacementPolicy(TensorPlacementPolicy): method __init__ (line 27) | def __init__(self, mem_stats_collector: Optional[MemStatsCollector] = ... method evict_tensors (line 30) | def evict_tensors(self, hold_cuda_tensor_list: List[StatefulTensor], *... class CUDATensorPlacementPolicy (line 38) | class CUDATensorPlacementPolicy(TensorPlacementPolicy): method __init__ (line 39) | def __init__(self, mem_stats_collector: Optional[MemStatsCollector] = ... method evict_tensors (line 43) | def evict_tensors(self, hold_cuda_tensor_list: List[StatefulTensor], *... class AutoTensorPlacementPolicy (line 47) | class AutoTensorPlacementPolicy(TensorPlacementPolicy): method __init__ (line 48) | def __init__(self, mem_stats_collector: Optional[MemStatsCollector] = ... method evict_tensors (line 55) | def evict_tensors( method _sort_hold_cuda_tensors (line 118) | def _sort_hold_cuda_tensors(hold_cuda_tensors: tuple, compute_idx: int... class TensorPlacementPolicyFactory (line 127) | class TensorPlacementPolicyFactory: method create (line 129) | def create(policy_name: str) -> Type[TensorPlacementPolicy]: FILE: colossalai/legacy/zero/gemini/tensor_utils.py function is_storage_empty (line 8) | def is_storage_empty(tensor: torch.Tensor) -> bool: function free_storage (line 12) | def free_storage(tensor: torch.Tensor) -> None: function alloc_storage (line 17) | def alloc_storage(tensor: torch.Tensor) -> None: function colo_tensor_mem_usage (line 22) | def colo_tensor_mem_usage(tensor: Union[torch.Tensor, StatefulTensor]) -... function colo_model_data_tensor_move (line 41) | def colo_model_data_tensor_move( function colo_model_data_tensor_move_inline (line 75) | def colo_model_data_tensor_move_inline( function colo_model_data_move_to_cpu (line 95) | def colo_model_data_move_to_cpu(t: Union[StatefulTensor, torch.Tensor]) ... function colo_model_tensor_clone (line 110) | def colo_model_tensor_clone(t: Union[StatefulTensor, torch.Tensor], targ... FILE: colossalai/legacy/zero/init_ctx/init_context.py class ZeroContextConfig (line 23) | class ZeroContextConfig: method __post_init__ (line 37) | def __post_init__(self): class ZeroInitContext (line 45) | class ZeroInitContext(InsertPostInitMethodToModuleSubClasses): method __init__ (line 62) | def __init__( method target_device (line 88) | def target_device(self): method is_replicated (line 92) | def is_replicated(self): method shard_param (line 96) | def shard_param(self): method calc_fanin_fanout (line 100) | def calc_fanin_fanout(tensor: torch.Tensor): method _pre_context_exec (line 129) | def _pre_context_exec(self): method _post_context_exec (line 162) | def _post_context_exec(self): method _post_init_method (line 186) | def _post_init_method(self, module: torch.nn.Module, *args, **kwargs): class ZeroContextMgr (line 242) | class ZeroContextMgr(metaclass=SingletonMeta): method hijack_context_config (line 246) | def hijack_context_config(self, **kwargs): function no_shard_zero_context (line 256) | def no_shard_zero_context(is_replicated: bool = True) -> AbstractContext... function no_shard_zero_decrator (line 262) | def no_shard_zero_decrator(is_replicated: bool = True): FILE: colossalai/legacy/zero/shard_utils/base_shard_strategy.py class BaseShardStrategy (line 9) | class BaseShardStrategy(ABC): method __init__ (line 10) | def __init__(self) -> None: method shard (line 15) | def shard(self, tensor_list: List[ShardedTensor], process_group: Optio... method gather (line 19) | def gather(self, tensor_list: List[ShardedTensor], process_group: Opti... FILE: colossalai/legacy/zero/shard_utils/bucket_tensor_shard_strategy.py class BucketTensorShardStrategy (line 13) | class BucketTensorShardStrategy(TensorShardStrategy): method gather (line 20) | def gather(self, tensor_list: List[ShardedTensor], process_group: Opti... FILE: colossalai/legacy/zero/shard_utils/commons.py function get_shard (line 6) | def get_shard(tensor: torch.Tensor, rank: int, world_size: int) -> Tuple... FILE: colossalai/legacy/zero/shard_utils/tensor_shard_strategy.py class TensorShardStrategy (line 13) | class TensorShardStrategy(BaseShardStrategy): method shard (line 18) | def shard(self, tensor_list: List[ShardedTensor], process_group: Optio... method gather (line 22) | def gather(self, tensor_list: List[ShardedTensor], process_group: Opti... method _shard_tensor (line 26) | def _shard_tensor(self, t: ShardedTensor, process_group: Optional[dist... method _gather_tensor (line 45) | def _gather_tensor(self, t: ShardedTensor, process_group: Optional[dis... FILE: colossalai/legacy/zero/sharded_model/_utils.py function get_gradient_predivide_factor (line 9) | def get_gradient_predivide_factor(world_size: int) -> float: function free_storage (line 16) | def free_storage(data: torch.Tensor) -> None: function alloc_storage (line 26) | def alloc_storage(data: torch.Tensor, size: torch.Size) -> None: function cast_tensor_to_fp16 (line 34) | def cast_tensor_to_fp16(tensor: torch.Tensor) -> torch.Tensor: function cast_tensor_to_fp32 (line 42) | def cast_tensor_to_fp32(tensor: Union[torch.Tensor, StatefulTensor]) -> ... function cast_tensor_to_bf16 (line 51) | def cast_tensor_to_bf16(tensor: torch.Tensor) -> torch.Tensor: function apply_to_tensors (line 59) | def apply_to_tensors(x: Any, fn: Callable): function cast_float_arguments (line 72) | def cast_float_arguments(fn: Callable, *args: Any, **kwargs: Any) -> Tup... function chunk_and_pad (line 76) | def chunk_and_pad(tensor: torch.Tensor, num_chunks: int) -> List[torch.T... FILE: colossalai/legacy/zero/sharded_model/reduce_scatter.py class Bucket (line 22) | class Bucket: method __init__ (line 23) | def __init__(self, shard_size: int, dtype: torch.dtype, device: torch.... method flush (line 30) | def flush(self) -> None: method alloc (line 53) | def alloc(self) -> None: method free (line 65) | def free(self) -> None: method append (line 71) | def append(self, tensor_list: List[Tensor], callback_fn: Callable): class ReduceScatterBucketer (line 85) | class ReduceScatterBucketer: method __init__ (line 113) | def __init__(self, bucket_size_mb: int = 25): method reduce_scatter_async (line 118) | def reduce_scatter_async( method flush (line 173) | def flush(self) -> None: method free (line 179) | def free(self) -> None: method _get_shard_size (line 185) | def _get_shard_size(self, element_size: int, num_shards: int) -> int: method _get_bucket (line 192) | def _get_bucket(self, tensor: Tensor, group: ProcessGroup) -> Bucket: FILE: colossalai/legacy/zero/sharded_model/sharded_model_v2.py class ShardedModelV2 (line 46) | class ShardedModelV2(nn.Module): method __init__ (line 81) | def __init__( method adjust_stateful_tensor_layout (line 187) | def adjust_stateful_tensor_layout(self) -> None: method use_memory_tracer (line 191) | def use_memory_tracer(self): method cuda_margin_space (line 195) | def cuda_margin_space(self): method cpu_offload (line 199) | def cpu_offload(self): method dump_memory_stats (line 202) | def dump_memory_stats(self, filename: Optional[str] = "dump_mem_stats.... method _pre_forward_operations (line 230) | def _pre_forward_operations(self, *args): method _post_forward_operations (line 241) | def _post_forward_operations(self): method forward (line 246) | def forward(self, *args: Any, **kwargs: Any) -> torch.Tensor: method backward (line 254) | def backward(self, loss): method backward_by_grad (line 260) | def backward_by_grad(self, tensor, grad): method _update_memstats (line 266) | def _update_memstats(self): method _post_backward_operations (line 279) | def _post_backward_operations(self) -> None: method _grad_post_backward_hook (line 322) | def _grad_post_backward_hook(self, param: Parameter, grad: torch.Tenso... method _reduce_scatter_handler (line 358) | def _reduce_scatter_handler(self, param: Parameter, grad: torch.Tensor... method _reduce_scatter_callback (line 377) | def _reduce_scatter_callback(self, param: Parameter, reduced_grad: tor... method _save_grad (line 388) | def _save_grad(self, param: Parameter, grad: torch.Tensor): method parameters (line 423) | def parameters(self, recurse: bool = True) -> Iterator[Parameter]: method named_parameters (line 426) | def named_parameters(self, prefix: str = "", recurse: bool = True) -> ... method state_dict (line 429) | def state_dict(self, destination=None, prefix="", keep_vars=False) -> ... method load_state_dict (line 441) | def load_state_dict(self, state_dict: "OrderedDict[str, torch.Tensor]"... method _colo_state_dict (line 453) | def _colo_state_dict( method _colo_load_from_state_dict (line 481) | def _colo_load_from_state_dict( method __getitem__ (line 583) | def __getitem__(self, idx: int): method __len__ (line 587) | def __len__(self): method __iter__ (line 591) | def __iter__(self): FILE: colossalai/legacy/zero/sharded_model/utils.py function col_model_deepcopy (line 8) | def col_model_deepcopy(sharded_model: ShardedModelV2, other_model: torch... FILE: colossalai/legacy/zero/sharded_model/zero_hook.py class ZeroHook (line 17) | class ZeroHook(BaseOpHook): method __init__ (line 23) | def __init__( method gather_parameters (line 41) | def gather_parameters(self, module: torch.nn.Module): method shard_parameters (line 50) | def shard_parameters(self, module: torch.nn.Module): method adjust_module_data (line 59) | def adjust_module_data(self, module: torch.nn.Module): method pre_fwd_exec (line 74) | def pre_fwd_exec(self, module: torch.nn.Module, *args): method post_fwd_exec (line 81) | def post_fwd_exec(self, module: torch.nn.Module, *args): method pre_bwd_exec (line 92) | def pre_bwd_exec(self, module: torch.nn.Module, input, output): method post_bwd_exec (line 99) | def post_bwd_exec(self, module: torch.nn.Module, input): method pre_iter (line 110) | def pre_iter(self): method post_iter (line 113) | def post_iter(self): FILE: colossalai/legacy/zero/sharded_optim/sharded_optim_v2.py class OptimState (line 25) | class OptimState(Enum): class ShardedOptimizerV2 (line 30) | class ShardedOptimizerV2(OptimizerWrapper): method __init__ (line 76) | def __init__( method loss_scale (line 148) | def loss_scale(self): method get_memory_usage (line 151) | def get_memory_usage(self) -> Tuple[int, int]: method zero_grad (line 178) | def zero_grad(self, *args, **kwargs): method backward (line 181) | def backward(self, loss: Tensor) -> None: method backward_by_grad (line 188) | def backward_by_grad(self, tensor: Tensor, grad: Tensor) -> None: method clip_grad_norm (line 198) | def clip_grad_norm(self, model: nn.Module, max_norm: float): method step (line 204) | def step(self, *args, **kwargs): method _check_overflow (line 240) | def _check_overflow(self): method _unscale_grads (line 252) | def _unscale_grads(self): method _zero_grad (line 260) | def _zero_grad(self, recover_data: bool = False): method sync_grad (line 286) | def sync_grad(self): method _register_master_weight (line 289) | def _register_master_weight(self): method _maybe_move_fp32_shards (line 304) | def _maybe_move_fp32_shards(self): method _prepare_grads (line 325) | def _prepare_grads(self): method _point_param_fp16_to_master_param (line 346) | def _point_param_fp16_to_master_param(self): method _copy_master_model_to_model_fp16 (line 356) | def _copy_master_model_to_model_fp16(self): method _copy_master_param_to_param_fp16 (line 364) | def _copy_master_param_to_param_fp16(self, p): method state_dict (line 394) | def state_dict(self): method load_state_dict (line 400) | def load_state_dict(self, *args, **kwargs): FILE: colossalai/legacy/zero/sharded_param/sharded_param.py function get_empty_tensor (line 13) | def get_empty_tensor(device: torch.device, dtype: torch.dtype): class ShardedParamV2 (line 21) | class ShardedParamV2(object): method __init__ (line 22) | def __init__(self, param: torch.nn.Parameter, set_data_none: bool = Fa... method get_payload_tensors (line 37) | def get_payload_tensors(self) -> List[StatefulTensor]: method set_data_none (line 41) | def set_data_none(self): method set_grad_none (line 44) | def set_grad_none(self): method sharded_data_tensor (line 48) | def sharded_data_tensor(self): method data_payload (line 52) | def data_payload(self): method grad_payload (line 57) | def grad_payload(self): method param_is_sharded (line 62) | def param_is_sharded(self): method data_payload_reset (line 65) | def data_payload_reset(self, tensor: torch.Tensor): method grad_payload_reset (line 70) | def grad_payload_reset(self, tensor: torch.Tensor): method get_memory_usage (line 75) | def get_memory_usage(self) -> Tuple[int, int]: FILE: colossalai/legacy/zero/sharded_param/sharded_tensor.py class ShardedTensor (line 6) | class ShardedTensor(StatefulTensor): method __init__ (line 7) | def __init__(self, tensor: torch.Tensor, state: TensorState = TensorSt... method dtype (line 21) | def dtype(self) -> torch.dtype: method origin_numel (line 26) | def origin_numel(self) -> int: method origin_shape (line 30) | def origin_shape(self) -> int: method is_sharded (line 34) | def is_sharded(self): method is_sharded (line 38) | def is_sharded(self, flag: bool): FILE: colossalai/logging/__init__.py function get_dist_logger (line 9) | def get_dist_logger(name: str = "colossalai") -> DistributedLogger: function disable_existing_loggers (line 22) | def disable_existing_loggers(include: Optional[List[str]] = None, exclud... FILE: colossalai/logging/logger.py class DistributedLogger (line 12) | class DistributedLogger: method get_instance (line 27) | def get_instance(name: str): method __init__ (line 42) | def __init__(self, name): method rank (line 69) | def rank(self): method __get_call_info (line 73) | def __get_call_info(): method _check_valid_logging_level (line 86) | def _check_valid_logging_level(level: str): method set_level (line 89) | def set_level(self, level: str) -> None: method log_to_file (line 98) | def log_to_file(self, path: Union[str, Path], mode: str = "a", level: ... method _log (line 129) | def _log(self, level, message: str, ranks: List[int] = None) -> None: method info (line 136) | def info(self, message: str, ranks: List[int] = None) -> None: method warning (line 147) | def warning(self, message: str, ranks: List[int] = None) -> None: method debug (line 158) | def debug(self, message: str, ranks: List[int] = None) -> None: method error (line 169) | def error(self, message: str, ranks: List[int] = None) -> None: FILE: colossalai/moe/_operation.py function load_moe (line 14) | def load_moe(): class AllGather (line 21) | class AllGather(torch.autograd.Function): method forward (line 23) | def forward( method backward (line 54) | def backward(ctx: Any, *grad_outputs) -> Tuple[Tensor, None, None]: class ReduceScatter (line 62) | class ReduceScatter(torch.autograd.Function): method forward (line 64) | def forward( method backward (line 98) | def backward(ctx: Any, *grad_outputs) -> Tuple[Tensor, None, None]: class AllToAll (line 107) | class AllToAll(torch.autograd.Function): method forward (line 113) | def forward( method backward (line 141) | def backward(ctx: Any, *grad_outputs) -> Tuple[Tensor, None, None]: class HierarchicalAllToAll (line 149) | class HierarchicalAllToAll(torch.autograd.Function): method forward (line 151) | def forward(ctx: Any, inputs: Tensor, groups: Tuple[ProcessGroup, Proc... method backward (line 196) | def backward(ctx: Any, *grad_outputs) -> Tuple[Tensor, None, None]: class MoeDispatch (line 204) | class MoeDispatch(torch.autograd.Function): method forward (line 207) | def forward(ctx, tokens, mask, dest_idx, ec): method backward (line 229) | def backward(ctx, output_grad): class MoeCombine (line 239) | class MoeCombine(torch.autograd.Function): method forward (line 242) | def forward(ctx, expert_tokens, logits, mask, dest_idx, ec): method backward (line 270) | def backward(ctx, tokens_grad): function moe_cumsum (line 284) | def moe_cumsum(inputs: Tensor, use_kernel: bool = False): class EPGradScalerIn (line 295) | class EPGradScalerIn(torch.autograd.Function): method forward (line 302) | def forward(ctx: Any, inputs: Tensor, ep_size: int) -> Tensor: method backward (line 307) | def backward(ctx: Any, *grad_outputs: Tensor) -> Tuple[Tensor, None]: class EPGradScalerOut (line 315) | class EPGradScalerOut(torch.autograd.Function): method forward (line 322) | def forward(ctx: Any, inputs: Tensor, ep_size: int) -> Tensor: method backward (line 327) | def backward(ctx: Any, *grad_outputs: Tensor) -> Tuple[Tensor, None]: class DPGradScalerIn (line 335) | class DPGradScalerIn(torch.autograd.Function): method forward (line 342) | def forward(ctx: Any, inputs: Tensor, moe_dp_size: int, activated_expe... method backward (line 349) | def backward(ctx: Any, *grad_outputs: Tensor) -> Tuple[Tensor, None, N... class DPGradScalerOut (line 357) | class DPGradScalerOut(torch.autograd.Function): method forward (line 364) | def forward(ctx: Any, inputs: Tensor, moe_dp_size: int, activated_expe... method backward (line 371) | def backward(ctx: Any, *grad_outputs: Tensor) -> Tuple[Tensor, None, N... function _all_to_all (line 379) | def _all_to_all( class AllToAllUneven (line 409) | class AllToAllUneven(torch.autograd.Function): method forward (line 411) | def forward( method backward (line 433) | def backward(ctx: Any, *grad_outputs): function all_to_all_uneven (line 444) | def all_to_all_uneven( FILE: colossalai/nn/init.py function zeros_ (line 8) | def zeros_(): function ones_ (line 17) | def ones_(): function uniform_ (line 26) | def uniform_(a: float = 0.0, b: float = 1.0): function normal_ (line 41) | def normal_(mean: float = 0.0, std: float = 1.0): function trunc_normal_ (line 58) | def trunc_normal_(mean: float = 0.0, std: float = 1.0, a: float = -2.0, ... function kaiming_uniform_ (line 79) | def kaiming_uniform_(a=0, mode="fan_in", nonlinearity="leaky_relu"): function kaiming_normal_ (line 123) | def kaiming_normal_(a=0, mode="fan_in", nonlinearity="leaky_relu"): function xavier_uniform_ (line 166) | def xavier_uniform_(a: float = math.sqrt(3.0), scale: float = 2.0, gain:... function xavier_normal_ (line 200) | def xavier_normal_(scale: float = 2.0, gain: float = 1.0): function lecun_uniform_ (line 232) | def lecun_uniform_(): function lecun_normal_ (line 244) | def lecun_normal_(): FILE: colossalai/nn/layer/layernorm.py class FusedLayerNormAffineFunction (line 20) | class FusedLayerNormAffineFunction(torch.autograd.Function): method forward (line 23) | def forward(ctx, input, weight, bias, normalized_shape, eps): method backward (line 41) | def backward(ctx, grad_output): class MixedFusedLayerNorm (line 51) | class MixedFusedLayerNorm(torch.nn.Module): method __init__ (line 52) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None): method reset_parameters (line 63) | def reset_parameters(self): method forward (line 67) | def forward(self, input): method __repr__ (line 70) | def __repr__(self): FILE: colossalai/nn/layer/scaled_softmax.py class AttnMaskType (line 20) | class AttnMaskType(enum.Enum): class ScaledUpperTriangMaskedSoftmax (line 26) | class ScaledUpperTriangMaskedSoftmax(torch.autograd.Function): method forward (line 36) | def forward(ctx, inputs, scale): method backward (line 48) | def backward(ctx, output_grads): class ScaledMaskedSoftmax (line 55) | class ScaledMaskedSoftmax(torch.autograd.Function): method forward (line 65) | def forward(ctx, inputs, mask, scale): method backward (line 78) | def backward(ctx, output_grads): class FusedScaleMaskSoftmax (line 85) | class FusedScaleMaskSoftmax(nn.Module): method __init__ (line 99) | def __init__( method forward (line 123) | def forward(self, input, mask): method is_kernel_available (line 132) | def is_kernel_available(self, mask, b, np, sq, sk): method forward_fused_softmax (line 154) | def forward_fused_softmax(self, input, mask): method forward_torch_softmax (line 169) | def forward_torch_softmax(self, input, mask): method get_batch_per_block (line 186) | def get_batch_per_block(self, sq, sk, b, np): FILE: colossalai/nn/layer/utils.py function divide (line 1) | def divide(numerator, denominator): FILE: colossalai/nn/lr_scheduler/cosine.py class CosineAnnealingLR (line 6) | class CosineAnnealingLR(_CosineAnnealingLR): method __init__ (line 45) | def __init__(self, optimizer, total_steps: int, eta_min: int = 0, last... class CosineAnnealingWarmupLR (line 49) | class CosineAnnealingWarmupLR(WarmupScheduler): method __init__ (line 61) | def __init__(self, optimizer, total_steps: int, warmup_steps: int = 0,... class FlatAnnealingLR (line 68) | class FlatAnnealingLR(DelayerScheduler): method __init__ (line 79) | def __init__(self, optimizer, total_steps: int, pct_start: float = 0.7... class FlatAnnealingWarmupLR (line 88) | class FlatAnnealingWarmupLR(WarmupDelayerScheduler): method __init__ (line 102) | def __init__( FILE: colossalai/nn/lr_scheduler/delayed.py class _enable_get_lr_call (line 12) | class _enable_get_lr_call: method __init__ (line 13) | def __init__(self, o): method __enter__ (line 16) | def __enter__(self): method __exit__ (line 20) | def __exit__(self, type, value, traceback): class TwoStageScheduler (line 24) | class TwoStageScheduler(_LRScheduler): method __init__ (line 25) | def __init__(self, optimizer, after_scheduler: _LRScheduler, last_epoc... method state_dict (line 30) | def state_dict(self): method load_state_dict (line 40) | def load_state_dict(self, state_dict): class DelayerScheduler (line 56) | class DelayerScheduler(TwoStageScheduler): method __init__ (line 68) | def __init__(self, optimizer, delay_epochs, after_scheduler, last_epoc... method get_lr (line 74) | def get_lr(self): method step (line 84) | def step(self, epoch=None): class WarmupScheduler (line 96) | class WarmupScheduler(TwoStageScheduler): method __init__ (line 108) | def __init__(self, optimizer, warmup_epochs, after_scheduler, last_epo... method get_lr (line 112) | def get_lr(self): method step (line 121) | def step(self, epoch=None): class WarmupDelayerScheduler (line 133) | class WarmupDelayerScheduler(TwoStageScheduler): method __init__ (line 146) | def __init__(self, optimizer, warmup_epochs, delay_epochs, after_sched... method get_lr (line 155) | def get_lr(self): method step (line 170) | def step(self, epoch=None): FILE: colossalai/nn/lr_scheduler/linear.py class LinearWarmupLR (line 4) | class LinearWarmupLR(_LRScheduler): method __init__ (line 15) | def __init__(self, optimizer, total_steps: int, warmup_steps: int = 0,... method get_lr (line 20) | def get_lr(self): FILE: colossalai/nn/lr_scheduler/multistep.py class MultiStepLR (line 8) | class MultiStepLR(_MultiStepLR): method __init__ (line 23) | def __init__( class MultiStepWarmupLR (line 35) | class MultiStepWarmupLR(WarmupScheduler): method __init__ (line 49) | def __init__( FILE: colossalai/nn/lr_scheduler/onecycle.py class OneCycleLR (line 4) | class OneCycleLR(_OneCycleLR): method __init__ (line 68) | def __init__( FILE: colossalai/nn/lr_scheduler/poly.py class PolynomialLR (line 6) | class PolynomialLR(_LRScheduler): method __init__ (line 18) | def __init__( method get_lr (line 28) | def get_lr(self): method _get_closed_form_lr (line 31) | def _get_closed_form_lr(self): class PolynomialWarmupLR (line 39) | class PolynomialWarmupLR(WarmupScheduler): method __init__ (line 52) | def __init__( FILE: colossalai/nn/lr_scheduler/torch.py class LambdaLR (line 7) | class LambdaLR(_LambdaLR): method __init__ (line 20) | def __init__(self, optimizer, total_steps, lr_lambda=None, last_epoch:... class MultiplicativeLR (line 24) | class MultiplicativeLR(_MultiplicativeLR): method __init__ (line 37) | def __init__(self, optimizer, total_steps, lr_lambda=None, last_epoch:... class StepLR (line 41) | class StepLR(_StepLR): method __init__ (line 55) | def __init__(self, optimizer, total_steps, step_size: int = 1, gamma: ... class ExponentialLR (line 59) | class ExponentialLR(_ExponentialLR): method __init__ (line 70) | def __init__(self, optimizer, total_steps, gamma: float = 1.0, last_ep... FILE: colossalai/nn/optimizer/__init__.py function cast_to_distributed (line 48) | def cast_to_distributed(optim): FILE: colossalai/nn/optimizer/adafactor.py class Adafactor (line 25) | class Adafactor(Optimizer): method __init__ (line 26) | def __init__( method _get_lr (line 59) | def _get_lr(param_group, param_state): method _get_options (line 70) | def _get_options(param_group, param_shape): method _rms (line 76) | def _rms(tensor): method _approx_sq_grad (line 80) | def _approx_sq_grad(exp_avg_sq_row, exp_avg_sq_col): method step (line 86) | def step(self, closure=None): FILE: colossalai/nn/optimizer/came.py class CAME (line 6) | class CAME(torch.optim.Optimizer): method __init__ (line 23) | def __init__( method supports_memory_efficient_fp16 (line 45) | def supports_memory_efficient_fp16(self): method supports_flat_params (line 49) | def supports_flat_params(self): method _get_options (line 52) | def _get_options(self, param_shape): method _rms (line 56) | def _rms(self, tensor): method _approx_sq_grad (line 59) | def _approx_sq_grad(self, exp_avg_sq_row, exp_avg_sq_col): method step (line 64) | def step(self, closure=None): FILE: colossalai/nn/optimizer/cpu_adam.py class CPUAdam (line 11) | class CPUAdam(NVMeOptimizer): method __init__ (line 65) | def __init__( method load_state_dict (line 84) | def load_state_dict(self, state_dict): method torch_adam_update (line 92) | def torch_adam_update( method step (line 127) | def step(self, closure=None, div_scale: float = -1): FILE: colossalai/nn/optimizer/distributed_adafactor.py class DistributedAdaFactor (line 15) | class DistributedAdaFactor(DistributedOptim): method __init__ (line 16) | def __init__( method setup_distributed (line 60) | def setup_distributed( method _get_lr (line 106) | def _get_lr(param_group, param_state): method _get_options (line 117) | def _get_options(param_group, param_shape): method _rms (line 130) | def _rms(tensor, param_is_dtensor, use_zero, tp_size, dp_size, tp_grou... method _approx_sq_grad (line 145) | def _approx_sq_grad(exp_avg_sq_row, exp_avg_sq_col): method _approx_sq_grad_row_parallel (line 152) | def _approx_sq_grad_row_parallel(exp_avg_sq_row, exp_avg_sq_col, sq_ro... method _col_parallel_factor (line 158) | def _col_parallel_factor(self, update, grad, state, grad_shape, beta2t): method _row_parallel_factor (line 190) | def _row_parallel_factor(self, update, grad, state, grad_shape, beta2t): method _base_factor (line 233) | def _base_factor(self, update, grad, state, grad_shape, beta2t): method step (line 286) | def step(self, closure=None): FILE: colossalai/nn/optimizer/distributed_came.py class DistributedCAME (line 11) | class DistributedCAME(DistributedOptim): method __init__ (line 28) | def __init__( method supports_memory_efficient_fp16 (line 61) | def supports_memory_efficient_fp16(self): method supports_flat_params (line 65) | def supports_flat_params(self): method setup_distributed (line 68) | def setup_distributed( method _get_options (line 119) | def _get_options(param_shape): method _rms (line 124) | def _rms(tensor, param_is_dtensor, use_zero, tp_size, dp_size, tp_grou... method _approx_sq_grad (line 139) | def _approx_sq_grad(exp_avg_sq_row, exp_avg_sq_col): method _approx_sq_grad_row_parallel (line 146) | def _approx_sq_grad_row_parallel(exp_avg_sq_row, exp_avg_sq_col, sq_ro... method _col_parallel_factor (line 151) | def _col_parallel_factor(self, update, grad, state_row, state_col, gra... method _row_parallel_factor (line 183) | def _row_parallel_factor(self, update, grad, state_row, state_col, gra... method _base_factor (line 226) | def _base_factor(self, update, grad, state_row, state_col, grad_shape,... method _base_res_factor (line 280) | def _base_res_factor(self, res, exp_avg, state_row, state_col, grad_sh... method step (line 333) | def step(self, closure=None): FILE: colossalai/nn/optimizer/distributed_galore.py class DistGaloreAwamW (line 21) | class DistGaloreAwamW(DistributedOptim, Optimizer2State): method __init__ (line 49) | def __init__( method setup_distributed (line 94) | def setup_distributed( method step (line 143) | def step(self, closure=None): method to_master_shape (line 268) | def to_master_shape(self, data, padding): method __del__ (line 277) | def __del__(self): FILE: colossalai/nn/optimizer/distributed_lamb.py class DistributedLamb (line 15) | class DistributedLamb(DistributedOptim): method __init__ (line 40) | def __init__( method setup_distributed (line 65) | def setup_distributed( method step (line 104) | def step(self, closure=None): FILE: colossalai/nn/optimizer/fused_adam.py class FusedAdam (line 14) | class FusedAdam(torch.optim.Optimizer): method __init__ (line 54) | def __init__( method zero_grad (line 83) | def zero_grad(self, set_to_none=False): method step (line 91) | def step(self, closure=None, grads=None, output_params=None, scale=Non... FILE: colossalai/nn/optimizer/fused_lamb.py class FusedLAMB (line 7) | class FusedLAMB(torch.optim.Optimizer): method __init__ (line 52) | def __init__( method zero_grad (line 97) | def zero_grad(self): method step (line 105) | def step(self, closure=None): FILE: colossalai/nn/optimizer/fused_sgd.py class FusedSGD (line 8) | class FusedSGD(Optimizer): method __init__ (line 57) | def __init__( method __setstate__ (line 87) | def __setstate__(self, state): method get_momentums (line 92) | def get_momentums(self, params): method step (line 109) | def step(self, closure=None): FILE: colossalai/nn/optimizer/galore.py function get_galore_param_groups (line 11) | def get_galore_param_groups( function make_low_rank_buffer (line 49) | def make_low_rank_buffer(p, grad): class GaLoreProjector (line 61) | class GaLoreProjector: method __init__ (line 62) | def __init__(self, rank, verbose=False, update_proj_gap=200, scale=1.0... method project (line 71) | def project(self, full_rank_grad, iter): method project_back (line 105) | def project_back(self, low_rank_grad): method get_orthogonal_matrix (line 118) | def get_orthogonal_matrix(self, weights, rank, type): class GaLoreAdamW8bit (line 163) | class GaLoreAdamW8bit(Optimizer2State): method __init__ (line 192) | def __init__( method step (line 235) | def step(self, closure=None): method __del__ (line 312) | def __del__(self): FILE: colossalai/nn/optimizer/hybrid_adam.py class HybridAdam (line 11) | class HybridAdam(CPUAdam): method __init__ (line 63) | def __init__( method step (line 93) | def step(self, closure=None, div_scale: float = -1): FILE: colossalai/nn/optimizer/lamb.py class Lamb (line 9) | class Lamb(Optimizer): method __init__ (line 29) | def __init__( method step (line 44) | def step(self, closure=None): FILE: colossalai/nn/optimizer/lars.py class Lars (line 9) | class Lars(Optimizer): method __init__ (line 22) | def __init__( method step (line 40) | def step(self, closure=None): FILE: colossalai/nn/optimizer/nvme_optimizer.py class NVMeOptimizer (line 10) | class NVMeOptimizer(torch.optim.Optimizer): method __init__ (line 24) | def __init__( method _get_numel (line 51) | def _get_numel(self) -> int: method _post_state_init (line 58) | def _post_state_init(self, param: Parameter) -> None: method _setup_prefetch_params (line 70) | def _setup_prefetch_params(self) -> List[Parameter]: method _pre_step (line 83) | def _pre_step(self, *state_keys: str) -> None: method _pre_update (line 94) | def _pre_update(self, param: Parameter, *state_keys: str) -> None: method _post_update (line 104) | def _post_update(self, param: Parameter, *state_keys: str) -> None: method _post_step (line 113) | def _post_step(self) -> None: method step (line 119) | def step(self, closure: Optional[Callable[[], float]] = ...) -> Option... method state_dict (line 148) | def state_dict(self) -> dict: method load_state_dict (line 154) | def load_state_dict(self, state_dict: dict) -> None: method __del__ (line 160) | def __del__(self) -> None: FILE: colossalai/pipeline/p2p.py function _cuda_safe_tensor_to_object (line 22) | def _cuda_safe_tensor_to_object(tensor: torch.Tensor, tensor_size: torch... function check_for_nccl_backend (line 50) | def check_for_nccl_backend(group): function _broadcast_object_list (line 63) | def _broadcast_object_list( function _check_for_nccl_hccl_backend (line 151) | def _check_for_nccl_hccl_backend(group): function _check_device (line 164) | def _check_device(group): function create_send_metadata (line 176) | def create_send_metadata( function _filling_ops_queue (line 201) | def _filling_ops_queue( function _create_recv_buffer (line 218) | def _create_recv_buffer(tensor_metadata: List[TensorMetadata], current_d... function _batch_send_recv_tensor (line 228) | def _batch_send_recv_tensor( function _send_recv_serialization_object (line 273) | def _send_recv_serialization_object( function _communicate (line 364) | def _communicate( function _p2p_comm (line 459) | def _p2p_comm( class PipelineP2PCommunication (line 539) | class PipelineP2PCommunication: method __init__ (line 540) | def __init__(self, stage_manager: PipelineStageManager, overlap_p2p: b... method recv_forward (line 544) | def recv_forward( method recv_backward (line 569) | def recv_backward( method send_forward (line 594) | def send_forward(self, output_object: Any, next_rank: Optional[int] = ... method send_backward (line 616) | def send_backward(self, input_object: Any, prev_rank: Optional[int] = ... method send_forward_recv_forward (line 638) | def send_forward_recv_forward( method send_backward_recv_backward (line 676) | def send_backward_recv_backward( method send_forward_recv_backward (line 716) | def send_forward_recv_backward( method send_backward_recv_forward (line 746) | def send_backward_recv_forward( method p2p_communicate (line 776) | def p2p_communicate( FILE: colossalai/pipeline/schedule/_utils.py function _odict_flatten (line 12) | def _odict_flatten(d: "OrderedDict[Any, Any]") -> Tuple[List[Any], Any]: function _odict_unflatten (line 16) | def _odict_unflatten(values: List[Any], context: Any) -> "OrderedDict[An... function tree_map_hf (line 28) | def tree_map_hf(fn: Any, pytree: Any): function tree_flatten_hf (line 34) | def tree_flatten_hf(pytree: Any) -> Tuple[List[Any], TreeSpec]: function to_device (line 56) | def to_device(x: Any, device: Optional[torch.device] = None) -> Any: function get_batch_size (line 71) | def get_batch_size(batch: Any) -> int: function get_micro_batch (line 90) | def get_micro_batch(batch: Any, start: int, micro_batch_size: int) -> Any: function model_forward (line 110) | def model_forward(model: Module, data: Any, internal_inputs: Optional[di... function retain_grad (line 130) | def retain_grad(x: Any) -> None: function require_grad (line 140) | def require_grad(x: Any) -> None: function detach (line 150) | def detach(x: Any) -> Any: function clone (line 164) | def clone(x: Any) -> Any: function release_tensor_data (line 178) | def release_tensor_data(x: Any) -> Any: function merge_batch (line 192) | def merge_batch(data: List[Any], batch_size_dim=0) -> Any: FILE: colossalai/pipeline/schedule/base.py class PipelineSchedule (line 10) | class PipelineSchedule: method __init__ (line 11) | def __init__(self, stage_manager: PipelineStageManager) -> None: method forward_backward_step (line 14) | def forward_backward_step( FILE: colossalai/pipeline/schedule/generate.py class ActionIntervalBuffer (line 19) | class ActionIntervalBuffer: method __int__ (line 25) | def __int__(self): method clear (line 29) | def clear(self): class GenerateSchedule (line 34) | class GenerateSchedule(PipelineSchedule): method __init__ (line 46) | def __init__(self, stage_manager: PipelineStageManager, mb_manager: Mi... method load_batch (line 60) | def load_batch(self, data_iter: Iterable, device: Optional[torch.devic... method load_micro_batch (line 81) | def load_micro_batch(self) -> Any: method _prepare_inputs_for_interval_stage (line 91) | def _prepare_inputs_for_interval_stage(self): method _prepare_inputs_for_new_token (line 101) | def _prepare_inputs_for_new_token(self, new_token: torch.Tensor): method _get_token_id (line 114) | def _get_token_id(self, hidden_state: torch.Tensor) -> torch.Tensor: method _recv_pre_stage (line 119) | def _recv_pre_stage(self) -> Any: method _init_infer_state_action (line 130) | def _init_infer_state_action(self) -> None: method _load_stage_action (line 138) | def _load_stage_action(self, model: Module) -> None: method _gen_token_action (line 153) | def _gen_token_action(self, model: Module): method _head_encoding_action (line 174) | def _head_encoding_action(self, model: Module): method _body_encoding_action (line 186) | def _body_encoding_action(self, model: Module): method _comm_action (line 194) | def _comm_action(self, recv_pre: bool) -> torch.Tensor: method _gen_action (line 203) | def _gen_action(self, model: Module): method _gen_one_stage_action (line 236) | def _gen_one_stage_action(self, model: Module): method generate_step (line 258) | def generate_step(self, model: Module, data_iter: Iterable) -> Union[t... method generate_step_one_stage (line 267) | def generate_step_one_stage(self, model: Module, data_iter: Iterable) ... method generate_step_p2p (line 304) | def generate_step_p2p(self, model: Module, data_iter: Iterable) -> Uni... method generate_step_broadcast (line 348) | def generate_step_broadcast(self, model: Module, data_iter: Iterable) ... FILE: colossalai/pipeline/schedule/interleaved_pp.py function _wait_p2p (line 20) | def _wait_p2p(wait_handles) -> None: class InterleavedSchedule (line 26) | class InterleavedSchedule(PipelineSchedule): method __init__ (line 27) | def __init__( method load_batch (line 62) | def load_batch(self, data_iter: Iterable, device: Optional[torch.devic... method load_micro_batch (line 104) | def load_micro_batch(self, model_chunk_id: int) -> Any: method get_model_chunk_id (line 118) | def get_model_chunk_id(self, microbatch_id: int, is_forward: bool) -> ... method recv_forward (line 138) | def recv_forward(self, model_chunk_id: int, prev_rank: int = None) -> ... method recv_backward (line 160) | def recv_backward(self, model_chunk_id: int, next_rank: int = None) ->... method send_forward (line 183) | def send_forward(self, model_chunk_id: int, output_tensor: Any, next_r... method send_backward (line 206) | def send_backward(self, model_chunk_id: int, input_tensor_grad: Any, p... method send_forward_recv_forward (line 231) | def send_forward_recv_forward( method send_backward_recv_backward (line 257) | def send_backward_recv_backward( method forward_step (line 282) | def forward_step( method backward_step (line 327) | def backward_step( method run_forward_only (line 373) | def run_forward_only( method run_forward_backward (line 419) | def run_forward_backward( method forward_backward_step (line 584) | def forward_backward_step( FILE: colossalai/pipeline/schedule/one_f_one_b.py class OneForwardOneBackwardSchedule (line 28) | class OneForwardOneBackwardSchedule(PipelineSchedule): method __init__ (line 29) | def __init__( method load_batch (line 67) | def load_batch(self, data_iter: Iterable, device: Optional[torch.devic... method load_micro_batch (line 109) | def load_micro_batch(self) -> Any: method recv_forward (line 120) | def recv_forward(self, prev_rank: int = None) -> Any: method recv_backward (line 139) | def recv_backward(self, next_rank: int = None) -> Any: method send_forward (line 158) | def send_forward(self, output_tensor: Any, next_rank: int = None) -> N... method send_backward (line 175) | def send_backward(self, input_tensor_grad: Any, prev_rank: int = None)... method send_forward_recv_backward (line 191) | def send_forward_recv_backward(self, output_tensor: Any, send_first: O... method send_backward_recv_forward (line 219) | def send_backward_recv_forward(self, input_tensor_grad: Any, send_firs... method forward_step (line 247) | def forward_step( method backward_step (line 282) | def backward_step( method run_forward_only (line 327) | def run_forward_only( method run_forward_backward (line 359) | def run_forward_backward( method forward_backward_step (line 443) | def forward_backward_step( FILE: colossalai/pipeline/schedule/v_schedule.py class ScheduledNode (line 36) | class ScheduledNode: class PipelineGraph (line 46) | class PipelineGraph(object): method __init__ (line 49) | def __init__( method get_id (line 76) | def get_id(self, cat, chunk, stage, micro): method try_v_schedule (line 81) | def try_v_schedule(self, fill_f=True, fill_b=True, approved_bubble=None): method print_details (line 290) | def print_details(self, end_time, print_scaling=1): method get_v_schedule (line 318) | def get_v_schedule(self, only_run_time=False): FILE: colossalai/pipeline/schedule/zero_bubble_pp.py function _wait_p2p (line 34) | def _wait_p2p(wait_handles: List[torch.cuda.Event]) -> None: class ZeroBubbleVPipeScheduler (line 40) | class ZeroBubbleVPipeScheduler(PipelineSchedule): method __init__ (line 54) | def __init__( method _free_buffers (line 117) | def _free_buffers(self): method assert_buffer_empty (line 149) | def assert_buffer_empty(self): method load_batch (line 170) | def load_batch(self, data_iter: Iterable, device: Optional[torch.devic... method load_micro_batch (line 205) | def load_micro_batch(self, model_chunk_id: int) -> Any: method get_model_chunk_id (line 219) | def get_model_chunk_id(self, microbatch_id: int, is_forward: bool) -> ... method recv_forward (line 239) | def recv_forward(self, model_chunk_id: int, prev_rank: int = None) -> ... method recv_backward (line 297) | def recv_backward(self, model_chunk_id: int, next_rank: int = None) ->... method send_forward (line 356) | def send_forward(self, model_chunk_id: int, next_rank: int = None) -> ... method send_backward (line 415) | def send_backward(self, model_chunk_id: int, prev_rank: int = None) ->... method forward_step (line 474) | def forward_step( method backward_b_step (line 516) | def backward_b_step( method backward_w_step (line 591) | def backward_w_step( method schedule_f (line 636) | def schedule_f( method schedule_b (line 741) | def schedule_b( method schedule_w (line 809) | def schedule_w( method run_forward_only (line 827) | def run_forward_only( method run_forward_backward (line 871) | def run_forward_backward( method forward_backward_step (line 938) | def forward_backward_step( FILE: colossalai/pipeline/stage_manager.py class PipelineStageManager (line 11) | class PipelineStageManager: method __init__ (line 24) | def __init__( method get_stage_index (line 62) | def get_stage_index( method is_first_stage (line 107) | def is_first_stage(self, ignore_chunk: bool = False) -> bool: method is_last_stage (line 124) | def is_last_stage(self, ignore_chunk: bool = False) -> bool: method num_stages (line 146) | def num_stages(self) -> int: method stage (line 155) | def stage(self) -> int: method get_rank (line 163) | def get_rank(self) -> int: method get_prev_rank (line 171) | def get_prev_rank(self) -> int: method get_next_rank (line 179) | def get_next_rank(self) -> int: method get_p2p_process_group (line 187) | def get_p2p_process_group(self) -> ProcessGroup: method init_process_group_by_stages (line 194) | def init_process_group_by_stages(self, stages: List[int]) -> ProcessGr... method switch_model_chunk_id (line 206) | def switch_model_chunk_id(self, model_chunk_id: int): method distribute_layers (line 212) | def distribute_layers( FILE: colossalai/pipeline/weight_grad_store.py class WeightGradStore (line 4) | class WeightGradStore: method put (line 10) | def put(cls, total_input, grad_output, weight, func): method flush (line 14) | def flush(cls, chunk=0): method pop (line 19) | def pop(cls, chunk=0): FILE: colossalai/quantization/bnb.py function quantize_model (line 30) | def quantize_model( function replace_with_bnb_layers (line 109) | def replace_with_bnb_layers(model, bnb_quantization_config, modules_to_n... function _replace_with_bnb_layers (line 141) | def _replace_with_bnb_layers( function get_keys_to_not_convert (line 208) | def get_keys_to_not_convert(model): function find_tied_parameters (line 261) | def find_tied_parameters(model: nn.Module, **kwargs): class FindTiedParametersResult (line 319) | class FindTiedParametersResult(list): method __init__ (line 325) | def __init__(self, *args, **kwargs): method values (line 328) | def values(self): FILE: colossalai/quantization/bnb_config.py class BnbQuantizationConfig (line 11) | class BnbQuantizationConfig: method __post_init__ (line 66) | def __post_init__(self): FILE: colossalai/quantization/fp8.py class Handle (line 21) | class Handle: method __init__ (line 22) | def __init__(self, handles=[], remain_ops=None) -> None: method wait (line 26) | def wait(self): function process_group_is_intranode (line 33) | def process_group_is_intranode(pg): function cast_to_fp8 (line 51) | def cast_to_fp8( function cast_from_fp8 (line 93) | def cast_from_fp8( function _all_reduce_fp8 (line 120) | def _all_reduce_fp8( function all_reduce_fp8 (line 187) | def all_reduce_fp8( function _all_to_all_single_fp8 (line 195) | def _all_to_all_single_fp8( function all_to_all_single_fp8 (line 258) | def all_to_all_single_fp8( function cast_to_fp8_pipeline (line 285) | def cast_to_fp8_pipeline(inp: Any) -> None: function cast_from_fp8_pipeline (line 327) | def cast_from_fp8_pipeline(inp: Any, del_metadata=True) -> None: function _reduce_scatter_fp8 (line 356) | def _reduce_scatter_fp8( function reduce_scatter_fp8 (line 401) | def reduce_scatter_fp8( function fp8_compress_ddp_grad_comm_hook_async (line 408) | def fp8_compress_ddp_grad_comm_hook_async( function fp8_compress_ddp_grad_comm_hook_sync (line 506) | def fp8_compress_ddp_grad_comm_hook_sync( function fp8_compress_fsdp_grad_comm_hook (line 532) | def fp8_compress_fsdp_grad_comm_hook( function fp8_compress_fsdp_params_comm_hook (line 566) | def fp8_compress_fsdp_params_comm_hook( function split_chunk_by_channel (line 601) | def split_chunk_by_channel( function _all_to_all_fp8 (line 616) | def _all_to_all_fp8(output_list, input_list, group=None, fp8_format="e5m... function all_to_all_fp8 (line 648) | def all_to_all_fp8(output_list, input_list, group=None, fp8_format="e5m2... function _all_gather_fp8 (line 656) | def _all_gather_fp8(output_list, input_, group=None, fp8_format="e5m2", ... function all_gather_fp8 (line 680) | def all_gather_fp8(output_list, input_, group=None, fp8_format="e5m2", a... function all_gather_fp8_lagacy (line 688) | def all_gather_fp8_lagacy( function all_gather_fp8_ring (line 716) | def all_gather_fp8_ring(output_list, input_, group=None, fp8_format="e5m... class _LinearFp8 (line 773) | class _LinearFp8(torch.autograd.Function): method forward (line 775) | def forward( method backward (line 812) | def backward(ctx: Any, out_grad) -> Any: function _linear_fp8 (line 838) | def _linear_fp8(input: torch.Tensor, weight: torch.Tensor, bias: Optiona... function linear_fp8 (line 842) | def linear_fp8(input: torch.Tensor, weight: torch.Tensor, bias: Optional... FILE: colossalai/quantization/fp8_hook.py class FP8Hook (line 7) | class FP8Hook(ColoParamOpHook): method pre_forward (line 8) | def pre_forward(self, params) -> None: method post_forward (line 11) | def post_forward(self, params) -> None: method pre_backward (line 14) | def pre_backward(self, params) -> None: method post_backward (line 17) | def post_backward(self, params) -> None: method rewrite_op (line 20) | def rewrite_op(self, func): FILE: colossalai/quantization/utils.py function _all_gather_flat_param (line 9) | def _all_gather_flat_param( function register_params_comm_hook (line 56) | def register_params_comm_hook(self, state: object, hook: callable): function patch_fsdp_params_comm_hook (line 102) | def patch_fsdp_params_comm_hook(): FILE: colossalai/shardformer/_utils.py function get_obj_list_element (line 4) | def get_obj_list_element(obj, attr: str): function set_obj_list_element (line 31) | def set_obj_list_element(obj, attr: str, value): function hasattr_ (line 55) | def hasattr_(obj, attr: str): function setattr_ (line 72) | def setattr_(obj, attr: str, value, ignore: bool = False): function getattr_ (line 94) | def getattr_(obj, attr: str, ignore: bool = False): FILE: colossalai/shardformer/examples/convergence_benchmark.py function to_device (line 21) | def to_device(x: Any, device: torch.device) -> Any: function train (line 30) | def train(args): function fit (line 86) | def fit( function evaluate_model (line 124) | def evaluate_model( FILE: colossalai/shardformer/examples/data.py class GLUEDataBuilder (line 8) | class GLUEDataBuilder: method __init__ (line 45) | def __init__( method setup (line 68) | def setup(self): method prepare_data (line 82) | def prepare_data(self): method train_dataloader (line 86) | def train_dataloader(self): method val_dataloader (line 95) | def val_dataloader(self): method test_dataloader (line 106) | def test_dataloader(self): method convert_to_features (line 117) | def convert_to_features(self, example_batch): method native_prepare_dataloader (line 134) | def native_prepare_dataloader(self, dataset, batch_size, shuffle=False... FILE: colossalai/shardformer/examples/performance_benchmark.py function data_gen (line 14) | def data_gen(batch_size, seq_length): function data_gen_for_sequence_classification (line 20) | def data_gen_for_sequence_classification(batch_size, seq_length): function train (line 56) | def train(model, data): function bench_shardformer (line 63) | def bench_shardformer(BATCH, N_CTX, provider, model_func, dtype=torch.fl... FILE: colossalai/shardformer/layer/_operation.py class FusedLayerNormAffineFunction1D (line 38) | class FusedLayerNormAffineFunction1D(torch.autograd.Function): method forward (line 53) | def forward(ctx, input, weight, bias, normalized_shape, eps): method backward (line 66) | def backward(ctx, grad_output): class MatmulWithAsyncCommunication (line 76) | class MatmulWithAsyncCommunication(torch.autograd.Function): method forward (line 82) | def forward(ctx, input_, weight, bias, process_group, async_grad_allre... method backward (line 98) | def backward(ctx, grad_output): class MatmulWithGradAccum (line 170) | class MatmulWithGradAccum(torch.autograd.Function): method forward (line 176) | def forward(ctx, input_, weight, bias, async_grad_allreduce, use_zbv=F... method backward (line 189) | def backward(ctx, grad_output): class LinearWithAsyncCommunication (line 251) | class LinearWithAsyncCommunication(torch.autograd.Function): method forward (line 257) | def forward(ctx, input_, weight, bias, process_group, async_grad_allre... method backward (line 272) | def backward(ctx, grad_output): class LinearWithGradAccum (line 341) | class LinearWithGradAccum(torch.autograd.Function): method forward (line 347) | def forward(ctx, input_, weight, bias, async_grad_allreduce, use_zbv=F... method backward (line 360) | def backward(ctx, grad_output): function _ring_as_gather (line 418) | def _ring_as_gather(func, input_to_gather=None, input_local=None, proces... class _GatherForwardReduceScatterBackward (line 485) | class _GatherForwardReduceScatterBackward(torch.autograd.Function): method forward (line 496) | def forward(ctx, input_, process_group, dim, fp8_communication=False): method backward (line 504) | def backward(ctx, grad_output): class _LinearWithGatherForwardReduceScatterBackward (line 527) | class _LinearWithGatherForwardReduceScatterBackward(torch.autograd.Funct... method forward (line 538) | def forward(ctx, input_, weight, bias, process_group, async_grad_reduc... method backward (line 571) | def backward(ctx, grad_output): function _ring_as_reducescatter (line 646) | def _ring_as_reducescatter( class _LinearWithReduceScatterForwardGatherBackward (line 704) | class _LinearWithReduceScatterForwardGatherBackward(torch.autograd.Funct... method forward (line 715) | def forward(ctx, input_, weight, bias, process_group, dim, ring, use_z... method backward (line 756) | def backward(ctx, grad_output): class _ReduceScatterForwardGatherBackward (line 819) | class _ReduceScatterForwardGatherBackward(torch.autograd.Function): method forward (line 829) | def forward(ctx, input_, process_group, dim, fp8_communication=False): method backward (line 850) | def backward(ctx, grad_output): class _MatmulWithGatherForwardReduceScatterBackward (line 858) | class _MatmulWithGatherForwardReduceScatterBackward(torch.autograd.Funct... method forward (line 870) | def forward( method backward (line 904) | def backward(ctx, grad_output): class _SplitForwardGatherBackward (line 982) | class _SplitForwardGatherBackward(torch.autograd.Function): method forward (line 994) | def forward(ctx, input_, dim, process_group, grad_scale=None, fp8_comm... method backward (line 1002) | def backward(ctx, grad_output): class _ReduceForward (line 1015) | class _ReduceForward(torch.autograd.Function): method forward (line 1026) | def forward(ctx, input_, process_group, grad_scale=None, fp8_communica... method backward (line 1031) | def backward(ctx, grad_output): class _ReduceBackward (line 1037) | class _ReduceBackward(torch.autograd.Function): method forward (line 1047) | def forward(ctx, input_, process_group, fp8_communication=False): method backward (line 1053) | def backward(ctx, grad_output): class _GatherForwardSplitBackward (line 1058) | class _GatherForwardSplitBackward(torch.autograd.Function): method forward (line 1068) | def forward(ctx, input_, dim, process_group, grad_scale=None, fp8_comm... method backward (line 1076) | def backward(ctx, grad_output): class _AllToAll (line 1082) | class _AllToAll(torch.autograd.Function): method forward (line 1093) | def forward(ctx, input_, process_group, scatter_dim, gather_dim, fp8_c... method backward (line 1124) | def backward(ctx, grad_output): class HookParameter (line 1156) | class HookParameter(torch.autograd.Function): method forward (line 1160) | def forward(ctx, input, weight, bias): method backward (line 1166) | def backward(ctx, grad_output): function hook_parameter_in_backward (line 1175) | def hook_parameter_in_backward(input, weight=None, bias=None): function _reduce (line 1179) | def _reduce(input_, process_group, fp8_communication=False, fp8_format="... function _split (line 1191) | def _split(input_, dim=-1, process_group=None): function _gather (line 1211) | def _gather(input_, dim=-1, process_group=None, fp8_communication=False,... function _reduce_scatter (line 1229) | def _reduce_scatter(input_, dim=1, process_group=None): function _all_to_all (line 1253) | def _all_to_all(input_, world_size, group, scatter_dim, gather_dim, fp8_... function _all_to_all_single (line 1263) | def _all_to_all_single( function matmul_with_async_comm (line 1296) | def matmul_with_async_comm( function matmul_with_grad_comm (line 1304) | def matmul_with_grad_comm(input_, weight, bias, async_grad_allreduce, us... function linear_with_async_comm (line 1308) | def linear_with_async_comm( function linear_with_grad_accum (line 1316) | def linear_with_grad_accum(input_, weight, bias, async_grad_allreduce, u... function linear_gather_forward_reducescatter_backward (line 1320) | def linear_gather_forward_reducescatter_backward( function gather_forward_reducescatter_backward (line 1328) | def gather_forward_reducescatter_backward(input_, process_group, dim, fp... function reducescatter_forward_gather_backward (line 1332) | def reducescatter_forward_gather_backward(input_, process_group, dim, fp... function linear_reducescatter_forward_gather_backward (line 1336) | def linear_reducescatter_forward_gather_backward( function matmul_gather_forward_reducescatter_backward (line 1342) | def matmul_gather_forward_reducescatter_backward( function gather_forward_split_backward (line 1358) | def gather_forward_split_backward(input_, dim, process_group, grad_scale... function split_forward_gather_backward (line 1362) | def split_forward_gather_backward(input_, dim, process_group, grad_scale... function reduce_forward (line 1366) | def reduce_forward(input_, process_group, grad_scale=None, fp8_communica... function reduce_backward (line 1370) | def reduce_backward(input_, process_group, fp8_communication=False): function all_to_all_comm (line 1374) | def all_to_all_comm(input_, process_group=None, scatter_dim=2, gather_di... function gather_sp_output (line 1378) | def gather_sp_output(hidden_states, shard_config, sp_dim=1): FILE: colossalai/shardformer/layer/attn.py class AttnMaskType (line 33) | class AttnMaskType(Enum): function invert_mask (line 40) | def invert_mask(mask: torch.Tensor) -> torch.Tensor: function get_pad_info (line 54) | def get_pad_info( class ColoAttention (line 82) | class ColoAttention: method _init_kernels_dispatch (line 87) | def _init_kernels_dispatch(): method _dispatch_kernel (line 114) | def _dispatch_kernel(dtype: torch.dtype, mask_type: Optional[AttnMaskT... method prepare_attn_kwargs (line 139) | def prepare_attn_kwargs( method attention (line 229) | def attention( function _load_varlen_helpers (line 334) | def _load_varlen_helpers(): function _load_flash_attn (line 356) | def _load_flash_attn(): function _rescale_out_lse (line 376) | def _rescale_out_lse(out, block_out, lse, block_lse): class RingAttention (line 406) | class RingAttention(torch.autograd.Function): method get_double_ring_groups (line 445) | def get_double_ring_groups(sp_axis, pg_mesh, inner_ring_size=None): method attention (line 498) | def attention( method forward (line 623) | def forward( method backward (line 942) | def backward(ctx, dout, _): method prepare_varlen_batch (line 1178) | def prepare_varlen_batch( FILE: colossalai/shardformer/layer/dropout.py class DropoutForParallelInput (line 13) | class DropoutForParallelInput(ParallelModule, nn.Dropout): method __init__ (line 25) | def __init__(self, p: float = 0.5, inplace: bool = False, process_grou... method from_native_module (line 34) | def from_native_module( method forward (line 44) | def forward(self, input): class DropoutForReplicatedInput (line 50) | class DropoutForReplicatedInput(ParallelModule, nn.Dropout): method __init__ (line 62) | def __init__(self, p: float = 0.5, inplace: bool = False, process_grou... method from_native_module (line 71) | def from_native_module( method forward (line 81) | def forward(self, input): FILE: colossalai/shardformer/layer/embedding.py class Embedding1D (line 30) | class Embedding1D(ParallelModule): method __init__ (line 60) | def __init__( method from_native_module (line 107) | def from_native_module( method reset_parameters (line 147) | def reset_parameters(self, weight_initializer) -> None: method _fill_padding_idx_with_zero (line 152) | def _fill_padding_idx_with_zero(self) -> None: method forward (line 157) | def forward(self, input_: Tensor) -> Tensor: class PaddingEmbedding (line 168) | class PaddingEmbedding(PaddingParallelModule): method __init__ (line 169) | def __init__( method reset_parameters (line 202) | def reset_parameters(self) -> None: method _fill_padding_idx_with_zero (line 206) | def _fill_padding_idx_with_zero(self) -> None: method forward (line 211) | def forward(self, input: Tensor) -> Tensor: method from_native_module (line 215) | def from_native_module( class VocabParallelEmbedding1D (line 241) | class VocabParallelEmbedding1D(PaddingParallelModule): method __init__ (line 270) | def __init__( method from_native_module (line 330) | def from_native_module( method reset_parameters (line 362) | def reset_parameters(self, weight_initializer) -> None: method _fill_padding_idx_with_zero (line 368) | def _fill_padding_idx_with_zero(self) -> None: method _select_padding_idx (line 377) | def _select_padding_idx(self, padding_idx: int): method forward (line 386) | def forward(self, input_: Tensor) -> Tensor: FILE: colossalai/shardformer/layer/linear.py class LinearWithGradAccum (line 40) | class LinearWithGradAccum(ParallelModule): method __init__ (line 63) | def __init__( method from_native_module (line 123) | def from_native_module(module: nn.Linear, **kwargs) -> ParallelModule: method reset_parameters (line 146) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method forward (line 153) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]: class Linear1D_Col (line 181) | class Linear1D_Col(ParallelModule): method __init__ (line 209) | def __init__( method from_native_module (line 285) | def from_native_module( method reset_parameters (line 324) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method forward (line 331) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]: class Linear1D_Row (line 379) | class Linear1D_Row(ParallelModule): method __init__ (line 402) | def __init__( method from_native_module (line 481) | def from_native_module( method chunk_weight (line 521) | def chunk_weight(self): method reset_parameters (line 525) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method forward (line 542) | def forward(self, input_: Tensor) -> Tensor: class PaddingLMHead (line 598) | class PaddingLMHead(PaddingParallelModule): method __init__ (line 599) | def __init__( method reset_parameters (line 640) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method from_native_module (line 647) | def from_native_module( method forward (line 673) | def forward(self, input: Tensor) -> Tensor: class VocabParallelLMHead1D (line 679) | class VocabParallelLMHead1D(Linear1D_Col, PaddingParallelModule): method __init__ (line 707) | def __init__( method from_native_module (line 762) | def from_native_module( method forward (line 788) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]: FILE: colossalai/shardformer/layer/loss.py class DistCrossEntropy (line 25) | class DistCrossEntropy(Function): method forward (line 34) | def forward( method backward (line 130) | def backward(ctx, grad_output): class DistLogProb (line 148) | class DistLogProb(Function): method forward (line 157) | def forward( method backward (line 218) | def backward(ctx, grad_output): function cross_entropy_1d (line 240) | def cross_entropy_1d( function dist_log_prob_1d (line 252) | def dist_log_prob_1d( function dist_cross_entropy (line 262) | def dist_cross_entropy( function dist_log_prob (line 358) | def dist_log_prob( FILE: colossalai/shardformer/layer/normalization.py class FusedLayerNormWithHook (line 38) | class FusedLayerNormWithHook(ApexFusedLayerNorm): method __init__ (line 39) | def __init__(self, normalized_shape, eps=0.00001, elementwise_affine=T... method forward (line 42) | def forward(self, input): class NPUFusedRMSNormWithHook (line 53) | class NPUFusedRMSNormWithHook(nn.Module): method __init__ (line 54) | def __init__(self, normalized_shape, eps=0.00001, elementwise_affine=T... method reset_parameters (line 67) | def reset_parameters(self): method forward (line 71) | def forward(self, input): class CUDAFusedRMSNormWithHook (line 82) | class CUDAFusedRMSNormWithHook(ApexFusedRMSNorm): method __init__ (line 83) | def __init__(self, normalized_shape, eps=0.00001, elementwise_affine=T... method forward (line 86) | def forward(self, input): class FastLayerNormWithHook (line 127) | class FastLayerNormWithHook(FastLayerNorm): method __init__ (line 128) | def __init__(self, hidden_size, eps=0.00001): method forward (line 131) | def forward(self, input): class BaseLayerNorm (line 137) | class BaseLayerNorm(ABC): method from_native_module (line 139) | def from_native_module(module: nn.Module, sp_partial_derived: bool = F... class RMSNorm (line 156) | class RMSNorm(BaseLayerNorm): method __init__ (line 161) | def __init__(self) -> None: method from_native_module (line 168) | def from_native_module(module: nn.Module, sp_partial_derived: bool = F... class LayerNorm (line 192) | class LayerNorm(BaseLayerNorm): method __init__ (line 197) | def __init__(self) -> None: method from_native_module (line 204) | def from_native_module(module: nn.Module, sp_partial_derived: bool = F... class FusedLayerNorm (line 231) | class FusedLayerNorm(BaseLayerNorm): method __init__ (line 236) | def __init__(self) -> None: method from_native_module (line 243) | def from_native_module(module: nn.LayerNorm, sp_partial_derived: bool ... class FusedRMSNorm (line 300) | class FusedRMSNorm(BaseLayerNorm): method __init__ (line 305) | def __init__(self) -> None: method from_native_module (line 312) | def from_native_module(module: nn.Module, sp_partial_derived: bool = F... FILE: colossalai/shardformer/layer/parallel_module.py class ParallelModule (line 28) | class ParallelModule(nn.Module, ABC): method __init__ (line 29) | def __init__(self, **kwargs): method from_native_module (line 33) | def from_native_module( method _save_to_state_dict (line 46) | def _save_to_state_dict(self, destination, prefix, keep_vars): method _load_from_state_dict (line 70) | def _load_from_state_dict( class PaddingParallelModule (line 180) | class PaddingParallelModule(ParallelModule): method __init__ (line 181) | def __init__( method from_native_module (line 204) | def from_native_module( method _save_to_state_dict (line 218) | def _save_to_state_dict(self, destination, prefix, keep_vars): method _load_from_state_dict (line 245) | def _load_from_state_dict( method resize_embedding_weight (line 357) | def resize_embedding_weight(self): method resize_embedding_bias (line 360) | def resize_embedding_bias(self): FILE: colossalai/shardformer/layer/qkv_fused_linear.py function split_fused_qkv_in_gpt2_style (line 55) | def split_fused_qkv_in_gpt2_style( function gather_fused_qkv_in_gpt2_style (line 100) | def gather_fused_qkv_in_gpt2_style( class _SplitForwardGatherBackwardFusedQKV (line 157) | class _SplitForwardGatherBackwardFusedQKV(torch.autograd.Function): method forward (line 159) | def forward(ctx, qkv: torch.Tensor, split_sizes: List[int], process_gr... method backward (line 165) | def backward(ctx, grad_output): function split_forward_gather_backward_fused_qkv (line 172) | def split_forward_gather_backward_fused_qkv(qkv: torch.Tensor, split_siz... class _GatherForwardSplitBackwardFusedQKV (line 176) | class _GatherForwardSplitBackwardFusedQKV(torch.autograd.Function): method forward (line 178) | def forward(ctx, qkv: torch.Tensor, split_sizes: List[int], process_gr... method backward (line 184) | def backward(ctx, grad_output): function gather_forward_split_backward_fused_qkv (line 189) | def gather_forward_split_backward_fused_qkv(qkv: torch.Tensor, split_siz... class GPT2FusedLinearConv1D_Col (line 193) | class GPT2FusedLinearConv1D_Col(ParallelModule): method __init__ (line 222) | def __init__( method from_native_module (line 310) | def from_native_module( method reset_parameters (line 361) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method forward (line 368) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]: class GPT2FusedLinearConv1D_Row (line 417) | class GPT2FusedLinearConv1D_Row(ParallelModule): method __init__ (line 439) | def __init__( method from_native_module (line 518) | def from_native_module( method chunk_weight (line 559) | def chunk_weight(self): method reset_parameters (line 562) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method forward (line 579) | def forward(self, input_: Tensor) -> Tensor: class GPT2FusedLinearConv (line 637) | class GPT2FusedLinearConv(ParallelModule): method __init__ (line 658) | def __init__( method from_native_module (line 720) | def from_native_module( method reset_parameters (line 752) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method forward (line 759) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]: class FusedLinear1D_Col (line 788) | class FusedLinear1D_Col(ParallelModule): method __init__ (line 816) | def __init__( method from_native_module (line 905) | def from_native_module( method reset_parameters (line 948) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method forward (line 955) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]: class FusedLinear1D_Row (line 1001) | class FusedLinear1D_Row(ParallelModule): method __init__ (line 1024) | def __init__( method from_native_module (line 1109) | def from_native_module( method reset_parameters (line 1142) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method forward (line 1159) | def forward(self, input_: Tensor) -> Tensor: class FusedLinear (line 1205) | class FusedLinear(ParallelModule): method __init__ (line 1233) | def __init__( method from_native_module (line 1295) | def from_native_module( method reset_parameters (line 1329) | def reset_parameters(self, weight_initializer, bias_initializer) -> None: method forward (line 1336) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]: FILE: colossalai/shardformer/layer/utils.py function execute_conv1d_w_pass_grad_accum (line 21) | def execute_conv1d_w_pass_grad_accum(_input_, _grad_output_, _weight_mai... function execute_conv1d_w_pass (line 31) | def execute_conv1d_w_pass(_input_, _grad_output_, _weight_main_grad_=Non... function execute_w_pass_grad_accum (line 36) | def execute_w_pass_grad_accum(_input_, _grad_output_, _weight_main_grad_): function execute_w_pass (line 46) | def execute_w_pass(_input_, _grad_output_, _weight_main_grad_=None, wgra... class SeqParallelUtils (line 50) | class SeqParallelUtils: method marked_as_sp_partial_derived_param (line 52) | def marked_as_sp_partial_derived_param(param): method is_sp_partial_derived_param (line 62) | def is_sp_partial_derived_param(param): method allreduce_partial_data_grad (line 75) | def allreduce_partial_data_grad( class Randomizer (line 129) | class Randomizer: method __init__ (line 151) | def __init__(self, seed: int): method _set_device_rng_state (line 169) | def _set_device_rng_state(self, rng_state): method _get_device_rng_state (line 172) | def _get_device_rng_state(self): method _set_cpu_rng_state (line 176) | def _set_cpu_rng_state(self, rng_state): method _get_cpu_rng_state (line 179) | def _get_cpu_rng_state(self): method fork_rng (line 184) | def fork_rng(self, enable_cpu: bool = False): method index (line 210) | def index(): method increment_index (line 237) | def increment_index(): method reset_index (line 244) | def reset_index(): method is_randomizer_index_synchronized (line 251) | def is_randomizer_index_synchronized(process_group: ProcessGroup = None): method synchronize_index (line 272) | def synchronize_index(process_group: ProcessGroup = None): function create_randomizer_with_offset (line 295) | def create_randomizer_with_offset( function split_batch_zigzag (line 331) | def split_batch_zigzag( function split_varlen_zigzag (line 381) | def split_varlen_zigzag( function is_share_sp_tp (line 467) | def is_share_sp_tp(sp_mode: str): class RingComm (line 475) | class RingComm: method __init__ (line 476) | def __init__(self, process_group: dist.ProcessGroup): method send_recv (line 489) | def send_recv( method commit (line 510) | def commit(self): method wait (line 514) | def wait(self): function get_half_index (line 523) | def get_half_index(cu_seqlens, *, front: bool): FILE: colossalai/shardformer/modeling/bert.py class BertPipelineForwards (line 35) | class BertPipelineForwards: method bert_model_forward (line 42) | def bert_model_forward( method bert_for_pretraining_forward (line 286) | def bert_for_pretraining_forward( method bert_lm_head_model_forward (line 363) | def bert_lm_head_model_forward( method bert_for_masked_lm_forward (line 472) | def bert_for_masked_lm_forward( method bert_for_next_sentence_prediction_forward (line 551) | def bert_for_next_sentence_prediction_forward( method bert_for_sequence_classification_forward (line 659) | def bert_for_sequence_classification_forward( method bert_for_token_classification_forward (line 753) | def bert_for_token_classification_forward( method bert_for_multiple_choice_forward (line 828) | def bert_for_multiple_choice_forward( method bert_for_question_answering_forward (line 919) | def bert_for_question_answering_forward( function get_jit_fused_bert_self_output_forward (line 1016) | def get_jit_fused_bert_self_output_forward(): function get_jit_fused_bert_output_forward (line 1028) | def get_jit_fused_bert_output_forward(): function get_bert_sequence_parallel_attention_forward (line 1042) | def get_bert_sequence_parallel_attention_forward(shard_config: ShardConf... function bert_sequence_parallel_forward_fn (line 1123) | def bert_sequence_parallel_forward_fn(shard_config: ShardConfig): function get_jit_fused_bert_intermediate_forward (line 1283) | def get_jit_fused_bert_intermediate_forward(): FILE: colossalai/shardformer/modeling/blip2.py function forward_fn (line 9) | def forward_fn(): function get_blip2_flash_attention_forward (line 64) | def get_blip2_flash_attention_forward(): function get_jit_fused_blip2_QFormer_self_output_forward (line 102) | def get_jit_fused_blip2_QFormer_self_output_forward(): function get_jit_fused_blip2_QFormer_output_forward (line 118) | def get_jit_fused_blip2_QFormer_output_forward(): function get_jit_fused_blip2_mlp_forward (line 134) | def get_jit_fused_blip2_mlp_forward(): FILE: colossalai/shardformer/modeling/bloom.py function build_bloom_alibi_tensor_fn (line 37) | def build_bloom_alibi_tensor_fn(process_group: ProcessGroup) -> torch.Te... class BloomPipelineForwards (line 103) | class BloomPipelineForwards: method bloom_model_forward (line 109) | def bloom_model_forward( method bloom_for_causal_lm_forward (line 320) | def bloom_for_causal_lm_forward( method bloom_for_sequence_classification_forward (line 412) | def bloom_for_sequence_classification_forward( method bloom_for_token_classification_forward (line 538) | def bloom_for_token_classification_forward( method bloom_for_question_answering_forward (line 632) | def bloom_for_question_answering_forward( function get_jit_fused_bloom_attention_forward (line 726) | def get_jit_fused_bloom_attention_forward(): function get_jit_fused_bloom_mlp_forward (line 810) | def get_jit_fused_bloom_mlp_forward(): function get_jit_fused_bloom_gelu_forward (line 832) | def get_jit_fused_bloom_gelu_forward(): function get_bloom_sequence_parallel_attention_forward (line 848) | def get_bloom_sequence_parallel_attention_forward(shard_config: ShardCon... function get_bloom_sequence_parallel_forward_fn (line 933) | def get_bloom_sequence_parallel_forward_fn(shard_config: ShardConfig): function get_lm_forward_with_dist_cross_entropy (line 1095) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig): FILE: colossalai/shardformer/modeling/chatglm2.py function get_flash_core_attention_forward (line 23) | def get_flash_core_attention_forward(): function get_jit_fused_glm_block_forward (line 37) | def get_jit_fused_glm_block_forward(): class ChatGLMPipelineForwards (line 87) | class ChatGLMPipelineForwards: method chatglm_model_forward (line 93) | def chatglm_model_forward( method chatglm_for_conditional_generation_forward (line 267) | def chatglm_for_conditional_generation_forward( function get_chatglm_sequence_parallel_forward_fn (line 339) | def get_chatglm_sequence_parallel_forward_fn(shard_config: ShardConfig, ... function get_chatglm_sequence_parallel_attention_forward (line 466) | def get_chatglm_sequence_parallel_attention_forward(shard_config: ShardC... function get_flash_attention_forward_for_chat_glm_model (line 642) | def get_flash_attention_forward_for_chat_glm_model(): FILE: colossalai/shardformer/modeling/chatglm2_6b/configuration_chatglm.py class ChatGLMConfig (line 4) | class ChatGLMConfig(PretrainedConfig): method __init__ (line 7) | def __init__( FILE: colossalai/shardformer/modeling/chatglm2_6b/modeling_chatglm.py function default_init (line 78) | def default_init(cls, *args, **kwargs): class InvalidScoreLogitsProcessor (line 82) | class InvalidScoreLogitsProcessor(LogitsProcessor): method __call__ (line 83) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... class PrefixEncoder (line 90) | class PrefixEncoder(torch.nn.Module): method __init__ (line 97) | def __init__(self, config: ChatGLMConfig): method forward (line 115) | def forward(self, prefix: torch.Tensor): function split_tensor_along_last_dim (line 124) | def split_tensor_along_last_dim( class RotaryEmbedding (line 152) | class RotaryEmbedding(nn.Module): method __init__ (line 153) | def __init__(self, dim, original_impl=False, device=None, dtype=None): method forward_impl (line 160) | def forward_impl( method forward (line 190) | def forward(self, max_seq_len, offset=0): function apply_rotary_pos_emb (line 200) | def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> t... class RMSNorm (line 220) | class RMSNorm(torch.nn.Module): method __init__ (line 221) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None... method forward (line 228) | def forward(self, hidden_states: torch.Tensor): class CoreAttention (line 235) | class CoreAttention(torch.nn.Module): method __init__ (line 236) | def __init__(self, config: ChatGLMConfig, layer_number): method forward (line 261) | def forward(self, query_layer, key_layer, value_layer, attention_mask): class SelfAttention (line 374) | class SelfAttention(torch.nn.Module): method __init__ (line 381) | def __init__(self, config: ChatGLMConfig, layer_number, device=None): method _allocate_memory (line 413) | def _allocate_memory(self, inference_max_sequence_len, batch_size, dev... method forward (line 427) | def forward( function _config_to_kwargs (line 546) | def _config_to_kwargs(args): class MLP (line 553) | class MLP(torch.nn.Module): method __init__ (line 561) | def __init__(self, config: ChatGLMConfig, device=None): method forward (line 590) | def forward(self, hidden_states): class GLMBlock (line 599) | class GLMBlock(torch.nn.Module): method __init__ (line 606) | def __init__(self, config: ChatGLMConfig, layer_number, device=None): method forward (line 638) | def forward( class GLMTransformer (line 686) | class GLMTransformer(torch.nn.Module): method __init__ (line 689) | def __init__(self, config: ChatGLMConfig, device=None): method _get_layer (line 716) | def _get_layer(self, layer_number): method forward (line 719) | def forward( class ChatGLMPreTrainedModel (line 776) | class ChatGLMPreTrainedModel(PreTrainedModel): method _init_weights (line 788) | def _init_weights(self, module: nn.Module): method get_masks (line 792) | def get_masks(self, input_ids, past_key_values, padding_mask=None): method get_position_ids (line 815) | def get_position_ids(self, input_ids, device): method _set_gradient_checkpointing (line 820) | def _set_gradient_checkpointing(self, module, value=False): class Embedding (line 825) | class Embedding(torch.nn.Module): method __init__ (line 828) | def __init__(self, config: ChatGLMConfig, device=None): method forward (line 841) | def forward(self, input_ids): class ChatGLMModel (line 853) | class ChatGLMModel(ChatGLMPreTrainedModel): method __init__ (line 854) | def __init__(self, config: ChatGLMConfig, device=None, empty_init=True): method get_input_embeddings (line 898) | def get_input_embeddings(self): method get_prompt (line 901) | def get_prompt(self, batch_size, device, dtype=torch.half): method forward (line 916) | def forward( method quantize (line 996) | def quantize(self, weight_bit_width: int): class ChatGLMForConditionalGeneration (line 1003) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel): method __init__ (line 1004) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None): method _update_model_kwargs_for_generation (line 1015) | def _update_model_kwargs_for_generation( method prepare_inputs_for_generation (line 1045) | def prepare_inputs_for_generation( method forward (line 1068) | def forward( method _reorder_cache (line 1129) | def _reorder_cache( method process_response (line 1147) | def process_response(self, response): method build_inputs (line 1152) | def build_inputs(self, tokenizer, query: str, history: List[Tuple[str,... method build_stream_inputs (line 1158) | def build_stream_inputs(self, tokenizer, query: str, history: List[Tup... method chat (line 1171) | def chat( method stream_chat (line 1207) | def stream_chat( method stream_generate (line 1265) | def stream_generate( method quantize (line 1372) | def quantize(self, bits: int, empty_init=False, device=None, **kwargs): FILE: colossalai/shardformer/modeling/command.py class CommandPipelineForwards (line 33) | class CommandPipelineForwards: method command_model_forward (line 40) | def command_model_forward( method command_for_causal_lm_forward (line 244) | def command_for_causal_lm_forward( function get_command_flash_attention_forward (line 349) | def get_command_flash_attention_forward(shard_config: ShardConfig, sp_mo... function get_command_flash_attention_model_forward (line 430) | def get_command_flash_attention_model_forward(shard_config: ShardConfig,... function get_lm_forward_with_dist_cross_entropy (line 573) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig): FILE: colossalai/shardformer/modeling/deepseek.py class AddAuxiliaryLoss (line 42) | class AddAuxiliaryLoss(torch.autograd.Function): method forward (line 49) | def forward(ctx, x, loss): method backward (line 56) | def backward(ctx, grad_output): class EPDeepseekMoE (line 63) | class EPDeepseekMoE(ParallelModule): method __init__ (line 64) | def __init__(self): method setup_process_groups (line 67) | def setup_process_groups( method from_native_module (line 126) | def from_native_module( method forward (line 142) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class DeepseekMoEGate_Col (line 232) | class DeepseekMoEGate_Col(ParallelModule): method parallel_linear (line 233) | def parallel_linear(self, hidden_states): method forward (line 250) | def forward(self, hidden_states): method from_native_module (line 293) | def from_native_module( class DeepseekPipelineForwards (line 305) | class DeepseekPipelineForwards: method deepseek_model_forward (line 312) | def deepseek_model_forward( method deepseek_for_causal_lm_forward (line 502) | def deepseek_for_causal_lm_forward( function get_deepseek_flash_attention_forward (line 614) | def get_deepseek_flash_attention_forward(shard_config, sp_mode=None, sp_... function get_deepseek_flash_attention_model_forward (line 733) | def get_deepseek_flash_attention_model_forward(shard_config, sp_mode=Non... FILE: colossalai/shardformer/modeling/deepseek_v3.py class EpDeepseekV3MoE (line 26) | class EpDeepseekV3MoE(ParallelModule): method __init__ (line 31) | def __init__(self, config): method setup_process_groups (line 34) | def setup_process_groups( method from_native_module (line 63) | def from_native_module( method forward (line 76) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: method moe_forward (line 86) | def moe_forward(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_we... function deepseek_v3_model_forward (line 161) | def deepseek_v3_model_forward( function deepseek_v3_for_causal_lm_forward (line 297) | def deepseek_v3_for_causal_lm_forward( FILE: colossalai/shardformer/modeling/falcon.py function build_falcon_alibi_tensor_fn (line 32) | def build_falcon_alibi_tensor_fn(process_group: ProcessGroup) -> torch.T... function get_tp_falcon_decoder_layer_forward (line 98) | def get_tp_falcon_decoder_layer_forward(): class FalconPipelineForwards (line 182) | class FalconPipelineForwards: method falcon_model_forward (line 188) | def falcon_model_forward( method falcon_for_causal_lm_forward (line 354) | def falcon_for_causal_lm_forward( method falcon_for_sequence_classification_forward (line 453) | def falcon_for_sequence_classification_forward( method falcon_for_token_classification_forward (line 567) | def falcon_for_token_classification_forward( method falcon_for_question_answering_forward (line 649) | def falcon_for_question_answering_forward( function get_lm_forward_with_dist_cross_entropy (line 742) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig): FILE: colossalai/shardformer/modeling/gpt2.py function _get_attention_mask (line 34) | def _get_attention_mask( class GPT2PipelineForwards (line 109) | class GPT2PipelineForwards: method gpt2_model_forward (line 116) | def gpt2_model_forward( method gpt2_lmhead_model_forward (line 344) | def gpt2_lmhead_model_forward( method gpt2_double_heads_model_forward (line 433) | def gpt2_double_heads_model_forward( method gpt2_for_question_answering_forward (line 527) | def gpt2_for_question_answering_forward( method gpt2_for_token_classification_forward (line 618) | def gpt2_for_token_classification_forward( method gpt2_for_sequence_classification_forward (line 693) | def gpt2_for_sequence_classification_forward( function get_gpt2_flash_attention_forward (line 811) | def get_gpt2_flash_attention_forward(shard_config: Optional[ShardConfig]... function get_jit_fused_gpt2_mlp_forward (line 889) | def get_jit_fused_gpt2_mlp_forward(): FILE: colossalai/shardformer/modeling/gptj.py function _get_attention_mask (line 30) | def _get_attention_mask( class GPTJPipelineForwards (line 73) | class GPTJPipelineForwards: method gptj_model_forward (line 80) | def gptj_model_forward( method gptj_causallm_model_forward (line 262) | def gptj_causallm_model_forward( method gptj_for_sequence_classification_forward (line 356) | def gptj_for_sequence_classification_forward( method gptj_for_question_answering_forward (line 475) | def gptj_for_question_answering_forward( function get_gptj_flash_attention_forward (line 567) | def get_gptj_flash_attention_forward(): function gptj_model_forward_for_flash_attention (line 649) | def gptj_model_forward_for_flash_attention(shard_config: ShardConfig): function gptj_sequence_parallel_forward_fn (line 825) | def gptj_sequence_parallel_forward_fn(shard_config: ShardConfig): FILE: colossalai/shardformer/modeling/jit.py function get_dropout_add_func (line 4) | def get_dropout_add_func(): function get_jit_fused_dropout_add_func (line 13) | def get_jit_fused_dropout_add_func(): function get_jit_fused_gelu_forward_func (line 25) | def get_jit_fused_gelu_forward_func(): FILE: colossalai/shardformer/modeling/llama.py class LlamaPipelineForwards (line 36) | class LlamaPipelineForwards: method llama_model_forward (line 43) | def llama_model_forward( method llama_for_causal_lm_forward (line 260) | def llama_for_causal_lm_forward( method llama_for_sequence_classification_forward (line 373) | def llama_for_sequence_classification_forward( function get_llama_flash_attention_forward (line 489) | def get_llama_flash_attention_forward(shard_config: ShardConfig, sp_mode... FILE: colossalai/shardformer/modeling/mistral.py class MistralForwards (line 23) | class MistralForwards: method mistral_model_forward (line 25) | def mistral_model_forward( method mistral_for_causal_lm_forward (line 187) | def mistral_for_causal_lm_forward( method mistral_for_sequence_classification_forward (line 276) | def mistral_for_sequence_classification_forward( function get_mistral_model_forward_for_flash_attn (line 375) | def get_mistral_model_forward_for_flash_attn(shard_config: ShardConfig): function get_mistral_flash_attention_forward (line 497) | def get_mistral_flash_attention_forward(shard_config: ShardConfig): function get_lm_forward_with_dist_cross_entropy (line 557) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig): FILE: colossalai/shardformer/modeling/mixtral.py class EPMixtralSparseMoeBlock (line 54) | class EPMixtralSparseMoeBlock(ParallelModule): method __init__ (line 55) | def __init__(self, *args, **kwargs): method setup_process_groups (line 58) | def setup_process_groups( method from_native_module (line 108) | def from_native_module( method forward (line 123) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class MixtralPipelineForwards (line 211) | class MixtralPipelineForwards: method mixtral_model_forward (line 218) | def mixtral_model_forward( method mixtral_for_causal_lm_forward (line 443) | def mixtral_for_causal_lm_forward( function get_mixtral_flash_attention_forward (line 573) | def get_mixtral_flash_attention_forward(shard_config, sp_mode=None, sp_s... function get_mixtral_flash_attention_model_forward (line 719) | def get_mixtral_flash_attention_model_forward(shard_config, sp_mode=None... FILE: colossalai/shardformer/modeling/opt.py function _get_attention_mask (line 30) | def _get_attention_mask( class OPTPipelineForwards (line 57) | class OPTPipelineForwards: method opt_model_forward (line 64) | def opt_model_forward( method opt_for_causal_lm_forward (line 286) | def opt_for_causal_lm_forward( method opt_for_sequence_classification_forward (line 359) | def opt_for_sequence_classification_forward( method opt_for_question_answering_forward (line 461) | def opt_for_question_answering_forward( function get_opt_flash_attention_forward (line 542) | def get_opt_flash_attention_forward(shard_config: ShardConfig): function get_opt_decoder_forward_for_flash_attention (line 612) | def get_opt_decoder_forward_for_flash_attention(shard_config: ShardConfig): function get_jit_fused_opt_decoder_layer_forward (line 764) | def get_jit_fused_opt_decoder_layer_forward(): function get_lm_forward_with_dist_cross_entropy (line 846) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig): FILE: colossalai/shardformer/modeling/qwen2.py class Qwen2PipelineForwards (line 35) | class Qwen2PipelineForwards: method qwen2_model_forward (line 42) | def qwen2_model_forward( method qwen2_for_causal_lm_forward (line 262) | def qwen2_for_causal_lm_forward( method qwen2_for_sequence_classification_forward (line 366) | def qwen2_for_sequence_classification_forward( function get_qwen2_flash_attention_forward (line 481) | def get_qwen2_flash_attention_forward(shard_config: ShardConfig, sp_mode... function get_qwen2_model_forward_for_flash_attn (line 606) | def get_qwen2_model_forward_for_flash_attn(shard_config: ShardConfig, sp... function get_lm_forward_with_dist_cross_entropy (line 761) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig): FILE: colossalai/shardformer/modeling/qwen3.py class Qwen3PipelineForwards (line 35) | class Qwen3PipelineForwards: method qwen3_model_forward (line 42) | def qwen3_model_forward( method qwen3_for_causal_lm_forward (line 260) | def qwen3_for_causal_lm_forward( method qwen3_for_sequence_classification_forward (line 364) | def qwen3_for_sequence_classification_forward( function get_qwen3_flash_attention_forward (line 467) | def get_qwen3_flash_attention_forward(shard_config: ShardConfig, sp_mode... function get_qwen3_model_forward_for_flash_attn (line 598) | def get_qwen3_model_forward_for_flash_attn(shard_config: ShardConfig, sp... function get_lm_forward_with_dist_cross_entropy (line 753) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig): FILE: colossalai/shardformer/modeling/sam.py function forward_fn (line 6) | def forward_fn(): FILE: colossalai/shardformer/modeling/t5.py class T5PipelineForwards (line 25) | class T5PipelineForwards: method t5_stack_forward (line 32) | def t5_stack_forward( method t5_model_forward (line 264) | def t5_model_forward( method t5_for_conditional_generation_forward (line 403) | def t5_for_conditional_generation_forward( method t5_encoder_model_forward (line 566) | def t5_encoder_model_forward( method t5_for_token_classification_forward (line 609) | def t5_for_token_classification_forward( function get_t5_flash_attention_forward (line 674) | def get_t5_flash_attention_forward(): function get_jit_fused_T5_layer_ff_forward (line 801) | def get_jit_fused_T5_layer_ff_forward(): function get_T5_layer_self_attention_forward (line 813) | def get_T5_layer_self_attention_forward(): function get_T5_layer_cross_attention_forward (line 845) | def get_T5_layer_cross_attention_forward(): FILE: colossalai/shardformer/modeling/vit.py function _encoder_forward (line 11) | def _encoder_forward( function ViTModel_pipeline_forward (line 50) | def ViTModel_pipeline_forward(stage_manager: PipelineStageManager, stage... function ViTForImageClassification_pipeline_forward (line 139) | def ViTForImageClassification_pipeline_forward(stage_manager: PipelineSt... function ViTForMaskedImageModeling_pipeline_forward (line 225) | def ViTForMaskedImageModeling_pipeline_forward(stage_manager: PipelineSt... function get_vit_flash_self_attention_forward (line 336) | def get_vit_flash_self_attention_forward(): function get_jit_fused_vit_output_forward (line 366) | def get_jit_fused_vit_output_forward(): function get_jit_fused_vit_intermediate_forward (line 377) | def get_jit_fused_vit_intermediate_forward(): FILE: colossalai/shardformer/modeling/whisper.py function _get_attention_mask (line 37) | def _get_attention_mask( function get_whisper_flash_attention_forward (line 74) | def get_whisper_flash_attention_forward(): function get_whisper_decoder_forward_for_flash_attention (line 161) | def get_whisper_decoder_forward_for_flash_attention(shard_config: ShardC... function get_jit_fused_whisper_encoder_layer_forward (line 319) | def get_jit_fused_whisper_encoder_layer_forward(): function get_jit_fused_whisper_decoder_layer_forward (line 373) | def get_jit_fused_whisper_decoder_layer_forward(): class WhisperPipelineForwards (line 465) | class WhisperPipelineForwards: method whisper_encoder_forward (line 472) | def whisper_encoder_forward( method whisper_decoder_forward (line 601) | def whisper_decoder_forward( method whisper_model_forward (line 845) | def whisper_model_forward( method whisper_for_conditional_generation_forward (line 992) | def whisper_for_conditional_generation_forward( method whisper_for_audio_classification_forward (line 1113) | def whisper_for_audio_classification_forward( FILE: colossalai/shardformer/policies/auto_policy.py class PolicyLocation (line 12) | class PolicyLocation: function import_policy (line 243) | def import_policy(policy_location: PolicyLocation) -> Policy: function _fullname (line 252) | def _fullname(obj): function get_autopolicy (line 273) | def get_autopolicy(model: nn.Module) -> Policy: FILE: colossalai/shardformer/policies/base_policy.py class SubModuleReplacementDescription (line 21) | class SubModuleReplacementDescription: class ModulePolicyDescription (line 39) | class ModulePolicyDescription: class Policy (line 65) | class Policy(ABC): method __init__ (line 75) | def __init__(self) -> None: method set_model (line 80) | def set_model(self, model: nn.Module) -> None: method set_shard_config (line 88) | def set_shard_config(self, shard_config: ShardConfig) -> None: method pipeline_stage_manager (line 99) | def pipeline_stage_manager(self) -> Optional[PipelineStageManager]: method config_sanity_check (line 105) | def config_sanity_check(self): method preprocess (line 113) | def preprocess(self) -> nn.Module: method module_policy (line 119) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... method postprocess (line 127) | def postprocess(self) -> nn.Module: method append_or_create_submodule_replacement (line 133) | def append_or_create_submodule_replacement( method append_or_create_method_replacement (line 162) | def append_or_create_method_replacement( method get_held_layers (line 186) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 194) | def get_shared_params(self) -> List[Dict[int, Tensor]]: method tie_weight_check (line 202) | def tie_weight_check(self): FILE: colossalai/shardformer/policies/bert.py class BertPolicy (line 36) | class BertPolicy(Policy): method config_sanity_check (line 37) | def config_sanity_check(self): method preprocess (line 40) | def preprocess(self): method module_policy (line 45) | def module_policy(self): method add_lm_head_policy (line 362) | def add_lm_head_policy(self, base_policy): method add_lm_prediction_policy (line 404) | def add_lm_prediction_policy(self, base_policy): method postprocess (line 418) | def postprocess(self): method set_pipeline_forward (line 421) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... method get_held_layers (line 460) | def get_held_layers(self) -> List[Module]: class BertModelPolicy (line 495) | class BertModelPolicy(BertPolicy): method module_policy (line 496) | def module_policy(self): method get_held_layers (line 508) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 513) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class BertForPreTrainingPolicy (line 519) | class BertForPreTrainingPolicy(BertPolicy): method module_policy (line 520) | def module_policy(self): method get_held_layers (line 534) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 543) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class BertLMHeadModelPolicy (line 558) | class BertLMHeadModelPolicy(BertPolicy): method module_policy (line 559) | def module_policy(self): method get_held_layers (line 573) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 583) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class BertForMaskedLMPolicy (line 598) | class BertForMaskedLMPolicy(BertPolicy): method module_policy (line 599) | def module_policy(self): method get_held_layers (line 613) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 623) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class BertForSequenceClassificationPolicy (line 638) | class BertForSequenceClassificationPolicy(BertPolicy): method module_policy (line 639) | def module_policy(self): method get_held_layers (line 665) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 676) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class BertForTokenClassificationPolicy (line 682) | class BertForTokenClassificationPolicy(BertPolicy): method module_policy (line 683) | def module_policy(self): method get_held_layers (line 709) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 720) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class BertForNextSentencePredictionPolicy (line 726) | class BertForNextSentencePredictionPolicy(BertPolicy): method module_policy (line 727) | def module_policy(self): method get_held_layers (line 740) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 750) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class BertForMultipleChoicePolicy (line 756) | class BertForMultipleChoicePolicy(BertPolicy): method module_policy (line 757) | def module_policy(self): method get_held_layers (line 783) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 794) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class BertForQuestionAnsweringPolicy (line 799) | class BertForQuestionAnsweringPolicy(BertPolicy): method module_policy (line 800) | def module_policy(self): method get_held_layers (line 813) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 823) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/blip2.py class BlipPolicy (line 16) | class BlipPolicy(Policy): method config_sanity_check (line 17) | def config_sanity_check(self): method preprocess (line 20) | def preprocess(self): method module_policy (line 27) | def module_policy(self): method postprocess (line 689) | def postprocess(self): class Blip2ModelPolicy (line 694) | class Blip2ModelPolicy(BlipPolicy): method __init__ (line 695) | def __init__(self) -> None: class Blip2ForConditionalGenerationPolicy (line 700) | class Blip2ForConditionalGenerationPolicy(BlipPolicy): method __init__ (line 701) | def __init__(self) -> None: FILE: colossalai/shardformer/policies/bloom.py class BloomPolicy (line 25) | class BloomPolicy(Policy): method __init__ (line 26) | def __init__(self) -> None: method config_sanity_check (line 29) | def config_sanity_check(self): method preprocess (line 32) | def preprocess(self): method module_policy (line 36) | def module_policy(self): method postprocess (line 276) | def postprocess(self): method set_pipeline_forward (line 279) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... method get_held_layers (line 301) | def get_held_layers(self) -> List[Module]: class BloomModelPolicy (line 337) | class BloomModelPolicy(BloomPolicy): method module_policy (line 338) | def module_policy(self): method get_held_layers (line 348) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 355) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class BloomForCausalLMPolicy (line 360) | class BloomForCausalLMPolicy(BloomPolicy): method module_policy (line 361) | def module_policy(self): method get_held_layers (line 402) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 416) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class BloomForSequenceClassificationPolicy (line 430) | class BloomForSequenceClassificationPolicy(BloomPolicy): method module_policy (line 431) | def module_policy(self): method get_held_layers (line 468) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 482) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class BloomForTokenClassificationPolicy (line 487) | class BloomForTokenClassificationPolicy(BloomPolicy): method module_policy (line 488) | def module_policy(self): method get_held_layers (line 538) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 554) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class BloomForQuestionAnsweringPolicy (line 559) | class BloomForQuestionAnsweringPolicy(BloomPolicy): method module_policy (line 561) | def module_policy(self): method get_held_layers (line 573) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 587) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/chatglm2.py class ChatGLMPolicy (line 28) | class ChatGLMPolicy(Policy): method config_sanity_check (line 29) | def config_sanity_check(self): method preprocess (line 32) | def preprocess(self): method module_policy (line 41) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... method postprocess (line 281) | def postprocess(self): method get_held_layers (line 284) | def get_held_layers(self) -> List[nn.Module]: method set_pipeline_forward (line 322) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... class ChatGLMModelPolicy (line 346) | class ChatGLMModelPolicy(ChatGLMPolicy): method module_policy (line 347) | def module_policy(self): method get_held_layers (line 360) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 363) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class ChatGLMForConditionalGenerationPolicy (line 368) | class ChatGLMForConditionalGenerationPolicy(ChatGLMModelPolicy): method module_policy (line 369) | def module_policy(self): method get_held_layers (line 380) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 393) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/command.py class CommandPolicy (line 29) | class CommandPolicy(Policy): method config_sanity_check (line 30) | def config_sanity_check(self): method preprocess (line 33) | def preprocess(self): method module_policy (line 38) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... method postprocess (line 275) | def postprocess(self): method set_pipeline_forward (line 278) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... method get_held_layers (line 308) | def get_held_layers(self) -> List[Module]: class CommandModelPolicy (line 345) | class CommandModelPolicy(CommandPolicy): method module_policy (line 346) | def module_policy(self): method get_held_layers (line 357) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 362) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class CommandForCausalLMPolicy (line 367) | class CommandForCausalLMPolicy(CommandPolicy): method module_policy (line 368) | def module_policy(self): method get_held_layers (line 419) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 433) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/deepseek.py class DeepseekPolicy (line 24) | class DeepseekPolicy(Policy): method config_sanity_check (line 25) | def config_sanity_check(self): method preprocess (line 28) | def preprocess(self): method module_policy (line 47) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... method postprocess (line 294) | def postprocess(self): method set_pipeline_forward (line 297) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... method get_held_layers (line 320) | def get_held_layers(self) -> List[Module]: class DeepseekModelPolicy (line 355) | class DeepseekModelPolicy(DeepseekPolicy): method __init__ (line 356) | def __init__(self) -> None: method module_policy (line 359) | def module_policy(self): method get_held_layers (line 370) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 375) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class DeepseekForCausalLMPolicy (line 380) | class DeepseekForCausalLMPolicy(DeepseekPolicy): method module_policy (line 381) | def module_policy(self): method get_held_layers (line 432) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 446) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/deepseek_v3.py class DeepseekV3Policy (line 17) | class DeepseekV3Policy(Policy): method config_sanity_check (line 18) | def config_sanity_check(self): method preprocess (line 24) | def preprocess(self): method module_policy (line 27) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... method postprocess (line 83) | def postprocess(self): method set_pipeline_forward (line 86) | def set_pipeline_forward(self, model_cls: str, new_forward: Callable, ... method get_held_layers (line 102) | def get_held_layers(self) -> List[nn.Module]: class DeepseekV3ModelPolicy (line 142) | class DeepseekV3ModelPolicy(DeepseekV3Policy): method module_policy (line 143) | def module_policy(self): class DeepseekV3ForCausalLMPolicy (line 150) | class DeepseekV3ForCausalLMPolicy(DeepseekV3Policy): method module_policy (line 151) | def module_policy(self): method get_held_layers (line 157) | def get_held_layers(self): FILE: colossalai/shardformer/policies/falcon.py class FalconPolicy (line 21) | class FalconPolicy(Policy): method __init__ (line 22) | def __init__(self) -> None: method config_sanity_check (line 25) | def config_sanity_check(self): method preprocess (line 28) | def preprocess(self): method module_policy (line 32) | def module_policy(self): method postprocess (line 216) | def postprocess(self): method set_pipeline_forward (line 219) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... method get_held_layers (line 240) | def get_held_layers(self) -> List[Module]: class FalconModelPolicy (line 274) | class FalconModelPolicy(FalconPolicy): method __init__ (line 275) | def __init__(self) -> None: method module_policy (line 278) | def module_policy(self): method get_held_layers (line 289) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 296) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class FalconForCausalLMPolicy (line 301) | class FalconForCausalLMPolicy(FalconPolicy): method __init__ (line 302) | def __init__(self) -> None: method module_policy (line 305) | def module_policy(self): method get_held_layers (line 349) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 363) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class FalconForSequenceClassificationPolicy (line 377) | class FalconForSequenceClassificationPolicy(FalconPolicy): method __init__ (line 378) | def __init__(self) -> None: method module_policy (line 381) | def module_policy(self): method get_held_layers (line 416) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 430) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class FalconForTokenClassificationPolicy (line 435) | class FalconForTokenClassificationPolicy(FalconPolicy): method __init__ (line 436) | def __init__(self) -> None: method module_policy (line 439) | def module_policy(self): method get_held_layers (line 487) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 503) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class FalconForQuestionAnsweringPolicy (line 508) | class FalconForQuestionAnsweringPolicy(FalconPolicy): method __init__ (line 509) | def __init__(self) -> None: method module_policy (line 512) | def module_policy(self): method get_held_layers (line 548) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 562) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/gpt2.py class GPT2Policy (line 22) | class GPT2Policy(Policy): method config_sanity_check (line 23) | def config_sanity_check(self): method preprocess (line 26) | def preprocess(self): method module_policy (line 38) | def module_policy(self): method postprocess (line 287) | def postprocess(self): method get_held_layers (line 290) | def get_held_layers(self) -> List[nn.Module]: method set_pipeline_forward (line 325) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... class GPT2ModelPolicy (line 361) | class GPT2ModelPolicy(GPT2Policy): method module_policy (line 362) | def module_policy(self): method get_held_layers (line 375) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 378) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class GPT2LMHeadModelPolicy (line 384) | class GPT2LMHeadModelPolicy(GPT2Policy): method module_policy (line 385) | def module_policy(self): method get_held_layers (line 431) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 446) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class GPT2DoubleHeadsModelPolicy (line 463) | class GPT2DoubleHeadsModelPolicy(GPT2Policy): method module_policy (line 464) | def module_policy(self): method get_held_layers (line 508) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 531) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class GPT2ForQuestionAnsweringPolicy (line 548) | class GPT2ForQuestionAnsweringPolicy(GPT2Policy): method module_policy (line 549) | def module_policy(self): method get_held_layers (line 563) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 578) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class GPT2ForTokenClassificationPolicy (line 584) | class GPT2ForTokenClassificationPolicy(GPT2Policy): method module_policy (line 585) | def module_policy(self): method get_held_layers (line 611) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 629) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class GPT2ForSequenceClassificationPolicy (line 635) | class GPT2ForSequenceClassificationPolicy(GPT2Policy): method module_policy (line 636) | def module_policy(self): method get_held_layers (line 649) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 665) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/gptj.py class GPTJPolicy (line 27) | class GPTJPolicy(Policy): method config_sanity_check (line 28) | def config_sanity_check(self): method preprocess (line 31) | def preprocess(self): method module_policy (line 36) | def module_policy(self): method postprocess (line 260) | def postprocess(self): method get_held_layers (line 263) | def get_held_layers(self) -> List[nn.Module]: method set_pipeline_forward (line 296) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... class GPTJModelPolicy (line 321) | class GPTJModelPolicy(GPTJPolicy): method __init__ (line 322) | def __init__(self) -> None: method module_policy (line 325) | def module_policy(self): method get_held_layers (line 338) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 341) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class GPTJForCausalLMPolicy (line 347) | class GPTJForCausalLMPolicy(GPTJPolicy): method __init__ (line 348) | def __init__(self) -> None: method module_policy (line 351) | def module_policy(self): method get_held_layers (line 394) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 407) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class GPTJForSequenceClassificationPolicy (line 424) | class GPTJForSequenceClassificationPolicy(GPTJPolicy): method __init__ (line 425) | def __init__(self) -> None: method module_policy (line 428) | def module_policy(self): method get_held_layers (line 441) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 454) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class GPTJForQuestionAnsweringPolicy (line 460) | class GPTJForQuestionAnsweringPolicy(GPTJPolicy): method __init__ (line 461) | def __init__(self) -> None: method module_policy (line 464) | def module_policy(self): method get_held_layers (line 477) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 490) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/llama.py class LlamaPolicy (line 26) | class LlamaPolicy(Policy): method config_sanity_check (line 27) | def config_sanity_check(self): method preprocess (line 30) | def preprocess(self): method module_policy (line 35) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... method postprocess (line 300) | def postprocess(self): method set_pipeline_forward (line 303) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... method get_held_layers (line 333) | def get_held_layers(self) -> List[Module]: class LlamaModelPolicy (line 369) | class LlamaModelPolicy(LlamaPolicy): method module_policy (line 370) | def module_policy(self): method get_held_layers (line 381) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 386) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class LlamaForCausalLMPolicy (line 391) | class LlamaForCausalLMPolicy(LlamaPolicy): method module_policy (line 392) | def module_policy(self): method get_held_layers (line 442) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 452) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class LlamaForSequenceClassificationPolicy (line 471) | class LlamaForSequenceClassificationPolicy(LlamaPolicy): method module_policy (line 472) | def module_policy(self): method get_held_layers (line 526) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 536) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/mistral.py class MistralPolicy (line 31) | class MistralPolicy(Policy): method config_sanity_check (line 32) | def config_sanity_check(self): method preprocess (line 35) | def preprocess(self): method module_policy (line 40) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... method postprocess (line 261) | def postprocess(self): method set_pipeline_forward (line 264) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... method get_held_layers (line 294) | def get_held_layers(self) -> List[Module]: class MistralModelPolicy (line 329) | class MistralModelPolicy(MistralPolicy): method __init__ (line 330) | def __init__(self) -> None: method module_policy (line 333) | def module_policy(self): method get_held_layers (line 344) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 349) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class MistralForCausalLMPolicy (line 354) | class MistralForCausalLMPolicy(MistralPolicy): method module_policy (line 355) | def module_policy(self): method get_held_layers (line 406) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 420) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class MistralForSequenceClassificationPolicy (line 437) | class MistralForSequenceClassificationPolicy(MistralPolicy): method module_policy (line 438) | def module_policy(self): method get_held_layers (line 468) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 482) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/mixtral.py class MixtralPolicy (line 33) | class MixtralPolicy(Policy): method config_sanity_check (line 34) | def config_sanity_check(self): method preprocess (line 37) | def preprocess(self): method module_policy (line 42) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... method postprocess (line 284) | def postprocess(self): method set_pipeline_forward (line 287) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... method get_held_layers (line 310) | def get_held_layers(self) -> List[Module]: class MixtralModelPolicy (line 348) | class MixtralModelPolicy(MixtralPolicy): method __init__ (line 349) | def __init__(self) -> None: method module_policy (line 352) | def module_policy(self): method get_held_layers (line 363) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 368) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class MixtralForCausalLMPolicy (line 373) | class MixtralForCausalLMPolicy(MixtralPolicy): method module_policy (line 374) | def module_policy(self): method get_held_layers (line 423) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 433) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class MixtralForSequenceClassificationPolicy (line 450) | class MixtralForSequenceClassificationPolicy(MixtralPolicy): method module_policy (line 451) | def module_policy(self): method get_held_layers (line 481) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 489) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/opt.py class OPTPolicy (line 40) | class OPTPolicy(Policy): method __init__ (line 41) | def __init__(self) -> None: method config_sanity_check (line 44) | def config_sanity_check(self): method preprocess (line 47) | def preprocess(self): method module_policy (line 52) | def module_policy(self): method postprocess (line 277) | def postprocess(self): method get_held_layers (line 280) | def get_held_layers(self) -> List[nn.Module]: method set_pipeline_forward (line 318) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... class OPTModelPolicy (line 343) | class OPTModelPolicy(OPTPolicy): method module_policy (line 344) | def module_policy(self): method get_held_layers (line 356) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 359) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class OPTForCausalLMPolicy (line 364) | class OPTForCausalLMPolicy(OPTPolicy): method module_policy (line 365) | def module_policy(self): method get_held_layers (line 407) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 420) | def get_shared_params(self) -> List[Dict[int, Tensor]]: method postprocess (line 433) | def postprocess(self): class OPTForSequenceClassificationPolicy (line 447) | class OPTForSequenceClassificationPolicy(OPTPolicy): method module_policy (line 448) | def module_policy(self): method get_held_layers (line 461) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 467) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class OPTForQuestionAnsweringPolicy (line 472) | class OPTForQuestionAnsweringPolicy(OPTPolicy): method module_policy (line 473) | def module_policy(self): method get_held_layers (line 486) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 499) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/qwen2.py class Qwen2Policy (line 37) | class Qwen2Policy(Policy): method __init__ (line 38) | def __init__(self) -> None: method config_sanity_check (line 47) | def config_sanity_check(self): method preprocess (line 50) | def preprocess(self): method module_policy (line 55) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... method postprocess (line 307) | def postprocess(self): method set_pipeline_forward (line 310) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... method get_held_layers (line 343) | def get_held_layers(self) -> List[Module]: class Qwen2ModelPolicy (line 381) | class Qwen2ModelPolicy(Qwen2Policy): method module_policy (line 382) | def module_policy(self): method get_held_layers (line 392) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 397) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class Qwen2ForCausalLMPolicy (line 402) | class Qwen2ForCausalLMPolicy(Qwen2Policy): method module_policy (line 403) | def module_policy(self): method get_held_layers (line 460) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 474) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class Qwen2ForSequenceClassificationPolicy (line 491) | class Qwen2ForSequenceClassificationPolicy(Qwen2Policy): method module_policy (line 492) | def module_policy(self): method get_held_layers (line 540) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 554) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/qwen3.py class Qwen3Policy (line 37) | class Qwen3Policy(Policy): method __init__ (line 38) | def __init__(self) -> None: method config_sanity_check (line 47) | def config_sanity_check(self): method preprocess (line 50) | def preprocess(self): method module_policy (line 55) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... method postprocess (line 307) | def postprocess(self): method set_pipeline_forward (line 310) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... method get_held_layers (line 343) | def get_held_layers(self) -> List[Module]: class Qwen3ModelPolicy (line 381) | class Qwen3ModelPolicy(Qwen3Policy): method module_policy (line 382) | def module_policy(self): method get_held_layers (line 391) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 396) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class Qwen3ForCausalLMPolicy (line 401) | class Qwen3ForCausalLMPolicy(Qwen3Policy): method module_policy (line 402) | def module_policy(self): method get_held_layers (line 446) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 460) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class Qwen3ForSequenceClassificationPolicy (line 477) | class Qwen3ForSequenceClassificationPolicy(Qwen3Policy): method module_policy (line 478) | def module_policy(self): method get_held_layers (line 525) | def get_held_layers(self) -> List[Module]: method get_shared_params (line 539) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/sam.py class SamPolicy (line 9) | class SamPolicy(Policy): method config_sanity_check (line 10) | def config_sanity_check(self): method preprocess (line 13) | def preprocess(self): method module_policy (line 16) | def module_policy(self): method postprocess (line 504) | def postprocess(self): class SamModelPolicy (line 509) | class SamModelPolicy(SamPolicy): method __init__ (line 510) | def __init__(self) -> None: FILE: colossalai/shardformer/policies/t5.py class T5BasePolicy (line 44) | class T5BasePolicy(Policy): method config_sanity_check (line 45) | def config_sanity_check(self): method preprocess (line 48) | def preprocess(self): method module_policy (line 52) | def module_policy(self): method postprocess (line 445) | def postprocess(self): method distribute_t5_layers (line 448) | def distribute_t5_layers( method get_t5_stage_index (line 484) | def get_t5_stage_index( method get_held_layers (line 501) | def get_held_layers(self) -> List[nn.Module]: method set_pipeline_forward (line 575) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... class T5ModelPolicy (line 608) | class T5ModelPolicy(T5BasePolicy): method module_policy (line 609) | def module_policy(self): method get_held_layers (line 643) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 646) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class T5ForConditionalGenerationPolicy (line 659) | class T5ForConditionalGenerationPolicy(T5BasePolicy): method module_policy (line 660) | def module_policy(self): method get_held_layers (line 723) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 736) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class T5EncoderPolicy (line 762) | class T5EncoderPolicy(T5BasePolicy): method module_policy (line 763) | def module_policy(self): method get_held_layers (line 799) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 802) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class T5ForTokenClassificationPolicy (line 806) | class T5ForTokenClassificationPolicy(T5EncoderPolicy): method module_policy (line 807) | def module_policy(self): method get_held_layers (line 833) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 851) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/policies/vit.py class ViTPolicy (line 23) | class ViTPolicy(Policy): method config_sanity_check (line 24) | def config_sanity_check(self): method preprocess (line 27) | def preprocess(self): method module_policy (line 31) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... method new_model_class (line 248) | def new_model_class(self): method postprocess (line 251) | def postprocess(self): method get_held_layers (line 254) | def get_held_layers(self) -> List[nn.Module]: method set_pipeline_forward (line 281) | def set_pipeline_forward(self, model_cls: nn.Module, pipeline_forward:... class ViTModelPolicy (line 298) | class ViTModelPolicy(ViTPolicy): method module_policy (line 299) | def module_policy(self): method get_held_layers (line 308) | def get_held_layers(self) -> List[nn.Module]: class ViTForImageClassificationPolicy (line 329) | class ViTForImageClassificationPolicy(ViTPolicy): method module_policy (line 330) | def module_policy(self): method get_held_layers (line 381) | def get_held_layers(self) -> List[nn.Module]: class ViTForMaskedImageModelingPolicy (line 402) | class ViTForMaskedImageModelingPolicy(ViTPolicy): method module_policy (line 403) | def module_policy(self): method get_held_layers (line 417) | def get_held_layers(self) -> List[nn.Module]: FILE: colossalai/shardformer/policies/whisper.py class WhisperPolicy (line 29) | class WhisperPolicy(Policy): method __init__ (line 30) | def __init__(self) -> None: method config_sanity_check (line 33) | def config_sanity_check(self): method preprocess (line 36) | def preprocess(self): method module_policy (line 44) | def module_policy(self): method add_lm_head_policy (line 505) | def add_lm_head_policy(self, base_policy): method postprocess (line 536) | def postprocess(self): method distribute_whisper_layers (line 539) | def distribute_whisper_layers( method get_whisper_stage_index (line 575) | def get_whisper_stage_index( method get_held_layers (line 593) | def get_held_layers(self) -> List[nn.Module]: method set_pipeline_forward (line 681) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call... class WhisperModelPolicy (line 726) | class WhisperModelPolicy(WhisperPolicy): method module_policy (line 727) | def module_policy(self): method get_held_layers (line 741) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 744) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class WhisperForConditionalGenerationPolicy (line 750) | class WhisperForConditionalGenerationPolicy(WhisperPolicy): method module_policy (line 751) | def module_policy(self): method postprocess (line 765) | def postprocess(self): method get_held_layers (line 768) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 781) | def get_shared_params(self) -> List[Dict[int, Tensor]]: class WhisperForAudioClassificationPolicy (line 815) | class WhisperForAudioClassificationPolicy(WhisperPolicy): method module_policy (line 816) | def module_policy(self): method get_held_layers (line 829) | def get_held_layers(self) -> List[nn.Module]: method get_shared_params (line 844) | def get_shared_params(self) -> List[Dict[int, Tensor]]: FILE: colossalai/shardformer/shard/grad_ckpt_config.py class GradientCheckpointConfig (line 6) | class GradientCheckpointConfig: method get_num_ckpt_layers (line 9) | def get_num_ckpt_layers(self, num_layers: int) -> int: class PipelineGradientCheckpointConfig (line 14) | class PipelineGradientCheckpointConfig(GradientCheckpointConfig): method __post_init__ (line 52) | def __post_init__(self): method _enable_gradient_checkpointing_ratio (line 60) | def _enable_gradient_checkpointing_ratio(self) -> bool: method _enable_customized_ckpt_layers_per_stage (line 64) | def _enable_customized_ckpt_layers_per_stage(self) -> bool: method get_num_ckpt_layers (line 67) | def get_num_ckpt_layers( FILE: colossalai/shardformer/shard/shard_config.py class ShardConfig (line 17) | class ShardConfig: method tensor_parallel_size (line 64) | def tensor_parallel_size(self): method sequence_parallel_size (line 68) | def sequence_parallel_size(self): method expert_parallel_size (line 72) | def expert_parallel_size(self): method __post_init__ (line 75) | def __post_init__(self): method _turn_on_all_optimization (line 112) | def _turn_on_all_optimization(self): FILE: colossalai/shardformer/shard/sharder.py class ModelSharder (line 18) | class ModelSharder(object): method __init__ (line 28) | def __init__(self, model: nn.Module, policy: Policy, shard_config: Sha... method shard (line 33) | def shard(self) -> List[Dict[int, Tensor]]: method _preprocess (line 48) | def _preprocess(self) -> None: method _postprocess (line 51) | def _postprocess(self) -> None: method _replace_module (line 54) | def _replace_module(self, include: Optional[Set[nn.Module]] = None) ->... method _recursive_replace_layer (line 77) | def _recursive_replace_layer( method _replace_attr (line 125) | def _replace_attr( method _replace_param (line 140) | def _replace_param( method _replace_method (line 155) | def _replace_method(self, module: nn.Module, method_replacement: Dict[... method _replace_sub_module (line 161) | def _replace_sub_module( method _get_recursive_held_layers (line 209) | def _get_recursive_held_layers(self, held_layers: Optional[List[nn.Mod... method _release_unheld_layers (line 222) | def _release_unheld_layers(self) -> Optional[Set[nn.Module]]: method _materialize (line 232) | def _materialize(self) -> None: FILE: colossalai/shardformer/shard/shardformer.py class ShardFormer (line 14) | class ShardFormer: method __init__ (line 35) | def __init__(self, shard_config: ShardConfig): method optimize (line 43) | def optimize(self, model: nn.Module, policy: Policy = None) -> Tuple[n... FILE: colossalai/shardformer/shard/utils.py function set_tensors_to_none (line 6) | def set_tensors_to_none(model: nn.Module, exclude: Set[nn.Module] = set(... FILE: colossalai/tensor/colo_parameter.py function is_no_hook_op (line 14) | def is_no_hook_op(func) -> bool: function filter_colo_parameters (line 18) | def filter_colo_parameters(*args, **kwargs): function replace_args (line 39) | def replace_args(args, kwargs, new_args): class ColoParameter (line 46) | class ColoParameter(ColoTensor, torch.nn.Parameter): method __new__ (line 49) | def __new__(cls, data: Optional[torch.Tensor] = None, requires_grad: b... method __torch_function__ (line 55) | def __torch_function__(cls, func, types, args=..., kwargs=None): method __deepcopy__ (line 72) | def __deepcopy__(self, memo): method __reduce_ex__ (line 82) | def __reduce_ex__(self, proto): FILE: colossalai/tensor/colo_tensor.py function _get_my_nowrap_functions (line 15) | def _get_my_nowrap_functions() -> Set[Callable]: function _convert (line 25) | def _convert(output): function _convert_output (line 33) | def _convert_output(output, func): class ColoTensor (line 39) | class ColoTensor(torch.Tensor): method __new__ (line 51) | def __new__(cls, data: torch.Tensor) -> "ColoTensor": method __torch_function__ (line 66) | def __torch_function__(cls, func, types, args=(), kwargs=None): method __deepcopy__ (line 94) | def __deepcopy__(self, memo): FILE: colossalai/tensor/comm_spec.py function _all_gather (line 15) | def _all_gather(tensor, comm_spec): function _split (line 33) | def _split(tensor, comm_spec): function _all_to_all (line 47) | def _all_to_all(tensor, comm_spec): function _all_reduce (line 68) | def _all_reduce(tensor, comm_spec, async_op=False): function _mix_gather (line 81) | def _mix_gather(tensor, comm_spec): function _mix_split (line 166) | def _mix_split(tensor, comm_spec): class _ReduceGrad (line 208) | class _ReduceGrad(torch.autograd.Function): method symbolic (line 219) | def symbolic(graph, input_): method forward (line 223) | def forward(ctx, input_, comm_spec): method backward (line 228) | def backward(ctx, grad_output): class _ReduceInput (line 232) | class _ReduceInput(torch.autograd.Function): method symbolic (line 243) | def symbolic(graph, input_): method forward (line 247) | def forward(ctx, input_, comm_spec): method backward (line 251) | def backward(ctx, grad_output): class _SplitForwardGatherBackward (line 255) | class _SplitForwardGatherBackward(torch.autograd.Function): method symbolic (line 266) | def symbolic(graph, input_): method forward (line 270) | def forward(ctx, input_, comm_spec): method backward (line 275) | def backward(ctx, grad_output): class _GatherForwardSplitBackward (line 279) | class _GatherForwardSplitBackward(torch.autograd.Function): method symbolic (line 290) | def symbolic(graph, input_): method forward (line 294) | def forward(ctx, input_, comm_spec): method backward (line 299) | def backward(ctx, grad_output): class _AllToAll (line 303) | class _AllToAll(torch.autograd.Function): method symbolic (line 314) | def symbolic(graph, input_): method forward (line 318) | def forward(ctx, input_, comm_spec): method backward (line 331) | def backward(ctx, grad_outputs): class _MixGatherForwardMixSplitBackward (line 335) | class _MixGatherForwardMixSplitBackward(torch.autograd.Function): method symbolic (line 337) | def symbolic(graph, input_): method forward (line 341) | def forward(ctx, input_, comm_spec): method backward (line 346) | def backward(ctx, grad_output): function reduce_grad (line 350) | def reduce_grad(input_, comm_spec): function reduce_input (line 354) | def reduce_input(input_, comm_spec): function split_forward_gather_backward (line 358) | def split_forward_gather_backward(input_, comm_spec): function gather_forward_split_backward (line 362) | def gather_forward_split_backward(input_, comm_spec): function all_to_all (line 366) | def all_to_all(input_, comm_spec): function mixgather_forward_split_backward (line 370) | def mixgather_forward_split_backward(input_, comm_spec): class CollectiveCommPattern (line 374) | class CollectiveCommPattern(Enum): class CommSpec (line 383) | class CommSpec: method __init__ (line 400) | def __init__( method __repr__ (line 428) | def __repr__(self): method get_comm_cost (line 458) | def get_comm_cost(self): method covert_spec_to_action (line 506) | def covert_spec_to_action(self, tensor): FILE: colossalai/tensor/d_tensor/api.py function get_shard_dim_1d (line 22) | def get_shard_dim_1d(p: torch.Tensor): function clear_layout_converter (line 36) | def clear_layout_converter(): function is_distributed_tensor (line 41) | def is_distributed_tensor(tensor: torch.Tensor) -> bool: function is_sharded (line 54) | def is_sharded(dtensor: torch.Tensor) -> bool: function _hijack_detach_and_clone (line 68) | def _hijack_detach_and_clone(dtensor: torch.Tensor) -> torch.Tensor: function _construct_default_sharding_spec (line 97) | def _construct_default_sharding_spec( function _apply_layout (line 112) | def _apply_layout(tensor, layout): function distribute_tensor (line 125) | def distribute_tensor(tensor: torch.Tensor, device_mesh: DeviceMesh, sha... function init_as_dtensor (line 149) | def init_as_dtensor( function redistribute (line 164) | def redistribute(dtensor: torch.Tensor, device_mesh: DeviceMesh, shardin... function to_global (line 183) | def to_global(dtensor: torch.Tensor) -> torch.Tensor: function shard_rowwise (line 206) | def shard_rowwise( function shard_colwise (line 238) | def shard_colwise(tensor: torch.Tensor, group_or_device_mesh: Union[Proc... function sharded_tensor_to_param (line 266) | def sharded_tensor_to_param(dtensor: torch.Tensor, requires_grad: bool =... function sharded_tensor_to_existing_param (line 277) | def sharded_tensor_to_existing_param(dtensor: torch.Tensor, param: torch... function compute_global_numel (line 285) | def compute_global_numel(dtensor: torch.Tensor) -> int: function get_layout (line 300) | def get_layout(dtensor: torch.Tensor) -> Layout: function get_global_shape (line 315) | def get_global_shape(dtensor: torch.Tensor) -> torch.Size: function get_device_mesh (line 329) | def get_device_mesh(dtensor: torch.Tensor) -> DeviceMesh: function get_sharding_spec (line 343) | def get_sharding_spec(dtensor: torch.Tensor) -> ShardingSpec: function is_customized_distributed_tensor (line 362) | def is_customized_distributed_tensor(tensor: torch.Tensor): function _hijack_detach_and_clone_for_customized_distributed_tensor (line 375) | def _hijack_detach_and_clone_for_customized_distributed_tensor(dtensor: ... function distribute_tensor_with_customization (line 406) | def distribute_tensor_with_customization(tensor: torch.Tensor, shard_fn,... function init_tensor_as_customization_distributed (line 455) | def init_tensor_as_customization_distributed(tensor: torch.Tensor, shard... function to_global_for_customized_distributed_tensor (line 502) | def to_global_for_customized_distributed_tensor(dtensor: torch.Tensor) -... function customized_distributed_tensor_to_param (line 516) | def customized_distributed_tensor_to_param(dtensor: torch.Tensor, requir... function customized_distributed_tensor_to_existing_param (line 531) | def customized_distributed_tensor_to_existing_param(dtensor: torch.Tenso... FILE: colossalai/tensor/d_tensor/comm_spec.py class CollectiveCommPattern (line 14) | class CollectiveCommPattern(Enum): class CommSpec (line 23) | class CommSpec: method __init__ (line 38) | def __init__( method __repr__ (line 52) | def __repr__(self): method covert_spec_to_action (line 78) | def covert_spec_to_action(self, tensor): function _all_gather (line 93) | def _all_gather(tensor: torch.Tensor, comm_spec: CommSpec): function _split (line 107) | def _split(tensor: torch.Tensor, comm_spec: CommSpec): function _all_to_all (line 119) | def _all_to_all(tensor: torch.Tensor, comm_spec: CommSpec): function _all_reduce (line 138) | def _all_reduce(tensor: torch.Tensor, comm_spec: CommSpec, async_op: boo... class _ReduceGrad (line 149) | class _ReduceGrad(torch.autograd.Function): method symbolic (line 160) | def symbolic(graph, input_): method forward (line 164) | def forward(ctx, input_, comm_spec): method backward (line 169) | def backward(ctx, grad_output): class _ReduceInput (line 173) | class _ReduceInput(torch.autograd.Function): method symbolic (line 184) | def symbolic(graph, input_): method forward (line 188) | def forward(ctx, input_, comm_spec): method backward (line 192) | def backward(ctx, grad_output): class _SplitForwardGatherBackward (line 196) | class _SplitForwardGatherBackward(torch.autograd.Function): method symbolic (line 207) | def symbolic(graph, input_): method forward (line 211) | def forward(ctx, input_, comm_spec): method backward (line 216) | def backward(ctx, grad_output): class _GatherForwardSplitBackward (line 220) | class _GatherForwardSplitBackward(torch.autograd.Function): method symbolic (line 231) | def symbolic(graph, input_): method forward (line 235) | def forward(ctx, input_, comm_spec): method backward (line 240) | def backward(ctx, grad_output): class _AllToAll (line 244) | class _AllToAll(torch.autograd.Function): method symbolic (line 255) | def symbolic(graph, input_): method forward (line 259) | def forward(ctx, input_, comm_spec): method backward (line 272) | def backward(ctx, grad_outputs): function reduce_grad (line 276) | def reduce_grad(input_, comm_spec): function reduce_input (line 280) | def reduce_input(input_, comm_spec): function split_forward_gather_backward (line 284) | def split_forward_gather_backward(input_, comm_spec): function gather_forward_split_backward (line 288) | def gather_forward_split_backward(input_, comm_spec): function all_to_all (line 292) | def all_to_all(input_, comm_spec): FILE: colossalai/tensor/d_tensor/layout.py class Layout (line 12) | class Layout: method __init__ (line 21) | def __init__(self, device_mesh: DeviceMesh, sharding_spec: ShardingSpe... method __hash__ (line 27) | def __hash__(self) -> int: method get_sharded_shape_per_device (line 30) | def get_sharded_shape_per_device(self): method _sanity_check (line 41) | def _sanity_check(self): FILE: colossalai/tensor/d_tensor/layout_converter.py class LayoutConverterOptions (line 23) | class LayoutConverterOptions: function set_layout_converting_options (line 31) | def set_layout_converting_options(options: LayoutConverterOptions): class LayoutConverter (line 39) | class LayoutConverter(metaclass=SingletonMeta): method __init__ (line 44) | def __init__(self): method options (line 50) | def options(self): method options (line 54) | def options(self, options_: LayoutConverterOptions): method forward_only (line 59) | def forward_only(self): method forward_only (line 63) | def forward_only(self, value): method all_gather_transform_layouts (line 67) | def all_gather_transform_layouts(self, source_layout: Layout) -> Dict[... method all_to_all_transform_layout (line 149) | def all_to_all_transform_layout(self, source_layout: Layout) -> Dict[L... method shard_transform_layout (line 267) | def shard_transform_layout(self, source_layout: Layout) -> Dict[Layout... method get_all_one_step_transform_spec (line 361) | def get_all_one_step_transform_spec(self, source_layout: Layout) -> Di... method layout_converting (line 382) | def layout_converting( method get_total_comm_cost (line 537) | def get_total_comm_cost(self, source_layout: Layout, target_layout: La... method apply (line 549) | def apply(self, tensor: torch.Tensor, source_layout: Layout, target_la... FILE: colossalai/tensor/d_tensor/misc.py class LayoutException (line 1) | class LayoutException(Exception): class DuplicatedShardingDimensionError (line 5) | class DuplicatedShardingDimensionError(LayoutException): class ShardingNotDivisibleError (line 9) | class ShardingNotDivisibleError(LayoutException): class ShardingOutOfIndexError (line 13) | class ShardingOutOfIndexError(LayoutException): FILE: colossalai/tensor/d_tensor/sharding_spec.py class DimSpec (line 14) | class DimSpec: method __init__ (line 27) | def __init__(self, shard_list): method __eq__ (line 31) | def __eq__(self, other): method __repr__ (line 34) | def __repr__(self): method difference_dict (line 43) | def difference_dict(self): method dim_diff (line 56) | def dim_diff(self, other): method _build_difference_2d_dict (line 78) | def _build_difference_2d_dict(cls): method _convert_str_to_shard_list (line 138) | def _convert_str_to_shard_list(str_spec): class ShardingSpec (line 156) | class ShardingSpec: method __init__ (line 168) | def __init__( method _sanity_check (line 191) | def _sanity_check(self): method __repr__ (line 202) | def __repr__(self): method convert_dict_to_shard_sequence (line 207) | def convert_dict_to_shard_sequence(self): method convert_shard_sequence_to_dict (line 216) | def convert_shard_sequence_to_dict(self): method spec_diff (line 228) | def spec_diff(self, other): FILE: colossalai/tensor/d_tensor/utils.py function get_comm_cost (line 9) | def get_comm_cost(layout: Layout, comm_spec: CommSpec, forward_only: boo... FILE: colossalai/tensor/moe_tensor/api.py function is_moe_tensor (line 10) | def is_moe_tensor(tensor: torch.Tensor) -> bool: function set_moe_tensor_ep_group (line 23) | def set_moe_tensor_ep_group(tensor: torch.Tensor, ep_group: ProcessGroup... function get_moe_info (line 35) | def get_moe_info(ep_size: int, dp_size: int, pp_size: int, ep_inside: bo... function get_ep_group (line 51) | def get_ep_group(tensor: torch.Tensor) -> ProcessGroup: function get_ep_size (line 64) | def get_ep_size(tensor: torch.Tensor) -> int: function get_dp_size (line 78) | def get_dp_size(tensor: torch.Tensor) -> int: function get_dp_group (line 91) | def get_dp_group(tensor: torch.Tensor) -> ProcessGroup: function get_ep_rank (line 104) | def get_ep_rank(tensor: torch.Tensor) -> int: function get_dp_rank (line 117) | def get_dp_rank(tensor: torch.Tensor) -> int: function get_ep_group_ranks (line 130) | def get_ep_group_ranks(tensor: torch.Tensor) -> List[int]: function get_dp_group_ranks (line 143) | def get_dp_group_ranks(tensor: torch.Tensor) -> List[int]: FILE: colossalai/tensor/moe_tensor/moe_info.py class MoeParallelInfo (line 4) | class MoeParallelInfo: method __init__ (line 7) | def __init__(self, ep_inside: bool, ep_size: int, dp_size: int, pp_siz... FILE: colossalai/tensor/padded_tensor/api.py function _hijack_detach_and_clone (line 4) | def _hijack_detach_and_clone(ptensor: torch.Tensor) -> torch.Tensor: function _hijack_back_detach_and_clone (line 37) | def _hijack_back_detach_and_clone(ptensor: torch.Tensor) -> torch.Tensor: function is_padded_tensor (line 56) | def is_padded_tensor(tensor: torch.Tensor) -> bool: function to_padded_tensor (line 69) | def to_padded_tensor( function to_unpadded_tensor (line 101) | def to_unpadded_tensor(ptensor: torch.Tensor): function init_as_padded_tensor (line 118) | def init_as_padded_tensor(tensor: torch.Tensor, current_length: int, ori... FILE: colossalai/tensor/param_op_hook.py class ColoParamOpHook (line 9) | class ColoParamOpHook(ABC): method pre_forward (line 18) | def pre_forward(self, params: List[torch.Tensor]) -> None: method post_forward (line 22) | def post_forward(self, params: List[torch.Tensor]) -> None: method pre_backward (line 26) | def pre_backward(self, params: List[torch.Tensor]) -> None: method post_backward (line 30) | def post_backward(self, params: List[torch.Tensor]) -> None: method rewrite_op (line 33) | def rewrite_op(self, func) -> Any: class ColoParamOpHookManager (line 37) | class ColoParamOpHookManager: method use_hooks (line 47) | def use_hooks(*hooks: ColoParamOpHook): method _trigger_pre_forward (line 65) | def _trigger_pre_forward(params: List[torch.Tensor]) -> None: method _trigger_post_forward (line 70) | def _trigger_post_forward(params: List[torch.Tensor]) -> None: method _trigger_pre_backward (line 75) | def _trigger_pre_backward(params: List[torch.Tensor]) -> None: method _trigger_post_backward (line 80) | def _trigger_post_backward(params: List[torch.Tensor]) -> None: method pre_op (line 85) | def pre_op(params: List[torch.Tensor], *args: Any) -> list: method post_op (line 96) | def post_op(params: List[torch.Tensor], arg: Any) -> Any: method has_hook (line 104) | def has_hook() -> bool: method rewrite_op (line 108) | def rewrite_op(func) -> Any: class PreFwdPostBwd (line 114) | class PreFwdPostBwd(torch.autograd.Function): method forward (line 116) | def forward(ctx, params, *args): method backward (line 121) | def backward(ctx, *grads): class PostFwdPreBwd (line 126) | class PostFwdPreBwd(torch.autograd.Function): method forward (line 128) | def forward(ctx, params, *args): method backward (line 133) | def backward(ctx, *grads): function _is_grad_tensor (line 138) | def _is_grad_tensor(obj) -> bool: function _flatten_grad_args (line 145) | def _flatten_grad_args(args) -> Tuple[list, list, List[bool], TreeSpec]: function _merge_args (line 160) | def _merge_args(grad_args, other_args, grad_flags, spec): FILE: colossalai/tensor/shape_consistency.py class ShapeConsistencyOptions (line 20) | class ShapeConsistencyOptions: function to_global (line 28) | def to_global(distributed_tensor: torch.Tensor, sharding_spec: ShardingS... function set_shape_consistency_options (line 38) | def set_shape_consistency_options(options: ShapeConsistencyOptions): class ShapeConsistencyManager (line 46) | class ShapeConsistencyManager(metaclass=SingletonMeta): method __init__ (line 47) | def __init__(self): method options (line 55) | def options(self): method options (line 59) | def options(self, options_: ShapeConsistencyOptions): method forward_only (line 64) | def forward_only(self): method forward_only (line 68) | def forward_only(self, value): method get_all_all_gather_spec (line 72) | def get_all_all_gather_spec( method get_all_all_to_all_spec (line 146) | def get_all_all_to_all_spec( method get_all_shard_spec (line 257) | def get_all_shard_spec(self, source_spec: ShardingSpec, orig_cost_dict): method get_all_mix_gather_spec (line 340) | def get_all_mix_gather_spec( method get_all_one_step_transform_spec (line 396) | def get_all_one_step_transform_spec(self, source_spec: ShardingSpec, o... method mem_cost (line 418) | def mem_cost(self, comm_action_sequence: List[CommSpec]) -> TrainCycle... method shape_consistency (line 581) | def shape_consistency( method apply (line 689) | def apply(self, tensor_with_sharding_spec: torch.Tensor, target_spec: ... method apply_for_autoparallel_runtime (line 754) | def apply_for_autoparallel_runtime(self, tensor, source_spec, target_s... FILE: colossalai/tensor/sharding_spec.py class _DimSpec (line 18) | class _DimSpec: method __init__ (line 31) | def __init__(self, shard_list): method __eq__ (line 35) | def __eq__(self, other): method __repr__ (line 38) | def __repr__(self): method difference_dict (line 47) | def difference_dict(self): method difference (line 60) | def difference(self, other): method _build_difference_2d_dict (line 82) | def _build_difference_2d_dict(cls): method _convert_str_to_shard_list (line 142) | def _convert_str_to_shard_list(str_spec): class ShardingSpecException (line 160) | class ShardingSpecException(Exception): class ShardingOutOfIndexError (line 164) | class ShardingOutOfIndexError(ShardingSpecException): class DuplicatedShardingDimensionError (line 168) | class DuplicatedShardingDimensionError(ShardingSpecException): class ShardingNotDivisibleError (line 172) | class ShardingNotDivisibleError(ShardingSpecException): class ShardingSpec (line 176) | class ShardingSpec: method __init__ (line 190) | def __init__( method __repr__ (line 215) | def __repr__(self): method _sanity_check (line 221) | def _sanity_check(self): method convert_dict_to_shard_sequence (line 253) | def convert_dict_to_shard_sequence(self): method convert_shard_sequence_to_dict (line 262) | def convert_shard_sequence_to_dict(self): method sharding_sequence_difference (line 274) | def sharding_sequence_difference(self, other): method get_sharded_shape_per_device (line 309) | def get_sharded_shape_per_device(self): FILE: colossalai/tensor/utils.py function all_gather_simulator (line 9) | def all_gather_simulator(target_pair): function all_to_all_simulator (line 29) | def all_to_all_simulator(f_target_pair, b_target_pair): function shard_simulator (line 63) | def shard_simulator(target_pair, legal_sharding_dims): function mix_gather_simulator (line 93) | def mix_gather_simulator(f_target_pair, b_target_pair): function named_params_with_colotensor (line 119) | def named_params_with_colotensor( function _convert_tensor (line 165) | def _convert_tensor(tensor: torch.Tensor) -> ColoTensor: function convert_parameter (line 169) | def convert_parameter(module: torch.nn.Module, param_name: str): function convert_dim_partition_dict (line 196) | def convert_dim_partition_dict(dim_size: int, dim_partition_dict: Dict[i... function merge_same_dim_mesh_list (line 210) | def merge_same_dim_mesh_list(dim_size: int, dim_partition_dict: Dict[int... FILE: colossalai/testing/comparison.py function assert_equal (line 11) | def assert_equal(a: Tensor, b: Tensor): function assert_not_equal (line 15) | def assert_not_equal(a: Tensor, b: Tensor): function assert_close_loose (line 19) | def assert_close_loose(a: Tensor, b: Tensor, rtol: float = 1e-3, atol: f... function assert_equal_in_group (line 28) | def assert_equal_in_group(tensor: Tensor, process_group: ProcessGroup = ... function check_state_dict_equal (line 41) | def check_state_dict_equal( function check_state_dict_equal_pytree (line 84) | def check_state_dict_equal_pytree(d1: OrderedDict, d2: OrderedDict, igno... function assert_hf_output_close (line 99) | def assert_hf_output_close( FILE: colossalai/testing/pytest_wrapper.py function run_on_environment_flag (line 10) | def run_on_environment_flag(name: str): FILE: colossalai/testing/random.py function seed_all (line 7) | def seed_all(seed, cuda_deterministic=False): FILE: colossalai/testing/utils.py function parameterize (line 16) | def parameterize(argument: str, values: List[Any]) -> Callable: function rerun_on_exception (line 71) | def rerun_on_exception(exception_type: Exception = Exception, pattern: s... function rerun_if_address_is_in_use (line 157) | def rerun_if_address_is_in_use(): function skip_if_not_enough_gpus (line 183) | def skip_if_not_enough_gpus(min_gpus: int): function free_port (line 212) | def free_port() -> int: function spawn (line 229) | def spawn(func, nprocs=1, **kwargs): function clear_cache_before_run (line 255) | def clear_cache_before_run(): class DummyDataloader (line 280) | class DummyDataloader: method __init__ (line 281) | def __init__(self, data_gen_fn: Callable, length: int = 10): method __iter__ (line 286) | def __iter__(self): method __next__ (line 290) | def __next__(self): method __len__ (line 297) | def __len__(self): FILE: colossalai/utils/common.py function get_current_device (line 17) | def get_current_device(): function ensure_path_exists (line 24) | def ensure_path_exists(filename: str): function conditional_context (line 32) | def conditional_context(context_manager, enable=True): function is_ddp_ignored (line 40) | def is_ddp_ignored(p): function disposable (line 44) | def disposable(func: Callable) -> Callable: function free_storage (line 57) | def free_storage(data: torch.Tensor) -> None: function _cast_float (line 66) | def _cast_float(args, dtype: torch.dtype): function set_seed (line 76) | def set_seed(seed): function get_non_persistent_buffers_set (line 82) | def get_non_persistent_buffers_set( FILE: colossalai/utils/memory.py function _get_cpu_memory_info (line 14) | def _get_cpu_memory_info(): function colo_device_memory_capacity (line 47) | def colo_device_memory_capacity(device: torch.device) -> int: function colo_get_cpu_memory_capacity (line 66) | def colo_get_cpu_memory_capacity() -> int: FILE: colossalai/utils/model/utils.py function substitute_init_recursively (line 11) | def substitute_init_recursively(cls, func, visited: set): function call_to_str (line 19) | def call_to_str(base, *args, **kwargs): class InsertPostInitMethodToModuleSubClasses (line 41) | class InsertPostInitMethodToModuleSubClasses(object): method __init__ (line 42) | def __init__(self, default_dtype: Optional[torch.dtype] = None): method __enter__ (line 46) | def __enter__(self): method __exit__ (line 82) | def __exit__(self, exc_type, exc_value, traceback): method _post_init_method (line 105) | def _post_init_method(self, module, *args, **kwargs): method _pre_context_exec (line 108) | def _pre_context_exec(self): method _post_context_exec (line 111) | def _post_context_exec(self): FILE: colossalai/utils/multi_tensor_apply/multi_tensor_apply.py class MultiTensorApply (line 4) | class MultiTensorApply(object): method __init__ (line 15) | def __init__(self, chunk_size): method check_avail (line 23) | def check_avail(self): method __call__ (line 32) | def __call__(self, op, noop_flag_buffer, tensor_lists, *args): FILE: colossalai/utils/rank_recorder/rank_recorder.py class Event (line 19) | class Event: method __init__ (line 20) | def __init__(self, start: int, end: int, name: str, rank: int) -> None: class Recorder (line 27) | class Recorder: method __init__ (line 28) | def __init__(self) -> None: method start (line 46) | def start(self, name: str, rank: int): method end (line 52) | def end(self): method get_history (line 63) | def get_history(self): method __call__ (line 66) | def __call__(self, name: str, rank: str): method __enter__ (line 71) | def __enter__(self): method __exit__ (line 76) | def __exit__(self, *args): method dump_record (line 79) | def dump_record(self): method merge_recode (line 96) | def merge_recode(self): method visualize_record (line 129) | def visualize_record(self): method exit_worker (line 157) | def exit_worker(self): FILE: colossalai/utils/safetensors.py function _object_to_tensor (line 24) | def _object_to_tensor(obj, device): function _tensor_to_object (line 35) | def _tensor_to_object(tensor, tensor_size): class TensorInfo (line 42) | class TensorInfo: class PreparedData (line 49) | class PreparedData: function _cast_to_tensor (line 55) | def _cast_to_tensor(obj): function _cast_to_object (line 61) | def _cast_to_object(tensor: torch.Tensor): function _flatten_optim_state_dict (line 65) | def _flatten_optim_state_dict(state_dict: dict, seperator: str = ".") ->... function _unflatten_optim_state_dict (line 93) | def _unflatten_optim_state_dict(flat_dict: dict, metadata: Optional[dict... function prepare (line 122) | def prepare( function save (line 162) | def save(path: str, state_dict: Dict[str, torch.Tensor], metadata: Optio... function save_nested (line 174) | def save_nested(path: str, state_dict: Dict[str, torch.Tensor]) -> None: function move_and_save (line 179) | def move_and_save( function load_flat (line 200) | def load_flat(checkpoint_path, seperator: str = "."): FILE: colossalai/utils/tensor_detector/tensor_detector.py class TensorDetector (line 13) | class TensorDetector: method __init__ (line 14) | def __init__( method get_tensor_mem (line 47) | def get_tensor_mem(self, tensor): method mem_format (line 55) | def mem_format(self, real_memory_size): method collect_tensors_state (line 65) | def collect_tensors_state(self): method print_tensors_state (line 110) | def print_tensors_state(self): method detect (line 170) | def detect(self, include_cpu=False): method close (line 180) | def close(self): FILE: colossalai/utils/timer.py class Timer (line 9) | class Timer: method __init__ (line 12) | def __init__(self): method has_history (line 19) | def has_history(self): method current_time (line 23) | def current_time(self) -> float: method start (line 27) | def start(self): method lap (line 34) | def lap(self): method stop (line 38) | def stop(self, keep_in_history: bool = False): method get_history_mean (line 56) | def get_history_mean(self): method get_history_sum (line 64) | def get_history_sum(self): method get_elapsed_time (line 72) | def get_elapsed_time(self): method reset (line 84) | def reset(self): class MultiTimer (line 91) | class MultiTimer: method __init__ (line 98) | def __init__(self, on: bool = True): method start (line 102) | def start(self, name: str): method stop (line 113) | def stop(self, name: str, keep_in_history: bool): method get_timer (line 125) | def get_timer(self, name): method reset (line 135) | def reset(self, name=None): method is_on (line 149) | def is_on(self): method set_status (line 152) | def set_status(self, mode: bool): method __iter__ (line 155) | def __iter__(self) -> Tuple[str, Timer]: FILE: colossalai/zero/gemini/chunk/chunk.py class TensorState (line 13) | class TensorState(Enum): class TensorInfo (line 35) | class TensorInfo: class ChunkFullError (line 41) | class ChunkFullError(Exception): function is_storage_empty (line 45) | def is_storage_empty(tensor: torch.Tensor) -> bool: function free_storage (line 49) | def free_storage(tensor: torch.Tensor) -> None: function alloc_storage (line 54) | def alloc_storage(tensor: torch.Tensor) -> None: class Chunk (line 59) | class Chunk: method __init__ (line 62) | def __init__( method memory_usage (line 173) | def memory_usage(self) -> Dict[str, int]: method device_type (line 194) | def device_type(self) -> str: method payload (line 203) | def payload(self) -> torch.Tensor: method payload_mem (line 215) | def payload_mem(self) -> int: method can_move (line 225) | def can_move(self) -> bool: method can_release (line 229) | def can_release(self) -> bool: method can_reduce (line 239) | def can_reduce(self): method has_inf_or_nan (line 243) | def has_inf_or_nan(self) -> bool: method set_l2_norm (line 253) | def set_l2_norm(self) -> None: method append_tensor (line 264) | def append_tensor(self, tensor: torch.Tensor): method close_chunk (line 290) | def close_chunk(self): method shard_move (line 321) | def shard_move(self, device: torch.device, force_copy: bool = False, n... method access_chunk (line 365) | def access_chunk(self, async_access: bool = False) -> Optional[dist.Wo... method release_chunk (line 375) | def release_chunk(self): method reduce (line 383) | def reduce(self, async_op: bool = False): method wait_async_reduce (line 421) | def wait_async_reduce(self) -> None: method tensor_trans_state (line 426) | def tensor_trans_state(self, tensor: torch.Tensor, tensor_state: Tenso... method copy_tensor_to_chunk_slice (line 445) | def copy_tensor_to_chunk_slice( method add_tensor_to_chunk_slice (line 463) | def add_tensor_to_chunk_slice(self, tensor: torch.Tensor, data_slice: ... method get_valid_length (line 478) | def get_valid_length(self) -> int: method init_pair (line 485) | def init_pair(self, friend_chunk: "Chunk") -> None: method optim_update (line 494) | def optim_update(self) -> None: method get_tensors (line 516) | def get_tensors(self) -> List[torch.Tensor]: method __gather (line 519) | def __gather(self, async_op: bool = False) -> Optional[dist.Work]: method __scatter (line 544) | def __scatter(self): method __paired_shard_move (line 559) | def __paired_shard_move(self, non_blocking=False): method __update_tensors_ptr (line 574) | def __update_tensors_ptr(self) -> None: method __update_one_tensor_info (line 582) | def __update_one_tensor_info(self, tensor_info: TensorInfo, next_state... method __update_tensors_state (line 587) | def __update_tensors_state(self, next_state: TensorState, prev_state: ... method __hash__ (line 592) | def __hash__(self) -> int: method __eq__ (line 595) | def __eq__(self, __o: object) -> bool: method __repr__ (line 598) | def __repr__(self, detailed: bool = True): method init_grad_chunk (line 640) | def init_grad_chunk(self) -> "Chunk": FILE: colossalai/zero/gemini/chunk/manager.py class ChunkManager (line 14) | class ChunkManager: method __init__ (line 23) | def __init__( method register_tensor (line 50) | def register_tensor( method close_all_groups (line 115) | def close_all_groups(self): method access_chunk (line 120) | def access_chunk(self, chunk: Chunk, async_access: bool = False) -> Op... method release_chunk (line 131) | def release_chunk(self, chunk: Chunk) -> None: method move_chunk (line 140) | def move_chunk(self, chunk: Chunk, device: torch.device, force_copy: b... method trans_tensor_state (line 148) | def trans_tensor_state(self, tensor: torch.Tensor, state: TensorState)... method reduce_chunk (line 153) | def reduce_chunk(self, chunk: Chunk, async_op: bool = False) -> bool: method fake_release_chunk (line 163) | def fake_release_chunk(self, chunk: Chunk) -> None: method copy_tensor_to_chunk_slice (line 171) | def copy_tensor_to_chunk_slice(self, tensor: torch.Tensor, data: torch... method get_chunk (line 182) | def get_chunk(self, tensor: torch.Tensor) -> Chunk: method get_cuda_movable_chunks (line 191) | def get_cuda_movable_chunks(self) -> List[Chunk]: method get_chunks (line 202) | def get_chunks(self, tensors: Iterable[torch.Tensor]) -> Tuple[Chunk, ... method add_extern_static_tensor (line 216) | def add_extern_static_tensor(self, tensor: torch.Tensor) -> None: method __repr__ (line 231) | def __repr__(self) -> str: method __get_chunk_group (line 242) | def __get_chunk_group(self, group_name: str) -> Deque[Chunk]: method __close_one_chunk (line 248) | def __close_one_chunk(self, chunk: Chunk): method __sub_memory_usage (line 253) | def __sub_memory_usage(self, usage: Dict[str, int]): method __add_memory_usage (line 257) | def __add_memory_usage(self, usage: Dict[str, int]): method __add_accessed_chunk (line 261) | def __add_accessed_chunk(self, chunk: Chunk, async_access: bool = Fals... method __sub_accessed_chunk (line 267) | def __sub_accessed_chunk(self, chunk: Chunk): method init_grad_chunk (line 272) | def init_grad_chunk(self, chunk: Chunk) -> Chunk: method rearrange_accumulated_grad_chunk (line 282) | def rearrange_accumulated_grad_chunk(self, chunk: Chunk) -> Chunk: FILE: colossalai/zero/gemini/chunk/search_utils.py function _filter_exlarge_params (line 14) | def _filter_exlarge_params(model: nn.Module, size_dict: Dict[int, List[i... function _get_unused_byte (line 41) | def _get_unused_byte(size_list: List[int], chunk_size: int) -> int: function _tensor_numel (line 63) | def _tensor_numel(local_param: ColoParameter) -> int: function classify_params_by_dp_degree (line 79) | def classify_params_by_dp_degree( function search_chunk_configuration (line 108) | def search_chunk_configuration( FILE: colossalai/zero/gemini/chunk/utils.py function safe_div (line 12) | def safe_div(a, b): function init_chunk_manager (line 18) | def init_chunk_manager( FILE: colossalai/zero/gemini/gemini_ddp.py class GeminiDDP (line 56) | class GeminiDDP(ModelWrapper): method __init__ (line 75) | def __init__( method remove_hooks (line 212) | def remove_hooks(self): method __del__ (line 221) | def __del__(self): method parameters (line 224) | def parameters(self, recurse: bool = True): method named_parameters (line 227) | def named_parameters(self, prefix: str = "", recurse: bool = True): method named_buffers (line 230) | def named_buffers(self, prefix: str = "", recurse: bool = True): method named_children (line 233) | def named_children(self): method named_modules (line 236) | def named_modules( method set_params_to_ignore (line 242) | def set_params_to_ignore(params_to_ignore: Iterable[torch.Tensor]) -> ... method _post_forward (line 260) | def _post_forward(self): method forward (line 274) | def forward(self, *args, **kwargs): method _inference_forward (line 295) | def _inference_forward(self, *args, **kwargs): method _setup_grads_ptr (line 312) | def _setup_grads_ptr(self): method _pre_backward (line 318) | def _pre_backward(self): method _post_backward (line 325) | def _post_backward(self): method backward (line 348) | def backward(self, loss: torch.Tensor): method backward_by_grad (line 354) | def backward_by_grad(self, tensor, grad, inputs: torch.Tensor = None, ... method grad_handle (line 358) | def grad_handle( method zero_grad (line 438) | def zero_grad(self, set_to_none: bool = False) -> None: method set_chunk_grad_device (line 441) | def set_chunk_grad_device(self, chunk: Chunk, device: torch.device) ->... method state_dict (line 445) | def state_dict(self, destination=None, prefix="", keep_vars=False, onl... method _get_chunk_to_save_data (line 472) | def _get_chunk_to_save_data(self, chunk: Chunk, only_rank_0: bool) -> ... method _get_param_to_save_data (line 516) | def _get_param_to_save_data(self, param_list: List[torch.nn.Parameter]... method _save_to_state_dict (line 534) | def _save_to_state_dict(self, destination, prefix, keep_vars, only_ran... method load_state_dict (line 587) | def load_state_dict(self, state_dict: "OrderedDict[str, torch.Tensor]"... method _load_from_state_dict (line 644) | def _load_from_state_dict( method _init_chunks (line 819) | def _init_chunks(self, param_order, strict_ddp_mode: bool, cpu_offload... method _cast_buffers (line 876) | def _cast_buffers(self): method _preprocess_param (line 885) | def _preprocess_param(self, p: Union[nn.Parameter, ColoParameter, "Laz... method state_dict_shard (line 900) | def state_dict_shard( FILE: colossalai/zero/gemini/gemini_hook.py class TrainingPhase (line 15) | class TrainingPhase(Enum): class GeminiZeROHook (line 20) | class GeminiZeROHook(ColoParamOpHook): method __init__ (line 21) | def __init__(self, gemini_manager: GeminiManager) -> None: method pre_op (line 27) | def pre_op(self, params): method post_op (line 76) | def post_op(self, params): method pre_forward (line 86) | def pre_forward(self, params: List[torch.Tensor]) -> None: method post_forward (line 89) | def post_forward(self, params: List[torch.Tensor]) -> None: method pre_backward (line 92) | def pre_backward(self, params: List[torch.Tensor]) -> None: method post_backward (line 95) | def post_backward(self, params: List[torch.Tensor]) -> None: method switch_training_phase (line 99) | def switch_training_phase(self, training_phase: TrainingPhase = Traini... FILE: colossalai/zero/gemini/gemini_mgr.py class GeminiManager (line 13) | class GeminiManager: method __init__ (line 28) | def __init__( method reset_attributes (line 59) | def reset_attributes(self): method need_warmup (line 68) | def need_warmup(self) -> bool: method is_warmup (line 71) | def is_warmup(self): method memstats (line 74) | def memstats(self): method pre_iter (line 87) | def pre_iter(self, *args): method post_iter (line 91) | def post_iter(self): method adjust_layout (line 98) | def adjust_layout(self, chunks: Tuple[Chunk, ...], record_anyway: bool... method wait_chunks (line 123) | def wait_chunks(self, chunks: Iterable[Chunk]) -> Tuple[Chunk]: method add_work (line 133) | def add_work(self, chunk: Chunk, work: dist.Work): method _get_layout_info (line 139) | def _get_layout_info(self, compute_idx: int, warmup: bool, chunks: Tup... method _record_warmup_chunks_order (line 157) | def _record_warmup_chunks_order(self, chunks: Tuple[Chunk, ...], recor... method sample_overall_data (line 162) | def sample_overall_data(self): method record_model_data_volume (line 166) | def record_model_data_volume(self): method chunk_manager (line 171) | def chunk_manager(self): method cuda_margin_mem (line 175) | def cuda_margin_mem(self) -> Optional[float]: method placement_policy (line 181) | def placement_policy(self) -> PlacementPolicy: method compute_list (line 185) | def compute_list(self) -> List[Tuple[Chunk, ...]]: method compute_idx (line 189) | def compute_idx(self) -> int: method async_works (line 193) | def async_works(self) -> Dict[Chunk, dist.Work]: method is_cuda_margin_mem_avail (line 197) | def is_cuda_margin_mem_avail(self) -> bool: method setup_grads_device (line 200) | def setup_grads_device( FILE: colossalai/zero/gemini/gemini_optimizer.py class GeminiFP16MixedPrecisionMixin (line 46) | class GeminiFP16MixedPrecisionMixin(FP16MixedPrecisionMixin): method __init__ (line 47) | def __init__( method check_local_overflow (line 63) | def check_local_overflow(self) -> bool: method pre_zero_grad (line 66) | def pre_zero_grad(self) -> None: class GeminiOptimizer (line 70) | class GeminiOptimizer(OptimizerWrapper): method __init__ (line 100) | def __init__( method _set_grad_ptr (line 200) | def _set_grad_ptr(self): method _update_fp16_params (line 213) | def _update_fp16_params(self): method _clear_global_norm (line 223) | def _clear_global_norm(self) -> None: method _calc_global_norm (line 228) | def _calc_global_norm(self) -> float: method _get_combined_scale (line 254) | def _get_combined_scale(self): method zero_grad (line 266) | def zero_grad(self, *args, **kwargs): method step (line 270) | def step(self, *args, **kwargs): method clip_grad_norm (line 296) | def clip_grad_norm(self, model: torch.nn.Module, max_norm: float, norm... method backward (line 299) | def backward(self, loss: torch.Tensor): method backward_by_grad (line 303) | def backward_by_grad( method _maybe_move_fp32_params (line 313) | def _maybe_move_fp32_params(self): method _register_states_ (line 345) | def _register_states_(self): method __init__optimizer (line 353) | def __init__optimizer(self): method get_offsets (line 392) | def get_offsets(self, param_id: int) -> tuple: method collect_states (line 421) | def collect_states(self, param_id: int, only_rank_0: bool = True) -> d... method pack_optimizer_states_to_tensor (line 574) | def pack_optimizer_states_to_tensor( method load_from_compacted_states (line 616) | def load_from_compacted_states( method get_param_groups_for_saving (line 642) | def get_param_groups_for_saving(self) -> list: method state_dict (line 670) | def state_dict(self, only_rank_0: bool = True) -> dict: method load_param_groups (line 698) | def load_param_groups(self, saved_param_groups: list): method load_single_param_states (line 719) | def load_single_param_states(self, param_id: int, saved_states: dict): method load_param_states (line 778) | def load_param_states(self, param_states: dict): method optimizer_loading_epilogue (line 789) | def optimizer_loading_epilogue(self): method load_state_dict (line 797) | def load_state_dict(self, state_dict: dict): method state_shard (line 811) | def state_shard( method clip_grad_by_value (line 852) | def clip_grad_by_value(self, clip_value: float, *args, **kwargs) -> None: method clip_grad_by_norm (line 855) | def clip_grad_by_norm( method get_grad_norm (line 867) | def get_grad_norm(self, norm_type=2, **kwargs): class GeminiAdamOptimizer (line 871) | class GeminiAdamOptimizer(GeminiOptimizer): method __init__ (line 872) | def __init__(self, model: torch.nn.Module, **defaults: Any) -> None: FILE: colossalai/zero/gemini/memory_tracer/chunk_memstats_collector.py class ChunkMemStatsCollector (line 10) | class ChunkMemStatsCollector(MemStatsCollector): method __init__ (line 11) | def __init__(self, chunk_manager: ChunkManager, memstats: Optional[Mem... method record_model_data_volume (line 24) | def record_model_data_volume(self) -> None: method cuda_margin_mem (line 33) | def cuda_margin_mem(self) -> float: FILE: colossalai/zero/gemini/memory_tracer/memory_monitor.py class MemoryMonitor (line 11) | class MemoryMonitor: method __init__ (line 16) | def __init__(self): method __len__ (line 20) | def __len__(self): method start (line 24) | def start(self): method finish (line 28) | def finish(self): method state_dict (line 31) | def state_dict(self): method save (line 37) | def save(self, filename): method clear (line 41) | def clear(self): class AsyncMemoryMonitor (line 46) | class AsyncMemoryMonitor(MemoryMonitor): method __init__ (line 76) | def __init__(self, power: int = 10): method set_interval (line 89) | def set_interval(self, power: int): method is_measuring (line 93) | def is_measuring(self): method start (line 96) | def start(self): method finish (line 100) | def finish(self): method _measure_usage (line 112) | def _measure_usage(self): class SyncCudaMemoryMonitor (line 125) | class SyncCudaMemoryMonitor(MemoryMonitor): method __init__ (line 131) | def __init__(self, power: int = 10): method start (line 134) | def start(self): method finish (line 138) | def finish(self) -> int: FILE: colossalai/zero/gemini/memory_tracer/memory_stats.py class MemStats (line 8) | class MemStats(object): method __init__ (line 9) | def __init__(self) -> None: method calc_max_cuda_non_model_data (line 37) | def calc_max_cuda_non_model_data(self): method record_max_cuda_model_data (line 44) | def record_max_cuda_model_data(self, val): method record_max_cuda_overall_data (line 47) | def record_max_cuda_overall_data(self, val): method max_overall_cuda (line 52) | def max_overall_cuda(self): method increase_preop_step (line 55) | def increase_preop_step(self, param_list: List[torch.nn.Parameter]): method param_used_step (line 72) | def param_used_step(self, param: torch.nn.Parameter) -> Optional[List[... method param_order (line 87) | def param_order(self): method non_model_data_list (line 93) | def non_model_data_list(self, device_type: str) -> List[int]: method max_non_model_data (line 101) | def max_non_model_data(self, device_type: str) -> float: method clear (line 109) | def clear(self): FILE: colossalai/zero/gemini/memory_tracer/memstats_collector.py class MemStatsCollector (line 8) | class MemStatsCollector: method __init__ (line 20) | def __init__(self, memstats: Optional[MemStats] = None) -> None: method next_period_non_model_data_usage (line 34) | def next_period_non_model_data_usage(self, device_type: str) -> int: method sampling_time (line 54) | def sampling_time(self): method start_collection (line 57) | def start_collection(self): method finish_collection (line 61) | def finish_collection(self): method record_model_data_volume (line 69) | def record_model_data_volume(self) -> None: method sample_overall_data (line 80) | def sample_overall_data(self) -> None: method clear (line 94) | def clear(self) -> None: FILE: colossalai/zero/gemini/memory_tracer/param_runtime_order.py class ParamGenerator (line 6) | class ParamGenerator(ABC): method append (line 7) | def append(self, param: torch.nn.Parameter): method generate (line 10) | def generate(self): method clear (line 13) | def clear(self): class OrderedParamGenerator (line 17) | class OrderedParamGenerator(ParamGenerator): method __init__ (line 23) | def __init__(self) -> None: method append (line 26) | def append(self, param: torch.nn.Parameter): method generate (line 29) | def generate(self): method is_empty (line 37) | def is_empty(self): method clear (line 40) | def clear(self): FILE: colossalai/zero/gemini/memory_tracer/runtime_mem_tracer.py class RuntimeMemTracer (line 11) | class RuntimeMemTracer: method __init__ (line 23) | def __init__(self, module: torch.nn.Module, dtype: torch.dtype = torch... method parameters_in_runtime_order (line 44) | def parameters_in_runtime_order(self): method memstats (line 47) | def memstats(self): method __call__ (line 50) | def __call__(self, *args, **kwargs): method _backup_params (line 53) | def _backup_params(self): method _restore_params (line 61) | def _restore_params(self): method _pre_forward (line 70) | def _pre_forward(self): method forward (line 76) | def forward(self, *args, **kwargs): method backward (line 84) | def backward(self, loss): method _post_backward (line 89) | def _post_backward(self): method _clear_cuda_mem_info (line 97) | def _clear_cuda_mem_info(self): method _cast_buffers_to_cuda_dtype (line 101) | def _cast_buffers_to_cuda_dtype(self): FILE: colossalai/zero/gemini/memory_tracer/static_memstats_collector.py class ModuleInfos (line 17) | class ModuleInfos: method __init__ (line 18) | def __init__( class StaticMemStatsCollector (line 27) | class StaticMemStatsCollector(ChunkMemStatsCollector): method __init__ (line 32) | def __init__(self, module: nn.Module, chunk_manager: ChunkManager) -> ... method init_mem_stats (line 37) | def init_mem_stats(self, *inputs): method refactor_module (line 80) | def refactor_module(self): method recover_module (line 85) | def recover_module(self): method register_opnodes_recursively (line 89) | def register_opnodes_recursively( FILE: colossalai/zero/gemini/memory_tracer/utils.py function colo_model_optimizer_usage (line 6) | def colo_model_optimizer_usage(optim) -> Tuple[int, int]: function colo_model_mem_usage (line 21) | def colo_model_mem_usage(model: torch.nn.Module) -> Tuple[int, int]: FILE: colossalai/zero/gemini/placement_policy.py class PlacementPolicy (line 17) | class PlacementPolicy(ABC): method __init__ (line 20) | def __init__( method evict_tensors (line 32) | def evict_tensors(self, can_evict_chunks: List[Chunk], **kwargs) -> Tu... method setup_grads_device (line 36) | def setup_grads_device( method get_prefetch_chunks (line 41) | def get_prefetch_chunks( class StaticPlacementPolicy (line 47) | class StaticPlacementPolicy(PlacementPolicy): method __init__ (line 48) | def __init__( method evict_tensors (line 69) | def evict_tensors(self, can_evict_chunks: List[Chunk], **kwargs) -> Tu... method setup_grads_device (line 86) | def setup_grads_device( method get_prefetch_chunks (line 109) | def get_prefetch_chunks( class AutoPlacementPolicy (line 128) | class AutoPlacementPolicy(PlacementPolicy): method __init__ (line 131) | def __init__( method evict_tensors (line 149) | def evict_tensors( method _sort_can_evict_chunks (line 215) | def _sort_can_evict_chunks(can_evict_chunks: tuple, compute_idx: int, ... method setup_grads_device (line 224) | def setup_grads_device( method get_prefetch_chunks (line 236) | def get_prefetch_chunks( class PlacementPolicyFactory (line 261) | class PlacementPolicyFactory: method create (line 268) | def create(policy_name: str) -> Type[PlacementPolicy]: method get_policy_names (line 274) | def get_policy_names(): FILE: colossalai/zero/gemini/utils.py function get_temp_total_chunk_on_cuda (line 14) | def get_temp_total_chunk_on_cuda(chunk: Chunk, dtype: torch.dtype): function _get_dfs_module_list (line 32) | def _get_dfs_module_list(module: nn.Module, memo: Optional[Set[nn.Module... function _get_shallow_copy_model (line 48) | def _get_shallow_copy_model(model: nn.Module): function get_static_torch_model (line 64) | def get_static_torch_model( FILE: colossalai/zero/low_level/_utils.py function flatten (line 10) | def flatten(input_): function unflatten (line 14) | def unflatten(flat, tensors): function count_numel (line 18) | def count_numel(tensor_list): function calculate_padding (line 25) | def calculate_padding(numel, unit_size): function shuffle_by_round_robin (line 30) | def shuffle_by_round_robin(tensor_list, num_partitions): function flatten_dense_tensors_with_padding (line 53) | def flatten_dense_tensors_with_padding(tensor_list, unit_size): function is_nccl_aligned (line 66) | def is_nccl_aligned(tensor): function get_grad_accumulate_object (line 70) | def get_grad_accumulate_object(tensor): function split_by_dtype (line 91) | def split_by_dtype(tensor_list): function reduce_tensor_dp_group (line 109) | def reduce_tensor_dp_group( function has_inf_or_nan (line 161) | def has_inf_or_nan(tensor): function release_param_grad (line 182) | def release_param_grad(tensor_list): function calculate_global_norm_from_list (line 187) | def calculate_global_norm_from_list(norm_list): function sync_tensor (line 195) | def sync_tensor(flat_tensor, tensor_list): function all_gather_into_flat_tensor_nd (line 215) | def all_gather_into_flat_tensor_nd( function get_nd_world_size (line 238) | def get_nd_world_size(group) -> int: function get_nd_rank (line 245) | def get_nd_rank(group) -> int: FILE: colossalai/zero/low_level/bookkeeping/base_store.py class BaseStore (line 8) | class BaseStore: method __init__ (line 9) | def __init__(self, torch_pg: Union[ProcessGroup, Tuple[ProcessGroup, .... method world_size (line 21) | def world_size(self): method local_rank (line 25) | def local_rank(self): FILE: colossalai/zero/low_level/bookkeeping/bucket_store.py class BucketStore (line 13) | class BucketStore(BaseStore): method __init__ (line 14) | def __init__( method reset_all (line 24) | def reset_all(self) -> None: method num_elements_in_bucket (line 40) | def num_elements_in_bucket(self) -> int: method reset_num_elements_in_bucket (line 49) | def reset_num_elements_in_bucket(self): method add_param_grad (line 54) | def add_param_grad(self, group_id: int, param: Tensor, padding_size: i... method build_grad_in_bucket (line 71) | def build_grad_in_bucket(self): method get_grad (line 94) | def get_grad(self) -> Dict: method get_flatten_grad (line 103) | def get_flatten_grad(self) -> Tensor: method get_param_id_of_grad (line 117) | def get_param_id_of_grad(self, grad: Tensor) -> int: method reset (line 129) | def reset(self): FILE: colossalai/zero/low_level/bookkeeping/gradient_store.py class GradientStore (line 8) | class GradientStore(BaseStore): method __init__ (line 9) | def __init__(self, *args, partition_grad: bool = False): method get_partitioned_gradients_by_param_id (line 26) | def get_partitioned_gradients_by_param_id(self, group_id: int, param_i... method append_gradients_by_param_id (line 43) | def append_gradients_by_param_id(self, grad: Tensor, group_id: int, pa... method add_gradients_by_param_id (line 61) | def add_gradients_by_param_id(self, grad: Tensor, grad_idx: int, group... method get_working_grads_by_group_id (line 74) | def get_working_grads_by_group_id(self, group_id: int) -> List: method get_working_grad_by_param_id (line 93) | def get_working_grad_by_param_id(self, param_id) -> Optional[Tensor]: method reset_grads_by_group_id (line 109) | def reset_grads_by_group_id(self, group_id: int): method reset_all_gradients (line 112) | def reset_all_gradients(self): method get_param_id_for_grad (line 116) | def get_param_id_for_grad(self, grad: Tensor) -> Optional[int]: FILE: colossalai/zero/low_level/bookkeeping/tensor_bucket.py class TensorBucket (line 12) | class TensorBucket: method __init__ (line 13) | def __init__(self, size): method max_size (line 20) | def max_size(self): method current_size (line 24) | def current_size(self): method is_full_or_oversized (line 27) | def is_full_or_oversized(self): method is_empty (line 30) | def is_empty(self): method add_to_bucket (line 33) | def add_to_bucket(self, tensor, allow_oversize=False, write_back_tenso... method will_exceed_max_size (line 45) | def will_exceed_max_size(self, tensor_size): method get_bucket (line 49) | def get_bucket(self): method empty (line 52) | def empty(self): method flatten (line 57) | def flatten(self): method unflatten (line 60) | def unflatten(self, flat_tensor): method unflatten_and_copy (line 63) | def unflatten_and_copy(self, flat_tensor): method all_gather (line 68) | def all_gather(self, group=None, fp8_communication: bool = False): FILE: colossalai/zero/low_level/low_level_optim.py class LowLevelZeroFP16MixedPrecisionMixin (line 40) | class LowLevelZeroFP16MixedPrecisionMixin(FP16MixedPrecisionMixin): method __init__ (line 41) | def __init__( method check_local_overflow (line 65) | def check_local_overflow(self) -> bool: class LowLevelZeroOptimizer (line 74) | class LowLevelZeroOptimizer(OptimizerWrapper): method __init__ (line 77) | def __init__( method __del__ (line 241) | def __del__(self): method dtype (line 246) | def dtype(self): method num_param_groups (line 250) | def num_param_groups(self): method _sanity_checks (line 253) | def _sanity_checks(self): method _create_master_param_current_rank (line 263) | def _create_master_param_current_rank(self, param_list): method _attach_reduction_hook (line 305) | def _attach_reduction_hook(self): method _run_reduction (line 327) | def _run_reduction(self): method _update_unpartitoned_grad (line 393) | def _update_unpartitoned_grad( method _update_partitoned_grad (line 402) | def _update_partitoned_grad( method _add_grad (line 415) | def _add_grad( method _add_to_bucket (line 431) | def _add_to_bucket(self, param, group_id): method backward (line 451) | def backward(self, loss, inputs=None, retain_graph=False): method backward_by_grad (line 472) | def backward_by_grad(self, tensor, grad, inputs: Tensor = None, retain... method zero_bucket_stores (line 494) | def zero_bucket_stores(self): method zero_grad_stores (line 498) | def zero_grad_stores(self): method zero_grad (line 502) | def zero_grad(self, set_to_none=True): method step (line 527) | def step(self, closure=None): method _compute_grad_norm (line 637) | def _compute_grad_norm( method _unscale_and_clip_grads (line 702) | def _unscale_and_clip_grads(self, grad_groups_flat, total_norm): method _sync_grad (line 722) | def _sync_grad(self): method _reduce_grad (line 736) | def _reduce_grad(self, partition_grad): method no_sync (line 746) | def no_sync(self): method _pack_state (line 758) | def _pack_state(self, state: Dict) -> Dict: method state_dict (line 779) | def state_dict( method load_state_dict (line 824) | def load_state_dict(self, state_dict: Dict): method state_dict_shard (line 853) | def state_dict_shard( method update_master_params (line 923) | def update_master_params(self, model: nn.Module) -> None: method get_working_to_master_map (line 942) | def get_working_to_master_map(self) -> Dict[int, torch.Tensor]: method get_master_to_working_map (line 945) | def get_master_to_working_map(self) -> Dict[int, torch.Tensor]: method get_param_padding_map (line 948) | def get_param_padding_map(self) -> Dict[int, torch.Tensor]: method record_param_padding_size (line 951) | def record_param_padding_size(self, param: Tensor, padding_size: int): method get_param_padding_size (line 961) | def get_param_padding_size(self, param: Tensor) -> int: method link_master_and_working_param (line 973) | def link_master_and_working_param(self, master_param: Tensor, working_... method get_padding_map (line 984) | def get_padding_map(self) -> Dict[int, Tensor]: method get_param_grad (line 993) | def get_param_grad(self, working_param: nn.Parameter) -> Tensor: method get_working_grads_by_group_id (line 1005) | def get_working_grads_by_group_id(self, group_id: int) -> List[Tensor]: method get_param_id_for_grad (line 1011) | def get_param_id_for_grad(self, grad: Tensor) -> int: method get_working_grad_by_param_id (line 1021) | def get_working_grad_by_param_id(self, param_id: int) -> Tensor: method get_partitioned_gradients_by_param_id (line 1025) | def get_partitioned_gradients_by_param_id(self, group_id: int, param_i... method _force_wait_all_gather (line 1029) | def _force_wait_all_gather(self): method get_grad_norm (line 1033) | def get_grad_norm(self, norm_type=2, **kwargs): FILE: colossalai/zero/low_level/zero_hook.py function wait_all_gather_handle (line 10) | def wait_all_gather_handle(p): function set_all_gather_handle (line 17) | def set_all_gather_handle(p, handle): class ZeroOpHook (line 21) | class ZeroOpHook(ColoParamOpHook): method pre_forward (line 22) | def pre_forward(self, params: List[Tensor]) -> None: method post_forward (line 26) | def post_forward(self, params: List[Tensor]) -> None: method pre_backward (line 29) | def pre_backward(self, params: List[Tensor]) -> None: method post_backward (line 32) | def post_backward(self, params: List[Tensor]) -> None: FILE: colossalai/zero/wrapper.py function zero_model_wrapper (line 10) | def zero_model_wrapper( function zero_optim_wrapper (line 52) | def zero_optim_wrapper( FILE: examples/community/fp8/mnist/main.py class Net (line 22) | class Net(nn.Module): method __init__ (line 23) | def __init__(self, use_te=False): method forward (line 37) | def forward(self, x): function train (line 55) | def train(args, model, device, train_loader, optimizer, epoch, use_fp8): function calibrate (line 77) | def calibrate(model, device, test_loader): function test (line 87) | def test(model, device, test_loader, use_fp8): function main (line 110) | def main(): FILE: examples/community/roberta/preprocessing/get_mask.py function map_to_numpy (line 17) | def map_to_numpy(data): class PreTrainingDataset (line 21) | class PreTrainingDataset: method __init__ (line 22) | def __init__( method tokenize (line 45) | def tokenize(self, doc): method create_training_instance (line 51) | def create_training_instance(self, instance): method create_masked_lm_predictions (line 111) | def create_masked_lm_predictions(self, tokens): method get_new_segment (line 159) | def get_new_segment(self, segment): method create_whole_masked_lm_predictions (line 190) | def create_whole_masked_lm_predictions(self, tokens): FILE: examples/community/roberta/preprocessing/mask.cpp type MaskedLMInstance (line 22) | struct MaskedLMInstance { method MaskedLMInstance (line 25) | MaskedLMInstance(int index, std::string label) { function get_new_segment (line 31) | auto get_new_segment( function startsWith (line 78) | bool startsWith(const std::string &s, const std::string &sub) { function create_whole_masked_lm_predictions (line 82) | auto create_whole_masked_lm_predictions( function PYBIND11_MODULE (line 186) | PYBIND11_MODULE(mask, m) { FILE: examples/community/roberta/preprocessing/sentence_split.py function split_sentence (line 13) | def split_sentence(document: str, flag: str = "all", limit: int = 510) -... function get_sent (line 50) | def get_sent(output_path, input_path, fin_list=[], host=-1, seq_len=512)... function getFileSize (line 85) | def getFileSize(filepath, shard): function get_start_end (line 110) | def get_start_end(real_shard, base=0, server_num=10, server_name="GPU"): FILE: examples/community/roberta/preprocessing/tokenize_mask.py function get_raw_instance (line 15) | def get_raw_instance(document, max_sequence_length=512): function split_numpy_chunk (line 58) | def split_numpy_chunk(path, tokenizer, pretrain_data, host): function split_numpy_chunk_pool (line 121) | def split_numpy_chunk_pool(input_path, output_path, pretrain_data, worke... FILE: examples/community/roberta/pretraining/arguments.py function parse_args (line 6) | def parse_args(): FILE: examples/community/roberta/pretraining/bert_dataset_provider.py class BertDatasetProviderInterface (line 1) | class BertDatasetProviderInterface: method get_shard (line 2) | def get_shard(self, index, shuffle=True): method release_shard (line 5) | def release_shard(self, index): method prefetch_shard (line 8) | def prefetch_shard(self, index): method get_batch (line 11) | def get_batch(self, batch_iter): method prefetch_batch (line 14) | def prefetch_batch(self): FILE: examples/community/roberta/pretraining/evaluation.py function evaluate (line 10) | def evaluate(model, args, logger, global_step, criterion): FILE: examples/community/roberta/pretraining/loss.py class LossForPretraining (line 6) | class LossForPretraining(torch.nn.Module): method __init__ (line 7) | def __init__(self, vocab_size): method forward (line 12) | def forward(self, prediction_scores, masked_lm_labels, next_sentence_l... FILE: examples/community/roberta/pretraining/model/bert.py function load_tf_weights_in_bert (line 105) | def load_tf_weights_in_bert(model, config, tf_checkpoint_path): class BertEmbeddings (line 178) | class BertEmbeddings(nn.Module): method __init__ (line 181) | def __init__(self, config): method forward (line 201) | def forward( class BertSelfAttention (line 243) | class BertSelfAttention(nn.Module): method __init__ (line 244) | def __init__(self, config, position_embedding_type=None): method transpose_for_scores (line 268) | def transpose_for_scores(self, x: torch.Tensor) -> torch.Tensor: method forward (line 273) | def forward( class BertSelfOutput (line 368) | class BertSelfOutput(nn.Module): method __init__ (line 369) | def __init__(self, config): method forward (line 375) | def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Ten... class BertAttention (line 382) | class BertAttention(nn.Module): method __init__ (line 383) | def __init__(self, config, position_embedding_type=None): method prune_heads (line 389) | def prune_heads(self, heads): method forward (line 407) | def forward( class BertIntermediate (line 431) | class BertIntermediate(nn.Module): method __init__ (line 432) | def __init__(self, config): method forward (line 440) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class BertOutput (line 446) | class BertOutput(nn.Module): method __init__ (line 447) | def __init__(self, config): method forward (line 453) | def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Ten... class BertLayer (line 460) | class BertLayer(nn.Module): method __init__ (line 461) | def __init__(self, config): method forward (line 475) | def forward( method feed_forward_chunk (line 540) | def feed_forward_chunk(self, attention_output): class BertEncoder (line 546) | class BertEncoder(nn.Module): method __init__ (line 547) | def __init__(self, config): method forward (line 553) | def forward( class BertPooler (line 642) | class BertPooler(nn.Module): method __init__ (line 643) | def __init__(self, config): method forward (line 648) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class BertPredictionHeadTransform (line 657) | class BertPredictionHeadTransform(nn.Module): method __init__ (line 658) | def __init__(self, config): method forward (line 667) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class BertLMPredictionHead (line 674) | class BertLMPredictionHead(nn.Module): method __init__ (line 675) | def __init__(self, config): method forward (line 688) | def forward(self, hidden_states): class BertOnlyMLMHead (line 694) | class BertOnlyMLMHead(nn.Module): method __init__ (line 695) | def __init__(self, config): method forward (line 699) | def forward(self, sequence_output: torch.Tensor) -> torch.Tensor: class BertOnlyNSPHead (line 704) | class BertOnlyNSPHead(nn.Module): method __init__ (line 705) | def __init__(self, config): method forward (line 709) | def forward(self, pooled_output): class BertPreTrainingHeads (line 714) | class BertPreTrainingHeads(nn.Module): method __init__ (line 715) | def __init__(self, config): method forward (line 720) | def forward(self, sequence_output, pooled_output): class BertPreTrainedModel (line 726) | class BertPreTrainedModel(PreTrainedModel): method _init_weights (line 738) | def _init_weights(self, module): method _set_gradient_checkpointing (line 754) | def _set_gradient_checkpointing(self, module, value=False): class BertForPreTrainingOutput (line 760) | class BertForPreTrainingOutput(ModelOutput): class BertModel (line 863) | class BertModel(BertPreTrainedModel): method __init__ (line 876) | def __init__(self, config, add_pooling_layer=True): method get_input_embeddings (line 888) | def get_input_embeddings(self): method set_input_embeddings (line 891) | def set_input_embeddings(self, value): method _prune_heads (line 894) | def _prune_heads(self, heads_to_prune): method forward (line 909) | def forward( class BertForPreTraining (line 1046) | class BertForPreTraining(BertPreTrainedModel): method __init__ (line 1047) | def __init__(self, config): method get_output_embeddings (line 1056) | def get_output_embeddings(self): method set_output_embeddings (line 1059) | def set_output_embeddings(self, new_embeddings): method forward (line 1064) | def forward( class BertLMHeadModel (line 1150) | class BertLMHeadModel(BertPreTrainedModel): method __init__ (line 1154) | def __init__(self, config): method get_output_embeddings (line 1166) | def get_output_embeddings(self): method set_output_embeddings (line 1169) | def set_output_embeddings(self, new_embeddings): method forward (line 1179) | def forward( method prepare_inputs_for_generation (line 1264) | def prepare_inputs_for_generation(self, input_ids, past=None, attentio... method _reorder_cache (line 1276) | def _reorder_cache(self, past, beam_idx): class BertForMaskedLM (line 1284) | class BertForMaskedLM(BertPreTrainedModel): method __init__ (line 1288) | def __init__(self, config): method get_output_embeddings (line 1303) | def get_output_embeddings(self): method set_output_embeddings (line 1306) | def set_output_embeddings(self, new_embeddings): method forward (line 1318) | def forward( method prepare_inputs_for_generation (line 1375) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None... class BertForNextSentencePrediction (line 1396) | class BertForNextSentencePrediction(BertPreTrainedModel): method __init__ (line 1397) | def __init__(self, config): method forward (line 1408) | def forward( class BertForSequenceClassification (line 1501) | class BertForSequenceClassification(BertPreTrainedModel): method __init__ (line 1502) | def __init__(self, config): method forward (line 1526) | def forward( class BertForMultipleChoice (line 1605) | class BertForMultipleChoice(BertPreTrainedModel): method __init__ (line 1606) | def __init__(self, config): method forward (line 1626) | def forward( class BertForTokenClassification (line 1700) | class BertForTokenClassification(BertPreTrainedModel): method __init__ (line 1703) | def __init__(self, config): method forward (line 1726) | def forward( class BertForQuestionAnswering (line 1786) | class BertForQuestionAnswering(BertPreTrainedModel): method __init__ (line 1789) | def __init__(self, config): method forward (line 1810) | def forward( FILE: examples/community/roberta/pretraining/model/deberta_v2.py class ContextPooler (line 60) | class ContextPooler(nn.Module): method __init__ (line 61) | def __init__(self, config): method forward (line 67) | def forward(self, hidden_states): method output_dim (line 78) | def output_dim(self): class XSoftmax (line 83) | class XSoftmax(torch.autograd.Function): method forward (line 112) | def forward(self, input, mask, dim): method backward (line 123) | def backward(self, grad_output): method symbolic (line 129) | def symbolic(g, self, mask, dim): class DropoutContext (line 147) | class DropoutContext(object): method __init__ (line 148) | def __init__(self): function get_mask (line 156) | def get_mask(input, local_context): class XDropout (line 176) | class XDropout(torch.autograd.Function): method forward (line 180) | def forward(ctx, input, local_ctx): method backward (line 190) | def backward(ctx, grad_output): class StableDropout (line 199) | class StableDropout(nn.Module): method __init__ (line 207) | def __init__(self, drop_prob): method forward (line 213) | def forward(self, x): method clear_context (line 224) | def clear_context(self): method init_context (line 228) | def init_context(self, reuse_mask=True, scale=1): method get_context (line 236) | def get_context(self): class DebertaV2SelfOutput (line 249) | class DebertaV2SelfOutput(nn.Module): method __init__ (line 250) | def __init__(self, config): method forward (line 256) | def forward(self, hidden_states, input_tensor): class DebertaV2Attention (line 264) | class DebertaV2Attention(nn.Module): method __init__ (line 265) | def __init__(self, config): method forward (line 271) | def forward( class DebertaV2Intermediate (line 301) | class DebertaV2Intermediate(nn.Module): method __init__ (line 302) | def __init__(self, config): method forward (line 310) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class DebertaV2Output (line 317) | class DebertaV2Output(nn.Module): method __init__ (line 318) | def __init__(self, config): method forward (line 325) | def forward(self, hidden_states, input_tensor): class DebertaV2Layer (line 333) | class DebertaV2Layer(nn.Module): method __init__ (line 334) | def __init__(self, config): method forward (line 340) | def forward( class ConvLayer (line 367) | class ConvLayer(nn.Module): method __init__ (line 368) | def __init__(self, config): method forward (line 380) | def forward(self, hidden_states, residual_states, input_mask): class DebertaV2Encoder (line 403) | class DebertaV2Encoder(nn.Module): method __init__ (line 406) | def __init__(self, config): method get_rel_embedding (line 435) | def get_rel_embedding(self): method get_attention_mask (line 445) | def get_attention_mask(self, attention_mask): method get_rel_pos (line 455) | def get_rel_pos(self, hidden_states, query_states=None, relative_pos=N... method forward (line 463) | def forward( function make_log_bucket_position (line 545) | def make_log_bucket_position(relative_pos, bucket_size, max_position): function build_relative_position (line 554) | def build_relative_position(query_size, key_size, bucket_size=-1, max_po... function c2p_dynamic_expand (line 585) | def c2p_dynamic_expand(c2p_pos, query_layer, relative_pos): function p2c_dynamic_expand (line 591) | def p2c_dynamic_expand(c2p_pos, query_layer, key_layer): function pos_dynamic_expand (line 597) | def pos_dynamic_expand(pos_index, p2c_att, key_layer): class DisentangledSelfAttention (line 601) | class DisentangledSelfAttention(nn.Module): method __init__ (line 612) | def __init__(self, config): method transpose_for_scores (line 651) | def transpose_for_scores(self, x, attention_heads): method forward (line 656) | def forward( method disentangled_attention_bias (line 740) | def disentangled_attention_bias(self, query_layer, key_layer, relative... class DebertaV2Embeddings (line 823) | class DebertaV2Embeddings(nn.Module): method __init__ (line 826) | def __init__(self, config): method forward (line 850) | def forward(self, input_ids=None, token_type_ids=None, position_ids=No... class DebertaV2PreTrainedModel (line 898) | class DebertaV2PreTrainedModel(PreTrainedModel): method _init_weights (line 910) | def _init_weights(self, module): method _set_gradient_checkpointing (line 923) | def _set_gradient_checkpointing(self, module, value=False): class DebertaV2Model (line 994) | class DebertaV2Model(DebertaV2PreTrainedModel): method __init__ (line 995) | def __init__(self, config): method get_input_embeddings (line 1005) | def get_input_embeddings(self): method set_input_embeddings (line 1008) | def set_input_embeddings(self, new_embeddings): method _prune_heads (line 1011) | def _prune_heads(self, heads_to_prune): method forward (line 1025) | def forward( class DebertaV2ForMaskedLM (line 1107) | class DebertaV2ForMaskedLM(DebertaV2PreTrainedModel): method __init__ (line 1111) | def __init__(self, config): method get_output_embeddings (line 1120) | def get_output_embeddings(self): method set_output_embeddings (line 1123) | def set_output_embeddings(self, new_embeddings): method forward (line 1133) | def forward( class DebertaV2PredictionHeadTransform (line 1186) | class DebertaV2PredictionHeadTransform(nn.Module): method __init__ (line 1187) | def __init__(self, config): method forward (line 1196) | def forward(self, hidden_states): class DebertaV2LMPredictionHead (line 1204) | class DebertaV2LMPredictionHead(nn.Module): method __init__ (line 1205) | def __init__(self, config): method forward (line 1218) | def forward(self, hidden_states): class DebertaV2OnlyMLMHead (line 1225) | class DebertaV2OnlyMLMHead(nn.Module): method __init__ (line 1226) | def __init__(self, config): method forward (line 1230) | def forward(self, sequence_output): class DebertaV2ForSequenceClassification (line 1243) | class DebertaV2ForSequenceClassification(DebertaV2PreTrainedModel): method __init__ (line 1244) | def __init__(self, config): method get_input_embeddings (line 1262) | def get_input_embeddings(self): method set_input_embeddings (line 1265) | def set_input_embeddings(self, new_embeddings): method forward (line 1275) | def forward( class DebertaV2ForTokenClassification (line 1363) | class DebertaV2ForTokenClassification(DebertaV2PreTrainedModel): method __init__ (line 1366) | def __init__(self, config): method forward (line 1384) | def forward( class DebertaV2ForQuestionAnswering (line 1440) | class DebertaV2ForQuestionAnswering(DebertaV2PreTrainedModel): method __init__ (line 1443) | def __init__(self, config): method forward (line 1460) | def forward( class DebertaV2ForMultipleChoice (line 1540) | class DebertaV2ForMultipleChoice(DebertaV2PreTrainedModel): method __init__ (line 1541) | def __init__(self, config): method get_input_embeddings (line 1558) | def get_input_embeddings(self): method set_input_embeddings (line 1561) | def set_input_embeddings(self, new_embeddings): method forward (line 1571) | def forward( FILE: examples/community/roberta/pretraining/nvidia_bert_dataset_provider.py class WorkerInitObj (line 16) | class WorkerInitObj(object): method __init__ (line 17) | def __init__(self, seed): method __call__ (line 20) | def __call__(self, id): function create_pretraining_dataset (line 25) | def create_pretraining_dataset( class pretraining_dataset (line 40) | class pretraining_dataset(Dataset): method __init__ (line 41) | def __init__(self, input_file, max_predictions_per_seq): method __len__ (line 49) | def __len__(self): method __getitem__ (line 53) | def __getitem__(self, index): class NvidiaBertDatasetProvider (line 66) | class NvidiaBertDatasetProvider(BertDatasetProviderInterface): method __init__ (line 67) | def __init__(self, args, evaluate=False): method get_shard (line 111) | def get_shard(self, index): method release_shard (line 132) | def release_shard(self): method prefetch_shard (line 136) | def prefetch_shard(self, index): method get_batch (line 148) | def get_batch(self, batch_iter): method prefetch_batch (line 151) | def prefetch_batch(self): method _get_shard_file (line 154) | def _get_shard_file(self, shard_index): method _get_shard_file_index (line 158) | def _get_shard_file_index(self, shard_index, global_rank): method shuffle_dataset (line 168) | def shuffle_dataset(self, epoch): FILE: examples/community/roberta/pretraining/pretrain_utils.py function get_new_state_dict (line 21) | def get_new_state_dict(state_dict, start_index=13): class LMModel (line 29) | class LMModel(nn.Module): method __init__ (line 30) | def __init__(self, model, config, args): method forward (line 39) | def forward(self, input_ids, token_type_ids=None, attention_mask=None): function get_model (line 44) | def get_model(args, logger): function get_optimizer (line 74) | def get_optimizer(model, lr): function get_lr_scheduler (line 87) | def get_lr_scheduler(optimizer, total_steps, warmup_steps=2000, last_epo... function save_ckpt (line 96) | def save_ckpt(model, optimizer, lr_scheduler, path, epoch, shard, global... FILE: examples/community/roberta/pretraining/run_pretraining.py function main (line 26) | def main(): FILE: examples/community/roberta/pretraining/utils/WandbLog.py class WandbLog (line 8) | class WandbLog: method init_wandb (line 10) | def init_wandb(cls, project, notes=None, name=time.strftime("%Y-%m-%d ... method log (line 14) | def log(cls, result, model=None, gradient=None): class TensorboardLog (line 24) | class TensorboardLog: method __init__ (line 25) | def __init__(self, location, name=time.strftime("%Y-%m-%d %H:%M:%S", t... method log_train (line 30) | def log_train(self, result, step): method log_eval (line 34) | def log_eval(self, result, step): method log_zeroshot (line 38) | def log_zeroshot(self, result, step): FILE: examples/community/roberta/pretraining/utils/exp_util.py function logging (line 11) | def logging(s, log_path, print_=True, log_=True): function get_logger (line 19) | def get_logger(log_path, **kwargs): function create_exp_dir (line 23) | def create_exp_dir(dir_path, scripts_to_save=None, debug=False): function get_cpu_mem (line 43) | def get_cpu_mem(): function get_gpu_mem (line 47) | def get_gpu_mem(): function get_mem_info (line 51) | def get_mem_info(prefix=""): function get_tflops (line 55) | def get_tflops(model_numel, batch_size, seq_len, step_time): function get_parameters_in_billions (line 59) | def get_parameters_in_billions(model, world_size=1): function throughput_calculator (line 72) | def throughput_calculator(numel, args, config, iteration_time, total_ite... function synchronize (line 98) | def synchronize(): function log_args (line 109) | def log_args(logger, args): FILE: examples/community/roberta/pretraining/utils/global_vars.py function set_global_variables (line 11) | def set_global_variables(launch_time, tensorboard_path): function _set_timers (line 16) | def _set_timers(): function _set_tensorboard_writer (line 23) | def _set_tensorboard_writer(launch_time, tensorboard_path): function get_timers (line 31) | def get_timers(): function get_tensorboard_writer (line 37) | def get_tensorboard_writer(): function _ensure_var_is_initialized (line 43) | def _ensure_var_is_initialized(var, name): function _ensure_var_is_not_initialized (line 48) | def _ensure_var_is_not_initialized(var, name): class _Timer (line 53) | class _Timer: method __init__ (line 56) | def __init__(self, name): method start (line 62) | def start(self): method stop (line 69) | def stop(self): method reset (line 76) | def reset(self): method elapsed (line 81) | def elapsed(self, reset=True): class Timers (line 98) | class Timers: method __init__ (line 101) | def __init__(self): method __call__ (line 104) | def __call__(self, name): method write (line 109) | def write(self, names, writer, iteration, normalizer=1.0, reset=False): method log (line 119) | def log(self, names, normalizer=1.0, reset=True): FILE: examples/community/roberta/pretraining/utils/logger.py class Logger (line 11) | class Logger: method __init__ (line 12) | def __init__(self, log_path, cuda=False, debug=False): method info (line 18) | def info(self, message, log_=True, print_=True, *args, **kwargs): method error (line 27) | def error(self, message, *args, **kwargs): FILE: examples/images/diffusion/ldm/data/base.py class Txt2ImgIterableBaseDataset (line 9) | class Txt2ImgIterableBaseDataset(IterableDataset): method __init__ (line 14) | def __init__(self, file_path: str, rank, world_size): method __len__ (line 36) | def __len__(self): method __iter__ (line 40) | def __iter__(self): method _sample_generator (line 45) | def _sample_generator(self, start, end): method _get_file_info (line 57) | def _get_file_info(self, file_path): FILE: examples/images/diffusion/ldm/data/cifar10.py function make_multi_folder_data (line 15) | def make_multi_folder_data(paths, caption_files=None, **kwargs): class FolderData (line 35) | class FolderData(Dataset): method __init__ (line 36) | def __init__( method __len__ (line 86) | def __len__(self): method __getitem__ (line 92) | def __getitem__(self, index): method process_im (line 120) | def process_im(self, im): function hf_dataset (line 125) | def hf_dataset( class TextOnly (line 171) | class TextOnly(Dataset): method __init__ (line 172) | def __init__(self, captions, output_size, image_key="image", caption_k... method __len__ (line 188) | def __len__(self): method __getitem__ (line 191) | def __getitem__(self, index): method _load_caption_file (line 196) | def _load_caption_file(self, filename): FILE: examples/images/diffusion/ldm/data/imagenet.py function synset2idx (line 23) | def synset2idx(path_to_yaml="data/index_synset.yaml"): class ImageNetBase (line 29) | class ImageNetBase(Dataset): method __init__ (line 30) | def __init__(self, config=None): method __len__ (line 42) | def __len__(self): method __getitem__ (line 45) | def __getitem__(self, i): method _prepare (line 48) | def _prepare(self): method _filter_relpaths (line 51) | def _filter_relpaths(self, relpaths): method _prepare_synset_to_human (line 71) | def _prepare_synset_to_human(self): method _prepare_idx_to_synset (line 78) | def _prepare_idx_to_synset(self): method _prepare_human_to_integer_label (line 84) | def _prepare_human_to_integer_label(self): method _load (line 97) | def _load(self): class ImageNetTrain (line 139) | class ImageNetTrain(ImageNetBase): method __init__ (line 150) | def __init__(self, process_images=True, data_root=None, **kwargs): method _prepare (line 155) | def _prepare(self): class ImageNetValidation (line 202) | class ImageNetValidation(ImageNetBase): method __init__ (line 216) | def __init__(self, process_images=True, data_root=None, **kwargs): method _prepare (line 221) | def _prepare(self): class ImageNetSR (line 276) | class ImageNetSR(Dataset): method __init__ (line 277) | def __init__(self, size=None, degradation=None, downscale_f=4, min_cro... method __len__ (line 339) | def __len__(self): method __getitem__ (line 342) | def __getitem__(self, i): class ImageNetSRTrain (line 378) | class ImageNetSRTrain(ImageNetSR): method __init__ (line 379) | def __init__(self, **kwargs): method get_base (line 382) | def get_base(self): class ImageNetSRValidation (line 391) | class ImageNetSRValidation(ImageNetSR): method __init__ (line 392) | def __init__(self, **kwargs): method get_base (line 395) | def get_base(self): FILE: examples/images/diffusion/ldm/data/lsun.py class LSUNBase (line 11) | class LSUNBase(Dataset): method __init__ (line 12) | def __init__( method __len__ (line 44) | def __len__(self): method __getitem__ (line 48) | def __getitem__(self, i): class LSUNChurchesTrain (line 84) | class LSUNChurchesTrain(LSUNBase): method __init__ (line 85) | def __init__(self, **kwargs): class LSUNChurchesValidation (line 91) | class LSUNChurchesValidation(LSUNBase): method __init__ (line 92) | def __init__(self, flip_p=0.0, **kwargs): class LSUNBedroomsTrain (line 100) | class LSUNBedroomsTrain(LSUNBase): method __init__ (line 101) | def __init__(self, **kwargs): class LSUNBedroomsValidation (line 107) | class LSUNBedroomsValidation(LSUNBase): method __init__ (line 108) | def __init__(self, flip_p=0.0, **kwargs): class LSUNCatsTrain (line 115) | class LSUNCatsTrain(LSUNBase): method __init__ (line 116) | def __init__(self, **kwargs): class LSUNCatsValidation (line 122) | class LSUNCatsValidation(LSUNBase): method __init__ (line 123) | def __init__(self, flip_p=0.0, **kwargs): FILE: examples/images/diffusion/ldm/data/teyvat.py function make_multi_folder_data (line 15) | def make_multi_folder_data(paths, caption_files=None, **kwargs): class FolderData (line 35) | class FolderData(Dataset): method __init__ (line 36) | def __init__( method __len__ (line 86) | def __len__(self): method __getitem__ (line 92) | def __getitem__(self, index): method process_im (line 120) | def process_im(self, im): function hf_dataset (line 125) | def hf_dataset( FILE: examples/images/diffusion/ldm/lr_scheduler.py class LambdaWarmUpCosineScheduler (line 4) | class LambdaWarmUpCosineScheduler: method __init__ (line 9) | def __init__(self, warm_up_steps, lr_min, lr_max, lr_start, max_decay_... method schedule (line 18) | def schedule(self, n, **kwargs): method __call__ (line 33) | def __call__(self, n, **kwargs): class LambdaWarmUpCosineScheduler2 (line 37) | class LambdaWarmUpCosineScheduler2: method __init__ (line 43) | def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths... method find_in_interval (line 54) | def find_in_interval(self, n): method schedule (line 61) | def schedule(self, n, **kwargs): method __call__ (line 78) | def __call__(self, n, **kwargs): class LambdaLinearScheduler (line 82) | class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2): method schedule (line 83) | def schedule(self, n, **kwargs): FILE: examples/images/diffusion/ldm/models/autoencoder.py class AutoencoderKL (line 12) | class AutoencoderKL(pl.LightningModule): method __init__ (line 13) | def __init__( method init_from_ckpt (line 52) | def init_from_ckpt(self, path, ignore_keys=list()): method ema_scope (line 64) | def ema_scope(self, context=None): method on_train_batch_end (line 78) | def on_train_batch_end(self, *args, **kwargs): method encode (line 82) | def encode(self, x): method decode (line 88) | def decode(self, z): method forward (line 93) | def forward(self, input, sample_posterior=True): method get_input (line 102) | def get_input(self, batch, k): method training_step (line 109) | def training_step(self, batch, batch_idx, optimizer_idx): method validation_step (line 144) | def validation_step(self, batch, batch_idx): method _validation_step (line 150) | def _validation_step(self, batch, batch_idx, postfix=""): method configure_optimizers (line 178) | def configure_optimizers(self): method get_last_layer (line 193) | def get_last_layer(self): method log_images (line 197) | def log_images(self, batch, only_inputs=False, log_ema=False, **kwargs): method to_rgb (line 222) | def to_rgb(self, x): class IdentityFirstStage (line 231) | class IdentityFirstStage(torch.nn.Module): method __init__ (line 232) | def __init__(self, *args, vq_interface=False, **kwargs): method encode (line 236) | def encode(self, x, *args, **kwargs): method decode (line 239) | def decode(self, x, *args, **kwargs): method quantize (line 242) | def quantize(self, x, *args, **kwargs): method forward (line 247) | def forward(self, x, *args, **kwargs): FILE: examples/images/diffusion/ldm/models/diffusion/classifier.py function disabled_train (line 21) | def disabled_train(self, mode=True): class NoisyLatentImageClassifier (line 27) | class NoisyLatentImageClassifier(pl.LightningModule): method __init__ (line 28) | def __init__( method init_from_ckpt (line 71) | def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): method load_diffusion (line 90) | def load_diffusion(self): method load_classifier (line 97) | def load_classifier(self, ckpt_path, pool): method get_x_noisy (line 112) | def get_x_noisy(self, x, t, noise=None): method forward (line 123) | def forward(self, x_noisy, t, *args, **kwargs): method get_input (line 127) | def get_input(self, batch, k): method get_conditioning (line 136) | def get_conditioning(self, batch, k=None): method compute_top_k (line 153) | def compute_top_k(self, logits, labels, k, reduction="mean"): method on_train_epoch_start (line 160) | def on_train_epoch_start(self): method write_logs (line 165) | def write_logs(self, loss, logits, targets): method shared_step (line 178) | def shared_step(self, batch, t=None): method training_step (line 197) | def training_step(self, batch, batch_idx): method reset_noise_accs (line 201) | def reset_noise_accs(self): method on_validation_start (line 207) | def on_validation_start(self): method validation_step (line 211) | def validation_step(self, batch, batch_idx): method configure_optimizers (line 221) | def configure_optimizers(self): method log_images (line 236) | def log_images(self, batch, N=8, *args, **kwargs): FILE: examples/images/diffusion/ldm/models/diffusion/ddim.py class DDIMSampler (line 14) | class DDIMSampler(object): method __init__ (line 15) | def __init__(self, model, schedule="linear", **kwargs): method register_buffer (line 21) | def register_buffer(self, name, attr): method make_schedule (line 27) | def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddi... method sample (line 65) | def sample( method ddim_sampling (line 138) | def ddim_sampling( method p_sample_ddim (line 221) | def p_sample_ddim( method encode (line 305) | def encode( method stochastic_encode (line 365) | def stochastic_encode(self, x0, t, use_original_steps=False, noise=None): method decode (line 383) | def decode( FILE: examples/images/diffusion/ldm/models/diffusion/ddpm.py function disabled_train (line 48) | def disabled_train(self, mode=True): function uniform_on_device (line 54) | def uniform_on_device(r1, r2, shape, device): class DDPM (line 58) | class DDPM(pl.LightningModule): method __init__ (line 60) | def __init__( method register_schedule (line 178) | def register_schedule( method ema_scope (line 246) | def ema_scope(self, context=None): method init_from_ckpt (line 261) | def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): method q_mean_variance (line 321) | def q_mean_variance(self, x_start, t): method predict_start_from_noise (line 333) | def predict_start_from_noise(self, x_t, t, noise): method predict_start_from_z_and_v (line 339) | def predict_start_from_z_and_v(self, x_t, t, v): method predict_eps_from_z_and_v (line 347) | def predict_eps_from_z_and_v(self, x_t, t, v): method q_posterior (line 353) | def q_posterior(self, x_start, x_t, t): method p_mean_variance (line 362) | def p_mean_variance(self, x, t, clip_denoised: bool): method p_sample (line 375) | def p_sample(self, x, t, clip_denoised=True, repeat_noise=False): method p_sample_loop (line 384) | def p_sample_loop(self, shape, return_intermediates=False): method sample (line 400) | def sample(self, batch_size=16, return_intermediates=False): method q_sample (line 407) | def q_sample(self, x_start, t, noise=None): method get_v (line 414) | def get_v(self, x, noise, t): method get_loss (line 420) | def get_loss(self, pred, target, mean=True): method p_losses (line 435) | def p_losses(self, x_start, t, noise=None): method forward (line 466) | def forward(self, x, *args, **kwargs): method get_input (line 472) | def get_input(self, batch, k): method shared_step (line 483) | def shared_step(self, batch): method training_step (line 488) | def training_step(self, batch, batch_idx): method validation_step (line 511) | def validation_step(self, batch, batch_idx): method on_train_batch_end (line 519) | def on_train_batch_end(self, *args, **kwargs): method _get_rows_from_list (line 523) | def _get_rows_from_list(self, samples): method log_images (line 531) | def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=Non... method configure_optimizers (line 568) | def configure_optimizers(self): class LatentDiffusion (line 577) | class LatentDiffusion(DDPM): method __init__ (line 580) | def __init__( method configure_sharded_model (line 644) | def configure_sharded_model(self) -> None: method make_cond_schedule (line 687) | def make_cond_schedule( method on_train_batch_start (line 696) | def on_train_batch_start(self, batch, batch_idx): method register_schedule (line 717) | def register_schedule( method instantiate_first_stage (line 732) | def instantiate_first_stage(self, config): method instantiate_cond_stage (line 739) | def instantiate_cond_stage(self, config): method _get_denoise_row_from_list (line 758) | def _get_denoise_row_from_list(self, samples, desc="", force_no_decode... method get_first_stage_encoding (line 771) | def get_first_stage_encoding(self, encoder_posterior): method get_learned_conditioning (line 780) | def get_learned_conditioning(self, c): method meshgrid (line 793) | def meshgrid(self, h, w): method delta_border (line 800) | def delta_border(self, h, w): method get_weighting (line 814) | def get_weighting(self, h, w, Ly, Lx, device): method get_fold_unfold (line 835) | def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo... method get_input (line 894) | def get_input( method decode_first_stage (line 957) | def decode_first_stage(self, z, predict_cids=False, force_not_quantize... method encode_first_stage (line 968) | def encode_first_stage(self, x): method shared_step (line 971) | def shared_step(self, batch, **kwargs): method forward (line 976) | def forward(self, x, c, *args, **kwargs): method apply_model (line 987) | def apply_model(self, x_noisy, t, cond, return_ids=False): method _predict_eps_from_xstart (line 1004) | def _predict_eps_from_xstart(self, x_t, t, pred_xstart): method _prior_bpd (line 1009) | def _prior_bpd(self, x_start): method p_losses (line 1023) | def p_losses(self, x_start, cond, t, noise=None): method p_mean_variance (line 1061) | def p_mean_variance( method p_sample (line 1103) | def p_sample( method progressive_denoising (line 1152) | def progressive_denoising( method p_sample_loop (line 1240) | def p_sample_loop( method sample (line 1305) | def sample( method sample_log (line 1346) | def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs): method get_unconditional_conditioning (line 1358) | def get_unconditional_conditioning(self, batch_size, null_label=None): method log_images (line 1383) | def log_images( method configure_optimizers (line 1539) | def configure_optimizers(self): method to_rgb (line 1563) | def to_rgb(self, x): class DiffusionWrapper (line 1572) | class DiffusionWrapper(pl.LightningModule): method __init__ (line 1573) | def __init__(self, diff_model_config, conditioning_key): method forward (line 1580) | def forward(self, x, t, c_concat: list = None, c_crossattn: list = Non... class LatentUpscaleDiffusion (line 1614) | class LatentUpscaleDiffusion(LatentDiffusion): method __init__ (line 1615) | def __init__(self, *args, low_scale_config, low_scale_key="LR", noise_... method instantiate_low_stage (line 1623) | def instantiate_low_stage(self, config): method get_input (line 1631) | def get_input(self, batch, k, cond_key=None, bs=None, log_mode=False): method log_images (line 1662) | def log_images( class LatentFinetuneDiffusion (line 1780) | class LatentFinetuneDiffusion(LatentDiffusion): method __init__ (line 1786) | def __init__( method init_from_ckpt (line 1813) | def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): method log_images (line 1847) | def log_images( class LatentInpaintDiffusion (line 1949) | class LatentInpaintDiffusion(LatentFinetuneDiffusion): method __init__ (line 1956) | def __init__(self, concat_keys=("mask", "masked_image"), masked_image_... method get_input (line 1962) | def get_input(self, batch, k, cond_key=None, bs=None, return_first_sta... method log_images (line 1997) | def log_images(self, *args, **kwargs): class LatentDepth2ImageDiffusion (line 2005) | class LatentDepth2ImageDiffusion(LatentFinetuneDiffusion): method __init__ (line 2010) | def __init__(self, depth_stage_config, concat_keys=("midas_in",), *arg... method get_input (line 2016) | def get_input(self, batch, k, cond_key=None, bs=None, return_first_sta... method log_images (line 2056) | def log_images(self, *args, **kwargs): class LatentUpscaleFinetuneDiffusion (line 2066) | class LatentUpscaleFinetuneDiffusion(LatentFinetuneDiffusion): method __init__ (line 2071) | def __init__( method instantiate_low_stage (line 2083) | def instantiate_low_stage(self, config): method get_input (line 2091) | def get_input(self, batch, k, cond_key=None, bs=None, return_first_sta... method log_images (line 2135) | def log_images(self, *args, **kwargs): FILE: examples/images/diffusion/ldm/models/diffusion/dpm_solver/dpm_solver.py class NoiseScheduleVP (line 7) | class NoiseScheduleVP: method __init__ (line 8) | def __init__( method marginal_log_mean_coeff (line 118) | def marginal_log_mean_coeff(self, t): method marginal_alpha (line 133) | def marginal_alpha(self, t): method marginal_std (line 139) | def marginal_std(self, t): method marginal_lambda (line 145) | def marginal_lambda(self, t): method inverse_lambda (line 153) | def inverse_lambda(self, lamb): function model_wrapper (line 182) | def model_wrapper( class DPM_Solver (line 340) | class DPM_Solver: method __init__ (line 341) | def __init__(self, model_fn, noise_schedule, predict_x0=False, thresho... method noise_prediction_fn (line 367) | def noise_prediction_fn(self, x, t): method data_prediction_fn (line 373) | def data_prediction_fn(self, x, t): method model_fn (line 388) | def model_fn(self, x, t): method get_time_steps (line 397) | def get_time_steps(self, skip_type, t_T, t_0, N, device): method get_orders_and_timesteps_for_singlestep_solver (line 427) | def get_orders_and_timesteps_for_singlestep_solver(self, steps, order,... method denoise_to_zero_fn (line 513) | def denoise_to_zero_fn(self, x, s): method dpm_solver_first_update (line 519) | def dpm_solver_first_update(self, x, s, t, model_s=None, return_interm... method singlestep_dpm_solver_second_update (line 562) | def singlestep_dpm_solver_second_update( method singlestep_dpm_solver_third_update (line 646) | def singlestep_dpm_solver_third_update( method multistep_dpm_solver_second_update (line 785) | def multistep_dpm_solver_second_update(self, x, model_prev_list, t_pre... method multistep_dpm_solver_third_update (line 845) | def multistep_dpm_solver_third_update(self, x, model_prev_list, t_prev... method singlestep_dpm_solver_update (line 896) | def singlestep_dpm_solver_update( method multistep_dpm_solver_update (line 927) | def multistep_dpm_solver_update(self, x, model_prev_list, t_prev_list,... method dpm_solver_adaptive (line 950) | def dpm_solver_adaptive( method sample (line 1012) | def sample( function interpolate_fn (line 1194) | def interpolate_fn(x, xp, yp): function expand_dims (line 1239) | def expand_dims(v, dims): FILE: examples/images/diffusion/ldm/models/diffusion/dpm_solver/sampler.py class DPMSolverSampler (line 10) | class DPMSolverSampler(object): method __init__ (line 11) | def __init__(self, model, **kwargs): method register_buffer (line 17) | def register_buffer(self, name, attr): method sample (line 24) | def sample( FILE: examples/images/diffusion/ldm/models/diffusion/plms.py class PLMSSampler (line 10) | class PLMSSampler(object): method __init__ (line 11) | def __init__(self, model, schedule="linear", **kwargs): method register_buffer (line 17) | def register_buffer(self, name, attr): method make_schedule (line 23) | def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddi... method sample (line 63) | def sample( method plms_sampling (line 126) | def plms_sampling( method p_sample_plms (line 211) | def p_sample_plms( FILE: examples/images/diffusion/ldm/models/diffusion/sampling_util.py function append_dims (line 1) | def append_dims(x, target_dims): function norm_thresholding (line 10) | def norm_thresholding(x0, value): function spatial_norm_thresholding (line 15) | def spatial_norm_thresholding(x0, value): FILE: examples/images/diffusion/ldm/modules/attention.py function exists (line 20) | def exists(val): function uniq (line 24) | def uniq(arr): function default (line 28) | def default(val, d): function max_neg_value (line 34) | def max_neg_value(t): function init_ (line 38) | def init_(tensor): class GEGLU (line 46) | class GEGLU(nn.Module): method __init__ (line 47) | def __init__(self, dim_in, dim_out): method forward (line 51) | def forward(self, x): class FeedForward (line 56) | class FeedForward(nn.Module): method __init__ (line 57) | def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0): method forward (line 65) | def forward(self, x): function zero_module (line 69) | def zero_module(module): function Normalize (line 78) | def Normalize(in_channels): class SpatialSelfAttention (line 82) | class SpatialSelfAttention(nn.Module): method __init__ (line 83) | def __init__(self, in_channels): method forward (line 93) | def forward(self, x): class CrossAttention (line 119) | class CrossAttention(nn.Module): method __init__ (line 120) | def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, ... method forward (line 134) | def forward(self, x, context=None, mask=None): class MemoryEfficientCrossAttention (line 161) | class MemoryEfficientCrossAttention(nn.Module): method __init__ (line 163) | def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, ... method forward (line 182) | def forward(self, x, context=None, mask=None): class BasicTransformerBlock (line 212) | class BasicTransformerBlock(nn.Module): method __init__ (line 218) | def __init__( method forward (line 250) | def forward(self, x, context=None): method _forward (line 253) | def _forward(self, x, context=None): class SpatialTransformer (line 260) | class SpatialTransformer(nn.Module): method __init__ (line 270) | def __init__( method forward (line 313) | def forward(self, x, context=None): FILE: examples/images/diffusion/ldm/modules/diffusionmodules/model.py function get_timestep_embedding (line 27) | def get_timestep_embedding(timesteps, embedding_dim): function nonlinearity (line 48) | def nonlinearity(x): function Normalize (line 53) | def Normalize(in_channels, num_groups=32): class Upsample (line 57) | class Upsample(nn.Module): method __init__ (line 58) | def __init__(self, in_channels, with_conv): method forward (line 64) | def forward(self, x): class Downsample (line 71) | class Downsample(nn.Module): method __init__ (line 72) | def __init__(self, in_channels, with_conv): method forward (line 79) | def forward(self, x): class ResnetBlock (line 89) | class ResnetBlock(nn.Module): method __init__ (line 90) | def __init__(self, *, in_channels, out_channels=None, conv_shortcut=Fa... method forward (line 110) | def forward(self, x, temb): class AttnBlock (line 133) | class AttnBlock(nn.Module): method __init__ (line 134) | def __init__(self, in_channels): method forward (line 144) | def forward(self, x): class MemoryEfficientAttnBlock (line 171) | class MemoryEfficientAttnBlock(nn.Module): method __init__ (line 179) | def __init__(self, in_channels): method forward (line 190) | def forward(self, x): class MemoryEfficientCrossAttentionWrapper (line 217) | class MemoryEfficientCrossAttentionWrapper(MemoryEfficientCrossAttention): method forward (line 218) | def forward(self, x, context=None, mask=None): function make_attn (line 226) | def make_attn(in_channels, attn_type="vanilla", attn_kwargs=None): class Model (line 251) | class Model(nn.Module): method __init__ (line 252) | def __init__( method forward (line 360) | def forward(self, x, t=None, context=None): method get_last_layer (line 407) | def get_last_layer(self): class Encoder (line 411) | class Encoder(nn.Module): method __init__ (line 412) | def __init__( method forward (line 485) | def forward(self, x): class Decoder (line 513) | class Decoder(nn.Module): method __init__ (line 514) | def __init__( method forward (line 592) | def forward(self, z): class SimpleDecoder (line 628) | class SimpleDecoder(nn.Module): method __init__ (line 629) | def __init__(self, in_channels, out_channels, *args, **kwargs): method forward (line 645) | def forward(self, x): class UpsampleDecoder (line 658) | class UpsampleDecoder(nn.Module): method __init__ (line 659) | def __init__(self, in_channels, out_channels, ch, num_res_blocks, reso... method forward (line 688) | def forward(self, x): class LatentRescaler (line 702) | class LatentRescaler(nn.Module): method __init__ (line 703) | def __init__(self, factor, in_channels, mid_channels, out_channels, de... method forward (line 728) | def forward(self, x): class MergedRescaleEncoder (line 742) | class MergedRescaleEncoder(nn.Module): method __init__ (line 743) | def __init__( method forward (line 780) | def forward(self, x): class MergedRescaleDecoder (line 786) | class MergedRescaleDecoder(nn.Module): method __init__ (line 787) | def __init__( method forward (line 823) | def forward(self, x): class Upsampler (line 829) | class Upsampler(nn.Module): method __init__ (line 830) | def __init__(self, in_size, out_size, in_channels, out_channels, ch_mu... method forward (line 852) | def forward(self, x): class Resize (line 858) | class Resize(nn.Module): method __init__ (line 859) | def __init__(self, in_channels=None, learned=False, mode="bilinear"): method forward (line 872) | def forward(self, x, scale_factor=1.0): FILE: examples/images/diffusion/ldm/modules/diffusionmodules/openaimodel.py function convert_module_to_f16 (line 22) | def convert_module_to_f16(x): function convert_module_to_f32 (line 26) | def convert_module_to_f32(x): class AttentionPool2d (line 31) | class AttentionPool2d(nn.Module): method __init__ (line 36) | def __init__( method forward (line 50) | def forward(self, x): class TimestepBlock (line 61) | class TimestepBlock(nn.Module): method forward (line 67) | def forward(self, x, emb): class TimestepEmbedSequential (line 73) | class TimestepEmbedSequential(nn.Sequential, TimestepBlock): method forward (line 79) | def forward(self, x, emb, context=None): class Upsample (line 90) | class Upsample(nn.Module): method __init__ (line 99) | def __init__(self, channels, use_conv, dims=2, out_channels=None, padd... method forward (line 108) | def forward(self, x): class TransposedUpsample (line 119) | class TransposedUpsample(nn.Module): method __init__ (line 122) | def __init__(self, channels, out_channels=None, ks=5): method forward (line 129) | def forward(self, x): class Downsample (line 133) | class Downsample(nn.Module): method __init__ (line 142) | def __init__(self, channels, use_conv, dims=2, out_channels=None, padd... method forward (line 155) | def forward(self, x): class ResBlock (line 160) | class ResBlock(TimestepBlock): method __init__ (line 176) | def __init__( method forward (line 236) | def forward(self, x, emb): method _forward (line 245) | def _forward(self, x, emb): class AttentionBlock (line 268) | class AttentionBlock(nn.Module): method __init__ (line 275) | def __init__( method forward (line 304) | def forward(self, x): method _forward (line 310) | def _forward(self, x): function count_flops_attn (line 319) | def count_flops_attn(model, _x, y): class QKVAttentionLegacy (line 339) | class QKVAttentionLegacy(nn.Module): method __init__ (line 344) | def __init__(self, n_heads): method forward (line 348) | def forward(self, qkv): method count_flops (line 365) | def count_flops(model, _x, y): class QKVAttention (line 369) | class QKVAttention(nn.Module): method __init__ (line 374) | def __init__(self, n_heads): method forward (line 378) | def forward(self, qkv): method count_flops (line 399) | def count_flops(model, _x, y): class UNetModel (line 403) | class UNetModel(nn.Module): method __init__ (line 433) | def __init__( method convert_to_fp16 (line 757) | def convert_to_fp16(self): method convert_to_fp32 (line 765) | def convert_to_fp32(self): method forward (line 773) | def forward(self, x, timesteps=None, context=None, y=None, **kwargs): FILE: examples/images/diffusion/ldm/modules/diffusionmodules/upscaling.py class AbstractLowScaleModel (line 10) | class AbstractLowScaleModel(nn.Module): method __init__ (line 12) | def __init__(self, noise_schedule_config=None): method register_schedule (line 17) | def register_schedule( method q_sample (line 46) | def q_sample(self, x_start, t, noise=None): method forward (line 53) | def forward(self, x): method decode (line 56) | def decode(self, x): class SimpleImageConcat (line 60) | class SimpleImageConcat(AbstractLowScaleModel): method __init__ (line 62) | def __init__(self): method forward (line 66) | def forward(self, x): class ImageConcatWithNoiseAugmentation (line 71) | class ImageConcatWithNoiseAugmentation(AbstractLowScaleModel): method __init__ (line 72) | def __init__(self, noise_schedule_config, max_noise_level=1000, to_cud... method forward (line 76) | def forward(self, x, noise_level=None): FILE: examples/images/diffusion/ldm/modules/diffusionmodules/util.py function make_beta_schedule (line 19) | def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_e... function make_ddim_timesteps (line 40) | def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_... function make_ddim_sampling_parameters (line 57) | def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbos... function betas_for_alpha_bar (line 73) | def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.9... function extract_into_tensor (line 92) | def extract_into_tensor(a, t, x_shape): function checkpoint (line 98) | def checkpoint(func, inputs, params, flag): class CheckpointFunction (line 118) | class CheckpointFunction(torch.autograd.Function): method forward (line 120) | def forward(ctx, run_function, length, *args): method backward (line 134) | def backward(ctx, *output_grads): function timestep_embedding (line 154) | def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=Fal... function zero_module (line 177) | def zero_module(module): function scale_module (line 186) | def scale_module(module, scale): function mean_flat (line 195) | def mean_flat(tensor): function normalization (line 202) | def normalization(channels): class SiLU (line 213) | class SiLU(nn.Module): method forward (line 214) | def forward(self, x): class GroupNorm32 (line 218) | class GroupNorm32(nn.GroupNorm): method forward (line 219) | def forward(self, x): function conv_nd (line 223) | def conv_nd(dims, *args, **kwargs): function linear (line 236) | def linear(*args, **kwargs): function avg_pool_nd (line 243) | def avg_pool_nd(dims, *args, **kwargs): class HybridConditioner (line 256) | class HybridConditioner(nn.Module): method __init__ (line 257) | def __init__(self, c_concat_config, c_crossattn_config): method forward (line 262) | def forward(self, c_concat, c_crossattn): function noise_like (line 268) | def noise_like(shape, device, repeat=False): FILE: examples/images/diffusion/ldm/modules/distributions/distributions.py class AbstractDistribution (line 5) | class AbstractDistribution: method sample (line 6) | def sample(self): method mode (line 9) | def mode(self): class DiracDistribution (line 13) | class DiracDistribution(AbstractDistribution): method __init__ (line 14) | def __init__(self, value): method sample (line 17) | def sample(self): method mode (line 20) | def mode(self): class DiagonalGaussianDistribution (line 24) | class DiagonalGaussianDistribution(object): method __init__ (line 25) | def __init__(self, parameters, deterministic=False): method sample (line 35) | def sample(self): method kl (line 39) | def kl(self, other=None): method nll (line 55) | def nll(self, sample, dims=[1, 2, 3]): method mode (line 61) | def mode(self): function normal_kl (line 65) | def normal_kl(mean1, logvar1, mean2, logvar2): FILE: examples/images/diffusion/ldm/modules/ema.py class LitEma (line 5) | class LitEma(nn.Module): method __init__ (line 6) | def __init__(self, model, decay=0.9999, use_num_upates=True): method reset_num_updates (line 26) | def reset_num_updates(self): method forward (line 30) | def forward(self, model): method copy_to (line 51) | def copy_to(self, model): method store (line 60) | def store(self, parameters): method restore (line 69) | def restore(self, parameters): FILE: examples/images/diffusion/ldm/modules/encoders/modules.py class AbstractEncoder (line 9) | class AbstractEncoder(nn.Module): method __init__ (line 10) | def __init__(self): method encode (line 13) | def encode(self, *args, **kwargs): class IdentityEncoder (line 17) | class IdentityEncoder(AbstractEncoder): method encode (line 18) | def encode(self, x): class ClassEmbedder (line 22) | class ClassEmbedder(nn.Module): method __init__ (line 23) | def __init__(self, embed_dim, n_classes=1000, key="class", ucg_rate=0.1): method forward (line 30) | def forward(self, batch, key=None, disable_dropout=False): method get_unconditional_conditioning (line 42) | def get_unconditional_conditioning(self, bs, device="cuda"): function disabled_train (line 49) | def disabled_train(self, mode=True): class FrozenT5Embedder (line 55) | class FrozenT5Embedder(AbstractEncoder): method __init__ (line 58) | def __init__( method freeze (line 69) | def freeze(self): method forward (line 75) | def forward(self, text): method encode (line 91) | def encode(self, text): class FrozenCLIPEmbedder (line 95) | class FrozenCLIPEmbedder(AbstractEncoder): method __init__ (line 100) | def __init__( method freeze (line 123) | def freeze(self): method forward (line 129) | def forward(self, text): method encode (line 149) | def encode(self, text): class FrozenOpenCLIPEmbedder (line 153) | class FrozenOpenCLIPEmbedder(AbstractEncoder): method __init__ (line 164) | def __init__( method freeze (line 185) | def freeze(self): method forward (line 190) | def forward(self, text): method encode_with_transformer (line 195) | def encode_with_transformer(self, text): method text_transformer_forward (line 204) | def text_transformer_forward(self, x: torch.Tensor, attn_mask=None): method encode (line 214) | def encode(self, text): class FrozenCLIPT5Encoder (line 218) | class FrozenCLIPT5Encoder(AbstractEncoder): method __init__ (line 219) | def __init__( method encode (line 235) | def encode(self, text): method forward (line 238) | def forward(self, text): FILE: examples/images/diffusion/ldm/modules/image_degradation/bsrgan.py function modcrop_np (line 28) | def modcrop_np(img, sf): function analytic_kernel (line 48) | def analytic_kernel(k): function anisotropic_Gaussian (line 64) | def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6): function gm_blur_kernel (line 85) | def gm_blur_kernel(mean, cov, size=15): function shift_pixel (line 98) | def shift_pixel(x, sf, upper_left=True): function blur (line 127) | def blur(x, k): function gen_kernel (line 144) | def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]),... function fspecial_gaussian (line 185) | def fspecial_gaussian(hsize, sigma): function fspecial_laplacian (line 199) | def fspecial_laplacian(alpha): function fspecial (line 208) | def fspecial(filter_type, *args, **kwargs): function bicubic_degradation (line 226) | def bicubic_degradation(x, sf=3): function srmd_degradation (line 238) | def srmd_degradation(x, k, sf=3): function dpsr_degradation (line 260) | def dpsr_degradation(x, k, sf=3): function classical_degradation (line 282) | def classical_degradation(x, k, sf=3): function add_sharpening (line 297) | def add_sharpening(img, weight=0.5, radius=50, threshold=10): function add_blur (line 323) | def add_blur(img, sf=4): function add_resize (line 337) | def add_resize(img, sf=4): function add_Gaussian_noise (line 368) | def add_Gaussian_noise(img, noise_level1=2, noise_level2=25): function add_speckle_noise (line 385) | def add_speckle_noise(img, noise_level1=2, noise_level2=25): function add_Poisson_noise (line 403) | def add_Poisson_noise(img): function add_JPEG_noise (line 417) | def add_JPEG_noise(img): function random_crop (line 426) | def random_crop(lq, hq, sf=4, lq_patchsize=64): function degradation_bsrgan (line 437) | def degradation_bsrgan(img, sf=4, lq_patchsize=72, isp_model=None): function degradation_bsrgan_variant (line 532) | def degradation_bsrgan_variant(image, sf=4, isp_model=None): function degradation_bsrgan_plus (line 623) | def degradation_bsrgan_plus(img, sf=4, shuffle_prob=0.5, use_sharp=True,... FILE: examples/images/diffusion/ldm/modules/image_degradation/bsrgan_light.py function modcrop_np (line 28) | def modcrop_np(img, sf): function analytic_kernel (line 48) | def analytic_kernel(k): function anisotropic_Gaussian (line 64) | def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6): function gm_blur_kernel (line 85) | def gm_blur_kernel(mean, cov, size=15): function shift_pixel (line 98) | def shift_pixel(x, sf, upper_left=True): function blur (line 127) | def blur(x, k): function gen_kernel (line 144) | def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]),... function fspecial_gaussian (line 185) | def fspecial_gaussian(hsize, sigma): function fspecial_laplacian (line 199) | def fspecial_laplacian(alpha): function fspecial (line 208) | def fspecial(filter_type, *args, **kwargs): function bicubic_degradation (line 226) | def bicubic_degradation(x, sf=3): function srmd_degradation (line 238) | def srmd_degradation(x, k, sf=3): function dpsr_degradation (line 260) | def dpsr_degradation(x, k, sf=3): function classical_degradation (line 282) | def classical_degradation(x, k, sf=3): function add_sharpening (line 297) | def add_sharpening(img, weight=0.5, radius=50, threshold=10): function add_blur (line 323) | def add_blur(img, sf=4): function add_resize (line 341) | def add_resize(img, sf=4): function add_Gaussian_noise (line 372) | def add_Gaussian_noise(img, noise_level1=2, noise_level2=25): function add_speckle_noise (line 389) | def add_speckle_noise(img, noise_level1=2, noise_level2=25): function add_Poisson_noise (line 407) | def add_Poisson_noise(img): function add_JPEG_noise (line 421) | def add_JPEG_noise(img): function random_crop (line 430) | def random_crop(lq, hq, sf=4, lq_patchsize=64): function degradation_bsrgan (line 441) | def degradation_bsrgan(img, sf=4, lq_patchsize=72, isp_model=None): function degradation_bsrgan_variant (line 536) | def degradation_bsrgan_variant(image, sf=4, isp_model=None, up=False): FILE: examples/images/diffusion/ldm/modules/image_degradation/utils_image.py function is_image_file (line 31) | def is_image_file(filename): function get_timestamp (line 35) | def get_timestamp(): function imshow (line 39) | def imshow(x, title=None, cbar=False, figsize=None): function surf (line 49) | def surf(Z, cmap="rainbow", figsize=None): function get_image_paths (line 69) | def get_image_paths(dataroot): function _get_paths_from_images (line 76) | def _get_paths_from_images(path): function patches_from_image (line 95) | def patches_from_image(img, p_size=512, p_overlap=64, p_max=800): function imssave (line 114) | def imssave(imgs, img_path): function split_imageset (line 127) | def split_imageset(original_dataroot, taget_dataroot, n_channels=3, p_si... function mkdir (line 156) | def mkdir(path): function mkdirs (line 161) | def mkdirs(paths): function mkdir_and_rename (line 169) | def mkdir_and_rename(path): function imread_uint (line 188) | def imread_uint(path, n_channels=3): function imsave (line 206) | def imsave(img, img_path): function imwrite (line 213) | def imwrite(img, img_path): function read_img (line 223) | def read_img(path): function uint2single (line 252) | def uint2single(img): function single2uint (line 256) | def single2uint(img): function uint162single (line 260) | def uint162single(img): function single2uint16 (line 264) | def single2uint16(img): function uint2tensor4 (line 274) | def uint2tensor4(img): function uint2tensor3 (line 281) | def uint2tensor3(img): function tensor2uint (line 288) | def tensor2uint(img): function single2tensor3 (line 301) | def single2tensor3(img): function single2tensor4 (line 306) | def single2tensor4(img): function tensor2single (line 311) | def tensor2single(img): function tensor2single3 (line 320) | def tensor2single3(img): function single2tensor5 (line 329) | def single2tensor5(img): function single32tensor5 (line 333) | def single32tensor5(img): function single42tensor4 (line 337) | def single42tensor4(img): function tensor2img (line 342) | def tensor2img(tensor, out_type=np.uint8, min_max=(0, 1)): function augment_img (line 379) | def augment_img(img, mode=0): function augment_img_tensor4 (line 399) | def augment_img_tensor4(img, mode=0): function augment_img_tensor (line 419) | def augment_img_tensor(img, mode=0): function augment_img_np3 (line 437) | def augment_img_np3(img, mode=0): function augment_imgs (line 465) | def augment_imgs(img_list, hflip=True, rot=True): function modcrop (line 490) | def modcrop(img_in, scale): function shave (line 506) | def shave(img_in, border=0): function rgb2ycbcr (line 525) | def rgb2ycbcr(img, only_y=True): function ycbcr2rgb (line 550) | def ycbcr2rgb(img): function bgr2ycbcr (line 571) | def bgr2ycbcr(img, only_y=True): function channel_convert (line 596) | def channel_convert(in_c, tar_type, img_list): function calculate_psnr (line 620) | def calculate_psnr(img1, img2, border=0): function calculate_ssim (line 641) | def calculate_ssim(img1, img2, border=0): function ssim (line 668) | def ssim(img1, img2): function cubic (line 698) | def cubic(x): function calculate_weights_indices (line 707) | def calculate_weights_indices(in_length, out_length, scale, kernel, kern... function imresize (line 766) | def imresize(img, scale, antialiasing=True): function imresize_np (line 841) | def imresize_np(img, scale, antialiasing=True): FILE: examples/images/diffusion/ldm/modules/midas/api.py function disabled_train (line 20) | def disabled_train(self, mode=True): function load_midas_transform (line 26) | def load_midas_transform(model_type): function load_model (line 71) | def load_model(model_type): class MiDaSInference (line 137) | class MiDaSInference(nn.Module): method __init__ (line 146) | def __init__(self, model_type): method forward (line 153) | def forward(self, x): FILE: examples/images/diffusion/ldm/modules/midas/midas/base_model.py class BaseModel (line 4) | class BaseModel(torch.nn.Module): method load (line 5) | def load(self, path): FILE: examples/images/diffusion/ldm/modules/midas/midas/blocks.py function _make_encoder (line 7) | def _make_encoder( function _make_scratch (line 51) | def _make_scratch(in_shape, out_shape, groups=1, expand=False): function _make_pretrained_efficientnet_lite3 (line 80) | def _make_pretrained_efficientnet_lite3(use_pretrained, exportable=False): function _make_efficientnet_backbone (line 87) | def _make_efficientnet_backbone(effnet): function _make_resnet_backbone (line 98) | def _make_resnet_backbone(resnet): function _make_pretrained_resnext101_wsl (line 109) | def _make_pretrained_resnext101_wsl(use_pretrained): class Interpolate (line 114) | class Interpolate(nn.Module): method __init__ (line 117) | def __init__(self, scale_factor, mode, align_corners=False): method forward (line 131) | def forward(self, x): class ResidualConvUnit (line 146) | class ResidualConvUnit(nn.Module): method __init__ (line 149) | def __init__(self, features): method forward (line 163) | def forward(self, x): class FeatureFusionBlock (line 180) | class FeatureFusionBlock(nn.Module): method __init__ (line 183) | def __init__(self, features): method forward (line 194) | def forward(self, *xs): class ResidualConvUnit_custom (line 212) | class ResidualConvUnit_custom(nn.Module): method __init__ (line 215) | def __init__(self, features, activation, bn): method forward (line 239) | def forward(self, x): class FeatureFusionBlock_custom (line 267) | class FeatureFusionBlock_custom(nn.Module): method __init__ (line 270) | def __init__(self, features, activation, deconv=False, bn=False, expan... method forward (line 295) | def forward(self, *xs): FILE: examples/images/diffusion/ldm/modules/midas/midas/dpt_depth.py function _make_fusion_block (line 9) | def _make_fusion_block(features, use_bn): class DPT (line 20) | class DPT(BaseModel): method __init__ (line 21) | def __init__( method forward (line 59) | def forward(self, x): class DPTDepthModel (line 80) | class DPTDepthModel(DPT): method __init__ (line 81) | def __init__(self, path=None, non_negative=True, **kwargs): method forward (line 99) | def forward(self, x): FILE: examples/images/diffusion/ldm/modules/midas/midas/midas_net.py class MidasNet (line 13) | class MidasNet(BaseModel): method __init__ (line 16) | def __init__(self, path=None, features=256, non_negative=True): method forward (line 51) | def forward(self, x): FILE: examples/images/diffusion/ldm/modules/midas/midas/midas_net_custom.py class MidasNet_small (line 13) | class MidasNet_small(BaseModel): method __init__ (line 16) | def __init__( method forward (line 90) | def forward(self, x): function fuse_model (line 123) | def fuse_model(m): FILE: examples/images/diffusion/ldm/modules/midas/midas/transforms.py function apply_min_size (line 7) | def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AR... class Resize (line 45) | class Resize(object): method __init__ (line 48) | def __init__( method constrain_to_multiple_of (line 90) | def constrain_to_multiple_of(self, x, min_val=0, max_val=None): method get_size (line 101) | def get_size(self, width, height): method __call__ (line 148) | def __call__(self, sample): class NormalizeImage (line 179) | class NormalizeImage(object): method __init__ (line 182) | def __init__(self, mean, std): method __call__ (line 186) | def __call__(self, sample): class PrepareForNet (line 192) | class PrepareForNet(object): method __init__ (line 195) | def __init__(self): method __call__ (line 198) | def __call__(self, sample): FILE: examples/images/diffusion/ldm/modules/midas/midas/vit.py class Slice (line 10) | class Slice(nn.Module): method __init__ (line 11) | def __init__(self, start_index=1): method forward (line 15) | def forward(self, x): class AddReadout (line 19) | class AddReadout(nn.Module): method __init__ (line 20) | def __init__(self, start_index=1): method forward (line 24) | def forward(self, x): class ProjectReadout (line 32) | class ProjectReadout(nn.Module): method __init__ (line 33) | def __init__(self, in_features, start_index=1): method forward (line 39) | def forward(self, x): class Transpose (line 46) | class Transpose(nn.Module): method __init__ (line 47) | def __init__(self, dim0, dim1): method forward (line 52) | def forward(self, x): function forward_vit (line 57) | def forward_vit(pretrained, x): function _resize_pos_embed (line 101) | def _resize_pos_embed(self, posemb, gs_h, gs_w): function forward_flex (line 118) | def forward_flex(self, x): function get_activation (line 154) | def get_activation(name): function get_readout_oper (line 161) | def get_readout_oper(vit_features, features, use_readout, start_index=1): function _make_vit_b16_backbone (line 174) | def _make_vit_b16_backbone( function _make_pretrained_vitl16_384 (line 286) | def _make_pretrained_vitl16_384(pretrained, use_readout="ignore", hooks=... function _make_pretrained_vitb16_384 (line 299) | def _make_pretrained_vitb16_384(pretrained, use_readout="ignore", hooks=... function _make_pretrained_deitb16_384 (line 306) | def _make_pretrained_deitb16_384(pretrained, use_readout="ignore", hooks... function _make_pretrained_deitb16_distil_384 (line 313) | def _make_pretrained_deitb16_distil_384(pretrained, use_readout="ignore"... function _make_vit_b_rn50_backbone (line 326) | def _make_vit_b_rn50_backbone( function _make_pretrained_vitb_rn50_384 (line 451) | def _make_pretrained_vitb_rn50_384(pretrained, use_readout="ignore", hoo... FILE: examples/images/diffusion/ldm/modules/midas/utils.py function read_pfm (line 11) | def read_pfm(path): function write_pfm (line 59) | def write_pfm(path, image, scale=1): function read_image (line 96) | def read_image(path): function resize_image (line 115) | def resize_image(img): function resize_depth (line 143) | def resize_depth(depth, width, height): function write_depth (line 161) | def write_depth(path, depth, bits=1): FILE: examples/images/diffusion/ldm/util.py function log_txt_as_img (line 10) | def log_txt_as_img(wh, xc, size=10): function ismap (line 34) | def ismap(x): function isimage (line 40) | def isimage(x): function exists (line 46) | def exists(x): function default (line 50) | def default(val, d): function mean_flat (line 56) | def mean_flat(tensor): function count_params (line 64) | def count_params(model, verbose=False): function instantiate_from_config (line 71) | def instantiate_from_config(config): function get_obj_from_str (line 81) | def get_obj_from_str(string, reload=False): class AdamWwithEMAandWings (line 89) | class AdamWwithEMAandWings(optim.Optimizer): method __init__ (line 91) | def __init__( method __setstate__ (line 128) | def __setstate__(self, state): method step (line 134) | def step(self, closure=None): FILE: examples/images/diffusion/main.py class DataLoaderX (line 34) | class DataLoaderX(DataLoader): method __iter__ (line 36) | def __iter__(self): function get_parser (line 42) | def get_parser(**parser_kwargs): function nondefault_trainer_args (line 161) | def nondefault_trainer_args(opt): class WrappedDataset (line 173) | class WrappedDataset(Dataset): method __init__ (line 176) | def __init__(self, dataset): method __len__ (line 179) | def __len__(self): method __getitem__ (line 182) | def __getitem__(self, idx): function worker_init_fn (line 187) | def worker_init_fn(_): class DataModuleFromConfig (line 207) | class DataModuleFromConfig(pl.LightningDataModule): method __init__ (line 208) | def __init__( method prepare_data (line 242) | def prepare_data(self): method setup (line 247) | def setup(self, stage=None): method _train_dataloader (line 256) | def _train_dataloader(self): method _val_dataloader (line 273) | def _val_dataloader(self, shuffle=False): method _test_dataloader (line 288) | def _test_dataloader(self, shuffle=False): method _predict_dataloader (line 308) | def _predict_dataloader(self, shuffle=False): class SetupCallback (line 318) | class SetupCallback(Callback): method __init__ (line 321) | def __init__(self, resume, now, logdir, ckptdir, cfgdir, config, light... method on_keyboard_interrupt (line 332) | def on_keyboard_interrupt(self, trainer, pl_module): method on_fit_start (line 340) | def on_fit_start(self, trainer, pl_module): class ImageLogger (line 383) | class ImageLogger(Callback): method __init__ (line 384) | def __init__( method _testtube (line 415) | def _testtube( method log_local (line 432) | def log_local( method log_img (line 456) | def log_img(self, pl_module, batch, batch_idx, split="train"): method check_frequency (line 500) | def check_frequency(self, check_idx): method on_train_batch_end (line 512) | def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch... method on_validation_batch_end (line 518) | def on_validation_batch_end(self, trainer, pl_module, outputs, batch, ... class CUDACallback (line 527) | class CUDACallback(Callback): method on_train_start (line 530) | def on_train_start(self, trainer, pl_module): method on_train_end (line 534) | def on_train_end(self, trainer, pl_module): method on_train_epoch_start (line 537) | def on_train_epoch_start(self, trainer, pl_module): method on_train_epoch_end (line 543) | def on_train_epoch_end(self, trainer, pl_module): function melk (line 839) | def melk(*args, **kwargs): function divein (line 846) | def divein(*args, **kwargs): FILE: examples/images/diffusion/scripts/img2img.py function chunk (line 30) | def chunk(it, size): function load_model_from_config (line 35) | def load_model_from_config(config, ckpt, verbose=False): function load_img (line 54) | def load_img(path): function main (line 66) | def main(): FILE: examples/images/diffusion/scripts/inpaint.py function make_batch (line 14) | def make_batch(image, mask, device): FILE: examples/images/diffusion/scripts/knn2img.py function chunk (line 36) | def chunk(it, size): function load_model_from_config (line 41) | def load_model_from_config(config, ckpt, verbose=False): class Searcher (line 61) | class Searcher(object): method __init__ (line 62) | def __init__(self, database, retriever_version="ViT-L/14"): method train_searcher (line 73) | def train_searcher(self, k, metric="dot_product", searcher_savedir=None): method load_single_file (line 86) | def load_single_file(self, saved_embeddings): method load_multi_files (line 91) | def load_multi_files(self, data_archive): method load_database (line 99) | def load_database(self): method load_retriever (line 119) | def load_retriever( method load_searcher (line 129) | def load_searcher(self): method search (line 134) | def search(self, x, k): method __call__ (line 164) | def __call__(self, x, n): FILE: examples/images/diffusion/scripts/sample_diffusion.py function custom_to_pil (line 20) | def custom_to_pil(x): function custom_to_np (line 32) | def custom_to_np(x): function logs2pil (line 41) | def logs2pil(logs, keys=["sample"]): function convsample (line 59) | def convsample(model, shape, return_intermediates=True, verbose=True, ma... function convsample_ddim (line 67) | def convsample_ddim(model, steps, shape, eta=1.0): function make_convolutional_sample (line 82) | def make_convolutional_sample( function run (line 116) | def run(model, logdir, batch_size=50, vanilla=False, custom_steps=None, ... function save_logs (line 150) | def save_logs(logs, path, n_saved=0, key="sample", np_path=None): function get_parser (line 169) | def get_parser(): function load_model_from_config (line 202) | def load_model_from_config(config, sd): function load_model (line 210) | def load_model(config, ckpt, gpu, eval_mode): FILE: examples/images/diffusion/scripts/tests/test_watermark.py function testit (line 6) | def testit(img_path): FILE: examples/images/diffusion/scripts/train_searcher.py function search_bruteforce (line 13) | def search_bruteforce(searcher): function search_partioned_ah (line 17) | def search_partioned_ah( function search_ah (line 30) | def search_ah(searcher, dims_per_block, aiq_threshold, reorder_k): function load_datapool (line 36) | def load_datapool(dpath): function train_searcher (line 71) | def train_searcher( FILE: examples/images/diffusion/scripts/txt2img.py function chunk (line 32) | def chunk(it, size): function load_model_from_config (line 37) | def load_model_from_config(config, ckpt, verbose=False): function parse_args (line 56) | def parse_args(): function put_watermark (line 184) | def put_watermark(img, wm_encoder=None): function main (line 192) | def main(opt): FILE: examples/images/diffusion/scripts/utils.py class Linear8bit (line 6) | class Linear8bit(nn.Linear): method __init__ (line 7) | def __init__( method quant (line 31) | def quant(self): method forward (line 40) | def forward(self, x): function replace_module (line 54) | def replace_module(model): function getModelSize (line 70) | def getModelSize(model): FILE: examples/images/dreambooth/train_dreambooth.py function import_model_class_from_model_name_or_path (line 27) | def import_model_class_from_model_name_or_path(pretrained_model_name_or_... function parse_args (line 47) | def parse_args(input_args=None): class DreamBoothDataset (line 240) | class DreamBoothDataset(Dataset): method __init__ (line 246) | def __init__( method __len__ (line 288) | def __len__(self): method __getitem__ (line 291) | def __getitem__(self, index): class PromptDataset (line 319) | class PromptDataset(Dataset): method __init__ (line 322) | def __init__(self, prompt, num_samples): method __len__ (line 326) | def __len__(self): method __getitem__ (line 329) | def __getitem__(self, index): function get_full_repo_name (line 336) | def get_full_repo_name(model_id: str, organization: Optional[str] = None... function main (line 346) | def main(args): FILE: examples/images/dreambooth/train_dreambooth_colossalai.py function import_model_class_from_model_name_or_path (line 33) | def import_model_class_from_model_name_or_path(pretrained_model_name_or_... function parse_args (line 53) | def parse_args(input_args=None): class DreamBoothDataset (line 264) | class DreamBoothDataset(Dataset): method __init__ (line 270) | def __init__( method __len__ (line 315) | def __len__(self): method __getitem__ (line 318) | def __getitem__(self, index): class PromptDataset (line 346) | class PromptDataset(Dataset): method __init__ (line 349) | def __init__(self, prompt, num_samples): method __len__ (line 353) | def __len__(self): method __getitem__ (line 356) | def __getitem__(self, index): function get_full_repo_name (line 363) | def get_full_repo_name(model_id: str, organization: Optional[str] = None... function main (line 373) | def main(args): FILE: examples/images/dreambooth/train_dreambooth_colossalai_lora.py function import_model_class_from_model_name_or_path (line 36) | def import_model_class_from_model_name_or_path(pretrained_model_name_or_... function parse_args (line 56) | def parse_args(input_args=None): class DreamBoothDataset (line 266) | class DreamBoothDataset(Dataset): method __init__ (line 272) | def __init__( method __len__ (line 314) | def __len__(self): method __getitem__ (line 317) | def __getitem__(self, index): class PromptDataset (line 345) | class PromptDataset(Dataset): method __init__ (line 348) | def __init__(self, prompt, num_samples): method __len__ (line 352) | def __len__(self): method __getitem__ (line 355) | def __getitem__(self, index): function get_full_repo_name (line 362) | def get_full_repo_name(model_id: str, organization: Optional[str] = None... function main (line 372) | def main(args): FILE: examples/images/dreambooth/train_dreambooth_inpaint.py function prepare_mask_and_masked_image (line 35) | def prepare_mask_and_masked_image(image, mask): function random_mask (line 53) | def random_mask(im_shape, ratio=1, mask_full_image=False): function parse_args (line 77) | def parse_args(): class DreamBoothDataset (line 255) | class DreamBoothDataset(Dataset): method __init__ (line 261) | def __init__( method __len__ (line 303) | def __len__(self): method __getitem__ (line 306) | def __getitem__(self, index): class PromptDataset (line 338) | class PromptDataset(Dataset): method __init__ (line 341) | def __init__(self, prompt, num_samples): method __len__ (line 345) | def __len__(self): method __getitem__ (line 348) | def __getitem__(self, index): function get_full_repo_name (line 355) | def get_full_repo_name(model_id: str, organization: Optional[str] = None... function main (line 365) | def main(): FILE: examples/images/resnet/train.py function build_dataloader (line 30) | def build_dataloader(batch_size: int, coordinator: DistCoordinator, plug... function evaluate (line 54) | def evaluate(model: nn.Module, test_dataloader: DataLoader, coordinator:... function train_epoch (line 73) | def train_epoch( function main (line 100) | def main(): FILE: examples/images/vit/args.py function parse_demo_args (line 4) | def parse_demo_args(): function parse_benchmark_args (line 54) | def parse_benchmark_args(): FILE: examples/images/vit/data.py class BeansDataset (line 6) | class BeansDataset(Dataset): method __init__ (line 7) | def __init__(self, image_processor, tp_size=1, split="train"): method __len__ (line 20) | def __len__(self): method __getitem__ (line 23) | def __getitem__(self, idx): method process_example (line 26) | def process_example(self, example): function beans_collator (line 32) | def beans_collator(batch): FILE: examples/images/vit/vit_benchmark.py function format_num (line 17) | def format_num(num: int, bytes=False): function get_data_batch (line 27) | def get_data_batch(batch_size, num_labels, num_channels=3, height=224, w... function colo_memory_cap (line 35) | def colo_memory_cap(size_in_GB): function main (line 45) | def main(): FILE: examples/images/vit/vit_train_demo.py function move_to_cuda (line 24) | def move_to_cuda(batch, device): function run_forward_backward (line 28) | def run_forward_backward( function train_epoch (line 54) | def train_epoch( function evaluate_model (line 89) | def evaluate_model( function main (line 136) | def main(): FILE: examples/inference/benchmark_ops/benchmark_context_attn_unpad.py function bench_kernel (line 38) | def bench_kernel( FILE: examples/inference/benchmark_ops/benchmark_decoding_attn.py function bench_kernel (line 44) | def bench_kernel( FILE: examples/inference/benchmark_ops/benchmark_flash_decoding_attention.py function prepare_data (line 43) | def prepare_data( function benchmark_flash_decoding_attention (line 70) | def benchmark_flash_decoding_attention( FILE: examples/inference/benchmark_ops/benchmark_fused_rotary_embdding_unpad.py function benchmark_rotary_emb (line 49) | def benchmark_rotary_emb( FILE: examples/inference/benchmark_ops/benchmark_kv_cache_memcopy.py function benchmark_kvcache_copy (line 39) | def benchmark_kvcache_copy( FILE: examples/inference/benchmark_ops/benchmark_rmsnorm.py function benchmark_rms_layernorm (line 44) | def benchmark_rms_layernorm( FILE: examples/inference/benchmark_ops/benchmark_rotary_embedding.py function torch_rotary_emb (line 26) | def torch_rotary_emb(x, cos, sin): function benchmark_rotary_emb (line 38) | def benchmark_rotary_emb( FILE: examples/inference/benchmark_ops/benchmark_xine_copy.py function benchmark_get_xine_cache (line 29) | def benchmark_get_xine_cache( FILE: examples/inference/client/locustfile.py class QuickstartUser (line 4) | class QuickstartUser(HttpUser): method completion (line 9) | def completion(self): method completion_streaming (line 14) | def completion_streaming(self): method chat (line 19) | def chat(self): method chat_streaming (line 33) | def chat_streaming(self): method generate_streaming (line 48) | def generate_streaming(self): method generate (line 53) | def generate(self): method health_check (line 58) | def health_check(self): FILE: examples/inference/llama/benchmark_llama.py function data_gen (line 73) | def data_gen(batch_size: int = 4, seq_len: int = 512): function print_details_info (line 78) | def print_details_info(model_config, args, whole_end2end, total_token_num): function benchmark_inference (line 104) | def benchmark_inference(args): function hybrid_inference (line 233) | def hybrid_inference(rank, world_size, port, args): function benchmark (line 240) | def benchmark(args): FILE: examples/inference/llama/benchmark_llama3.py function data_gen (line 80) | def data_gen(batch_size: int = 4, seq_len: int = 512): function print_details_info (line 85) | def print_details_info(model_config, whole_end2end, total_token_num, dty... function benchmark_inference (line 111) | def benchmark_inference(args): function inference (line 194) | def inference(rank, world_size, port, args): function benchmark (line 201) | def benchmark(args): FILE: examples/inference/llama/llama_generation.py function infer (line 23) | def infer(args): FILE: examples/inference/stable_diffusion/benchmark_sd3.py function log_generation_time (line 25) | def log_generation_time(log_data, log_file): function warmup (line 31) | def warmup(engine, args): function profile_context (line 41) | def profile_context(args): function log_and_profile (line 57) | def log_and_profile(h, w, avg_time, log_msg, args, model_name, mode, pro... function benchmark_colossalai (line 83) | def benchmark_colossalai(rank, world_size, port, args): function benchmark_diffusers (line 117) | def benchmark_diffusers(args): function benchmark (line 144) | def benchmark(args): FILE: examples/inference/stable_diffusion/compute_metric.py function read_image (line 15) | def read_image(path: str): class MultiImageDataset (line 27) | class MultiImageDataset(Dataset): method __init__ (line 28) | def __init__(self, root0, root1, is_gt=False): method __len__ (line 40) | def __len__(self): method __getitem__ (line 43) | def __getitem__(self, idx): FILE: examples/inference/stable_diffusion/sd3_generation.py function infer (line 23) | def infer(args): FILE: examples/language/bert/benchmark.py class RandintDataset (line 34) | class RandintDataset(Dataset): method __init__ (line 35) | def __init__(self, dataset_length: int, sequence_length: int, vocab_si... method __len__ (line 51) | def __len__(self): method __getitem__ (line 54) | def __getitem__(self, idx): function main (line 58) | def main(): FILE: examples/language/bert/benchmark_utils.py function format_num (line 20) | def format_num(num: int, bytes=False): function _is_valid (line 30) | def _is_valid(val): function get_call_arg_names (line 34) | def get_call_arg_names(module_or_fn): function measure_params (line 40) | def measure_params(model): function warm_up (line 51) | def warm_up( function fmt (line 74) | def fmt(d: dict): function benchmark (line 78) | def benchmark( FILE: examples/language/bert/data.py class GLUEDataBuilder (line 7) | class GLUEDataBuilder: method __init__ (line 44) | def __init__( method setup (line 67) | def setup(self): method prepare_data (line 81) | def prepare_data(self): method train_dataloader (line 85) | def train_dataloader(self): method val_dataloader (line 90) | def val_dataloader(self): method test_dataloader (line 100) | def test_dataloader(self): method convert_to_features (line 109) | def convert_to_features(self, example_batch): FILE: examples/language/bert/finetune.py function move_to_cuda (line 40) | def move_to_cuda(batch): function evaluate_model (line 45) | def evaluate_model( function train_epoch (line 128) | def train_epoch( function main (line 171) | def main(): FILE: examples/language/commons/utils.py function get_data (line 5) | def get_data(batch_size, seq_len, vocab_size): function get_tflops (line 11) | def get_tflops(model_numel, batch_size, seq_len, step_time): FILE: examples/language/data_utils.py class StatefulDistributedSampler (line 14) | class StatefulDistributedSampler(DistributedSampler): method __init__ (line 15) | def __init__( method __iter__ (line 27) | def __iter__(self) -> Iterator: method __len__ (line 33) | def __len__(self) -> int: method set_start_index (line 36) | def set_start_index(self, start_index: int) -> None: function prepare_dataloader (line 40) | def prepare_dataloader( function load_json (line 97) | def load_json(file_path: str): function save_json (line 102) | def save_json(data, file_path: str): class RandomDataset (line 107) | class RandomDataset(Dataset): method __init__ (line 108) | def __init__(self, num_samples: int = 1000, max_length: int = 2048, vo... method __len__ (line 116) | def __len__(self): method __getitem__ (line 119) | def __getitem__(self, idx): FILE: examples/language/deepseek/benchmark.py function main (line 75) | def main(): FILE: examples/language/gpt/experiments/auto_offload/model_zoo.py class GPTLMModel (line 6) | class GPTLMModel(nn.Module): method __init__ (line 7) | def __init__(self, hidden_size=768, num_layers=12, num_attention_heads... method forward (line 20) | def forward(self, input_ids, attention_mask): class GPTLMLoss (line 25) | class GPTLMLoss(nn.Module): method __init__ (line 26) | def __init__(self): method forward (line 30) | def forward(self, logits, labels): function get_gpt2_components (line 37) | def get_gpt2_components(model_type: str, batch_size: int): FILE: examples/language/gpt/experiments/auto_offload/train_gpt_offload.py function parse_args (line 19) | def parse_args(): function train_gpt (line 29) | def train_gpt(args): function run (line 96) | def run(rank, world_size, port, args): FILE: examples/language/gpt/experiments/auto_parallel/auto_parallel_with_gpt.py function get_cpu_mem (line 24) | def get_cpu_mem(): function get_gpu_mem (line 28) | def get_gpu_mem(): function get_mem_info (line 32) | def get_mem_info(prefix=""): function get_tflops (line 36) | def get_tflops(model_numel, batch_size, seq_len, step_time): function get_data (line 42) | def get_data(batch_size, seq_len, vocab_size): function main (line 48) | def main(): FILE: examples/language/gpt/experiments/auto_parallel/gpt_modules.py class GPT2MLP (line 10) | class GPT2MLP(nn.Module): method __init__ (line 11) | def __init__(self, intermediate_size, config): method forward (line 19) | def forward(self, hidden_states: Optional[Tuple[torch.FloatTensor]]) -... class GPT2Attention (line 31) | class GPT2Attention(nn.Module): method __init__ (line 32) | def __init__(self, config, layer_idx=None): method _attn (line 62) | def _attn(self, query, key, value, attention_mask=None, head_mask=None): method _split_heads (line 92) | def _split_heads(self, tensor, num_heads, attn_head_size): method _merge_heads (line 97) | def _merge_heads(self, tensor, num_heads, attn_head_size): method forward (line 102) | def forward( class GPT2Block (line 117) | class GPT2Block(nn.Module): method __init__ (line 118) | def __init__(self, config, layer_idx=None): method forward (line 127) | def forward( class GPT2Model (line 151) | class GPT2Model(GPT2PreTrainedModel): method __init__ (line 152) | def __init__(self, config): method forward (line 167) | def forward( class GPT2LMHeadModel (line 213) | class GPT2LMHeadModel(GPT2PreTrainedModel): method __init__ (line 214) | def __init__(self, config): method forward (line 222) | def forward( class GPTLMLoss (line 236) | class GPTLMLoss(nn.Module): method __init__ (line 237) | def __init__(self): method forward (line 241) | def forward(self, logits, labels): FILE: examples/language/gpt/experiments/pipeline_parallel/model_zoo.py class GPTLMModel (line 6) | class GPTLMModel(nn.Module): method __init__ (line 7) | def __init__( method forward (line 30) | def forward(self, input_ids, attention_mask): function gpt2_medium (line 35) | def gpt2_medium(checkpoint=False): function gpt2_xl (line 39) | def gpt2_xl(checkpoint=True): function gpt2_10b (line 43) | def gpt2_10b(checkpoint=True): function gpt2_14b (line 47) | def gpt2_14b(checkpoint=True): function gpt2_20b (line 51) | def gpt2_20b(checkpoint=True): function gpt2_24b (line 55) | def gpt2_24b(checkpoint=True): function model_builder (line 59) | def model_builder(model_size: str) -> callable: FILE: examples/language/gpt/experiments/pipeline_parallel/train_gpt_pp.py function parse_args (line 18) | def parse_args(): class GPTLMLoss (line 33) | class GPTLMLoss(nn.Module): method __init__ (line 34) | def __init__(self): method forward (line 38) | def forward(self, logits, labels): function get_data (line 46) | def get_data(batch_size, seq_len, vocab_size): function get_tflops (line 52) | def get_tflops(model_numel, batch_size, seq_len, step_time): function get_annotated_model (line 57) | def get_annotated_model(model, data_kwargs, num_stages, num_microbatches): function create_partition_module (line 73) | def create_partition_module(pp_rank: int, num_stages: int, model, data_k... function partition (line 83) | def partition(model, data_kwargs, num_microbatches, pp_rank: int, chunk:... function run_master (line 88) | def run_master(args): FILE: examples/language/gpt/gemini/commons/model_zoo.py class GPTLMModel (line 6) | class GPTLMModel(nn.Module): method __init__ (line 7) | def __init__( method forward (line 30) | def forward(self, input_ids, attention_mask): function gpt2_medium (line 35) | def gpt2_medium(checkpoint=False): function gpt2_xl (line 39) | def gpt2_xl(checkpoint=True): function gpt2_10b (line 43) | def gpt2_10b(checkpoint=True): function gpt2_14b (line 47) | def gpt2_14b(checkpoint=True): function gpt2_20b (line 51) | def gpt2_20b(checkpoint=True): function gpt2_24b (line 55) | def gpt2_24b(checkpoint=True): function gpt2_30b (line 59) | def gpt2_30b(checkpoint=True): function gpt2_40b (line 63) | def gpt2_40b(checkpoint=True): function model_builder (line 67) | def model_builder(model_size: str) -> callable: FILE: examples/language/gpt/gemini/commons/utils.py class DummyProfiler (line 6) | class DummyProfiler: method __init__ (line 7) | def __init__(self): method step (line 10) | def step(self): function get_data (line 15) | def get_data(batch_size, seq_len, vocab_size): function get_tflops (line 21) | def get_tflops(model_numel, batch_size, seq_len, step_time): function get_time_stamp (line 25) | def get_time_stamp(): FILE: examples/language/gpt/gemini/train_gpt_demo.py function parse_args (line 26) | def parse_args(): class GPTLMLoss (line 57) | class GPTLMLoss(nn.Module): method __init__ (line 58) | def __init__(self): method forward (line 62) | def forward(self, logits, labels): function get_cpu_mem (line 69) | def get_cpu_mem(): function get_gpu_mem (line 73) | def get_gpu_mem(): function get_mem_info (line 77) | def get_mem_info(prefix=""): function get_model_size (line 81) | def get_model_size(model: nn.Module): function model_size_formatter (line 89) | def model_size_formatter(numel: int) -> str: function set_cpu_maximum_parallelism (line 103) | def set_cpu_maximum_parallelism(): function main (line 111) | def main(): FILE: examples/language/gpt/hybridparallelism/benchmark.py function main (line 35) | def main(): FILE: examples/language/gpt/hybridparallelism/data.py class GLUEDataBuilder (line 7) | class GLUEDataBuilder: method __init__ (line 44) | def __init__( method setup (line 69) | def setup(self): method prepare_data (line 83) | def prepare_data(self): method train_dataloader (line 87) | def train_dataloader(self): method val_dataloader (line 92) | def val_dataloader(self): method test_dataloader (line 101) | def test_dataloader(self): method convert_to_features (line 110) | def convert_to_features(self, example_batch): FILE: examples/language/gpt/hybridparallelism/finetune.py function move_to_cuda (line 37) | def move_to_cuda(batch): function evaluate_model (line 42) | def evaluate_model( function train_epoch (line 124) | def train_epoch( function main (line 170) | def main(): FILE: examples/language/gpt/titans/dataset/webtext.py class WebtextDataset (line 13) | class WebtextDataset(Dataset): method __init__ (line 14) | def __init__(self, path: Optional[str] = None, seq_len=1024) -> None: method __len__ (line 38) | def __len__(self): method __getitem__ (line 41) | def __getitem__(self, index): FILE: examples/language/gpt/titans/model/embed.py class VocabParallelEmbedding (line 18) | class VocabParallelEmbedding(torch.nn.Module): method __init__ (line 32) | def __init__( method zero_parameters (line 65) | def zero_parameters(self): method add_tokentype_embeddings (line 75) | def add_tokentype_embeddings(self, num_tokentypes): method forward (line 89) | def forward(self, input_ids, position_ids=None, tokentype_ids=None): method state_dict_for_save_checkpoint (line 112) | def state_dict_for_save_checkpoint(self, destination=None, prefix="", ... method load_state_dict (line 125) | def load_state_dict(self, state_dict, strict=True): class VocabParallelEmbedding1D (line 168) | class VocabParallelEmbedding1D(torch.nn.Module): method __init__ (line 179) | def __init__(self, num_embeddings, embedding_dim, dtype=None, init_met... method forward (line 203) | def forward(self, input_): class vocab_parallel_cross_entropy (line 231) | class vocab_parallel_cross_entropy(nn.Module): method __init__ (line 232) | def __init__(self): method forward (line 235) | def forward(self, vocab_parallel_logits, target): class _VocabParallelCrossEntropy (line 244) | class _VocabParallelCrossEntropy(torch.autograd.Function): method forward (line 246) | def forward(ctx, vocab_parallel_logits, target): method backward (line 299) | def backward(ctx, grad_output): class VocabUtility (line 319) | class VocabUtility: method vocab_range_from_per_partition_vocab_size (line 325) | def vocab_range_from_per_partition_vocab_size(per_partition_vocab_size... method vocab_range_from_global_vocab_size (line 331) | def vocab_range_from_global_vocab_size(global_vocab_size, rank, world_... class VocabParallelGPTLMHead1D (line 336) | class VocabParallelGPTLMHead1D(ParallelLayer): method __init__ (line 341) | def __init__(self, embed=None, vocab_size=None, dtype=None, embed_dim=... method forward (line 348) | def forward(self, x: Tensor) -> Tensor: class HiddenParallelEmbedding (line 357) | class HiddenParallelEmbedding(torch.nn.Module): method __init__ (line 371) | def __init__( method zero_parameters (line 411) | def zero_parameters(self): method add_tokentype_embeddings (line 421) | def add_tokentype_embeddings(self, num_tokentypes): method forward (line 435) | def forward(self, input_ids, position_ids=None, tokentype_ids=None): method state_dict_for_save_checkpoint (line 457) | def state_dict_for_save_checkpoint(self, destination=None, prefix="", ... method load_state_dict (line 470) | def load_state_dict(self, state_dict, strict=True): class HiddenParallelEmbedding1D (line 513) | class HiddenParallelEmbedding1D(torch.nn.Module): method __init__ (line 524) | def __init__(self, num_embeddings, embedding_dim, dtype=torch.float, p... method forward (line 543) | def forward(self, input_): class HiddenParallelGPTLMHead1D (line 555) | class HiddenParallelGPTLMHead1D(ParallelLayer): method __init__ (line 560) | def __init__( method forward (line 579) | def forward(self, x: Tensor) -> Tensor: FILE: examples/language/gpt/titans/model/gpt1d.py class GPTMLP1D (line 29) | class GPTMLP1D(ParallelLayer): method __init__ (line 30) | def __init__( method _forward (line 69) | def _forward(self, hidden_states: Tensor) -> Tensor: method _checkpoint_forward (line 77) | def _checkpoint_forward(self, hidden_states: Tensor) -> Tensor: method forward (line 80) | def forward(self, hidden_states: Tensor) -> Tensor: class GenericGPTSelfAttention1D (line 87) | class GenericGPTSelfAttention1D(ParallelLayer): method __init__ (line 88) | def __init__( method softmax_forward (line 118) | def softmax_forward(self, attention_scores, attention_mask, query_laye... method _forward (line 121) | def _forward(self, hidden_states: Tensor, attention_mask=None) -> Tensor: method _checkpoint_forward (line 148) | def _checkpoint_forward(self, hidden_states: Tensor, attention_mask=No... method forward (line 151) | def forward(self, hidden_states: Tensor, attention_mask=None) -> Tensor: class GPTSelfAttention1D (line 158) | class GPTSelfAttention1D(GenericGPTSelfAttention1D): method __init__ (line 159) | def __init__( method softmax_forward (line 188) | def softmax_forward(self, attention_scores, attention_mask, query_laye... class FusedGPTSelfAttention1D (line 201) | class FusedGPTSelfAttention1D(GenericGPTSelfAttention1D): method __init__ (line 202) | def __init__( method softmax_forward (line 231) | def softmax_forward(self, attention_scores, attention_mask, query_laye... class GenericGPTTransformerLayer1D (line 235) | class GenericGPTTransformerLayer1D(ParallelLayer): method __init__ (line 236) | def __init__( method _forward (line 277) | def _forward(self, hidden_states, attention_mask) -> Tensor: method forward (line 297) | def forward(self, hidden_states, attention_mask): class GPTTransformerLayer1D (line 304) | class GPTTransformerLayer1D(GenericGPTTransformerLayer1D): method __init__ (line 305) | def __init__( class FusedGPTTransformerLayer1D (line 338) | class FusedGPTTransformerLayer1D(GenericGPTTransformerLayer1D): method __init__ (line 339) | def __init__( FILE: examples/language/gpt/titans/model/pipeline_gpt1d.py class GenericPipelineGPT (line 29) | class GenericPipelineGPT(nn.Module): method __init__ (line 30) | def __init__(self, embedding=None, blocks=None, norm=None, head=None) ... method forward (line 40) | def forward(self, hidden_states=None, input_ids=None, attention_mask=N... class PipelineGPT1D (line 55) | class PipelineGPT1D(GenericPipelineGPT): method __init__ (line 56) | def __init__( class FusedPipelineGPT1D (line 110) | class FusedPipelineGPT1D(GenericPipelineGPT): method __init__ (line 111) | def __init__( method forward (line 164) | def forward(self, hidden_states=None, input_ids=None, attention_mask=N... class PipelineGPTHybrid (line 175) | class PipelineGPTHybrid(GenericPipelineGPT): method __init__ (line 176) | def __init__( function _filter_kwargs (line 228) | def _filter_kwargs(func, kwargs): function _build_generic_gpt_pipeline_1d (line 233) | def _build_generic_gpt_pipeline_1d(module_cls, num_layers, num_chunks, d... function _build_gpt_pipeline_1d (line 275) | def _build_gpt_pipeline_1d(num_layers, num_chunks, device=torch.device("... function _build_gpt_pipeline_hybrid (line 280) | def _build_gpt_pipeline_hybrid(num_layers, num_chunks, device=torch.devi... function GPT2_small_pipeline_1D (line 284) | def GPT2_small_pipeline_1D(num_chunks=1, checkpoint=False, dtype=torch.f... function GPT2_exlarge_pipeline_1D (line 295) | def GPT2_exlarge_pipeline_1D(num_chunks=1, checkpoint=False, dtype=torch... function GPT3_pipeline_1D (line 306) | def GPT3_pipeline_1D(num_chunks=1, checkpoint=False, dtype=torch.float, ... function GPT2_exlarge_pipeline_hybrid (line 318) | def GPT2_exlarge_pipeline_hybrid(num_chunks=1, checkpoint=False, dtype=t... function GPT2_small_pipeline_hybrid (line 329) | def GPT2_small_pipeline_hybrid(num_chunks=1, checkpoint=False, dtype=tor... function GPT3_pipeline_hybrid (line 340) | def GPT3_pipeline_hybrid(num_chunks=1, checkpoint=False, dtype=torch.flo... FILE: examples/language/gpt/titans/train_gpt.py function calc_local_model_size (line 22) | def calc_local_model_size(model: torch.nn.Module): function main (line 32) | def main(): FILE: examples/language/grok-1/grok1_policy.py class Grok1Policy (line 9) | class Grok1Policy(Policy): method config_sanity_check (line 10) | def config_sanity_check(self): method preprocess (line 13) | def preprocess(self) -> nn.Module: method module_policy (line 20) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... method postprocess (line 79) | def postprocess(self): class Grok1ModelPolicy (line 83) | class Grok1ModelPolicy(Grok1Policy): class Grok1ForCausalLMPolicy (line 87) | class Grok1ForCausalLMPolicy(Grok1Policy): method module_policy (line 88) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes... FILE: examples/language/grok-1/utils.py class Bcolors (line 6) | class Bcolors: function print_output (line 18) | def print_output(text, output): function inference (line 23) | def inference(model, tokenizer, text, **generate_kwargs): function get_default_parser (line 36) | def get_default_parser(): FILE: examples/language/llama/benchmark.py function main (line 62) | def main(): FILE: examples/language/mixtral/benchmark.py function main (line 55) | def main(): FILE: examples/language/model_utils.py function low_precision_init (line 8) | def low_precision_init(target_dtype: torch.dtype = torch.float16): function get_model_numel (line 17) | def get_model_numel(model: nn.Module) -> int: function format_numel_str (line 21) | def format_numel_str(numel: int) -> str: FILE: examples/language/opt/args.py function parse_demo_args (line 4) | def parse_demo_args(): function parse_benchmark_args (line 41) | def parse_benchmark_args(): FILE: examples/language/opt/data.py class NetflixDataset (line 6) | class NetflixDataset(Dataset): method __init__ (line 7) | def __init__(self, tokenizer): method __len__ (line 26) | def __len__(self): method __getitem__ (line 29) | def __getitem__(self, idx): function netflix_collator (line 33) | def netflix_collator(data): FILE: examples/language/opt/opt_benchmark.py function format_num (line 23) | def format_num(num: int, bytes=False): function get_data (line 33) | def get_data(batch_size, seq_len, vocab_size): function colo_memory_cap (line 39) | def colo_memory_cap(size_in_GB): function main (line 48) | def main(): FILE: examples/language/opt/opt_train_demo.py function move_to_cuda (line 28) | def move_to_cuda(batch, device): function train_epoch (line 32) | def train_epoch(epoch, model, optimizer, _criterion, lr_scheduler, datal... function main (line 67) | def main(): FILE: examples/language/palm/palm_pytorch/autoregressive_wrapper.py function exists (line 9) | def exists(val): function eval_decorator (line 13) | def eval_decorator(fn): function top_k (line 27) | def top_k(logits, thres=0.9): class AutoregressiveWrapper (line 35) | class AutoregressiveWrapper(nn.Module): method __init__ (line 36) | def __init__(self, net, max_seq_len=2048, pad_value=0): method generate (line 44) | def generate(self, start_tokens, seq_len, eos_token=None, temperature=... method forward (line 72) | def forward(self, x, **kwargs): FILE: examples/language/palm/palm_pytorch/palm_pytorch.py class LayerNorm (line 10) | class LayerNorm(nn.Module): method __init__ (line 11) | def __init__(self, dim, eps=1e-5): method forward (line 17) | def forward(self, x): class ParallelResidual (line 25) | class ParallelResidual(nn.Module): method __init__ (line 26) | def __init__(self, *fns): method forward (line 30) | def forward(self, x): class RotaryEmbedding (line 38) | class RotaryEmbedding(nn.Module): method __init__ (line 39) | def __init__(self, dim): method forward (line 44) | def forward(self, max_seq_len, *, device): function rotate_half (line 53) | def rotate_half(x): function apply_rotary_pos_emb (line 59) | def apply_rotary_pos_emb(pos, t): class SwiGLU (line 68) | class SwiGLU(nn.Module): method forward (line 69) | def forward(self, x): function FeedForward (line 74) | def FeedForward(dim, mult=4): class Attention (line 85) | class Attention(nn.Module): method __init__ (line 86) | def __init__(self, dim, dim_head=64, heads=8): method get_mask (line 103) | def get_mask(self, n, device): method get_rotary_embedding (line 111) | def get_rotary_embedding(self, n, device): method forward (line 119) | def forward(self, x): function PaLM (line 189) | def PaLM(*, dim, num_tokens, depth, dim_head=64, heads=8, ff_mult=4): FILE: examples/language/palm/train.py function parse_args (line 36) | def parse_args(): function cycle (line 75) | def cycle(loader): function decode_token (line 81) | def decode_token(token): function get_tflops (line 85) | def get_tflops(model_numel, batch_size, seq_len, step_time): function decode_tokens (line 89) | def decode_tokens(tokens): function get_model_size (line 93) | def get_model_size(model: nn.Module): function generate_dataset (line 109) | def generate_dataset(dummy_data: bool = False): class TextSamplerDataset (line 127) | class TextSamplerDataset(Dataset): method __init__ (line 128) | def __init__(self, data, seq_len): method __getitem__ (line 133) | def __getitem__(self, index): method __len__ (line 138) | def __len__(self): FILE: examples/language/performance_evaluator.py function divide (line 13) | def divide(x: float, y: float) -> float: function all_reduce_mean (line 22) | def all_reduce_mean(x: float, world_size: int) -> float: function get_profile_context (line 39) | def get_profile_context(enable_flag, warmup_steps, active_steps, save_di... class Timer (line 88) | class Timer: method __init__ (line 89) | def __init__(self) -> None: method start (line 93) | def start(self) -> None: method end (line 96) | def end(self) -> None: method reset (line 101) | def reset(self) -> None: class PerformanceEvaluator (line 105) | class PerformanceEvaluator: method __init__ (line 117) | def __init__( method on_step_start (line 142) | def on_step_start(self, step: int) -> None: method on_step_end (line 149) | def on_step_end(self, input_ids: Tensor, **kwargs) -> None: method on_fit_end (line 166) | def on_fit_end(self) -> None: FILE: examples/tutorial/auto_parallel/auto_ckpt_batchsize_test.py function _benchmark (line 14) | def _benchmark(rank, world_size, port): function auto_activation_checkpoint_batchsize_benchmark (line 49) | def auto_activation_checkpoint_batchsize_benchmark(): FILE: examples/tutorial/auto_parallel/auto_ckpt_solver_test.py function _benchmark (line 14) | def _benchmark(rank, world_size, port, args): function auto_activation_checkpoint_benchmark (line 71) | def auto_activation_checkpoint_benchmark(args): FILE: examples/tutorial/auto_parallel/auto_parallel_with_resnet.py function synthesize_data (line 13) | def synthesize_data(): function main (line 19) | def main(): FILE: examples/tutorial/auto_parallel/bench_utils.py function bench (line 14) | def bench( function bench_rotor (line 58) | def bench_rotor( class GPTLMModel (line 97) | class GPTLMModel(nn.Module): method __init__ (line 102) | def __init__( method forward (line 126) | def forward(self, input_ids, attention_mask): class GPTLMLoss (line 131) | class GPTLMLoss(nn.Module): method __init__ (line 136) | def __init__(self): method forward (line 140) | def forward(self, logits, labels): function gpt2_medium (line 147) | def gpt2_medium(checkpoint=False): function gpt2_xl (line 151) | def gpt2_xl(checkpoint=False): function gpt2_6b (line 155) | def gpt2_6b(checkpoint=False): function data_gen_gpt2 (line 159) | def data_gen_gpt2(batch_size, seq_len, vocab_size, device="cuda:0"): function data_gen_resnet (line 168) | def data_gen_resnet(batch_size, shape, device="cuda:0"): FILE: examples/tutorial/download_cifar10.py function main (line 6) | def main(): FILE: examples/tutorial/hybrid_parallel/train.py class DummyDataloader (line 17) | class DummyDataloader: method __init__ (line 18) | def __init__(self, length, batch_size): method generate (line 22) | def generate(self): method __iter__ (line 27) | def __iter__(self): method __next__ (line 31) | def __next__(self): method __len__ (line 38) | def __len__(self): function main (line 42) | def main(): FILE: examples/tutorial/large_batch_optimizer/train.py class DummyDataloader (line 13) | class DummyDataloader: method __init__ (line 14) | def __init__(self, length, batch_size): method generate (line 18) | def generate(self): method __iter__ (line 23) | def __iter__(self): method __next__ (line 27) | def __next__(self): method __len__ (line 34) | def __len__(self): function main (line 38) | def main(): FILE: examples/tutorial/new_api/cifar_resnet/train.py function build_dataloader (line 30) | def build_dataloader(batch_size: int, coordinator: DistCoordinator, plug... function evaluate (line 54) | def evaluate(model: nn.Module, test_dataloader: DataLoader, coordinator:... function train_epoch (line 73) | def train_epoch( function main (line 100) | def main(): FILE: examples/tutorial/new_api/cifar_vit/train.py function vit_cifar (line 32) | def vit_cifar(**kwargs): function build_dataloader (line 39) | def build_dataloader(batch_size: int, coordinator: DistCoordinator, plug... function evaluate (line 74) | def evaluate(model: nn.Module, test_dataloader: DataLoader, coordinator:... function train_epoch (line 93) | def train_epoch( function main (line 120) | def main(): FILE: examples/tutorial/new_api/glue_bert/data.py class GLUEDataBuilder (line 7) | class GLUEDataBuilder: method __init__ (line 44) | def __init__( method setup (line 67) | def setup(self): method prepare_data (line 81) | def prepare_data(self): method train_dataloader (line 85) | def train_dataloader(self): method val_dataloader (line 90) | def val_dataloader(self): method test_dataloader (line 99) | def test_dataloader(self): method convert_to_features (line 108) | def convert_to_features(self, example_batch): FILE: examples/tutorial/new_api/glue_bert/finetune.py function move_to_cuda (line 31) | def move_to_cuda(batch): function evaluate (line 36) | def evaluate( function train_epoch (line 81) | def train_epoch( function main (line 108) | def main(): FILE: examples/tutorial/opt/inference/batch.py class BatchManagerForGeneration (line 7) | class BatchManagerForGeneration(BatchManager): method __init__ (line 8) | def __init__(self, max_batch_size: int = 1, pad_token_id: int = 0) -> ... method _left_padding (line 13) | def _left_padding(self, batch_inputs): method _make_batch_key (line 28) | def _make_batch_key(entry: SubmitEntry) -> tuple: method make_batch (line 32) | def make_batch(self, q: Deque[SubmitEntry]) -> Tuple[TaskEntry, dict]: method split_batch (line 56) | def split_batch(self, task_entry: TaskEntry, trunc_lens: List[int] = [... FILE: examples/tutorial/opt/inference/benchmark/locustfile.py class GenerationUser (line 4) | class GenerationUser(HttpUser): method generate (line 6) | def generate(self): FILE: examples/tutorial/opt/inference/cache.py class MissCacheError (line 7) | class MissCacheError(Exception): class ListCache (line 11) | class ListCache: method __init__ (line 12) | def __init__(self, cache_size: int, list_size: int, fixed_keys: List[H... method get (line 29) | def get(self, key: Hashable) -> List[Any]: method add (line 42) | def add(self, key: Hashable, value: Any) -> None: method lock (line 59) | def lock(self): FILE: examples/tutorial/opt/inference/opt_fastapi.py class GenerationTaskReq (line 16) | class GenerationTaskReq(BaseModel): function generate (line 31) | async def generate(data: GenerationTaskReq, request: Request): function shutdown (line 60) | async def shutdown(*_): function get_model_fn (line 67) | def get_model_fn(model_name: str): function print_args (line 72) | def print_args(args: argparse.Namespace): FILE: examples/tutorial/opt/inference/opt_server.py class GenerationTaskReq (line 19) | class GenerationTaskReq(BaseModel): function generate (line 36) | async def generate(request: Request, body: GenerationTaskReq): function shutdown (line 66) | def shutdown(*_): function get_model_fn (line 70) | def get_model_fn(model_name: str): function print_args (line 75) | def print_args(args: argparse.Namespace): FILE: examples/tutorial/opt/inference/script/process-opt-175b/convert_ckpt.py function load_json (line 11) | def load_json(path: str): function parse_shape_info (line 16) | def parse_shape_info(flat_dir: str): function convert (line 31) | def convert(flat_dir: str, output_dir: str, part: int): FILE: examples/tutorial/opt/opt/context.py class barrier_context (line 7) | class barrier_context: method __init__ (line 20) | def __init__(self, executor_rank: int = 0, parallel_mode: ParallelMode... method __enter__ (line 26) | def __enter__(self): method __exit__ (line 30) | def __exit__(self, exc_type, exc_value, exc_traceback): FILE: examples/tutorial/opt/opt/run_clm.py function get_time_stamp (line 69) | def get_time_stamp(): function parse_args (line 74) | def parse_args(): function colo_memory_cap (line 251) | def colo_memory_cap(size_in_GB): class DummyDataloader (line 260) | class DummyDataloader: method __init__ (line 261) | def __init__(self, length, batch_size, seq_len, vocab_size): method generate (line 267) | def generate(self): method __iter__ (line 274) | def __iter__(self): method __next__ (line 278) | def __next__(self): method __len__ (line 285) | def __len__(self): function main (line 289) | def main(): FILE: examples/tutorial/sequence_parallel/data/__init__.py function cyclic_iter (line 12) | def cyclic_iter(iter): function build_train_valid_test_data_iterators (line 18) | def build_train_valid_test_data_iterators( FILE: examples/tutorial/sequence_parallel/data/bert_helper.py function _build_key_size_numel_dictionaries (line 9) | def _build_key_size_numel_dictionaries(keys, data): function broadcast_data (line 53) | def broadcast_data(keys, data, datatype): function get_batch (line 92) | def get_batch(data_iterator): function get_batch_for_sequence_parallel (line 117) | def get_batch_for_sequence_parallel(data_iterator): class SequenceParallelDataIterator (line 153) | class SequenceParallelDataIterator: method __init__ (line 154) | def __init__(self, data_iter): method __iter__ (line 157) | def __iter__(self): method __next__ (line 160) | def __next__(self): FILE: examples/tutorial/sequence_parallel/data/datasets/bert_dataset.py class BertDataset (line 43) | class BertDataset(Dataset): method __init__ (line 44) | def __init__( method __len__ (line 89) | def __len__(self): method __getitem__ (line 92) | def __getitem__(self, idx): function get_samples_mapping_ (line 115) | def get_samples_mapping_( function build_training_sample (line 198) | def build_training_sample( FILE: examples/tutorial/sequence_parallel/data/datasets/blendable_dataset.py class BlendableDataset (line 24) | class BlendableDataset(torch.utils.data.Dataset): method __init__ (line 25) | def __init__(self, datasets, weights): method __len__ (line 58) | def __len__(self): method __getitem__ (line 61) | def __getitem__(self, idx): FILE: examples/tutorial/sequence_parallel/data/datasets/builder.py function _build_train_valid_test_datasets (line 14) | def _build_train_valid_test_datasets( function build_train_valid_test_datasets (line 108) | def build_train_valid_test_datasets( FILE: examples/tutorial/sequence_parallel/data/datasets/data_samplers.py function build_pretraining_data_loader (line 24) | def build_pretraining_data_loader(dataset, consumed_samples, micro_batch... class MegatronPretrainingSampler (line 54) | class MegatronPretrainingSampler: method __init__ (line 55) | def __init__( method __len__ (line 79) | def __len__(self): method get_start_end_idx (line 82) | def get_start_end_idx(self): method __iter__ (line 87) | def __iter__(self): class MegatronPretrainingRandomSampler (line 103) | class MegatronPretrainingRandomSampler: method __init__ (line 104) | def __init__(self, total_samples, consumed_samples, micro_batch_size, ... method __len__ (line 124) | def __len__(self): method __iter__ (line 127) | def __iter__(self): FILE: examples/tutorial/sequence_parallel/data/datasets/dataset_utils.py function get_datasets_weights_and_num_samples (line 38) | def get_datasets_weights_and_num_samples(data_prefix, train_valid_test_n... function compile_helper (line 67) | def compile_helper(): function get_a_and_b_segments (line 82) | def get_a_and_b_segments(sample, np_rng): function truncate_segments (line 114) | def truncate_segments(tokens_a, tokens_b, len_a, len_b, max_num_tokens, ... function create_tokens_and_tokentypes (line 134) | def create_tokens_and_tokentypes(tokens_a, tokens_b, cls_id, sep_id): function is_start_piece (line 164) | def is_start_piece(piece): function create_masked_lm_predictions (line 173) | def create_masked_lm_predictions( function pad_and_convert_to_numpy (line 362) | def pad_and_convert_to_numpy(tokens, tokentypes, masked_positions, maske... function build_train_valid_test_datasets (line 393) | def build_train_valid_test_datasets( function _build_train_valid_test_datasets (line 464) | def _build_train_valid_test_datasets( function get_indexed_dataset_ (line 571) | def get_indexed_dataset_(data_prefix, data_impl, skip_warmup): function get_train_valid_test_split_ (line 590) | def get_train_valid_test_split_(splits_string, size): FILE: examples/tutorial/sequence_parallel/data/datasets/helpers.cpp function build_blending_indices (line 35) | void build_blending_indices(py::array_t& dataset_index, function build_sample_idx (line 95) | py::array build_sample_idx(const py::array_t& sizes_, function get_target_sample_len (line 182) | inline int32_t get_target_sample_len(const int32_t short_seq_ratio, function build_mapping_impl (line 197) | py::array build_mapping_impl(const py::array_t& docs_, function build_mapping (line 425) | py::array build_mapping(const py::array_t& docs_, function build_blocks_mapping_impl (line 449) | py::array build_blocks_mapping_impl( function build_blocks_mapping (line 663) | py::array build_blocks_mapping( function PYBIND11_MODULE (line 685) | PYBIND11_MODULE(helpers, m) { FILE: examples/tutorial/sequence_parallel/data/datasets/ict_dataset.py function make_attention_mask (line 11) | def make_attention_mask(source_block, target_block): function get_ict_dataset (line 23) | def get_ict_dataset(use_titles=True, query_in_block_prob=1): class ICTDataset (line 48) | class ICTDataset(Dataset): method __init__ (line 51) | def __init__( method __len__ (line 95) | def __len__(self): method __getitem__ (line 98) | def __getitem__(self, idx): method get_block (line 146) | def get_block(self, start_idx, end_idx, doc_idx): method get_null_block (line 156) | def get_null_block(self): method concat_and_pad_tokens (line 163) | def concat_and_pad_tokens(self, tokens, title=None): FILE: examples/tutorial/sequence_parallel/data/datasets/indexed_dataset.py function __best_fitting_dtype (line 22) | def __best_fitting_dtype(vocab_size=None): function get_available_dataset_impl (line 29) | def get_available_dataset_impl(): function infer_dataset_impl (line 33) | def infer_dataset_impl(path): function make_builder (line 49) | def make_builder(out_file, impl, vocab_size=None): function make_dataset (line 56) | def make_dataset(path, impl, skip_warmup=False): function dataset_exists (line 73) | def dataset_exists(path, impl): function read_longs (line 80) | def read_longs(f, n): function write_longs (line 86) | def write_longs(f, a): function code (line 93) | def code(dtype): function index_file_path (line 100) | def index_file_path(prefix_path): function data_file_path (line 104) | def data_file_path(prefix_path): function create_doc_idx (line 108) | def create_doc_idx(sizes): class IndexedDataset (line 116) | class IndexedDataset(torch.utils.data.Dataset): method __init__ (line 121) | def __init__(self, path): method read_index (line 127) | def read_index(self, path): method read_data (line 144) | def read_data(self, path): method check_index (line 147) | def check_index(self, i): method __del__ (line 151) | def __del__(self): method __getitem__ (line 156) | def __getitem__(self, idx): method __len__ (line 180) | def __len__(self): method num_tokens (line 183) | def num_tokens(self, index): method size (line 186) | def size(self, index): method exists (line 190) | def exists(path): method supports_prefetch (line 194) | def supports_prefetch(self): class IndexedCachedDataset (line 198) | class IndexedCachedDataset(IndexedDataset): method __init__ (line 199) | def __init__(self, path): method supports_prefetch (line 205) | def supports_prefetch(self): method prefetch (line 208) | def prefetch(self, indices): method __getitem__ (line 233) | def __getitem__(self, idx): class IndexedDatasetBuilder (line 250) | class IndexedDatasetBuilder(object): method __init__ (line 253) | def __init__(self, out_file, dtype=np.int32): method add_item (line 262) | def add_item(self, tensor): method end_document (line 269) | def end_document(self): method merge_file_ (line 272) | def merge_file_(self, another_file): method finalize (line 292) | def finalize(self, index_file): function _warmup_mmap_file (line 307) | def _warmup_mmap_file(path): class MMapIndexedDataset (line 313) | class MMapIndexedDataset(torch.utils.data.Dataset): class Index (line 314) | class Index(object): method writer (line 318) | def writer(cls, path, dtype): method __init__ (line 363) | def __init__(self, path, skip_warmup=False): method __del__ (line 400) | def __del__(self): method dtype (line 405) | def dtype(self): method sizes (line 409) | def sizes(self): method doc_idx (line 413) | def doc_idx(self): method __getitem__ (line 417) | def __getitem__(self, i): method __len__ (line 420) | def __len__(self): method __init__ (line 423) | def __init__(self, path, skip_warmup=False): method __getstate__ (line 432) | def __getstate__(self): method __setstate__ (line 435) | def __setstate__(self, state): method _do_init (line 438) | def _do_init(self, path, skip_warmup): method __del__ (line 450) | def __del__(self): method __len__ (line 455) | def __len__(self): method __getitem__ (line 459) | def __getitem__(self, idx): method get (line 476) | def get(self, idx, offset=0, length=None): method sizes (line 490) | def sizes(self): method doc_idx (line 494) | def doc_idx(self): method get_doc_idx (line 497) | def get_doc_idx(self): method set_doc_idx (line 500) | def set_doc_idx(self, doc_idx_): method supports_prefetch (line 504) | def supports_prefetch(self): method exists (line 508) | def exists(path): class MMapIndexedDatasetBuilder (line 512) | class MMapIndexedDatasetBuilder(object): method __init__ (line 513) | def __init__(self, out_file, dtype=np.int64): method add_item (line 519) | def add_item(self, tensor): method end_document (line 524) | def end_document(self): method merge_file_ (line 527) | def merge_file_(self, another_file): method finalize (line 539) | def finalize(self, index_file): FILE: examples/tutorial/sequence_parallel/data/datasets/test/test_indexed_dataset.py function test_indexed_dataset (line 16) | def test_indexed_dataset(args): function test_indexed_dataset_get (line 42) | def test_indexed_dataset_get(args): function main (line 82) | def main(): FILE: examples/tutorial/sequence_parallel/data/dummy_dataloader.py class DummyDataloader (line 4) | class DummyDataloader: method __init__ (line 5) | def __init__(self, batch_size, vocab_size, seq_length): method generate (line 11) | def generate(self): method __iter__ (line 48) | def __iter__(self): method __next__ (line 51) | def __next__(self): FILE: examples/tutorial/sequence_parallel/data/tokenizer/__init__.py function initialize_tokenizer (line 23) | def initialize_tokenizer(vocab_file, tokenizer_type, vocab_extra_ids=0): function get_tokenizer (line 30) | def get_tokenizer(): function get_padded_vocab_size (line 35) | def get_padded_vocab_size(): FILE: examples/tutorial/sequence_parallel/data/tokenizer/bert_tokenization.py function validate_case_matches_checkpoint (line 27) | def validate_case_matches_checkpoint(do_lower_case, init_checkpoint): function convert_to_unicode (line 76) | def convert_to_unicode(text): function printable_text (line 96) | def printable_text(text): function load_vocab (line 119) | def load_vocab(vocab_file): function convert_by_vocab (line 134) | def convert_by_vocab(vocab, items): function convert_tokens_to_ids (line 142) | def convert_tokens_to_ids(vocab, tokens): function convert_ids_to_tokens (line 146) | def convert_ids_to_tokens(inv_vocab, ids): function whitespace_tokenize (line 150) | def whitespace_tokenize(text): class FullTokenizer (line 159) | class FullTokenizer(object): method __init__ (line 162) | def __init__(self, vocab_file, do_lower_case=True): method tokenize (line 168) | def tokenize(self, text): method convert_tokens_to_ids (line 176) | def convert_tokens_to_ids(self, tokens): method convert_ids_to_tokens (line 179) | def convert_ids_to_tokens(self, ids): method convert_tokens_to_string (line 183) | def convert_tokens_to_string(tokens, clean_up_tokenization_spaces=True): method vocab_size (line 211) | def vocab_size(self): class BasicTokenizer (line 215) | class BasicTokenizer(object): method __init__ (line 218) | def __init__(self, do_lower_case=True): method tokenize (line 226) | def tokenize(self, text): method _run_strip_accents (line 250) | def _run_strip_accents(self, text): method _run_split_on_punc (line 261) | def _run_split_on_punc(self, text): method _tokenize_chinese_chars (line 281) | def _tokenize_chinese_chars(self, text): method _is_chinese_char (line 294) | def _is_chinese_char(self, cp): method _clean_text (line 318) | def _clean_text(self, text): class WordpieceTokenizer (line 332) | class WordpieceTokenizer(object): method __init__ (line 335) | def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=... method tokenize (line 340) | def tokenize(self, text): function _is_whitespace (line 394) | def _is_whitespace(char): function _is_control (line 406) | def _is_control(char): function _is_punctuation (line 418) | def _is_punctuation(char): FILE: examples/tutorial/sequence_parallel/data/tokenizer/tokenizer.py function build_tokenizer (line 25) | def build_tokenizer(vocab_file, tokenizer_type, vocab_extra_ids=0): function _vocab_size_with_padding (line 44) | def _vocab_size_with_padding(orig_vocab_size, make_vocab_size_divisible_... class AbstractTokenizer (line 65) | class AbstractTokenizer(ABC): method __init__ (line 68) | def __init__(self, name): method vocab_size (line 74) | def vocab_size(self): method vocab (line 79) | def vocab(self): method inv_vocab (line 84) | def inv_vocab(self): method tokenize (line 88) | def tokenize(self, text): method detokenize (line 91) | def detokenize(self, token_ids): method cls (line 95) | def cls(self): method sep (line 99) | def sep(self): method pad (line 103) | def pad(self): method eod (line 107) | def eod(self): method mask (line 111) | def mask(self): class _BertWordPieceTokenizer (line 115) | class _BertWordPieceTokenizer(AbstractTokenizer): method __init__ (line 118) | def __init__(self, vocab_file, lower_case=True, vocab_extra_ids=0): method add_token (line 147) | def add_token(self, token): method add_additional_special_tokens (line 154) | def add_additional_special_tokens(self, tokens_list): method vocab_size (line 160) | def vocab_size(self): method vocab (line 164) | def vocab(self): method inv_vocab (line 168) | def inv_vocab(self): method tokenize (line 171) | def tokenize(self, text): method decode (line 175) | def decode(self, ids): method decode_token_ids (line 179) | def decode_token_ids(self, token_ids): method cls (line 194) | def cls(self): method sep (line 198) | def sep(self): method pad (line 202) | def pad(self): method mask (line 206) | def mask(self): method bos_token (line 210) | def bos_token(self): method eos_token (line 215) | def eos_token(self): method additional_special_tokens (line 220) | def additional_special_tokens(self): method bos_token_id (line 225) | def bos_token_id(self): method eos_token_id (line 230) | def eos_token_id(self): method additional_special_tokens_ids (line 235) | def additional_special_tokens_ids(self): method additional_special_tokens (line 240) | def additional_special_tokens(self, value): FILE: examples/tutorial/sequence_parallel/loss_func/bert_loss.py class BertLoss (line 9) | class BertLoss(nn.Module): method forward (line 10) | def forward(self, lm_loss, sop_logits, loss_mask, sentence_order): FILE: examples/tutorial/sequence_parallel/loss_func/cross_entropy.py class _VocabCrossEntropy (line 5) | class _VocabCrossEntropy(torch.autograd.Function): method forward (line 8) | def forward(ctx, vocab_parallel_logits, target): method backward (line 47) | def backward(ctx, grad_output): function vocab_cross_entropy (line 67) | def vocab_cross_entropy(vocab_logits, target): FILE: examples/tutorial/sequence_parallel/loss_func/utils.py function ensure_divisibility (line 4) | def ensure_divisibility(numerator, denominator): function divide (line 9) | def divide(numerator, denominator): function split_tensor_along_last_dim (line 16) | def split_tensor_along_last_dim(tensor, num_partitions, contiguous_split... class VocabUtility (line 36) | class VocabUtility: method vocab_range_from_per_partition_vocab_size (line 42) | def vocab_range_from_per_partition_vocab_size(per_partition_vocab_size... method vocab_range_from_global_vocab_size (line 48) | def vocab_range_from_global_vocab_size(global_vocab_size, rank, world_... FILE: examples/tutorial/sequence_parallel/lr_scheduler/annealing_lr.py class AnnealingLR (line 21) | class AnnealingLR(object): method __init__ (line 24) | def __init__( method get_lr (line 59) | def get_lr(self): method step (line 92) | def step(self, increment=1): method state_dict (line 99) | def state_dict(self): method _check_and_set (line 110) | def _check_and_set(self, cls_value, sd_value, name): method load_state_dict (line 122) | def load_state_dict(self, sd): FILE: examples/tutorial/sequence_parallel/model/bert.py class BertForPretrain (line 18) | class BertForPretrain(nn.Module): method __init__ (line 19) | def __init__( method _init_normal (line 75) | def _init_normal(self, tensor): method _output_init_normal (line 78) | def _output_init_normal(self, tensor): method reset_parameters (line 81) | def reset_parameters(self): method forward (line 102) | def forward(self, input_ids, attention_masks, tokentype_ids, lm_labels): class PipelineBertForPretrain (line 129) | class PipelineBertForPretrain(nn.Module): method __init__ (line 130) | def __init__( method _init_normal (line 201) | def _init_normal(self, tensor): method _output_init_normal (line 204) | def _output_init_normal(self, tensor): method reset_parameters (line 207) | def reset_parameters(self): method forward (line 230) | def forward(self, input_ids, attention_masks, tokentype_ids, lm_labels): function _filter_kwargs (line 266) | def _filter_kwargs(func, kwargs): function build_pipeline_bert (line 271) | def build_pipeline_bert(num_layers, num_chunks, device=torch.device("cud... FILE: examples/tutorial/sequence_parallel/model/layers/bert_layer.py function attention_mask_func (line 12) | def attention_mask_func(attention_scores, attention_mask): class BertLayer (line 17) | class BertLayer(nn.Module): method __init__ (line 23) | def __init__( method forward (line 66) | def forward(self, hidden_states, attention_mask): FILE: examples/tutorial/sequence_parallel/model/layers/dropout.py function bias_dropout_add (line 4) | def bias_dropout_add(x, bias, residual, prob, training): function get_bias_dropout_add (line 11) | def get_bias_dropout_add(training): FILE: examples/tutorial/sequence_parallel/model/layers/embedding.py class VocabEmbedding (line 7) | class VocabEmbedding(torch.nn.Module): method __init__ (line 8) | def __init__(self, num_embeddings, embedding_dim): method forward (line 24) | def forward(self, hidden_state): method __repr__ (line 36) | def __repr__(self): class Embedding (line 40) | class Embedding(nn.Module): method __init__ (line 53) | def __init__(self, hidden_size, vocab_size, max_sequence_length, embed... method word_embedding_weight (line 77) | def word_embedding_weight(self): method forward (line 80) | def forward(self, input_ids, position_ids, tokentype_ids=None): FILE: examples/tutorial/sequence_parallel/model/layers/head.py class BertLMHead (line 14) | class BertLMHead(nn.Module): method __init__ (line 22) | def __init__( method forward (line 34) | def forward(self, hidden_states, word_embeddings_weight, lm_labels): class BertBinaryHead (line 45) | class BertBinaryHead(nn.Module): method __init__ (line 46) | def __init__(self, hidden_size): method forward (line 51) | def forward(self, hidden_states): class BertDualHead (line 60) | class BertDualHead(nn.Module): method __init__ (line 61) | def __init__(self, hidden_size, vocab_size, add_binary_head): method forward (line 70) | def forward(self, hidden_states, word_embeddings_weight, lm_labels): FILE: examples/tutorial/sequence_parallel/model/layers/init_method.py function init_normal (line 6) | def init_normal(tensor, sigma): function output_init_normal (line 11) | def output_init_normal(tensor, sigma, num_layers): FILE: examples/tutorial/sequence_parallel/model/layers/linear.py class Linear (line 8) | class Linear(nn.Module): method __init__ (line 27) | def __init__(self, input_size, output_size, bias=True, skip_bias_add=F... method forward (line 50) | def forward(self, input_): method __repr__ (line 60) | def __repr__(self): FILE: examples/tutorial/sequence_parallel/model/layers/mlp.py class TransformerMLP (line 9) | class TransformerMLP(nn.Module): method __init__ (line 17) | def __init__(self, hidden_size, mlp_ratio, fuse_gelu=True): method forward (line 29) | def forward(self, hidden_states): FILE: examples/tutorial/sequence_parallel/model/layers/pooler.py class Pooler (line 7) | class Pooler(nn.Module): method __init__ (line 19) | def __init__(self, hidden_size): method forward (line 23) | def forward(self, hidden_states, sequence_index=0): FILE: examples/tutorial/sequence_parallel/model/layers/preprocess.py class PreProcessor (line 8) | class PreProcessor(nn.Module): method __init__ (line 9) | def __init__(self, sub_seq_length): method bert_position_ids (line 13) | def bert_position_ids(self, token_ids): method bert_extended_attention_mask (line 24) | def bert_extended_attention_mask(self, attention_mask): method forward (line 47) | def forward(self, input_ids=None, attention_mask=None): FILE: examples/tutorial/sequence_parallel/train.py function process_batch_data (line 21) | def process_batch_data(batch_data): function parse_args (line 31) | def parse_args(): function pipeline_data_process_func (line 37) | def pipeline_data_process_func(stage_output, micro_batch_data): function main (line 48) | def main(): FILE: extensions/base_extension.py class _Extension (line 9) | class _Extension(ABC): method __init__ (line 10) | def __init__(self, name: str, support_aot: bool, support_jit: bool, pr... method name (line 17) | def name(self): method support_aot (line 21) | def support_aot(self): method support_jit (line 25) | def support_jit(self): method get_jit_extension_folder_path (line 29) | def get_jit_extension_folder_path(): method is_available (line 61) | def is_available(self) -> bool: method assert_compatible (line 67) | def assert_compatible(self) -> None: method build_aot (line 73) | def build_aot(self) -> Union["CppExtension", "CUDAExtension"]: method build_jit (line 77) | def build_jit(self) -> Callable: method load (line 81) | def load(self) -> Callable: FILE: extensions/cpp_extension.py class _CppExtension (line 13) | class _CppExtension(_Extension): method __init__ (line 14) | def __init__(self, name: str, priority: int = 1): method csrc_abs_path (line 25) | def csrc_abs_path(self, path): method pybind_abs_path (line 28) | def pybind_abs_path(self, path): method relative_to_abs_path (line 31) | def relative_to_abs_path(self, code_path: str) -> str: method strip_empty_entries (line 51) | def strip_empty_entries(self, args): method import_op (line 57) | def import_op(self): method build_aot (line 63) | def build_aot(self) -> "CppExtension": method build_jit (line 73) | def build_jit(self) -> None: method sources_files (line 112) | def sources_files(self) -> List[str]: method include_dirs (line 118) | def include_dirs(self) -> List[str]: method cxx_flags (line 125) | def cxx_flags(self) -> List[str]: method load (line 130) | def load(self): FILE: extensions/csrc/common/data_type.h function namespace (line 8) | namespace colossalAI { FILE: extensions/csrc/common/mp_type_traits.h function namespace (line 12) | namespace colossalAI { FILE: extensions/csrc/common/target.h type class (line 12) | enum class type class (line 17) | enum class function BitLen (line 25) | enum class BitLen : int { FILE: extensions/csrc/common/vec_type_traits.h function namespace (line 13) | namespace colossalAI { FILE: extensions/csrc/funcs/binary_functor.h function namespace (line 16) | namespace colossalAI { FILE: extensions/csrc/funcs/cast_functor.h function namespace (line 22) | namespace colossalAI { FILE: extensions/csrc/funcs/reduce_function.h function namespace (line 11) | namespace funcs { FILE: extensions/csrc/funcs/ternary_functor.h function namespace (line 17) | namespace colossalAI { FILE: extensions/csrc/funcs/unary_functor.h function namespace (line 15) | namespace colossalAI { FILE: extensions/csrc/kernel/arm/cpu_adam_arm.cpp function PYBIND11_MODULE (line 300) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: extensions/csrc/kernel/arm/cpu_adam_arm.h function float32x4_t (line 14) | inline float32x4_t simd_load_offset(const void *ptr, at::ScalarType dtype, function float32x4_t (line 34) | inline float32x4_t simd_load(void const *ptr, at::ScalarType dtype) { function simd_store_offset (line 38) | inline void simd_store_offset(void *ptr, at::ScalarType dtype, float32x4... function simd_store (line 62) | inline void simd_store(void *ptr, at::ScalarType dtype, float32x4_t data) { function float32x4_t (line 66) | inline float32x4_t simd_set(float value) { function scalar_load_offset (line 73) | inline float scalar_load_offset(const void *ptr, at::ScalarType dtype, function scalar_store_offset (line 90) | inline void scalar_store_offset(void *ptr, at::ScalarType dtype, float d... function class (line 129) | class AdamOptimizer { function update_state (line 183) | inline void update_state(float lr, float epsilon, float weight_decay, FILE: extensions/csrc/kernel/cuda/attention/attention_utils.h function namespace (line 32) | namespace colossalAI { FILE: extensions/csrc/kernel/cuda/utils/gpu_launch_config.h function namespace (line 8) | namespace colossalAI { FILE: extensions/csrc/kernel/cuda/utils/nvgpu_dev_info.h function namespace (line 12) | namespace colossalAI { FILE: extensions/csrc/kernel/cuda/utils/vec_copy.h function namespace (line 7) | namespace colossalAI { FILE: extensions/csrc/kernel/x86/cpu_adam.cpp function PYBIND11_MODULE (line 442) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: extensions/csrc/kernel/x86/cpu_adam.h function class (line 93) | class Adam_Optimizer { function update_state (line 131) | inline void update_state(float lr, float epsilon, float weight_decay, function simd_load (line 146) | inline void simd_load(bool is_half, float *ptr, __half *h_ptr, function simd_store (line 155) | inline void simd_store(bool is_half, float *ptr, __half *h_ptr, FILE: extensions/cuda_extension.py class _CudaExtension (line 18) | class _CudaExtension(_CppExtension): method nvcc_flags (line 20) | def nvcc_flags(self) -> List[str]: method is_available (line 26) | def is_available(self) -> bool: method assert_compatible (line 38) | def assert_compatible(self) -> None: method get_cuda_home_include (line 48) | def get_cuda_home_include(self): method include_dirs (line 59) | def include_dirs(self) -> List[str]: method build_jit (line 65) | def build_jit(self) -> None: method build_aot (line 106) | def build_aot(self) -> "CUDAExtension": FILE: extensions/pybind/cpu_adam/cpu_adam_arm.py class CpuAdamArmExtension (line 7) | class CpuAdamArmExtension(_CppExtension): method __init__ (line 8) | def __init__(self): method is_available (line 11) | def is_available(self) -> bool: method assert_compatible (line 15) | def assert_compatible(self) -> None: method sources_files (line 22) | def sources_files(self): method include_dirs (line 28) | def include_dirs(self) -> List[str]: method cxx_flags (line 31) | def cxx_flags(self): method nvcc_flags (line 41) | def nvcc_flags(self): FILE: extensions/pybind/cpu_adam/cpu_adam_x86.py class CpuAdamX86Extension (line 7) | class CpuAdamX86Extension(_CudaExtension): method __init__ (line 8) | def __init__(self): method is_available (line 11) | def is_available(self) -> bool: method assert_compatible (line 14) | def assert_compatible(self) -> None: method sources_files (line 22) | def sources_files(self): method cxx_flags (line 28) | def cxx_flags(self): method nvcc_flags (line 41) | def nvcc_flags(self): FILE: extensions/pybind/flash_attention/flash_attention_dao_cuda.py class FlashAttentionDaoCudaExtension (line 4) | class FlashAttentionDaoCudaExtension(_Extension): method __init__ (line 5) | def __init__(self): method is_available (line 8) | def is_available(self) -> bool: method assert_compatible (line 21) | def assert_compatible(self) -> bool: method build_aot (line 24) | def build_aot(self) -> None: method build_jit (line 29) | def build_jit(self) -> None: method load (line 34) | def load(self): FILE: extensions/pybind/flash_attention/flash_attention_npu.py class FlashAttentionNpuExtension (line 6) | class FlashAttentionNpuExtension(_Extension): method __init__ (line 7) | def __init__(self): method is_available (line 10) | def is_available(self) -> bool: method assert_compatible (line 18) | def assert_compatible(self) -> bool: method build_aot (line 21) | def build_aot(self) -> None: method build_jit (line 26) | def build_jit(self) -> None: method load (line 31) | def load(self): FILE: extensions/pybind/flash_attention/flash_attention_sdpa_cuda.py class FlashAttentionSdpaCudaExtension (line 4) | class FlashAttentionSdpaCudaExtension(_Extension): method __init__ (line 5) | def __init__(self): method is_available (line 8) | def is_available(self) -> bool: method assert_compatible (line 18) | def assert_compatible(self) -> bool: method build_aot (line 21) | def build_aot(self) -> None: method build_jit (line 24) | def build_jit(self) -> None: method load (line 27) | def load(self): FILE: extensions/pybind/inference/inference.cpp function PYBIND11_MODULE (line 81) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: extensions/pybind/inference/inference_ops_cuda.py class InferenceOpsCudaExtension (line 5) | class InferenceOpsCudaExtension(_CudaExtension): method __init__ (line 6) | def __init__(self): method sources_files (line 9) | def sources_files(self): method cxx_flags (line 25) | def cxx_flags(self): method nvcc_flags (line 29) | def nvcc_flags(self): FILE: extensions/pybind/layernorm/layer_norm.cpp function compute_n1_n2 (line 14) | void compute_n1_n2(at::Tensor input, at::IntArrayRef normalized_shape, i... function check_args (line 28) | void check_args(at::IntArrayRef normalized_shape, at::Tensor gamma, function check_args (line 34) | void check_args(at::Tensor input, at::IntArrayRef normalized_shape, int ... function check_args (line 65) | void check_args(at::Tensor input, at::IntArrayRef normalized_shape, function layer_norm_affine (line 84) | std::vector layer_norm_affine(at::Tensor input, function layer_norm_gradient_affine (line 113) | std::vector layer_norm_gradient_affine( function PYBIND11_MODULE (line 137) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: extensions/pybind/layernorm/layernorm_cuda.py class LayerNormCudaExtension (line 5) | class LayerNormCudaExtension(_CudaExtension): method __init__ (line 6) | def __init__(self): method sources_files (line 9) | def sources_files(self): method include_dirs (line 15) | def include_dirs(self): method cxx_flags (line 19) | def cxx_flags(self): method nvcc_flags (line 22) | def nvcc_flags(self): FILE: extensions/pybind/moe/moe.cpp function moe_dispatch_forward (line 33) | torch::Tensor moe_dispatch_forward(int s, int ec, int h, function moe_dispatch_backward (line 43) | torch::Tensor moe_dispatch_backward(int s, int ec, int h, function moe_combine_forward (line 54) | torch::Tensor moe_combine_forward(int s, int e, int c, int h, function moe_combine_backward (line 67) | std::vector moe_combine_backward(int s, int e, int c, int h, function moe_cumsum (line 82) | torch::Tensor moe_cumsum(torch::Tensor mask) { function PYBIND11_MODULE (line 87) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: extensions/pybind/moe/moe_cuda.py class MoeCudaExtension (line 5) | class MoeCudaExtension(_CudaExtension): method __init__ (line 6) | def __init__(self): method sources_files (line 9) | def sources_files(self): method cxx_flags (line 15) | def cxx_flags(self): method nvcc_flags (line 18) | def nvcc_flags(self): FILE: extensions/pybind/optimizer/fused_optimizer_cuda.py class FusedOptimizerCudaExtension (line 5) | class FusedOptimizerCudaExtension(_CudaExtension): method __init__ (line 6) | def __init__(self): method sources_files (line 9) | def sources_files(self): method cxx_flags (line 22) | def cxx_flags(self): method nvcc_flags (line 26) | def nvcc_flags(self): FILE: extensions/pybind/optimizer/optimizer.cpp function PYBIND11_MODULE (line 38) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: extensions/pybind/softmax/scaled_masked_softmax.cpp function fwd (line 19) | torch::Tensor fwd(torch::Tensor const& input, torch::Tensor const& mask, function bwd (line 30) | torch::Tensor bwd(torch::Tensor const& output_grads, function PYBIND11_MODULE (line 45) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: extensions/pybind/softmax/scaled_masked_softmax_cuda.py class ScaledMaskedSoftmaxCudaExtension (line 5) | class ScaledMaskedSoftmaxCudaExtension(_CudaExtension): method __init__ (line 6) | def __init__(self): method sources_files (line 9) | def sources_files(self): method cxx_flags (line 15) | def cxx_flags(self): method nvcc_flags (line 18) | def nvcc_flags(self): FILE: extensions/pybind/softmax/scaled_upper_triang_masked_softmax.cpp function fwd (line 15) | torch::Tensor fwd(torch::Tensor const& input, float scale_factor) { function bwd (line 24) | torch::Tensor bwd(torch::Tensor const& output_grads, function PYBIND11_MODULE (line 39) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: extensions/pybind/softmax/scaled_upper_triangle_masked_softmax_cuda.py class ScaledUpperTriangleMaskedSoftmaxCudaExtension (line 5) | class ScaledUpperTriangleMaskedSoftmaxCudaExtension(_CudaExtension): method __init__ (line 6) | def __init__(self): method sources_files (line 9) | def sources_files(self): method cxx_flags (line 18) | def cxx_flags(self): method nvcc_flags (line 21) | def nvcc_flags(self): FILE: extensions/triton_extension.py class _TritonExtension (line 6) | class _TritonExtension(_Extension): method __init__ (line 7) | def __init__(self, name: str, priority: int = 1): method is_hardware_compatible (line 10) | def is_hardware_compatible(self) -> bool: method load (line 20) | def load(self): FILE: extensions/utils.py function print_rank_0 (line 8) | def print_rank_0(message: str) -> None: function get_cuda_version_in_pytorch (line 26) | def get_cuda_version_in_pytorch() -> List[int]: function get_cuda_bare_metal_version (line 45) | def get_cuda_bare_metal_version(cuda_dir) -> List[int]: function check_system_pytorch_cuda_match (line 84) | def check_system_pytorch_cuda_match(cuda_dir): function get_pytorch_version (line 104) | def get_pytorch_version() -> List[int]: function check_pytorch_version (line 120) | def check_pytorch_version(min_major_version, min_minor_version) -> bool: function check_cuda_availability (line 142) | def check_cuda_availability(): function set_cuda_arch_list (line 154) | def set_cuda_arch_list(cuda_dir): function get_cuda_cc_flag (line 193) | def get_cuda_cc_flag() -> List[str]: function append_nvcc_threads (line 217) | def append_nvcc_threads(nvcc_extra_args: List[str]) -> List[str]: FILE: setup.py function fetch_requirements (line 23) | def fetch_requirements(path) -> List[str]: function fetch_readme (line 37) | def fetch_readme() -> str: function get_version (line 48) | def get_version() -> str: FILE: tests/conftest.py function pytest_runtest_setup (line 6) | def pytest_runtest_setup(item): FILE: tests/kit/model_zoo/custom/base.py class CheckpointModule (line 5) | class CheckpointModule(nn.Module): method __init__ (line 6) | def __init__(self, checkpoint: bool = False): method _forward (line 11) | def _forward(self, *args, **kwargs): method forward (line 14) | def forward(self, *args, **kwargs): method train (line 20) | def train(self, mode: bool = True): method eval (line 24) | def eval(self): FILE: tests/kit/model_zoo/custom/hanging_param_model.py class HangingParamModule (line 9) | class HangingParamModule(CheckpointModule): method __init__ (line 15) | def __init__(self, checkpoint=False) -> None: method forward (line 21) | def forward(self, x): function data_gen (line 28) | def data_gen(): function loss_fn (line 32) | def loss_fn(x): function output_transform (line 38) | def output_transform(x: torch.Tensor): FILE: tests/kit/model_zoo/custom/nested_model.py class SubNet (line 9) | class SubNet(nn.Module): method __init__ (line 10) | def __init__(self, out_features) -> None: method forward (line 14) | def forward(self, x, weight): class NestedNet (line 18) | class NestedNet(CheckpointModule): method __init__ (line 19) | def __init__(self, checkpoint=False) -> None: method forward (line 25) | def forward(self, x): function data_gen (line 33) | def data_gen(): function loss_fn (line 37) | def loss_fn(x): function output_transform (line 43) | def output_transform(x: torch.Tensor): FILE: tests/kit/model_zoo/custom/repeated_computed_layers.py class NetWithRepeatedlyComputedLayers (line 9) | class NetWithRepeatedlyComputedLayers(CheckpointModule): method __init__ (line 15) | def __init__(self, checkpoint=False) -> None: method forward (line 22) | def forward(self, x): function data_gen (line 28) | def data_gen(): function loss_fn (line 32) | def loss_fn(x): function output_transform (line 38) | def output_transform(x: torch.Tensor): FILE: tests/kit/model_zoo/custom/simple_mlp.py class Net (line 15) | class Net(nn.Module): method __init__ (line 16) | def __init__(self, in_dim=_IN_DIM, hid_dim=_HID_DIM, identity=True, dt... method forward (line 26) | def forward(self, x): class TPNet (line 30) | class TPNet(nn.Module): method __init__ (line 31) | def __init__( method forward (line 48) | def forward(self, x): function data_gen (line 52) | def data_gen(): function output_transform (line 56) | def output_transform(x: torch.Tensor): FILE: tests/kit/model_zoo/custom/simple_net.py class SimpleNet (line 9) | class SimpleNet(CheckpointModule): method __init__ (line 14) | def __init__(self, checkpoint=False) -> None: method forward (line 23) | def forward(self, x): function data_gen (line 33) | def data_gen(): function loss_fn (line 37) | def loss_fn(x): function output_transform (line 43) | def output_transform(x: torch.Tensor): FILE: tests/kit/model_zoo/diffusers/diffusers.py function data_clip_model (line 24) | def data_clip_model(): function data_clip_text (line 34) | def data_clip_text(): function data_clip_vision (line 40) | def data_clip_vision(): FILE: tests/kit/model_zoo/executor.py function run_fwd (line 10) | def run_fwd( function run_fwd_bwd (line 34) | def run_fwd_bwd( FILE: tests/kit/model_zoo/registry.py class ModelAttribute (line 9) | class ModelAttribute: class ModelZooRegistry (line 22) | class ModelZooRegistry(dict): method register (line 27) | def register( method get_sub_registry (line 64) | def get_sub_registry( FILE: tests/kit/model_zoo/torchaudio/torchaudio.py function conformer_data_gen_fn (line 18) | def conformer_data_gen_fn(): function emformer_data_gen_fn (line 53) | def emformer_data_gen_fn(): function wavernn_data_gen_fn (line 82) | def wavernn_data_gen_fn(): function tacotron_data_gen_fn (line 108) | def tacotron_data_gen_fn(): function wav2vec_data_gen_fn (line 130) | def wav2vec_data_gen_fn(): FILE: tests/kit/model_zoo/torchrec/torchrec.py function gen_kt (line 15) | def gen_kt(): function gen_kjt (line 21) | def gen_kjt(): function interaction_arch_data_gen_fn (line 31) | def interaction_arch_data_gen_fn(): function simple_dfm_data_gen_fn (line 36) | def simple_dfm_data_gen_fn(): function sparse_arch_data_gen_fn (line 41) | def sparse_arch_data_gen_fn(): function output_transform_fn (line 46) | def output_transform_fn(x): function get_ebc (line 56) | def get_ebc(): function sparse_arch_model_fn (line 63) | def sparse_arch_model_fn(): function simple_deep_fmnn_model_fn (line 68) | def simple_deep_fmnn_model_fn(): function dlrm_model_fn (line 73) | def dlrm_model_fn(): function dlrm_sparsearch_model_fn (line 78) | def dlrm_sparsearch_model_fn(): FILE: tests/kit/model_zoo/torchvision/torchvision.py function swin_s (line 16) | def swin_s(): FILE: tests/kit/model_zoo/transformers/albert.py function data_gen_fn (line 13) | def data_gen_fn(): function data_gen_for_pretrain (line 20) | def data_gen_for_pretrain(): function data_gen_for_qa (line 74) | def data_gen_for_qa(): function data_gen_for_mcq (line 81) | def data_gen_for_mcq(): FILE: tests/kit/model_zoo/transformers/bert.py function data_gen (line 12) | def data_gen(): function data_gen_for_lm (line 27) | def data_gen_for_lm(): function data_gen_for_pretraining (line 35) | def data_gen_for_pretraining(): function data_gen_for_sequence_classification (line 43) | def data_gen_for_sequence_classification(): function data_gen_for_token_classification (line 51) | def data_gen_for_token_classification(): function data_gen_for_mcq (line 59) | def data_gen_for_mcq(): function data_gen_for_qa (line 345) | def data_gen_for_qa(): FILE: tests/kit/model_zoo/transformers/blip2.py function data_gen (line 12) | def data_gen(): FILE: tests/kit/model_zoo/transformers/bloom.py function data_gen (line 11) | def data_gen(): function data_gen_for_lm (line 24) | def data_gen_for_lm(): function data_gen_for_token_classification (line 32) | def data_gen_for_token_classification(): function data_gen_for_sequence_classification (line 40) | def data_gen_for_sequence_classification(): function data_gen_for_question_answering (line 47) | def data_gen_for_question_answering(): FILE: tests/kit/model_zoo/transformers/chatglm2.py function data_gen (line 12) | def data_gen(): function data_gen_for_conditional_generation (line 18) | def data_gen_for_conditional_generation(): function init_chatglm (line 54) | def init_chatglm(): FILE: tests/kit/model_zoo/transformers/command.py function data_gen (line 18) | def data_gen(): function data_gen_for_causal_lm (line 36) | def data_gen_for_causal_lm(): FILE: tests/kit/model_zoo/transformers/deepseek.py function data_gen (line 13) | def data_gen(): function data_gen_for_lm (line 27) | def data_gen_for_lm(): function data_gen_for_sequence_classification (line 35) | def data_gen_for_sequence_classification(): function init_deepseek (line 51) | def init_deepseek(): FILE: tests/kit/model_zoo/transformers/deepseek_v3.py function data_gen (line 13) | def data_gen(): function data_gen_for_lm (line 27) | def data_gen_for_lm(): function init_deepseek (line 43) | def init_deepseek(): FILE: tests/kit/model_zoo/transformers/falcon.py function data_gen (line 11) | def data_gen(): function data_gen_for_lm (line 24) | def data_gen_for_lm(): function data_gen_for_token_classification (line 32) | def data_gen_for_token_classification(): function data_gen_for_sequence_classification (line 40) | def data_gen_for_sequence_classification(): function data_gen_for_question_answering (line 47) | def data_gen_for_question_answering(): FILE: tests/kit/model_zoo/transformers/gpt.py function data_gen (line 13) | def data_gen(): function data_gen_for_lm (line 26) | def data_gen_for_lm(): function data_gen_for_question_answering (line 43) | def data_gen_for_question_answering(): function data_gen_for_token_classification (line 54) | def data_gen_for_token_classification(): function data_gen_for_sequence_classification (line 62) | def data_gen_for_sequence_classification(): function date_gen_for_double_heads (line 69) | def date_gen_for_double_heads(): FILE: tests/kit/model_zoo/transformers/gptj.py function data_gen (line 13) | def data_gen(): function data_gen_for_lm (line 27) | def data_gen_for_lm(): function data_gen_for_question_answering (line 35) | def data_gen_for_question_answering(): function data_gen_for_sequence_classification (line 46) | def data_gen_for_sequence_classification(): FILE: tests/kit/model_zoo/transformers/llama.py function data_gen (line 18) | def data_gen(): function data_gen_for_causal_lm (line 40) | def data_gen_for_causal_lm(): FILE: tests/kit/model_zoo/transformers/mistral.py function data_gen (line 12) | def data_gen(): function data_gen_for_lm (line 26) | def data_gen_for_lm(): function data_gen_for_sequence_classification (line 34) | def data_gen_for_sequence_classification(): FILE: tests/kit/model_zoo/transformers/mixtral.py function data_gen (line 13) | def data_gen(): function data_gen_for_lm (line 27) | def data_gen_for_lm(): function data_gen_for_sequence_classification (line 35) | def data_gen_for_sequence_classification(): FILE: tests/kit/model_zoo/transformers/opt.py function data_gen (line 13) | def data_gen(): function data_gen_for_causal_lm (line 19) | def data_gen_for_causal_lm(): function data_gen_for_sequence_classification (line 28) | def data_gen_for_sequence_classification(): function data_gen_for_question_answering (line 37) | def data_gen_for_question_answering(): FILE: tests/kit/model_zoo/transformers/qwen2.py function data_gen (line 18) | def data_gen(): function data_gen_for_causal_lm (line 37) | def data_gen_for_causal_lm(): FILE: tests/kit/model_zoo/transformers/qwen3.py function data_gen (line 18) | def data_gen(): function data_gen_for_causal_lm (line 66) | def data_gen_for_causal_lm(): FILE: tests/kit/model_zoo/transformers/sam.py function data_gen (line 12) | def data_gen(): FILE: tests/kit/model_zoo/transformers/t5.py function data_gen_for_encoder_only (line 12) | def data_gen_for_encoder_only(): function data_gen_for_conditional_generation (line 24) | def data_gen_for_conditional_generation(): function data_gen_for_t5_model (line 34) | def data_gen_for_t5_model(): function data_gen_for_token_classification (line 43) | def data_gen_for_token_classification(): FILE: tests/kit/model_zoo/transformers/vit.py function data_gen (line 14) | def data_gen(): function data_gen_for_image_classification (line 19) | def data_gen_for_image_classification(): function data_gen_for_masked_image_modeling (line 25) | def data_gen_for_masked_image_modeling(): FILE: tests/kit/model_zoo/transformers/whisper.py function data_gen (line 12) | def data_gen(): function data_gen_for_conditional_generation (line 30) | def data_gen_for_conditional_generation(): function data_gen_for_audio_classification (line 40) | def data_gen_for_audio_classification(): FILE: tests/test_analyzer/test_fx/test_bias_addition.py class LinearModel (line 14) | class LinearModel(torch.nn.Module): method __init__ (line 15) | def __init__(self, in_features, out_features, bias): method forward (line 19) | def forward(self, x): class ConvModel (line 24) | class ConvModel(torch.nn.Module): method __init__ (line 25) | def __init__(self, in_channel, out_channels, kernel_size, bias) -> None: method forward (line 34) | def forward(self, x, select=0): class SiuModel (line 42) | class SiuModel(torch.nn.Module): method __init__ (line 43) | def __init__(self, bias) -> None: method forward (line 48) | def forward(self, x, select=torch.Tensor([0])): class AddmmModel (line 58) | class AddmmModel(torch.nn.Module): method __init__ (line 59) | def __init__(self, alpha, beta) -> None: method forward (line 64) | def forward(self, x): function test_siu_model (line 75) | def test_siu_model(bias, bias_addition_split, shape, select): function test_addmm_model (line 97) | def test_addmm_model(alpha, beta, bias_addition_split, shape): FILE: tests/test_analyzer/test_fx/test_mod_dir.py class LinearModel (line 12) | class LinearModel(torch.nn.Module): method __init__ (line 13) | def __init__(self, in_features, out_features, bias): method forward (line 17) | def forward(self, x): class ConvModel (line 22) | class ConvModel(torch.nn.Module): method __init__ (line 23) | def __init__(self, in_channel, out_channels, kernel_size, bias) -> None: method forward (line 32) | def forward(self, x): class AModel (line 38) | class AModel(torch.nn.Module): method __init__ (line 39) | def __init__(self, bias) -> None: method forward (line 45) | def forward(self, x): function test_mod_dir (line 58) | def test_mod_dir(bias, bias_addition_split, shape): FILE: tests/test_analyzer/test_fx/test_nested_ckpt.py class MyModule (line 14) | class MyModule(nn.Module): method __init__ (line 15) | def __init__(self): method checkpoint_0 (line 23) | def checkpoint_0(self, x): method checkpoint_0_0 (line 26) | def checkpoint_0_0(self, x): method checkpoint_0_0_0 (line 29) | def checkpoint_0_0_0(self, x): method checkpoint_0_0_0_0 (line 32) | def checkpoint_0_0_0_0(self, x): method checkpoint_0_0_1 (line 35) | def checkpoint_0_0_1(self, x): method checkpoint_0_1 (line 38) | def checkpoint_0_1(self, x): method forward (line 41) | def forward(self, x): function test_nested_ckpt (line 47) | def test_nested_ckpt(): FILE: tests/test_analyzer/test_fx/test_shape_prop.py function linear_impl (line 15) | def linear_impl(*args, **kwargs): function _check_gm_validity (line 23) | def _check_gm_validity(gm: torch.fx.GraphModule): function test_torchvision_shape_prop (line 37) | def test_torchvision_shape_prop(m): function test_timm_shape_prop (line 52) | def test_timm_shape_prop(m): FILE: tests/test_analyzer/test_fx/test_symbolic_profile.py function _check_gm_validity (line 15) | def _check_gm_validity(gm: torch.fx.GraphModule): function test_torchvision_profile (line 23) | def test_torchvision_profile(m, verbose=False, bias_addition_split=False): function test_timm_profile (line 38) | def test_timm_profile(m, verbose=False, bias_addition_split=False): FILE: tests/test_analyzer/test_subclasses/test_aten.py function compare_all (line 64) | def compare_all(tensor: torch.Tensor, meta_tensor: torch.Tensor) -> Any: function run_and_compare (line 76) | def run_and_compare(f: Union[nn.Module, Callable], x: torch.Tensor, requ... function test_meta_aten (line 89) | def test_meta_aten(): FILE: tests/test_analyzer/test_subclasses/test_flop_tensor.py function test_flop_count_module (line 17) | def test_flop_count_module(m): function test_flop_count_function (line 47) | def test_flop_count_function(func, args, kwargs): FILE: tests/test_analyzer/test_subclasses/test_meta_mode.py function compare_all (line 15) | def compare_all(tensor: torch.Tensor, meta_tensor: torch.Tensor): function run_and_compare (line 27) | def run_and_compare(model): function test_meta_mode_shape (line 42) | def test_meta_mode_shape(m): FILE: tests/test_auto_parallel/test_ckpt_solvers/test_C_solver_consistency.py function _run_C_solver_consistency_test (line 29) | def _run_C_solver_consistency_test(rank, world_size, port): function test_C_solver_consistency (line 76) | def test_C_solver_consistency(): FILE: tests/test_auto_parallel/test_ckpt_solvers/test_ckpt_torchvision.py function _is_activation_checkpoint_available (line 37) | def _is_activation_checkpoint_available(gm: GraphModule): function _is_all_gradient_close (line 43) | def _is_all_gradient_close(m: torch.nn.Module, gm: GraphModule): function _is_graph_linearized (line 50) | def _is_graph_linearized(gm: GraphModule): function check_backward_consistency (line 60) | def check_backward_consistency( function _run_ckpt_solver (line 77) | def _run_ckpt_solver(rank, world_size, port): function test_ckpt_solver (line 109) | def test_ckpt_solver(): function _run_ckpt_solver_torch11 (line 113) | def _run_ckpt_solver_torch11(rank, world_size, port): function test_ckpt_solver_torch11 (line 144) | def test_ckpt_solver_torch11(): FILE: tests/test_auto_parallel/test_ckpt_solvers/test_linearize.py function test_linearize (line 32) | def test_linearize(): function test_linearize_torch11 (line 96) | def test_linearize_torch11(): FILE: tests/test_auto_parallel/test_offload/model_utils.py class GPTLMModel (line 8) | class GPTLMModel(nn.Module): method __init__ (line 9) | def __init__(self, hidden_size=768, num_layers=12, num_attention_heads... method forward (line 22) | def forward(self, input_ids, attention_mask): class LMLoss (line 27) | class LMLoss(nn.Module): method __init__ (line 28) | def __init__(self): method forward (line 32) | def forward(self, logits, labels): class BertLMModel (line 39) | class BertLMModel(nn.Module): method __init__ (line 40) | def __init__(self, hidden_size=768, num_layers=12, num_attention_heads... method forward (line 53) | def forward(self, input_ids, attention_mask): function get_bert_components (line 59) | def get_bert_components(): function get_gpt2_components (line 78) | def get_gpt2_components(): FILE: tests/test_auto_parallel/test_offload/test_perf.py function exam_fwd_bwd (line 26) | def exam_fwd_bwd(model_name: str, memory_budget: float, solver_name: str): function run_dist (line 146) | def run_dist(rank, world_size, port): function test_perf (line 154) | def test_perf(): FILE: tests/test_auto_parallel/test_offload/test_solver.py function solver_test (line 19) | def solver_test(model_name: str, memory_budget: float, solver_name: str): FILE: tests/test_auto_parallel/test_pass/test_node_converting_pass.py class TestModule (line 11) | class TestModule(torch.nn.Module): method forward (line 12) | def forward(self, x): function insert_narrow (line 17) | def insert_narrow(gm, x_node): function test_node_args_converting_pass (line 29) | def test_node_args_converting_pass(): FILE: tests/test_auto_parallel/test_pass/test_size_value_converting_pass.py class TestModule (line 13) | class TestModule(torch.nn.Module): method forward (line 14) | def forward(self, x): function insert_narrow (line 19) | def insert_narrow(gm, x_node): function recover_narrow (line 28) | def recover_narrow(gm, narrow_node): function test_size_value_converting_pass (line 39) | def test_size_value_converting_pass(): FILE: tests/test_auto_parallel/test_tensor_shard/test_bias_addition_forward.py class LinearModel (line 17) | class LinearModel(torch.nn.Module): method __init__ (line 18) | def __init__(self, in_features, out_features): method forward (line 22) | def forward(self, x): class ConvModel (line 29) | class ConvModel(torch.nn.Module): method __init__ (line 30) | def __init__(self, in_channels, out_channels, kernel_size, bias=True): method forward (line 36) | def forward(self, x): function check_linear_module (line 43) | def check_linear_module(rank, world_size, port): function check_conv_module (line 60) | def check_conv_module(rank, world_size, port): function test_bias_addition_module (line 81) | def test_bias_addition_module(): FILE: tests/test_auto_parallel/test_tensor_shard/test_broadcast.py function test_is_broadcastable (line 12) | def test_is_broadcastable(): function test_get_broadcast_shape (line 26) | def test_get_broadcast_shape(): function test_recover_sharding_spec_for_broadcast_shape (line 40) | def test_recover_sharding_spec_for_broadcast_shape(): FILE: tests/test_auto_parallel/test_tensor_shard/test_checkpoint.py class GPT2MLPWithCkpt (line 24) | class GPT2MLPWithCkpt(nn.Module): method __init__ (line 25) | def __init__(self, intermediate_size, hidden_size): method forward (line 32) | def forward(self, hidden_states: Optional[Tuple[torch.FloatTensor]]) -... function check_act_ckpt (line 40) | def check_act_ckpt(rank, world_size, port): function test_mlp_layer (line 69) | def test_mlp_layer(): FILE: tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_ddp.py class MLP (line 20) | class MLP(torch.nn.Module): method __init__ (line 21) | def __init__(self, in_features): method forward (line 26) | def forward(self, x): function check_compatibility_with_ddp (line 33) | def check_compatibility_with_ddp(rank, world_size, port): function test_compatibility_with_ddp (line 102) | def test_compatibility_with_ddp(): FILE: tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_gemini.py class MLP (line 22) | class MLP(torch.nn.Module): method __init__ (line 23) | def __init__(self, in_features): method forward (line 28) | def forward(self, x): function check_auto_parallel_with_gemini (line 35) | def check_auto_parallel_with_gemini(rank, world_size, port): function test_auto_parallel_with_gemini (line 109) | def test_auto_parallel_with_gemini(): FILE: tests/test_auto_parallel/test_tensor_shard/test_find_repeat_block.py class RepeatBlock (line 21) | class RepeatBlock(nn.Module): method __init__ (line 22) | def __init__(self, intermediate_size, hidden_size): method forward (line 28) | def forward(self, hidden_states: Optional[Tuple[torch.FloatTensor]]) -... class RepeatModel (line 36) | class RepeatModel(nn.Module): method __init__ (line 37) | def __init__(self, intermediate_size, hidden_size, num_layers): method forward (line 41) | def forward(self, x): class NonRepeatBlock (line 48) | class NonRepeatBlock(nn.Module): method __init__ (line 49) | def __init__(self, intermediate_size, hidden_size, layer_index): method forward (line 56) | def forward(self, hidden_states: Optional[Tuple[torch.FloatTensor]]) -... class NonRepeatModel (line 64) | class NonRepeatModel(nn.Module): method __init__ (line 65) | def __init__(self, intermediate_size, hidden_size, num_layers): method forward (line 69) | def forward(self, x): function test_repeat_blocks (line 79) | def test_repeat_blocks(model_cls): FILE: tests/test_auto_parallel/test_tensor_shard/test_gpt/gpt_modules.py class GPT2MLP (line 10) | class GPT2MLP(nn.Module): method __init__ (line 11) | def __init__(self, intermediate_size, config): method forward (line 21) | def forward(self, hidden_states: Optional[Tuple[torch.FloatTensor]]) -... class GPT2Attention (line 35) | class GPT2Attention(nn.Module): method __init__ (line 36) | def __init__(self, config, layer_idx=None): method _attn (line 66) | def _attn(self, query, key, value, attention_mask=None, head_mask=None): method _split_heads (line 99) | def _split_heads(self, tensor, num_heads, attn_head_size): method _merge_heads (line 104) | def _merge_heads(self, tensor, num_heads, attn_head_size): method forward (line 109) | def forward( class GPT2Block (line 131) | class GPT2Block(nn.Module): method __init__ (line 132) | def __init__(self, config, layer_idx=None): method forward (line 141) | def forward( class GPT2Model (line 166) | class GPT2Model(GPT2PreTrainedModel): method __init__ (line 169) | def __init__(self, config): method forward (line 184) | def forward( class GPT2LMHeadModel (line 234) | class GPT2LMHeadModel(GPT2PreTrainedModel): method __init__ (line 237) | def __init__(self, config): method forward (line 249) | def forward( class GPTLMLoss (line 264) | class GPTLMLoss(nn.Module): method __init__ (line 265) | def __init__(self): method forward (line 269) | def forward(self, logits, labels): FILE: tests/test_auto_parallel/test_tensor_shard/test_gpt/test_runtime_with_gpt_modules.py function _check_module_grad (line 50) | def _check_module_grad( function check_attention_layer (line 74) | def check_attention_layer(rank, model_cls, world_size, port): function test_mlp_layer (line 195) | def test_mlp_layer(model_cls): FILE: tests/test_auto_parallel/test_tensor_shard/test_gpt/test_solver_with_gpt_module.py function test_self_attention_block (line 23) | def test_self_attention_block(model_cls): FILE: tests/test_auto_parallel/test_tensor_shard/test_liveness_analysis.py class LinearModel (line 13) | class LinearModel(nn.Module): method __init__ (line 14) | def __init__(self): method forward (line 20) | def forward(self, x1, x2): function test_liveness_analysis (line 31) | def test_liveness_analysis(): FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_activation_metainfo.py function test_activation_meta_info (line 21) | def test_activation_meta_info(func): FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_binary_elementwise_metainfo.py class BinaryElementwiseOpModule (line 13) | class BinaryElementwiseOpModule(nn.Module): method __init__ (line 14) | def __init__(self, token=torch.add, shape=64) -> None: method forward (line 19) | def forward(self, input): function _binary_elementwise_mem_test (line 23) | def _binary_elementwise_mem_test(rank, world_size, port): function test_binary_elementwise_meta_concrete_info_match (line 60) | def test_binary_elementwise_meta_concrete_info_match(): FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_conv_metainfo.py class ConvFunctionModule (line 13) | class ConvFunctionModule(nn.Module): method __init__ (line 14) | def __init__(self, in_channels=4, out_channels=64, kernel_size=3): method forward (line 18) | def forward(self, input): function _conv_module_mem_test (line 22) | def _conv_module_mem_test(rank, world_size, port, bias): function test_conv_meta_concrete_info_match (line 60) | def test_conv_meta_concrete_info_match(bias=False): function _conv_function_mem_test (line 64) | def _conv_function_mem_test(rank, world_size, port): function test_conv_function_concrete_info_match (line 101) | def test_conv_function_concrete_info_match(): FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_embedding_metainfo.py function test_embedding_meta_info (line 14) | def test_embedding_meta_info(): FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_linear_metainfo.py class MyModule (line 13) | class MyModule(nn.Module): method __init__ (line 14) | def __init__(self, in_features=64, out_features=128): method forward (line 18) | def forward(self, input): function _linear_module_mem_test (line 22) | def _linear_module_mem_test(rank, world_size, port): function test_linear_module_meta_concrete_info_match (line 56) | def test_linear_module_meta_concrete_info_match(): function _linear_function_mem_test (line 60) | def _linear_function_mem_test(rank, world_size, port): function test_linear_function_meta_concrete_info_match (line 94) | def test_linear_function_meta_concrete_info_match(): FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_matmul_metainfo.py function test_matmul_function_meta_info (line 29) | def test_matmul_function_meta_info(tensor_shapes): FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_norm_metainfo.py function _batchnorm_module_mem_test (line 17) | def _batchnorm_module_mem_test(rank, world_size, port): function test_batchnorm_meta_concrete_info_match (line 54) | def test_batchnorm_meta_concrete_info_match(): function test_layernorm_meta_info (line 66) | def test_layernorm_meta_info(tensor_shape): FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_pooling_metainfo.py function _adaptiveavgpool_module_mem_test (line 13) | def _adaptiveavgpool_module_mem_test(rank, world_size, port): function test_adaptiveavgpool_meta_concrete_info_match (line 50) | def test_adaptiveavgpool_meta_concrete_info_match(): function _maxpool_module_mem_test (line 54) | def _maxpool_module_mem_test(rank, world_size, port): function test_maxpool_meta_concrete_info_match (line 91) | def test_maxpool_meta_concrete_info_match(): FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_tensor_metainfo.py class SplitModule (line 13) | class SplitModule(nn.Module): method __init__ (line 14) | def __init__(self) -> None: method forward (line 17) | def forward(self, x): function test_tensor_meta_info (line 23) | def test_tensor_meta_info(): FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_where_metainfo.py function test_where_meta_info (line 14) | def test_where_meta_info(): FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/utils.py function mem_test_for_node_strategy (line 24) | def mem_test_for_node_strategy( function print_results (line 154) | def print_results( FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addbmm_handler.py class AddBMMTensorMethodModule (line 15) | class AddBMMTensorMethodModule(nn.Module): method __init__ (line 16) | def __init__(self, using_kwargs): method forward (line 20) | def forward(self, bias, x1, x2): class AddBMMTorchFunctionModule (line 28) | class AddBMMTorchFunctionModule(nn.Module): method __init__ (line 29) | def __init__(self, using_kwargs): method forward (line 33) | def forward(self, bias, x1, x2): function check_2d_device_mesh (line 41) | def check_2d_device_mesh(rank, world_size, port, module, bias_shape, usi... function check_1d_device_mesh (line 151) | def check_1d_device_mesh(rank, module, bias_shape, using_kwargs, world_s... function test_2d_device_mesh (line 252) | def test_2d_device_mesh(module, bias_shape, using_kwargs): function test_1d_device_mesh (line 269) | def test_1d_device_mesh(module, bias_shape, using_kwargs): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addmm_handler.py class AddmmModel (line 21) | class AddmmModel(nn.Module): method __init__ (line 22) | def __init__(self): method forward (line 25) | def forward(self, input, m1, m2): class AddmmModel_with_param (line 30) | class AddmmModel_with_param(nn.Module): method __init__ (line 31) | def __init__(self, weight_shape, bias_shape): method forward (line 36) | def forward(self, m1): function check_addmm_function_handler (line 41) | def check_addmm_function_handler(rank, world_size, port, input_shape, mo... function test_addmm_handler (line 186) | def test_addmm_handler(input_shape, model_cls): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_batch_norm_handler.py function check_bn_module_handler (line 17) | def check_bn_module_handler(rank, world_size, port): function test_bn_module_handler (line 113) | def test_bn_module_handler(): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_function_node.py class LinearModule (line 24) | class LinearModule(torch.nn.Module): method __init__ (line 25) | def __init__(self, weight_shape): method forward (line 30) | def forward(self, x): function check_linear_module_handler (line 35) | def check_linear_module_handler(rank, world_size, port): function test_linear_handler (line 165) | def test_linear_handler(): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_module_node.py class LinearModule (line 21) | class LinearModule(torch.nn.Module): method __init__ (line 22) | def __init__(self, in_features, out_features, bias): method forward (line 26) | def forward(self, x): function check_linear_module_handler (line 31) | def check_linear_module_handler(rank, world_size, port, bias): function test_linear_handler (line 154) | def test_linear_handler(bias=True): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_binary_elementwise_handler.py function check_binary_elementwise_handler_with_tensor (line 17) | def check_binary_elementwise_handler_with_tensor(rank, world_size, port,... class BEOpModelWithNodeConst (line 124) | class BEOpModelWithNodeConst(nn.Module): method __init__ (line 125) | def __init__(self, op): method forward (line 129) | def forward(self, x1): class BEOpModelWithIntConst (line 135) | class BEOpModelWithIntConst(nn.Module): method __init__ (line 136) | def __init__(self, op, const): method forward (line 141) | def forward(self, x1): function check_binary_elementwise_handler_with_int (line 146) | def check_binary_elementwise_handler_with_int(rank, world_size, port, op... function test_binary_elementwise_handler_with_tensor (line 234) | def test_binary_elementwise_handler_with_tensor(op, other_dim): function test_binary_elementwise_handler_with_int (line 249) | def test_binary_elementwise_handler_with_int(op, model_cls, other_dim): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bmm_handler.py class BMMTensorMethodModule (line 17) | class BMMTensorMethodModule(nn.Module): method forward (line 18) | def forward(self, x1, x2): class BMMTorchFunctionModule (line 22) | class BMMTorchFunctionModule(nn.Module): method forward (line 23) | def forward(self, x1, x2): function check_2d_device_mesh (line 27) | def check_2d_device_mesh(rank, module, world_size, port): function check_1d_device_mesh (line 122) | def check_1d_device_mesh(rank, module, world_size, port): function test_bmm_handler (line 206) | def test_bmm_handler(module): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_conv_handler.py function check_conv_module_handler (line 17) | def check_conv_module_handler(rank, world_size, port, bias): class ConvModel (line 145) | class ConvModel(nn.Module): method __init__ (line 146) | def __init__(self): method forward (line 149) | def forward(self, input, others, bias=None): function check_conv_function_handler (line 154) | def check_conv_function_handler(rank, world_size, port, bias): function test_conv_module_handler (line 302) | def test_conv_module_handler(bias=False): function test_conv_function_handler (line 312) | def test_conv_function_handler(bias=False): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_default_reshape_handler.py class ReshapeModel (line 14) | class ReshapeModel(nn.Module): method __init__ (line 15) | def __init__(self): method forward (line 18) | def forward(self, input, other): function test_reshape_handler (line 26) | def test_reshape_handler(): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_embedding_handler.py class EmbeddingModule (line 24) | class EmbeddingModule(nn.Module): method __init__ (line 25) | def __init__(self, num_embeddings, embedding_dims): method forward (line 29) | def forward(self, input): function check_embedding_module_handler (line 34) | def check_embedding_module_handler(rank, world_size, port): class EmbeddingFunction (line 142) | class EmbeddingFunction(nn.Module): method __init__ (line 143) | def __init__(self): method forward (line 146) | def forward(self, input, others): function check_embedding_function_handler (line 151) | def check_embedding_function_handler(rank, world_size, port): function test_embedding_module_handler (line 272) | def test_embedding_module_handler(): function test_embedding_function_handler (line 279) | def test_embedding_function_handler(): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_getattr_handler.py class GetattrModel (line 14) | class GetattrModel(nn.Module): method __init__ (line 15) | def __init__(self): method forward (line 19) | def forward(self, input): function test_getattr_handler (line 26) | def test_getattr_handler(): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_getitem_handler.py class GetItemFromTensorModel (line 21) | class GetItemFromTensorModel(nn.Module): method __init__ (line 22) | def __init__(self, getitem_index): method forward (line 26) | def forward(self, input, other): function check_getitem_from_tensor_handler (line 32) | def check_getitem_from_tensor_handler(rank, getitem_index, world_size, p... function test_getitem_from_tensor_handler (line 101) | def test_getitem_from_tensor_handler(getitem_index): class GetItemFromTupleModel (line 105) | class GetItemFromTupleModel(nn.Module): method __init__ (line 106) | def __init__(self): method forward (line 109) | def forward(self, input): function test_getitem_from_tuple_handler (line 117) | def test_getitem_from_tuple_handler(): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_layer_norm_handler.py function check_ln_module_handler (line 18) | def check_ln_module_handler(rank, world_size, port): function test_ln_module_handler (line 103) | def test_ln_module_handler(): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_linear_handler.py function check_linear_module_handler (line 24) | def check_linear_module_handler(rank, world_size, port, bias, input_shape): class LinearModel (line 163) | class LinearModel(nn.Module): method __init__ (line 164) | def __init__(self): method forward (line 167) | def forward(self, input, others, bias=None): function check_linear_function_handler (line 172) | def check_linear_function_handler(rank, world_size, port, bias, input_sh... function test_linear_handler (line 314) | def test_linear_handler(input_shape, bias=False): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_matmul_handler.py class MatMulModule (line 24) | class MatMulModule(nn.Module): method forward (line 25) | def forward(self, x1, x2): function test_matmul_node_handler (line 50) | def test_matmul_node_handler(tensor_shapes): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_norm_pooling_handler.py function test_norm_pool_handler (line 15) | def test_norm_pool_handler(): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_output_handler.py class OutputModel (line 14) | class OutputModel(nn.Module): method __init__ (line 15) | def __init__(self): method forward (line 18) | def forward(self, x): function test_output_handler (line 26) | def test_output_handler(output_option): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_permute_and_transpose_handler.py class ConvReshapeModel (line 20) | class ConvReshapeModel(nn.Module): method __init__ (line 21) | def __init__(self, reshape_dims, call_function): method forward (line 26) | def forward(self, input, other): class LinearReshapeModel (line 36) | class LinearReshapeModel(nn.Module): method __init__ (line 37) | def __init__(self, reshape_dims, call_function): method forward (line 42) | def forward(self, input, other): function check_view_handler (line 52) | def check_view_handler(rank, world_size, port, call_function, reshape_di... function test_view_handler (line 323) | def test_view_handler(call_function, reshape_dims, model_cls): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_placeholder_handler.py class PlaceholderModel (line 14) | class PlaceholderModel(nn.Module): method __init__ (line 15) | def __init__(self): method forward (line 18) | def forward(self, input): function test_placeholder_handler (line 25) | def test_placeholder_handler(placeholder_option): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_shard_option.py class LinearModel (line 14) | class LinearModel(nn.Module): method __init__ (line 15) | def __init__(self): method forward (line 18) | def forward(self, input, others, bias=None): function check_shard_option (line 23) | def check_shard_option(shard_option): function test_shard_option (line 109) | def test_shard_option(): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_softmax_handler.py class LinearSplitModel (line 19) | class LinearSplitModel(nn.Module): method __init__ (line 20) | def __init__(self, softmax_dim): method forward (line 24) | def forward(self, input, other): function check_split_handler (line 30) | def check_split_handler(rank, world_size, port, softmax_dim, model_cls): function test_split_handler (line 174) | def test_split_handler(softmax_dim, model_cls): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_split_handler.py class ConvSplitModel (line 19) | class ConvSplitModel(nn.Module): method __init__ (line 20) | def __init__(self, split_size, split_dim): method forward (line 25) | def forward(self, input, other): class LinearSplitModel (line 31) | class LinearSplitModel(nn.Module): method __init__ (line 32) | def __init__(self, split_size, split_dim): method forward (line 37) | def forward(self, input, other): function check_split_handler (line 43) | def check_split_handler(rank, world_size, port, split_size, split_dim, m... function test_split_handler (line 252) | def test_split_handler(split_size, split_dim, model_cls): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_sum_handler.py class LinearSumModel (line 18) | class LinearSumModel(nn.Module): method __init__ (line 19) | def __init__(self, sum_dims, keepdim): method forward (line 24) | def forward(self, input, other): function check_sum_handler (line 33) | def check_sum_handler(rank, world_size, port, sum_dims, keepdim): function test_sum_handler (line 226) | def test_sum_handler(sum_dims, keepdim): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_tensor_constructor.py class TensorConstructorModel (line 13) | class TensorConstructorModel(nn.Module): method __init__ (line 14) | def __init__(self): method forward (line 17) | def forward(self, x): function test_where_handler (line 25) | def test_where_handler(): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_unary_element_wise_handler.py class ReLuModel (line 14) | class ReLuModel(nn.Module): method __init__ (line 15) | def __init__(self): method forward (line 19) | def forward(self, input, other): function test_elementwise_handler (line 27) | def test_elementwise_handler(): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_view_handler.py class ConvViewModel (line 20) | class ConvViewModel(nn.Module): method __init__ (line 21) | def __init__(self, tgt_shape): method forward (line 25) | def forward(self, input, other): class LinearViewModel (line 31) | class LinearViewModel(nn.Module): method __init__ (line 32) | def __init__(self, tgt_shape): method forward (line 36) | def forward(self, input, other): function check_view_handler (line 42) | def check_view_handler(rank, tgt_shape, model_cls, world_size, port): function test_view_handler (line 250) | def test_view_handler(tgt_shape, model_cls): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_where_handler.py class ConvModel (line 14) | class ConvModel(nn.Module): method __init__ (line 15) | def __init__(self): method forward (line 18) | def forward(self, condition, x, y): function test_where_handler (line 25) | def test_where_handler(): FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/utils.py function _build_model_to_compare (line 20) | def _build_model_to_compare( function numerical_test_for_node_strategy (line 66) | def numerical_test_for_node_strategy( function assert_close_helper (line 184) | def assert_close_helper( FILE: tests/test_auto_parallel/test_tensor_shard/test_solver_with_resnet_v2.py function test_cost_graph (line 19) | def test_cost_graph(): FILE: tests/test_autochunk/test_autochunk_alphafold/benchmark_autochunk_alphafold.py function _benchmark_evoformer_stack_gm (line 19) | def _benchmark_evoformer_stack_gm( function _benchmark_evoformer_stack_origin (line 67) | def _benchmark_evoformer_stack_origin( function _benchmark_memory (line 90) | def _benchmark_memory(model, inputs): function _benchmark_speed (line 99) | def _benchmark_speed(model, inputs, loop=5): function benchmark_evoformer_stack (line 112) | def benchmark_evoformer_stack(data_args): FILE: tests/test_autochunk/test_autochunk_alphafold/test_autochunk_alphafold_utils.py function assert_codegen_run (line 19) | def assert_codegen_run( function run_test (line 89) | def run_test( FILE: tests/test_autochunk/test_autochunk_alphafold/test_autochunk_evoformer_block.py function get_model (line 20) | def get_model(): function get_data (line 44) | def get_data(msa_len: int, pair_len: int) -> Tuple[List, List]: function get_chunk_target (line 60) | def get_chunk_target() -> Dict: function test_evoformer_block (line 88) | def test_evoformer_block(data_args, max_memory): FILE: tests/test_autochunk/test_autochunk_alphafold/test_autochunk_evoformer_stack.py function get_model (line 20) | def get_model(): function get_data (line 48) | def get_data(msa_len: int, pair_len: int) -> Tuple[List, List]: function test_evoformer_stack (line 71) | def test_evoformer_stack(data_args, max_memory): FILE: tests/test_autochunk/test_autochunk_alphafold/test_autochunk_extramsa_block.py function get_model (line 19) | def get_model(): function get_data (line 44) | def get_data(msa_len: int, pair_len: int) -> Tuple[List, List]: function test_extramsa_block (line 67) | def test_extramsa_block(data_args, max_memory): FILE: tests/test_autochunk/test_autochunk_diffuser/benchmark_autochunk_diffuser.py function _benchmark_autochunk_unet_gm (line 20) | def _benchmark_autochunk_unet_gm( function _benchmark_autochunk_unet_origin (line 73) | def _benchmark_autochunk_unet_origin( function _benchmark_memory (line 98) | def _benchmark_memory(model, inputs): function _benchmark_speed (line 107) | def _benchmark_speed(model, inputs, loop=5): function benchmark_autochunk_unet (line 120) | def benchmark_autochunk_unet(batch=1, height=448, width=448): FILE: tests/test_autochunk/test_autochunk_diffuser/test_autochunk_diffuser_utils.py function assert_codegen_run (line 18) | def assert_codegen_run( function run_test (line 95) | def run_test( FILE: tests/test_autochunk/test_autochunk_diffuser/test_autochunk_unet.py function get_data (line 31) | def get_data(shape: tuple) -> Tuple[List, List]: function test_evoformer_block (line 52) | def test_evoformer_block(model, shape, max_memory): FILE: tests/test_autochunk/test_autochunk_transformer/benchmark_autochunk_transformer.py function _benchmark_autochunk_gpt_gm (line 20) | def _benchmark_autochunk_gpt_gm( function _benchmark_autochunk_gpt_origin (line 73) | def _benchmark_autochunk_gpt_origin( function _benchmark_memory (line 98) | def _benchmark_memory(model, inputs): function _benchmark_speed (line 107) | def _benchmark_speed(model, inputs, loop=5): function benchmark_autochunk_gpt (line 120) | def benchmark_autochunk_gpt(batch=1, seq=512, n_embd=768, n_head=12): FILE: tests/test_autochunk/test_autochunk_transformer/test_autochunk_gpt.py function get_data (line 24) | def get_data(shape: tuple) -> Tuple[List, List]: function test_autochunk_gpt (line 44) | def test_autochunk_gpt(model, shape, max_memory): FILE: tests/test_autochunk/test_autochunk_transformer/test_autochunk_transformer_utils.py function assert_codegen_run (line 17) | def assert_codegen_run( function assert_allclose (line 81) | def assert_allclose(out_model: Any, out_gm: Any) -> None: function run_test (line 97) | def run_test( FILE: tests/test_autochunk/test_autochunk_vit/test_autochunk_vit.py function get_data (line 21) | def get_data() -> Tuple[List, List]: function test_evoformer_block (line 34) | def test_evoformer_block(model, max_memory): FILE: tests/test_autochunk/test_autochunk_vit/test_autochunk_vit_utils.py function assert_codegen_run (line 18) | def assert_codegen_run( function run_test (line 85) | def run_test( FILE: tests/test_booster/test_accelerator.py function test_accelerator (line 9) | def test_accelerator(device): FILE: tests/test_booster/test_mixed_precision/test_fp16_torch.py function run_torch_amp (line 10) | def run_torch_amp(rank, world_size, port): function test_torch_ddp_plugin (line 39) | def test_torch_ddp_plugin(): FILE: tests/test_booster/test_plugin/test_3d_plugin.py class RandomDataset (line 22) | class RandomDataset(Dataset): method __init__ (line 23) | def __init__(self, num_samples: int = 100, max_length: int = 512, voca... method __len__ (line 32) | def __len__(self): method __getitem__ (line 35) | def __getitem__(self, idx): function move_to_cuda (line 43) | def move_to_cuda(batch): function run_fn (line 48) | def run_fn(init_method, model_fn, data_gen_fn, output_transform_fn) -> O... function check_3d_plugin (line 87) | def check_3d_plugin(init_method: str = "none", early_stop: bool = True): function run_grad_acc_test (line 195) | def run_grad_acc_test(test_args): function run_dist (line 268) | def run_dist(rank, world_size, port, early_stop: bool = True): function test_3d_plugin (line 276) | def test_3d_plugin(early_stop: bool = True): FILE: tests/test_booster/test_plugin/test_dp_plugin_base.py class DPPluginWrapper (line 17) | class DPPluginWrapper(DPPluginBase): method configure (line 20) | def configure( method control_checkpoint_io (line 30) | def control_checkpoint_io(self) -> bool: method control_device (line 33) | def control_device(self) -> bool: method control_precision (line 36) | def control_precision(self) -> bool: method get_checkpoint_io (line 39) | def get_checkpoint_io(self) -> CheckpointIO: method support_no_sync (line 42) | def support_no_sync(self) -> bool: method supported_devices (line 45) | def supported_devices(self) -> List[str]: method supported_precisions (line 48) | def supported_precisions(self) -> List[str]: method no_sync (line 51) | def no_sync(self, model: nn.Module) -> Iterator[None]: method enable_lora (line 54) | def enable_lora(self, model: nn.Module, pretrained_dir: str, lora_conf... method support_lora (line 57) | def support_lora(self) -> bool: function check_dataloader_sharding (line 61) | def check_dataloader_sharding(): function run_dist (line 86) | def run_dist(rank, world_size, port): function test_dp_plugin_dataloader (line 93) | def test_dp_plugin_dataloader(): FILE: tests/test_booster/test_plugin/test_gemini_plugin.py function run_fn (line 19) | def run_fn(init_method, model_fn, data_gen_fn, output_transform_fn, zero... function check_gemini_plugin (line 75) | def check_gemini_plugin( function run_dist (line 164) | def run_dist(rank, world_size, port, early_stop: bool = True): function test_gemini_plugin (line 171) | def test_gemini_plugin(early_stop: bool = True): FILE: tests/test_booster/test_plugin/test_low_level_zero_plugin.py function run_fn (line 26) | def run_fn(stage, model_fn, data_gen_fn, output_transform_fn, lora_confi... function check_low_level_zero_plugin (line 62) | def check_low_level_zero_plugin(stage: int, early_stop: bool = True): function check_low_level_zero_lora (line 103) | def check_low_level_zero_lora(stage, model_name, early_stop: bool = True): function run_dist (line 132) | def run_dist(rank, world_size, port, early_stop: bool = True): function test_low_level_zero_plugin (line 140) | def test_low_level_zero_plugin(early_stop: bool = True): FILE: tests/test_booster/test_plugin/test_torch_ddp_plugin.py function run_fn (line 18) | def run_fn(model_fn, data_gen_fn, output_transform_fn): function check_torch_ddp_plugin (line 43) | def check_torch_ddp_plugin(): class DummyModel (line 56) | class DummyModel(nn.Module): method __init__ (line 57) | def __init__(self): method forward (line 61) | def forward(self, x): function check_torch_ddp_no_sync (line 65) | def check_torch_ddp_no_sync(): function run_dist (line 110) | def run_dist(rank, world_size, port): function test_torch_ddp_plugin (line 118) | def test_torch_ddp_plugin(): FILE: tests/test_booster/test_plugin/test_torch_fsdp_plugin.py function run_fn (line 20) | def run_fn(model_fn, data_gen_fn, output_transform_fn): function check_torch_fsdp_plugin (line 51) | def check_torch_fsdp_plugin(): function run_dist (line 74) | def run_dist(rank, world_size, port): function test_torch_fsdp_plugin (line 82) | def test_torch_fsdp_plugin(): FILE: tests/test_checkpoint_io/test_gemini_checkpoint_io.py function exam_state_dict_with_origin (line 39) | def exam_state_dict_with_origin( function exam_state_dict (line 94) | def exam_state_dict( function exam_lazy_from_pretrained (line 193) | def exam_lazy_from_pretrained(): function run_dist (line 210) | def run_dist(rank, world_size, port): function test_gemini_ckpIO (line 219) | def test_gemini_ckpIO(): FILE: tests/test_checkpoint_io/test_gemini_torch_compability.py function exam_torch_load_from_gemini (line 24) | def exam_torch_load_from_gemini(shard: bool, model_name: str): function exam_gemini_load_from_torch (line 90) | def exam_gemini_load_from_torch(shard: bool, model_name: str): function run_dist (line 165) | def run_dist(rank, world_size, port): function test_gemini_ckpIO (line 174) | def test_gemini_ckpIO(world_size): FILE: tests/test_checkpoint_io/test_general_checkpoint_io.py function test_unsharded_checkpoint (line 23) | def test_unsharded_checkpoint(use_safetensors: bool, use_async: bool): function test_sharded_model_checkpoint (line 77) | def test_sharded_model_checkpoint(use_safetensors: bool, use_async: bool): function test_sharded_optimizer_checkpoint (line 117) | def test_sharded_optimizer_checkpoint(use_async: bool): function test_sharded_optimizer_multiple_param_groups (line 186) | def test_sharded_optimizer_multiple_param_groups(use_async: bool): FILE: tests/test_checkpoint_io/test_hybrid_parallel_plugin_checkpoint_io.py function exam_state_dict (line 48) | def exam_state_dict( function run_dist (line 146) | def run_dist(rank, world_size, port): function test_hybrid_ckpIO (line 154) | def test_hybrid_ckpIO(world_size): FILE: tests/test_checkpoint_io/test_low_level_zero_checkpoint_io.py function check_low_level_zero_checkpointIO (line 33) | def check_low_level_zero_checkpointIO(stage: int, shard: bool, offload: ... function run_fn (line 95) | def run_fn(stage, shard, offload, model_fn, data_gen_fn, output_transfor... function check_low_level_zero_lora_checkpointIO (line 159) | def check_low_level_zero_lora_checkpointIO( function run_dist (line 192) | def run_dist(rank, world_size, port): function test_low_level_zero_checkpointIO (line 201) | def test_low_level_zero_checkpointIO(): FILE: tests/test_checkpoint_io/test_plugins_huggingface_compatibility.py function exam_from_pretrained (line 23) | def exam_from_pretrained(plugin_type: str, model_name: str, shard=True, ... function run_dist (line 70) | def run_dist(rank, world_size, port): function test_huggingface_compatibility (line 78) | def test_huggingface_compatibility(world_size): FILE: tests/test_checkpoint_io/test_safetensors_async_io.py function gen_optim_state_dict (line 13) | def gen_optim_state_dict(): function gen_model_state_dict (line 108) | def gen_model_state_dict(): function test_create_pin (line 118) | def test_create_pin(empty: bool, num_threads: int): function test_save_load (line 139) | def test_save_load(): FILE: tests/test_checkpoint_io/test_torch_ddp_checkpoint_io.py function check_torch_ddp_checkpointIO (line 19) | def check_torch_ddp_checkpointIO(shard: bool, size_per_shard: int, use_a... function run_dist (line 74) | def run_dist(rank, world_size, port): function test_torch_ddp_checkpointIO (line 80) | def test_torch_ddp_checkpointIO(): FILE: tests/test_checkpoint_io/test_torch_fsdp_checkpoint_io.py function compare_nested_dict (line 18) | def compare_nested_dict(dict1, dict2): function check_torch_fsdp_ckpt (line 47) | def check_torch_fsdp_ckpt(use_async: bool): function run_dist (line 153) | def run_dist(rank, world_size, port): function test_torch_fsdp_ckpt (line 161) | def test_torch_fsdp_ckpt(): FILE: tests/test_checkpoint_io/utils.py function shared_tempdir (line 9) | def shared_tempdir() -> Iterator[str]: FILE: tests/test_cluster/test_device_mesh_manager.py function check_device_mesh_manager (line 7) | def check_device_mesh_manager(rank, world_size, port): function test_device_mesh_manager (line 28) | def test_device_mesh_manager(): FILE: tests/test_cluster/test_process_group_mesh.py function check_process_group_mesh_with_cases (line 9) | def check_process_group_mesh_with_cases(): function run_dist (line 127) | def run_dist(rank, world_size, port): function test_process_group_mesh (line 138) | def test_process_group_mesh(): FILE: tests/test_config/test_load_config.py function test_load_config (line 9) | def test_load_config(): FILE: tests/test_device/test_alpha_beta.py function check_alpha_beta (line 9) | def check_alpha_beta(rank, world_size, port, physical_devices): function test_profile_alpha_beta (line 22) | def test_profile_alpha_beta(physical_devices): FILE: tests/test_device/test_device_mesh.py function test_device_mesh (line 10) | def test_device_mesh(): function check_1d_device_mesh (line 23) | def check_1d_device_mesh(): function check_2d_device_mesh (line 39) | def check_2d_device_mesh(): function check_init_from_process_group (line 77) | def check_init_from_process_group(rank, world_size, port): function test_device_mesh_from_process_group (line 83) | def test_device_mesh_from_process_group(): FILE: tests/test_device/test_extract_alpha_beta.py function check_extract_alpha_beta (line 9) | def check_extract_alpha_beta(rank, world_size, port, physical_devices): function test_profile_alpha_beta (line 25) | def test_profile_alpha_beta(physical_devices): FILE: tests/test_device/test_init_logical_pg.py function check_layer (line 11) | def check_layer(rank, world_size, port): function test_logical_pg (line 32) | def test_logical_pg(): FILE: tests/test_device/test_search_logical_device_mesh.py function check_alpha_beta (line 9) | def check_alpha_beta(rank, world_size, port, physical_devices): function test_profile_alpha_beta (line 25) | def test_profile_alpha_beta(physical_devices): FILE: tests/test_fp8/test_all_to_all_single.py function check_all2all (line 16) | def check_all2all(shape, dtype, async_op): function check_all2all_uneven (line 32) | def check_all2all_uneven(shape, dtype, async_op): function run_dist (line 65) | def run_dist(rank, world_size, port): function test_all_to_all_single (line 72) | def test_all_to_all_single(): FILE: tests/test_fp8/test_fp8_all_to_all.py function check_4gpu (line 17) | def check_4gpu(shape, scatter_dim, dtype, fp8_format): function run_dist (line 29) | def run_dist(rank, world_size, port): function test_all_to_all (line 35) | def test_all_to_all(): FILE: tests/test_fp8/test_fp8_all_to_all_single.py function check_4gpu (line 18) | def check_4gpu(shape, dtype, fp8_format): function run_dist (line 27) | def run_dist(rank, world_size, port): function test_all_to_all_single (line 33) | def test_all_to_all_single(): FILE: tests/test_fp8/test_fp8_allgather.py function check_4gpu (line 20) | def check_4gpu(shape, dtype, fp8_format, async_op): function run_dist (line 35) | def run_dist(rank, world_size, port): function test_all_gather (line 41) | def test_all_gather(): FILE: tests/test_fp8/test_fp8_allreduce.py function check_4gpu (line 27) | def check_4gpu(shape, dtype, fp8_format, async_op): function run_dist (line 45) | def run_dist(rank, world_size, port): function test_all_reduce (line 51) | def test_all_reduce(): FILE: tests/test_fp8/test_fp8_cast.py function test_fp8_cast (line 13) | def test_fp8_cast(shape, dtype, fp8_format): FILE: tests/test_fp8/test_fp8_ddp_comm_hook.py function setup (line 14) | def setup(rank, world_size): function cleanup (line 22) | def cleanup(): class ToyModel (line 26) | class ToyModel(nn.Module): method __init__ (line 27) | def __init__(self): method forward (line 33) | def forward(self, x): function demo_basic (line 37) | def demo_basic(rank, world_size): function run_demo (line 79) | def run_demo(demo_fn, world_size): FILE: tests/test_fp8/test_fp8_fsdp_comm_hook.py function cleanup (line 16) | def cleanup(): class ToyModel (line 20) | class ToyModel(nn.Module): method __init__ (line 21) | def __init__(self): method forward (line 27) | def forward(self, x): function run_model (line 33) | def run_model(mode): function demo_basic (line 92) | def demo_basic(rank, world_size, port): function test_fsdp (line 101) | def test_fsdp(): FILE: tests/test_fp8/test_fp8_hook.py function new_linear_fp8 (line 17) | def new_linear_fp8(x, w, bias=None): class FP8TestHook (line 23) | class FP8TestHook(FP8Hook): method rewrite_op (line 24) | def rewrite_op(self, func): function test_fp8_hook (line 39) | def test_fp8_hook(): FILE: tests/test_fp8/test_fp8_linear.py function test_fp8_linear (line 18) | def test_fp8_linear(use_bias: bool, use_batch: bool): FILE: tests/test_fp8/test_fp8_reduce_scatter.py function check_4gpu (line 18) | def check_4gpu(shape, scatter_dim, dtype, fp8_format, async_op): function run_dist (line 34) | def run_dist(rank, world_size, port): function test_reduce_scatter (line 40) | def test_reduce_scatter(): FILE: tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py class MLP (line 23) | class MLP(torch.nn.Module): method __init__ (line 24) | def __init__(self): method forward (line 29) | def forward(self, x): class relu (line 33) | class relu(torch.nn.Module): method __init__ (line 34) | def __init__(self) -> None: method forward (line 38) | def forward(self, x): class MyModule (line 42) | class MyModule(torch.nn.Module): method __init__ (line 43) | def __init__(self): method ckpt2 (line 49) | def ckpt2(self, x): method ckpt3 (line 52) | def ckpt3(self, x, y): method forward (line 55) | def forward(self, x, y): function _run_act_ckpt_codegen (line 65) | def _run_act_ckpt_codegen(rank, world_size, port): function test_act_ckpt_codegen (line 124) | def test_act_ckpt_codegen(): function _run_act_ckpt_python_code_torch11 (line 128) | def _run_act_ckpt_python_code_torch11(rank, world_size, port): function test_act_ckpt_python_code_torch11 (line 186) | def test_act_ckpt_python_code_torch11(): FILE: tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py class MyModule (line 19) | class MyModule(torch.nn.Module): method __init__ (line 20) | def __init__(self): method forward (line 29) | def forward(self, x): function _run_act_ckpt_codegen (line 33) | def _run_act_ckpt_codegen(rank, world_size, port): function test_act_ckpt_codegen (line 93) | def test_act_ckpt_codegen(): function _run_act_ckpt_python_code_torch11 (line 97) | def _run_act_ckpt_python_code_torch11(rank, world_size, port): function test_act_ckpt_python_code_torch11 (line 159) | def test_act_ckpt_python_code_torch11(): FILE: tests/test_fx/test_codegen/test_offload_codegen.py class MyNet (line 24) | class MyNet(torch.nn.Module): method __init__ (line 25) | def __init__(self) -> None: method forward (line 35) | def forward(self, x): function _is_all_gradient_close (line 46) | def _is_all_gradient_close(m: torch.nn.Module, gm: GraphModule) -> bool: function _test_fwd_and_bwd (line 53) | def _test_fwd_and_bwd(model: torch.nn.Module, gm: ColoGraphModule, data:... function _run_offload_codegen (line 67) | def _run_offload_codegen(rank, world_size, port): function test_act_ckpt_codegen (line 121) | def test_act_ckpt_codegen(): function _run_offload_codegen_torch11 (line 125) | def _run_offload_codegen_torch11(rank, world_size, port): function test_act_ckpt_python_code_torch11 (line 180) | def test_act_ckpt_python_code_torch11(): FILE: tests/test_fx/test_coloproxy.py class Conv1D (line 10) | class Conv1D(nn.Module): method __init__ (line 11) | def __init__(self, nf, nx): method forward (line 19) | def forward(self, x): function test_coloproxy (line 27) | def test_coloproxy(): FILE: tests/test_fx/test_comm_size_compute.py class MLP (line 19) | class MLP(torch.nn.Module): method __init__ (line 20) | def __init__(self, dim: int): method forward (line 27) | def forward(self, x): function test_comm_size_compute (line 36) | def test_comm_size_compute(): FILE: tests/test_fx/test_graph_manipulation.py class MLP (line 8) | class MLP(torch.nn.Module): method __init__ (line 9) | def __init__(self, dim: int): method forward (line 17) | def forward(self, x): function test_graph_manipulation (line 27) | def test_graph_manipulation(): FILE: tests/test_fx/test_meta/test_aten.py function compare_all (line 63) | def compare_all(tensor: torch.Tensor, meta_tensor: torch.Tensor) -> Any: function run_and_compare (line 75) | def run_and_compare(f: Union[nn.Module, Callable], x: torch.Tensor, requ... function test_meta_aten (line 88) | def test_meta_aten(): FILE: tests/test_fx/test_meta/test_backward.py function test_torchvision_models (line 44) | def test_torchvision_models(): function test_timm_models (line 53) | def test_timm_models(): FILE: tests/test_fx/test_meta/test_meta_trace.py function test_torchvision_models_trace (line 44) | def test_torchvision_models_trace(): function test_timm_models_trace (line 53) | def test_timm_models_trace(): FILE: tests/test_fx/test_meta_info_prop.py function meta_check (line 16) | def meta_check(meta_info_spec: TensorMetadata, orig_tensor: torch.Tensor): function test_meta_info_prop (line 24) | def test_meta_info_prop(): FILE: tests/test_fx/test_parallel_1d.py class MLP (line 15) | class MLP(torch.nn.Module): method __init__ (line 16) | def __init__(self, dim: int): method forward (line 23) | def forward(self, x): function check_layer (line 34) | def check_layer(rank, world_size, port): function test_1d (line 53) | def test_1d(): FILE: tests/test_fx/test_pipeline/test_hf_model/hf_utils.py function split_model_and_compare_output (line 17) | def split_model_and_compare_output(model, data_gen): FILE: tests/test_fx/test_pipeline/test_hf_model/test_albert.py function test_single_sentence_albert (line 11) | def test_single_sentence_albert(): FILE: tests/test_fx/test_pipeline/test_hf_model/test_bert.py function test_single_sentence_bert (line 11) | def test_single_sentence_bert(): FILE: tests/test_fx/test_pipeline/test_hf_model/test_gpt.py function test_gpt (line 13) | def test_gpt(): FILE: tests/test_fx/test_pipeline/test_hf_model/test_opt.py function test_opt (line 11) | def test_opt(): FILE: tests/test_fx/test_pipeline/test_hf_model/test_t5.py function test_t5 (line 11) | def test_t5(): FILE: tests/test_fx/test_pipeline/test_timm_model/test_timm.py function test_timm_models_without_control_flow (line 8) | def test_timm_models_without_control_flow(): function test_timm_models_with_control_flow (line 28) | def test_timm_models_with_control_flow(): FILE: tests/test_fx/test_pipeline/test_timm_model/timm_utils.py function split_model_and_compare_output (line 18) | def split_model_and_compare_output(model, data, meta_args=None): FILE: tests/test_fx/test_pipeline/test_topo/test_topo.py function test_opt (line 11) | def test_opt(): FILE: tests/test_fx/test_pipeline/test_topo/topo_utils.py class MLP (line 18) | class MLP(torch.nn.Module): method __init__ (line 19) | def __init__(self, config={}): method forward (line 28) | def forward(self, x): function split_model_and_get_DAG (line 34) | def split_model_and_get_DAG(model, data_gen): function check_input (line 62) | def check_input(top_module, input_partition: Partition): function check_submod (line 75) | def check_submod(top_module, part_id, mid_partition: Partition): function check_topo (line 92) | def check_topo(top_module, topo: Topo): FILE: tests/test_fx/test_pipeline/test_torchvision/test_torchvision.py function test_torchvision_models (line 23) | def test_torchvision_models(): FILE: tests/test_fx/test_pipeline_passes.py class MLP (line 17) | class MLP(torch.nn.Module): method __init__ (line 18) | def __init__(self, dim: int): method forward (line 25) | def forward(self, x): function pipeline_pass_test_helper (line 33) | def pipeline_pass_test_helper(model, data, pass_func): function test_pipeline_passes (line 43) | def test_pipeline_passes(): FILE: tests/test_fx/test_profiler/gpt_utils.py class GPTLMModel (line 5) | class GPTLMModel(nn.Module): method __init__ (line 6) | def __init__( method forward (line 30) | def forward(self, input_ids, attention_mask): class GPTLMLoss (line 35) | class GPTLMLoss(nn.Module): method __init__ (line 36) | def __init__(self): method forward (line 40) | def forward(self, logits, labels): function gpt2_medium (line 47) | def gpt2_medium(checkpoint=False): function gpt2_xl (line 51) | def gpt2_xl(checkpoint=False): FILE: tests/test_fx/test_profiler/test_profiler_meta_info_prop.py function extract_forward_mem (line 22) | def extract_forward_mem(gm: torch.fx.GraphModule): function extract_forward_flops (line 32) | def extract_forward_flops(gm: torch.fx.GraphModule): function gen_tm_data (line 41) | def gen_tm_data(batch_size: int, shape: Tuple[int, int, int], device="cu... function gen_gpt_data (line 47) | def gen_gpt_data(batch_size, seq_len, vocab_size, device="cpu"): function run_tm_forward (line 53) | def run_tm_forward(gm: torch.fx.GraphModule): function run_gpt_forward (line 91) | def run_gpt_forward(gm: torch.fx.GraphModule): function test_meta_info_prop (line 130) | def test_meta_info_prop(): function test_gpt_meta_info_prop (line 195) | def test_gpt_meta_info_prop(): FILE: tests/test_fx/test_tracer/test_activation_checkpoint_annotation.py class MLP (line 9) | class MLP(torch.nn.Module): method __init__ (line 10) | def __init__(self): method forward (line 15) | def forward(self, x): class MyModule (line 22) | class MyModule(torch.nn.Module): method __init__ (line 23) | def __init__(self): method forward (line 29) | def forward(self, x): function test_activation_checkpoint_annotation (line 37) | def test_activation_checkpoint_annotation(): FILE: tests/test_fx/test_tracer/test_bias_addition_module.py class LinearModel (line 7) | class LinearModel(torch.nn.Module): method __init__ (line 8) | def __init__(self, in_features, out_features): method forward (line 12) | def forward(self, x): class ConvModel (line 19) | class ConvModel(torch.nn.Module): method __init__ (line 20) | def __init__(self, in_channels, out_channels, kernel_size, bias=True): method forward (line 26) | def forward(self, x): function test_linear_module (line 34) | def test_linear_module(): function test_conv_module (line 71) | def test_conv_module(): FILE: tests/test_fx/test_tracer/test_control_flow.py class ControlFlowModel (line 9) | class ControlFlowModel(nn.Module): method __init__ (line 10) | def __init__(self): method forward (line 15) | def forward(self, x, y): function test_control_flow (line 26) | def test_control_flow(): FILE: tests/test_fx/test_tracer/test_functional_conv.py function test_conv (line 9) | def test_conv(): FILE: tests/test_fx/test_tracer/test_hf_model/hf_tracer_utils.py function trace_model_and_compare_output (line 9) | def trace_model_and_compare_output(model, data_gen, ignore_data: List[st... FILE: tests/test_fx/test_tracer/test_hf_model/test_hf_albert.py function test_albert (line 15) | def test_albert(): FILE: tests/test_fx/test_tracer/test_hf_model/test_hf_bert.py function test_bert (line 12) | def test_bert(): FILE: tests/test_fx/test_tracer/test_hf_model/test_hf_diffuser.py function assert_dict (line 10) | def assert_dict(da, db, assert_fn): function trace_and_compare (line 20) | def trace_and_compare(model_cls, data, output_fn): function test_diffusers (line 45) | def test_diffusers(): function test_torch_diffusers (line 58) | def test_torch_diffusers(): FILE: tests/test_fx/test_tracer/test_hf_model/test_hf_gpt.py function test_gpt (line 12) | def test_gpt(): FILE: tests/test_fx/test_tracer/test_hf_model/test_hf_opt.py function test_opt (line 12) | def test_opt(): FILE: tests/test_fx/test_tracer/test_hf_model/test_hf_t5.py function test_t5 (line 12) | def test_t5(): FILE: tests/test_fx/test_tracer/test_patched_module.py function _run (line 7) | def _run(data, module, patch_fn): function _assert_output_shape (line 20) | def _assert_output_shape(data, module, patch_fn, expect_exception, outpu... function test_linear (line 37) | def test_linear(): function test_rnn (line 49) | def test_rnn(): function test_embedding (line 66) | def test_embedding(): function test_conv1d (line 147) | def test_conv1d(): function test_conv2d (line 187) | def test_conv2d(): function test_conv3d (line 234) | def test_conv3d(): function test_conv_transpose1d (line 281) | def test_conv_transpose1d(): function test_conv_transpose2d (line 309) | def test_conv_transpose2d(): function test_conv_transpose3d (line 337) | def test_conv_transpose3d(): function test_pool1d (line 365) | def test_pool1d(): function test_pool2d (line 399) | def test_pool2d(): function test_pool3d (line 436) | def test_pool3d(): function test_adaptive_pooling_1d (line 474) | def test_adaptive_pooling_1d(): function test_adaptive_pooling_2d (line 495) | def test_adaptive_pooling_2d(): function test_adaptive_pooling_3d (line 516) | def test_adaptive_pooling_3d(): FILE: tests/test_fx/test_tracer/test_patched_op.py function _run (line 9) | def _run(data, patch_fn): function _assert_output_shape (line 17) | def _assert_output_shape(data, patch_fn, expect_exception, output_shape): function test_repeat_interleave (line 29) | def test_repeat_interleave(): function test_torch_max (line 67) | def test_torch_max(): FILE: tests/test_fx/test_tracer/test_timm_model/test_timm_model.py function trace_and_compare (line 10) | def trace_and_compare(model_cls, data, output_transform_fn, meta_args=No... function test_timm_models (line 55) | def test_timm_models(): FILE: tests/test_fx/test_tracer/test_torchaudio_model/test_torchaudio_model.py function test_torchaudio_models (line 13) | def test_torchaudio_models(): FILE: tests/test_fx/test_tracer/test_torchaudio_model/torchaudio_utils.py function trace_and_compare (line 6) | def trace_and_compare(model, data_gen, output_transform_fn, need_meta=Fa... FILE: tests/test_fx/test_tracer/test_torchrec_model/test_deepfm_model.py function trace_and_compare (line 11) | def trace_and_compare(model_cls, data, output_transform_fn, meta_args=No... function test_torchrec_deepfm_models (line 54) | def test_torchrec_deepfm_models(): FILE: tests/test_fx/test_tracer/test_torchrec_model/test_dlrm_model.py function trace_and_compare (line 11) | def trace_and_compare(model_cls, data, output_transform_fn, meta_args=No... function test_torchrec_dlrm_models (line 54) | def test_torchrec_dlrm_models(): FILE: tests/test_fx/test_tracer/test_torchvision_model/test_torchvision_model.py function test_torchvision_models (line 9) | def test_torchvision_models(): FILE: tests/test_infer/_utils.py function build_model (line 6) | def build_model( function run_infer (line 29) | def run_infer(original_model, sharded_model, data_gen_fn, output_transfo... FILE: tests/test_infer/test_async_engine/test_async_engine.py class MockSequence (line 10) | class MockSequence: class MockEngine (line 14) | class MockEngine: method __init__ (line 15) | def __init__(self): method async_step (line 21) | async def async_step(self): method add_single_request (line 25) | def add_single_request(self, **kwargs): method generate (line 29) | def generate(self, request_id): method stop_generating (line 32) | def stop_generating(self): method add_request (line 35) | def add_request(self, **kwargs): method abort_request (line 39) | def abort_request(self, request_id): class MockAsyncInferenceEngine (line 44) | class MockAsyncInferenceEngine(AsyncInferenceEngine): method _init_engine (line 45) | def _init_engine(self, *args, **kwargs): function test_new_requests_event (line 50) | async def test_new_requests_event(): FILE: tests/test_infer/test_async_engine/test_request_tracer.py class SampleEvent (line 7) | class SampleEvent: method __init__ (line 8) | def __init__(self): method set (line 11) | def set(self): method clear (line 14) | def clear(self): function test_request_tracer (line 18) | def test_request_tracer(): FILE: tests/test_infer/test_batch_bucket.py function test_bucket (line 30) | def test_bucket(test_config): FILE: tests/test_infer/test_config_and_struct.py function check_config_and_inference (line 9) | def check_config_and_inference(): function run_dist (line 34) | def run_dist(rank, world_size, port): function test_config_and_inference (line 41) | def test_config_and_inference(): FILE: tests/test_infer/test_continuous_batching.py function setup_seed (line 14) | def setup_seed(seed): function generate_inputs (line 21) | def generate_inputs(num_sequences, min_length, max_length): function check_inference_engine (line 35) | def check_inference_engine(n_multiple, max_batch_size, max_input_len, ma... function run_dist (line 59) | def run_dist(rank, world_size, port): function test_continuous_batching (line 66) | def test_continuous_batching(): FILE: tests/test_infer/test_cuda_graph.py function setup_seed (line 14) | def setup_seed(seed): function check_inference_engine (line 21) | def check_inference_engine(use_cuda_graph=False, batch_size=32): function check_output_consistency (line 74) | def check_output_consistency(batch_size): function run_dist (line 82) | def run_dist(rank, world_size, port): function test_cuda_graph_infer (line 91) | def test_cuda_graph_infer(): FILE: tests/test_infer/test_drafter.py function tokenizer (line 15) | def tokenizer(): function test_drafter (line 20) | def test_drafter(tokenizer, spec_num: int): function test_spec_dec (line 45) | def test_spec_dec(tokenizer): FILE: tests/test_infer/test_inference_engine.py function setup_seed (line 18) | def setup_seed(seed): function check_inference_engine (line 26) | def check_inference_engine(use_engine=False, prompt_template=None, do_sa... function run_engine (line 88) | def run_engine(world_size, **kwargs): function check_spec_dec (line 96) | def check_spec_dec(num_layers, max_length): function run_dist (line 166) | def run_dist(rank, world_size, port, func_to_run, ret=None, **kwargs): function test_tp_engine (line 179) | def test_tp_engine(prompt_template, do_sample): function test_spec_dec (line 202) | def test_spec_dec(num_layers, max_length): FILE: tests/test_infer/test_kernels/cuda/test_convert_fp8.py function test_fp8_conversion (line 27) | def test_fp8_conversion( FILE: tests/test_infer/test_kernels/cuda/test_flash_decoding_attention.py function prepare_data (line 26) | def prepare_data( function numpy_allclose (line 52) | def numpy_allclose(x, y, rtol, atol): function test_flash_decoding_attention (line 67) | def test_flash_decoding_attention( function test_vllm_flash_decoding_attention (line 209) | def test_vllm_flash_decoding_attention( FILE: tests/test_infer/test_kernels/cuda/test_get_cos_and_sin.py function numpy_equal (line 11) | def numpy_equal(x, y): function test_get_cos_and_sin (line 22) | def test_get_cos_and_sin(BATCH_SIZE, MAX_SEQ_LEN, HEAD_DIM, dtype): FILE: tests/test_infer/test_kernels/cuda/test_kv_cache_memcpy.py function prepare_data (line 17) | def prepare_data( function run_decode_copy_kv_to_caches (line 46) | def run_decode_copy_kv_to_caches( function run_context_copy_kv_to_cache (line 101) | def run_context_copy_kv_to_cache( function test_kv_cache_memcopy (line 145) | def test_kv_cache_memcopy( FILE: tests/test_infer/test_kernels/cuda/test_rms_layernorm.py function test_rms_layernorm (line 13) | def test_rms_layernorm(M: int, N: int): FILE: tests/test_infer/test_kernels/cuda/test_rotary_embdding_unpad.py function numpy_allclose (line 14) | def numpy_allclose(x, y, rtol, atol): function test_rotary_emb (line 27) | def test_rotary_emb(BATCH_SIZE, SEQ_LEN, H, K_H, D, dtype): FILE: tests/test_infer/test_kernels/cuda/test_silu_and_mul.py function test_silu_and_mul (line 14) | def test_silu_and_mul(SHAPE_X, SHAPE_Y, SHAPE_Z, dtype): FILE: tests/test_infer/test_kernels/triton/kernel_utils.py function repeat_kv (line 10) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: function create_attention_mask (line 22) | def create_attention_mask(kv_lengths: torch.Tensor, bsz: int, q_len: int... function torch_attn_ref (line 41) | def torch_attn_ref( function mock_alloc_block_table_and_kvcache (line 81) | def mock_alloc_block_table_and_kvcache( function mock_alloc_block_table_and_kvcache_v2 (line 115) | def mock_alloc_block_table_and_kvcache_v2( function mock_alloc_block_table_and_kvcache_v3 (line 149) | def mock_alloc_block_table_and_kvcache_v3( function mock_alloc_block_table_and_kvcache_vllm (line 193) | def mock_alloc_block_table_and_kvcache_vllm( function mock_alloc_single_token (line 238) | def mock_alloc_single_token(block_tables: torch.Tensor, context_lengths:... function generate_caches_and_block_tables (line 262) | def generate_caches_and_block_tables( function generate_caches_and_block_tables_v2 (line 278) | def generate_caches_and_block_tables_v2( function generate_caches_and_block_tables_v3 (line 294) | def generate_caches_and_block_tables_v3( function generate_caches_and_block_tables_vllm (line 314) | def generate_caches_and_block_tables_vllm( function convert_kv_unpad_to_padded (line 334) | def convert_kv_unpad_to_padded( FILE: tests/test_infer/test_kernels/triton/test_context_attn_unpad.py function _fill_with_neg_inf (line 27) | def _fill_with_neg_inf(t): function generate_alibi_mask (line 32) | def generate_alibi_mask(slopes, num_heads, max_seq_len, device): function torch_attn_unpad (line 44) | def torch_attn_unpad( function test_context_attention (line 97) | def test_context_attention( FILE: tests/test_infer/test_kernels/triton/test_decoding_attn.py function numpy_allclose (line 31) | def numpy_allclose(x, y, rtol, atol): function prepare_data (line 38) | def prepare_data( function test_flash_decoding (line 80) | def test_flash_decoding( FILE: tests/test_infer/test_kernels/triton/test_fused_rotary_embedding.py function test_fused_rotary_emb (line 24) | def test_fused_rotary_emb(): FILE: tests/test_infer/test_kernels/triton/test_kvcache_copy.py function prepare_data (line 26) | def prepare_data( function test_copy_kv_to_caches (line 80) | def test_copy_kv_to_caches( FILE: tests/test_infer/test_kernels/triton/test_rmsnorm_triton.py function test_layer_norm (line 25) | def test_layer_norm(M, N): FILE: tests/test_infer/test_kernels/triton/test_rotary_embdding_unpad.py function torch_rotary_emb (line 23) | def torch_rotary_emb(x, cos, sin): function test_rotary_emb (line 43) | def test_rotary_emb(BATCH_SIZE, SEQ_LEN, H, D, dtype, use_new_kcache_lay... FILE: tests/test_infer/test_kernels/triton/test_xine_copy.py function get_cos_sin (line 19) | def get_cos_sin(lengths, cos_cache, sin_cache, is_prompts, dtype): function test_get_xine_cache (line 48) | def test_get_xine_cache(BATCH_SIZE, MAX_SEQ_LEN, HEAD_DIM, dtype): FILE: tests/test_infer/test_kvcache_manager.py function test_logical_blocks (line 23) | def test_logical_blocks(test_config): function check_cache_manager (line 69) | def check_cache_manager(test_config): function run_dist (line 166) | def run_dist(rank, world_size, port): function test_cache_manager (line 173) | def test_cache_manager(): FILE: tests/test_infer/test_models/test_attention.py function test_copy_to_cache (line 12) | def test_copy_to_cache(): function test_convert_kvcache (line 30) | def test_convert_kvcache(): function test_context_attention (line 41) | def test_context_attention(): function test_decoding_attention (line 94) | def test_decoding_attention(): FILE: tests/test_infer/test_models/test_baichuan.py function setup_seed (line 20) | def setup_seed(seed): function check_inference_engine (line 28) | def check_inference_engine(use_engine=False, do_sample=False, use_cuda_k... function run_engine (line 80) | def run_engine(world_size, **kwargs): function run_dist (line 88) | def run_dist(rank, world_size, port, func_to_run, ret=None, **kwargs): function check_tp_engine (line 101) | def check_tp_engine(prompt_template, do_sample, use_cuda_kernel): function test_inference_engine (line 133) | def test_inference_engine(): FILE: tests/test_infer/test_models/test_custom_model.py function test_model (line 43) | def test_model(model, prompt_template, do_sample, use_cuda_kernel): function run_engine (line 82) | def run_engine(world_size, **kwargs): function run_dist (line 89) | def run_dist(rank, world_size, port, func_to_run, ret=None, **kwargs): function _run_engine (line 98) | def _run_engine(model, use_engine=False, do_sample=False, use_cuda_kerne... function setup_seed (line 152) | def setup_seed(seed): FILE: tests/test_infer/test_request_handler.py function check_running_list (line 11) | def check_running_list(): function check_request_handler (line 56) | def check_request_handler(): function run_dist (line 92) | def run_dist(rank, world_size, port): function test_running_list_and_request_handler (line 100) | def test_running_list_and_request_handler(): FILE: tests/test_infer/test_rpc_engine.py function setup_seed (line 14) | def setup_seed(seed): function check_inference_engine (line 22) | def check_inference_engine(tp_size, use_engine=False, prompt_template=No... function run_engine (line 74) | def run_engine(tp_size, **kwargs): function test_tp_engine (line 84) | def test_tp_engine(prompt_template, do_sample): FILE: tests/test_infer/test_streamingllm.py function data_gen (line 14) | def data_gen(batch_size: int = 4, seq_len: int = 512): function setup_seed (line 19) | def setup_seed(seed): function check_streamingllm (line 27) | def check_streamingllm(): function run_dist (line 103) | def run_dist(rank, world_size, port, func_to_run, ret=None, **kwargs): function test_engine (line 113) | def test_engine(): FILE: tests/test_lazy/lazy_init_utils.py function set_seed (line 21) | def set_seed(seed: int) -> None: function assert_model_equal (line 27) | def assert_model_equal(m1: torch.nn.Module, m2: torch.nn.Module) -> None: function assert_forward_equal (line 41) | def assert_forward_equal( function check_lazy_init (line 69) | def check_lazy_init( function assert_dist_model_equal (line 93) | def assert_dist_model_equal( FILE: tests/test_lazy/test_from_pretrained.py function test_lazy_from_pretrained (line 9) | def test_lazy_from_pretrained(): FILE: tests/test_lazy/test_models.py function test_models_lazy_init (line 17) | def test_models_lazy_init(subset, default_device): FILE: tests/test_lazy/test_ops.py function test_lazy_ops (line 13) | def test_lazy_ops(): FILE: tests/test_legacy/test_amp/test_naive_fp16.py function check_equal (line 12) | def check_equal(a, b): function run_naive_amp (line 19) | def run_naive_amp(): function run_dist (line 79) | def run_dist(rank, world_size, port): function test_naive_amp (line 87) | def test_naive_amp(): FILE: tests/test_legacy/test_amp/test_torch_fp16.py function run_torch_amp (line 12) | def run_torch_amp(): function run_dist (line 78) | def run_dist(rank, world_size, port): function test_torch_amp (line 86) | def test_torch_amp(): FILE: tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py function check_layer (line 17) | def check_layer(rank, world_size, port): function test_object_list_p2p (line 50) | def test_object_list_p2p(): FILE: tests/test_legacy/test_comm/test_comm.py function check_all_gather (line 17) | def check_all_gather(): function check_reduce_scatter (line 28) | def check_reduce_scatter(): function check_all_reduce (line 39) | def check_all_reduce(): function check_layer (line 50) | def check_layer(rank, world_size, port): function test_comm (line 66) | def test_comm(): FILE: tests/test_legacy/test_comm/test_object_list_p2p.py function check_send_recv_forward (line 28) | def check_send_recv_forward(): function check_send_recv_backward (line 48) | def check_send_recv_backward(): function check_send_recv_forward_backward (line 68) | def check_send_recv_forward_backward(): function check_layer (line 90) | def check_layer(rank, world_size, port): function test_object_list_p2p (line 101) | def test_object_list_p2p(): FILE: tests/test_legacy/test_comm/test_object_list_p2p_v2.py function check_send_recv_forward (line 30) | def check_send_recv_forward(): function check_send_recv_backward (line 60) | def check_send_recv_backward(): function check_small_pipeline (line 85) | def check_small_pipeline(): function check_layer (line 105) | def check_layer(rank, world_size, port): function test_object_list_p2p (line 119) | def test_object_list_p2p(): FILE: tests/test_legacy/test_context/test_hybrid_parallel.py function check_data_parallel_rank (line 18) | def check_data_parallel_rank(rank): function check_pipeline_parallel_rank (line 32) | def check_pipeline_parallel_rank(rank): function check_model_parallel_rank (line 44) | def check_model_parallel_rank(rank): function check_tensor_parallel_rank (line 51) | def check_tensor_parallel_rank(rank): function get_tp_info (line 60) | def get_tp_info(): function check_2d_tensor_parallel_rank (line 68) | def check_2d_tensor_parallel_rank(rank): function check_2p5d_tensor_parallel_rank (line 82) | def check_2p5d_tensor_parallel_rank(rank): function check_3d_tensor_parallel_rank (line 100) | def check_3d_tensor_parallel_rank(rank): function init_context (line 116) | def init_context(config_path, rank, world_size, backend, port, host): function run_dist (line 130) | def run_dist(rank, world_size, port, backend, port_list, host): function test_context (line 139) | def test_context(): FILE: tests/test_legacy/test_data/test_cifar10_dataset.py function test_cifar10_dataset (line 11) | def test_cifar10_dataset(): FILE: tests/test_legacy/test_data/test_data_parallel_sampler.py function run_data_sampler (line 29) | def run_data_sampler(rank, world_size, port): function test_data_sampler (line 60) | def test_data_sampler(): FILE: tests/test_legacy/test_data/test_deterministic_dataloader.py function run_data_sampler (line 38) | def run_data_sampler(rank, world_size, port): function test_data_sampler (line 70) | def test_data_sampler(): FILE: tests/test_legacy/test_engine/test_engine.py function run_train (line 17) | def run_train(model_name, amp_mode): function run_engine (line 52) | def run_engine(rank, world_size, port): function test_engine (line 62) | def test_engine(): FILE: tests/test_legacy/test_engine/test_gradient_accumluation.py function run_no_pipeline (line 27) | def run_no_pipeline(rank, world_size, port): function test_engine (line 90) | def test_engine(): FILE: tests/test_legacy/test_layers/test_1d/checks_1d/check_layer_1d.py function check_linear_col (line 24) | def check_linear_col(): function check_linear_row (line 93) | def check_linear_row(): function check_embed (line 161) | def check_embed(): function check_vocab_parallel_embed (line 203) | def check_vocab_parallel_embed(): function check_classifier_no_given_weight (line 245) | def check_classifier_no_given_weight(): function check_vocab_parallel_classifier_no_given_weight (line 311) | def check_vocab_parallel_classifier_no_given_weight(): function check_classifier_given_embed_weight (line 371) | def check_classifier_given_embed_weight(): function check_vocab_parallel_classifier_given_embed_weight (line 422) | def check_vocab_parallel_classifier_given_embed_weight(): function check_vocab_parallel_loss (line 474) | def check_vocab_parallel_loss(): function check_linear_row_stream_inference (line 510) | def check_linear_row_stream_inference(): FILE: tests/test_legacy/test_layers/test_1d/checks_1d/common.py function check_equal (line 15) | def check_equal(A, B): FILE: tests/test_legacy/test_layers/test_1d/test_1d.py function check_layer (line 18) | def check_layer(rank, world_size, port): function test_1d (line 40) | def test_1d(): FILE: tests/test_legacy/test_layers/test_2d/checks_2d/check_layer_2d.py function check_linear (line 24) | def check_linear(): function check_layernorm (line 105) | def check_layernorm(): function check_embed (line 156) | def check_embed(): function check_patch_embed (line 203) | def check_patch_embed(): function check_vocab_parallel_embed (line 276) | def check_vocab_parallel_embed(): function check_classifier_no_given_weight (line 323) | def check_classifier_no_given_weight(): function check_vocab_parallel_classifier_no_given_weight (line 401) | def check_vocab_parallel_classifier_no_given_weight(): function check_classifier_given_embed_weight (line 469) | def check_classifier_given_embed_weight(): function check_vocab_parallel_classifier_given_embed_weight (line 521) | def check_vocab_parallel_classifier_given_embed_weight(): function check_loss (line 575) | def check_loss(): function check_vocab_parallel_loss (line 610) | def check_vocab_parallel_loss(): FILE: tests/test_legacy/test_layers/test_2d/checks_2d/check_operation_2d.py function check_AB (line 15) | def check_AB(): function check_ABT (line 97) | def check_ABT(): function check_ATB (line 177) | def check_ATB(): FILE: tests/test_legacy/test_layers/test_2d/checks_2d/common.py function check_equal (line 15) | def check_equal(A, B): FILE: tests/test_legacy/test_layers/test_2d/test_2d.py function check_operations (line 31) | def check_operations(): function check_layer (line 37) | def check_layer(): function check_layer_and_operation (line 51) | def check_layer_and_operation(rank, world_size, port): function test_2d (line 66) | def test_2d(): FILE: tests/test_legacy/test_layers/test_2p5d/checks_2p5d/check_layer_2p5d.py function check_linear (line 25) | def check_linear(): function check_layernorm (line 106) | def check_layernorm(): function check_embed (line 158) | def check_embed(): function check_patch_embed (line 206) | def check_patch_embed(): function check_vocab_parallel_embed (line 280) | def check_vocab_parallel_embed(): function check_classifier_no_given_weight (line 328) | def check_classifier_no_given_weight(): function check_vocab_parallel_classifier_no_given_weight (line 407) | def check_vocab_parallel_classifier_no_given_weight(): function check_classifier_given_embed_weight (line 474) | def check_classifier_given_embed_weight(): function check_vocab_parallel_classifier_given_embed_weight (line 526) | def check_vocab_parallel_classifier_given_embed_weight(): function check_loss (line 580) | def check_loss(): function check_vocab_parallel_loss (line 615) | def check_vocab_parallel_loss(): FILE: tests/test_legacy/test_layers/test_2p5d/checks_2p5d/check_operation_2p5d.py function check_AB (line 12) | def check_AB(): function check_ABT (line 95) | def check_ABT(): function check_ATB (line 177) | def check_ATB(): FILE: tests/test_legacy/test_layers/test_2p5d/checks_2p5d/common.py function check_equal (line 13) | def check_equal(A, B): FILE: tests/test_legacy/test_layers/test_2p5d/test_2p5d.py function check_operations (line 19) | def check_operations(): function check_layer (line 25) | def check_layer(): function check_layer_and_operation (line 39) | def check_layer_and_operation(rank, world_size, port): function test_2p5d (line 54) | def test_2p5d(): FILE: tests/test_legacy/test_layers/test_3d/checks_3d/check_layer_3d.py function check_linear (line 31) | def check_linear(): function check_layernorm (line 119) | def check_layernorm(): function check_classifier_no_given_weight (line 206) | def check_classifier_no_given_weight(): function check_vocab_parallel_classifier_no_given_weight (line 306) | def check_vocab_parallel_classifier_no_given_weight(): function check_classifier_given_embed_weight (line 413) | def check_classifier_given_embed_weight(): function check_vocab_parallel_classifier_given_embed_weight (line 497) | def check_vocab_parallel_classifier_given_embed_weight(): function check_patch_embed (line 581) | def check_patch_embed(): function check_embed (line 679) | def check_embed(): function check_vocab_parallel_embed (line 747) | def check_vocab_parallel_embed(): function check_loss (line 823) | def check_loss(): function check_vocab_parallel_loss (line 876) | def check_vocab_parallel_loss(): FILE: tests/test_legacy/test_layers/test_3d/checks_3d/common.py function check_equal (line 16) | def check_equal(A, B): FILE: tests/test_legacy/test_layers/test_3d/test_3d.py function check_layer (line 32) | def check_layer(): function check_layer_and_operation (line 45) | def check_layer_and_operation(rank, world_size, port): function test_3d (line 59) | def test_3d(): FILE: tests/test_legacy/test_layers/test_cache_embedding.py function set_seed (line 25) | def set_seed(seed): function synthesize_1d_sparse_feature (line 34) | def synthesize_1d_sparse_feature( function test_cachemgr (line 59) | def test_cachemgr(): function test_reorder_with_freq (line 88) | def test_reorder_with_freq(): function test_freq_aware_embed (line 119) | def test_freq_aware_embed(use_LFU: bool): function test_lfu_strategy (line 168) | def test_lfu_strategy(init_freq: bool): function gather_tensor (line 215) | def gather_tensor(tensor, rank, world_size): function run_parallel_freq_aware_embed_tablewise (line 224) | def run_parallel_freq_aware_embed_tablewise(rank, world_size): function run_parallel_freq_aware_embed_columnwise (line 310) | def run_parallel_freq_aware_embed_columnwise(rank, world_size): function run_dist (line 380) | def run_dist(rank, world_size, port): function test_parallel_freq_aware_embed (line 389) | def test_parallel_freq_aware_embed(world_size): FILE: tests/test_legacy/test_layers/test_sequence/checks_seq/check_layer_seq.py function check_selfattention (line 9) | def check_selfattention(): FILE: tests/test_legacy/test_layers/test_sequence/test_sequence.py function check_ring_qk (line 14) | def check_ring_qk(rank, world_size): function check_ring_av (line 66) | def check_ring_av(rank, world_size): function run_test (line 120) | def run_test(rank, world_size, port): function test_sequence (line 132) | def test_sequence(): FILE: tests/test_legacy/test_moe/moe_utils.py function delete_moe_info (line 15) | def delete_moe_info(model): class MoeModel (line 21) | class MoeModel(nn.Module): method __init__ (line 22) | def __init__(self, ep_group: ProcessGroup = None): method forward (line 29) | def forward(self, x): class MoeGradientHandler (line 37) | class MoeGradientHandler(BaseGradientHandler): method __init__ (line 49) | def __init__(self, model, optimizer=None): method handle_gradient (line 52) | def handle_gradient(self): function assert_not_equal_in_group (line 73) | def assert_not_equal_in_group(tensor, process_group=None): function run_fwd_bwd (line 88) | def run_fwd_bwd(model, data, label, criterion, optimizer, enable_autocas... function sync_local_from_ep (line 105) | def sync_local_from_ep(local_model, ep_model, assert_grad_flag: bool = F... FILE: tests/test_legacy/test_moe/test_grad_handler.py function run_test (line 18) | def run_test(rank, world_size, port): function test_grad_handler (line 76) | def test_grad_handler(): FILE: tests/test_legacy/test_moe/test_moe_group.py function run_moe_init (line 17) | def run_moe_init(expert_parallel): function _run_test (line 62) | def _run_test(rank, world_size, port, expert_parallel): function test_moe_initialization (line 77) | def test_moe_initialization(expert_parallel): FILE: tests/test_legacy/test_moe/test_moe_hybrid_zero.py function run_fwd_bwd (line 15) | def run_fwd_bwd(model, data, label, criterion, optimizer, enable_autocas... function run_zero_optim_test (line 32) | def run_zero_optim_test(local_rank, world_size, stage=1): function run_dist (line 83) | def run_dist(rank, world_size, port): function test_moe_zero_optim (line 93) | def test_moe_zero_optim(world_size): FILE: tests/test_legacy/test_moe/test_moe_load_balance.py function split_ddp_grad (line 17) | def split_ddp_grad(grad, world_size): function run_fwd_bwd (line 27) | def run_fwd_bwd(model, data, label, criterion, optimizer, enable_autocas... function run_zero_optim_test (line 44) | def run_zero_optim_test(local_rank, world_size, stage=1): function run_hybrid_zero_optim_test (line 100) | def run_hybrid_zero_optim_test(local_rank, world_size, stage=1): function run_dist (line 166) | def run_dist(rank, world_size, port): function test_moe_load_balance (line 184) | def test_moe_load_balance(world_size): FILE: tests/test_legacy/test_pipeline/rpc_test_utils.py function color_debug (line 18) | def color_debug(text, prefix=" ", color="blue"): class MLP (line 23) | class MLP(nn.Module): method __init__ (line 24) | def __init__(self, dim: int, layers: int): method forward (line 31) | def forward(self, x): class DAG_MLP (line 37) | class DAG_MLP(nn.Module): method __init__ (line 38) | def __init__(self, dim: int, layers: int): method forward (line 46) | def forward(self, x, y): class RpcTestModel (line 53) | class RpcTestModel(nn.Module): method __init__ (line 54) | def __init__(self, stage_id, actual_stage_num, feat_num, h) -> None: method forward (line 69) | def forward(self, x) -> torch.Tensor: function parse_args (line 78) | def parse_args(): function pg_parse_args (line 96) | def pg_parse_args(): function run_worker (line 109) | def run_worker(rank, args, master_func): function rpc_run (line 144) | def rpc_run(args, master_func): FILE: tests/test_legacy/test_pipeline/test_cuda_rpc_chimera.py function partition (line 13) | def partition(pp_rank: int, chunk: int, stage_num: int): function run_master (line 19) | def run_master(args): FILE: tests/test_legacy/test_pipeline/test_cuda_rpc_optimizer.py function partition (line 14) | def partition(pp_rank: int, chunk: int, stage_num: int): function run_master (line 20) | def run_master(args): FILE: tests/test_legacy/test_pipeline/test_cuda_rpc_pipeline.py function partition (line 11) | def partition(pp_rank: int, chunk: int, stage_num: int): function run_master (line 17) | def run_master(args): FILE: tests/test_legacy/test_pipeline/test_cuda_rpc_value_correctness.py function partition (line 12) | def partition(pp_rank: int, chunk: int, stage_num: int): function run_master (line 18) | def run_master(args): FILE: tests/test_legacy/test_pipeline/test_middleware_1f1b.py function create_partition_module (line 25) | def create_partition_module(pp_rank: int, stage_num: int, model, data_kw... function partition (line 40) | def partition(model, data_kwargs: dict, pp_rank: int, chunk: int, stage_... function run_master (line 46) | def run_master(model_cls, world_size, forward_only): function run_worker (line 103) | def run_worker(rank, world_size, port, model_cls, forward_only, master_f... function test_pp_middleware_fwd (line 129) | def test_pp_middleware_fwd(model_cls, forward_only): FILE: tests/test_legacy/test_pipeline/test_pipelinable.py class MLP (line 11) | class MLP(torch.nn.Module): method __init__ (line 12) | def __init__(self, dim: int = 256): method forward (line 20) | def forward(self, x): function run_pipelinable (line 28) | def run_pipelinable(rank, world_size, port): function test_pipelinable (line 53) | def test_pipelinable(): FILE: tests/test_legacy/test_pipeline/test_pipeline_process_group.py function run_worker (line 12) | def run_worker(rank, args): FILE: tests/test_legacy/test_tensor/common_utils/_utils.py function set_seed (line 14) | def set_seed(seed): function check_equal (line 24) | def check_equal(A, B): function replace_parameter_add_grad (line 28) | def replace_parameter_add_grad(layer, weight=None, bias=None): function broadcast_tensor_chunk (line 39) | def broadcast_tensor_chunk(tensor, chunk_size=1, local_rank=0): function tensor_equal (line 45) | def tensor_equal(t_a: torch.Tensor, t_b: torch.Tensor, rtol: float = 1e-... function tensor_shard_equal (line 50) | def tensor_shard_equal( function split_param_single_dim_tp1d (line 70) | def split_param_single_dim_tp1d(dim, param, pg): function split_param_row_tp1d (line 77) | def split_param_row_tp1d(param, pg): function split_param_col_tp1d (line 81) | def split_param_col_tp1d(param, pg): function debug_print (line 85) | def debug_print(ranks, *args): FILE: tests/test_legacy/test_tensor/core/test_dist_spec_mgr.py function run (line 12) | def run(): function check_mem (line 34) | def check_mem(): function run_dist (line 50) | def run_dist(rank, world_size, port): function test_dist_spec_mgr (line 59) | def test_dist_spec_mgr(world_size): FILE: tests/test_legacy/test_tensor/test_parameter.py function test_multiinheritance (line 11) | def test_multiinheritance(): FILE: tests/test_legacy/test_trainer/test_pipeline/test_p2p.py function check_equal (line 30) | def check_equal(A, B): function check_forward (line 34) | def check_forward(output_tensor, rank, logger): function check_backward (line 46) | def check_backward(output_grad, rank, logger): function check_forward_backward (line 58) | def check_forward_backward(output_tensor, output_grad, rank, logger): function check_comm (line 74) | def check_comm(size, rank, prev_rank, next_rank, logger): function run_check (line 88) | def run_check(rank, world_size, port): function test_p2p (line 104) | def test_p2p(): FILE: tests/test_legacy/test_trainer/test_pipeline/test_pipeline_schedule.py function run_schedule (line 25) | def run_schedule(rank, world_size, port): function test_pipeline_schedule (line 84) | def test_pipeline_schedule(): FILE: tests/test_legacy/test_trainer/test_trainer_with_non_pipe_schedule.py function run_trainer (line 20) | def run_trainer(model_name): function run_dist (line 50) | def run_dist(rank, world_size, port): function test_trainer_no_pipeline (line 58) | def test_trainer_no_pipeline(): FILE: tests/test_legacy/test_trainer/test_trainer_with_pipe_schedule.py function run_trainer_with_pipeline (line 31) | def run_trainer_with_pipeline(rank, world_size, port): function test_trainer_with_pipeline (line 91) | def test_trainer_with_pipeline(): FILE: tests/test_legacy/test_utils/test_activation_checkpointing.py function forward (line 13) | def forward(x, weight): function forward_inplace_ckpt (line 20) | def forward_inplace_ckpt(x, weight, cpu_offload=False): function forward_inplace (line 33) | def forward_inplace(x, weight): function test_activation_checkpointing (line 45) | def test_activation_checkpointing(cpu_offload, use_reentrant): FILE: tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_1d.py function build_pipeline (line 20) | def build_pipeline(model): function check_equal (line 36) | def check_equal(A, B): function check_checkpoint_1d (line 40) | def check_checkpoint_1d(rank, world_size, port): function test_checkpoint_1d (line 74) | def test_checkpoint_1d(): FILE: tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_2d.py function build_pipeline (line 20) | def build_pipeline(model): function check_equal (line 36) | def check_equal(A, B): function check_checkpoint_2d (line 40) | def check_checkpoint_2d(rank, world_size, port): function test_checkpoint_2d (line 74) | def test_checkpoint_2d(): FILE: tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_2p5d.py function build_pipeline (line 20) | def build_pipeline(model): function check_equal (line 36) | def check_equal(A, B): function check_checkpoint_2p5d (line 40) | def check_checkpoint_2p5d(rank, world_size, port): function test_checkpoint_2p5d (line 74) | def test_checkpoint_2p5d(): FILE: tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_3d.py function build_pipeline (line 20) | def build_pipeline(model): function check_equal (line 36) | def check_equal(A, B): function check_checkpoint_3d (line 40) | def check_checkpoint_3d(rank, world_size, port): function test_checkpoint_3d (line 74) | def test_checkpoint_3d(): FILE: tests/test_legacy/test_utils/test_memory.py function _run_colo_set_process_memory_fraction_and_colo_device_memory_capacity (line 9) | def _run_colo_set_process_memory_fraction_and_colo_device_memory_capacit... function run_dist (line 16) | def run_dist(rank, world_size, port): function test_memory_utils (line 23) | def test_memory_utils(world_size): FILE: tests/test_legacy/test_utils/test_norm_gradient_clipping.py function close (line 15) | def close(num: float, other: float, rtol: float = 1e-5, atol: float = 1e... function shard_param (line 19) | def shard_param(p: ColoParameter) -> None: function check_grad_equal (line 25) | def check_grad_equal(p: Parameter, colo_p: ColoParameter) -> None: function run_grad_clip_norm (line 37) | def run_grad_clip_norm(world_size: int, dtype: torch.dtype, device: str,... function run_dist (line 63) | def run_dist(rank, world_size, port): function test_zero_clip_grad (line 73) | def test_zero_clip_grad(world_size: int): FILE: tests/test_legacy/test_zero/test_commons.py function run_tensor_move (line 9) | def run_tensor_move(rank, world_size, port): function test_tensor_move (line 36) | def test_tensor_move(): FILE: tests/test_lora/test_lora.py function check_fwd_bwd (line 20) | def check_fwd_bwd(model_fn, data_gen_fn, output_transform_fn, loss_fn, t... function run_lora_test (line 90) | def run_lora_test(): function run_dist (line 101) | def run_dist(rank, world_size, port): function test_torch_ddp_lora (line 107) | def test_torch_ddp_lora(): FILE: tests/test_moe/moe_utils.py function assert_loose_close (line 12) | def assert_loose_close(a, b, dtype: torch.dtype = torch.float32, name=""): function loose_close (line 16) | def loose_close(a, b, dtype: torch.dtype = torch.float32): function check_model_equal (line 36) | def check_model_equal(model1, model2, dtype): function distributed_debug_mode (line 43) | def distributed_debug_mode(num_stacks: int = 1, funcs_to_patch: Optional... FILE: tests/test_moe/test_deepseek_layer.py function check_deepseek_moe_layer (line 19) | def check_deepseek_moe_layer(): function run_dist (line 66) | def run_dist(rank: int, world_size: int, port: int): function test_deepseek_moe_layer (line 73) | def test_deepseek_moe_layer(world_size: int): FILE: tests/test_moe/test_kernel.py function check_equal (line 16) | def check_equal(tensor_a, tensor_b, atol=1e-06): function run_moe_cumsum (line 20) | def run_moe_cumsum(): function run_moe_dispatch_combine_fwd_bwd (line 36) | def run_moe_dispatch_combine_fwd_bwd(data_type=torch.float32, hidden_siz... function test_moe_kernel (line 91) | def test_moe_kernel(data_type): FILE: tests/test_moe/test_mixtral_layer.py function check_mixtral_moe_layer (line 20) | def check_mixtral_moe_layer(): function run_dist (line 61) | def run_dist(rank: int, world_size: int, port: int): function test_mixtral_moe_layer (line 68) | def test_mixtral_moe_layer(world_size: int): FILE: tests/test_moe/test_moe_checkpoint.py function get_optimizer_snapshot (line 26) | def get_optimizer_snapshot(optim): function check_optimizer_snapshot_equal (line 42) | def check_optimizer_snapshot_equal(snapshot1, snapshot2, param2name, moe... function check_moe_checkpoint (line 90) | def check_moe_checkpoint(test_config): function run_dist (line 163) | def run_dist(rank: int, world_size: int, port: int): function test_mixtral_moe_layer (line 170) | def test_mixtral_moe_layer(world_size: int): FILE: tests/test_moe/test_moe_ep_tp.py function run_zero_with_original_model (line 26) | def run_zero_with_original_model(stage: int, ep_size: int): function run_dist (line 118) | def run_dist(rank, world_size, port): function test_moe_ep_tp (line 127) | def test_moe_ep_tp(world_size): FILE: tests/test_moe/test_moe_ep_zero.py function run_zero_with_original_model (line 26) | def run_zero_with_original_model(stage: int, ep_size: int): function run_dist (line 105) | def run_dist(rank, world_size, port): function test_moe_ep_zero (line 114) | def test_moe_ep_zero(world_size): FILE: tests/test_optimizer/_utils.py function force_assign_grad (line 21) | def force_assign_grad(p, g_dtype, grad=None): function setup_param_groups (line 29) | def setup_param_groups(model: nn.Module) -> list: function setup_flatten_param_groups_sharding_spec_shape (line 45) | def setup_flatten_param_groups_sharding_spec_shape(model: nn.Module) -> ... function set_master_param_to_shard_param (line 62) | def set_master_param_to_shard_param(master_param_list) -> dict: function set_dist_grad (line 67) | def set_dist_grad( function check_optim_states (line 103) | def check_optim_states(org_optim, sharded_optim): function check_bert_fwd_bwd (line 112) | def check_bert_fwd_bwd( function run_bert_test (line 200) | def run_bert_test(test_config, optim_class, sharded_optim_class): function _run_bert_test (line 221) | def _run_bert_test(rank, world_size, port, optim_class, sharded_optim_cl... function check_optim_on_bert (line 226) | def check_optim_on_bert(optim_class, sharded_optim_class): function check_dist_optim_state (line 230) | def check_dist_optim_state(org_optimizer, sharded_optimizer): function check_dist_param (line 307) | def check_dist_param(org_model, sharded_model, weight_layer_for_check, a... function check_dist_grad (line 315) | def check_dist_grad(sharded_optimizer, org_model, sharded_model, weight_... FILE: tests/test_optimizer/test_adam_kernel.py class AdamKernel (line 29) | class AdamKernel: method __init__ (line 30) | def __init__(self, lr: float, beta1: float, beta2: float, eps: float, ... method update (line 39) | def update(self, step: int, param: Tensor, grad: Tensor, exp_avg: Tens... class TorchAdamKernel (line 43) | class TorchAdamKernel(AdamKernel): method update (line 44) | def update(self, step: int, param: Tensor, grad: Tensor, exp_avg: Tens... class FusedAdamKernel (line 65) | class FusedAdamKernel(AdamKernel): method __init__ (line 66) | def __init__(self, lr: float, beta1: float, beta2: float, eps: float, ... method update (line 74) | def update(self, step: int, param: Tensor, grad: Tensor, exp_avg: Tens... class CPUAdamKernel (line 91) | class CPUAdamKernel(AdamKernel): method __init__ (line 92) | def __init__(self, lr: float, beta1: float, beta2: float, eps: float, ... method update (line 100) | def update(self, step: int, param: Tensor, grad: Tensor, exp_avg: Tens... function check_adam_kernel (line 117) | def check_adam_kernel( function test_fused_adam_kernel (line 153) | def test_fused_adam_kernel(adamw, weight_decay, p_dtype, g_dtype): function test_cpu_adam_kernel (line 167) | def test_cpu_adam_kernel(adamw, weight_decay, p_dtype, g_dtype): FILE: tests/test_optimizer/test_adam_optim.py function set_grad (line 30) | def set_grad(model: nn.Module, torch_model: nn.Module, g_dtype: torch.dt... function test_adam_optim_on_bert (line 40) | def test_adam_optim_on_bert( FILE: tests/test_optimizer/test_dist_adafactor.py function correctness_verify (line 52) | def correctness_verify(tensor1: torch.Tensor, tensor2: torch.Tensor, dty... class MlpModel (line 68) | class MlpModel(nn.Module): method __init__ (line 69) | def __init__(self): method forward (line 74) | def forward(self, x): class TPModel (line 80) | class TPModel(nn.Module): method __init__ (line 81) | def __init__(self, linear1, linear2, tp_group=None): method forward (line 88) | def forward(self, x): function exam_dist_adafactor_base (line 96) | def exam_dist_adafactor_base(dtype: torch.dtype, tp_zero_size: tuple[int... function exam_dist_adafactor_zero (line 196) | def exam_dist_adafactor_zero(dtype: torch.dtype, tp_zero_size: tuple[int... function exam_bert_test_on_lowlevelzero_plugin (line 321) | def exam_bert_test_on_lowlevelzero_plugin(test_config): function exam_bert_test_on_hybrid_plugin (line 405) | def exam_bert_test_on_hybrid_plugin(test_config): function run_dist (line 456) | def run_dist(rank, world_size, port): function test_dist_adafactor (line 467) | def test_dist_adafactor(): FILE: tests/test_optimizer/test_dist_came.py function correctness_verify (line 44) | def correctness_verify(tensor1: torch.Tensor, tensor2: torch.Tensor, dty... function exam_dist_came_base (line 63) | def exam_dist_came_base(dtype: torch.dtype, tp_zero_size: tuple[int, int]): function exam_bert_test_on_lowlevelzero_plugin (line 181) | def exam_bert_test_on_lowlevelzero_plugin(test_config): function exam_bert_test_on_hybrid_plugin (line 279) | def exam_bert_test_on_hybrid_plugin(test_config): function run_dist (line 346) | def run_dist(rank, world_size, port): function test_dist_came (line 356) | def test_dist_came(): FILE: tests/test_optimizer/test_dist_galore.py function assert_grad_close (line 64) | def assert_grad_close(tp_model, torch_model, tp_group): function assert_distributed_close (line 85) | def assert_distributed_close(tp_model, torch_model, rtol, atol, tp_group): function force_assign_grad (line 103) | def force_assign_grad(p, g_dtype, grad=None): function run_dist_galore_basic (line 113) | def run_dist_galore_basic(p_g_dtype: tuple[torch.dtype, torch.dtype], tp... function run_dist_galore_fwd_bwd (line 178) | def run_dist_galore_fwd_bwd(p_g_dtype: tuple[torch.dtype, torch.dtype], ... function check_dist_galore (line 269) | def check_dist_galore(rank, world_size, port): function test_dist_galore (line 297) | def test_dist_galore(): FILE: tests/test_optimizer/test_dist_lamb.py function assert_distributed_close (line 34) | def assert_distributed_close(tp_model, torch_model, rtol, atol, tp_group): function set_dist_grad (line 52) | def set_dist_grad( function run_dist_lamb_basic (line 89) | def run_dist_lamb_basic( function run_dist_lamb_fwd_bwd (line 159) | def run_dist_lamb_fwd_bwd( function check_dist_lamb (line 257) | def check_dist_lamb(rank, world_size, port): function test_dist_lamb (line 275) | def test_dist_lamb(): FILE: tests/test_optimizer/test_lr_scheduler.py function test_lr_scheduler_save_load (line 7) | def test_lr_scheduler_save_load(): FILE: tests/test_optimizer/test_nvme.py function move_some_params_to_cuda (line 9) | def move_some_params_to_cuda(model, torch_model): function check_params_equal (line 16) | def check_params_equal(model, torch_model): function test_nvme_adam (line 27) | def test_nvme_adam(nvme_offload_fraction, nvme_offload_dir, adam_cls): FILE: tests/test_pipeline/test_p2p_communication.py function check_p2p_communication (line 15) | def check_p2p_communication(): function run_dist (line 71) | def run_dist(rank, world_size, port): function test_pipeline_p2p (line 78) | def test_pipeline_p2p(): FILE: tests/test_pipeline/test_pipeline_utils/test_t5_pipeline_utils.py class _ShardConfig (line 8) | class _ShardConfig(ShardConfig): method __post_init__ (line 9) | def __post_init__(self): class _PipelineStageManager (line 13) | class _PipelineStageManager(PipelineStageManager): method __init__ (line 14) | def __init__(self): method num_stages (line 21) | def num_stages(self): function test_t5_pipeline_distribution (line 25) | def test_t5_pipeline_distribution(): function test_t5_pipeline_layers (line 47) | def test_t5_pipeline_layers(): FILE: tests/test_pipeline/test_pipeline_utils/test_whisper_pipeline_utils.py class _ShardConfig (line 8) | class _ShardConfig(ShardConfig): method __post_init__ (line 9) | def __post_init__(self): class _PipelineStageManager (line 13) | class _PipelineStageManager(PipelineStageManager): method __init__ (line 14) | def __init__(self): method num_stages (line 21) | def num_stages(self): function test_whisper_pipeline_distribution (line 25) | def test_whisper_pipeline_distribution(): function test_whisper_pipeline_layers (line 47) | def test_whisper_pipeline_layers(): FILE: tests/test_pipeline/test_schedule/test_interleaved.py class MlpModel (line 23) | class MlpModel(nn.Module): method __init__ (line 24) | def __init__(self): method forward (line 28) | def forward(self, x): function pp_linear_fwd (line 34) | def pp_linear_fwd( function run_pp (line 50) | def run_pp( function test_pp (line 154) | def test_pp(num_microbatch: int, batch_size: int, num_model_chunk: int): FILE: tests/test_pipeline/test_schedule/test_oneF_oneB.py class MlpModel (line 23) | class MlpModel(nn.Module): method __init__ (line 24) | def __init__(self): method forward (line 28) | def forward(self, x): function pp_linear_fwd (line 34) | def pp_linear_fwd( function examine_pp (line 48) | def examine_pp(num_microbatch: int, batch_size: int): function run_dist (line 145) | def run_dist( function test_pp (line 161) | def test_pp(num_microbatch: int, batch_size: int, world_size: int): FILE: tests/test_pipeline/test_schedule/test_pipeline_schedule_utils.py function test_get_batch_size (line 6) | def test_get_batch_size(): function test_get_micro_batch (line 16) | def test_get_micro_batch(): function test_merge_batch (line 37) | def test_merge_batch(): FILE: tests/test_pipeline/test_schedule/test_zerobubble_pp.py class MlpModel (line 39) | class MlpModel(nn.Module): method __init__ (line 40) | def __init__( method forward (line 51) | def forward( method no_sync (line 78) | def no_sync(self): function assert_optim_param_groups (line 82) | def assert_optim_param_groups(optim_base_param_groups, optim_pp_param_gr... function get_model_numel (line 89) | def get_model_numel(model: torch.nn.Module) -> Tuple[int, int]: function run_fwd_bwd_iter_input (line 114) | def run_fwd_bwd_iter_input(test_config): function run_fwd_bwd_vschedule_with_optim (line 580) | def run_fwd_bwd_vschedule_with_optim(test_config): function run_with_booster_moehybridplugin (line 762) | def run_with_booster_moehybridplugin(config: Tuple[int, ...]): function run_with_booster_hybridplugin (line 921) | def run_with_booster_hybridplugin(config: Tuple[int, ...]): function run_dist (line 1067) | def run_dist(rank, world_size, port): function test_pp (line 1076) | def test_pp(): FILE: tests/test_pipeline/test_stage_manager.py function check_stage_manager (line 10) | def check_stage_manager(): function run_dist (line 66) | def run_dist(rank, world_size, port): function test_pipeline_stage_manager (line 73) | def test_pipeline_stage_manager(): FILE: tests/test_shardformer/test_flash_attention.py function attention_ref (line 22) | def attention_ref(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, att... function gen_padded_kwargs (line 33) | def gen_padded_kwargs(dtype: torch.dtype): function gen_padded_causal_kwargs (line 42) | def gen_padded_causal_kwargs(dtype: torch.dtype): function gen_causal_kwargs (line 53) | def gen_causal_kwargs(dtype: torch.dtype): function gen_custom_kwargs (line 57) | def gen_custom_kwargs(dtype: torch.dtype): function post_process_kwargs_for_raw_attn (line 67) | def post_process_kwargs_for_raw_attn(attn_kwargs: dict): function check_attn_func (line 75) | def check_attn_func(dtype: torch.dtype, attn_func, attn_kwargs: dict, pa... function test_flash_attn_func (line 102) | def test_flash_attn_func(dtype: torch.dtype): FILE: tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_amp_optimizer.py function check_forward_backward (line 23) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_test (line 148) | def run_test(test_config): function run_3d_test (line 183) | def run_3d_test(test_config): function check_grad_clip_norm (line 194) | def check_grad_clip_norm(rank, world_size, port): function check_grad_clip_norm_3d (line 200) | def check_grad_clip_norm_3d(rank, world_size, port): function test_grad_clip_norm (line 209) | def test_grad_clip_norm(): function test_grad_clip_norm_3d (line 216) | def test_grad_clip_norm_3d(): FILE: tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_naive_optimizer.py function check_forward_backward (line 23) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_test (line 116) | def run_test(test_config): function run_3d_test (line 141) | def run_3d_test(test_config): function check_grad_clip_norm (line 152) | def check_grad_clip_norm(rank, world_size, port): function check_grad_clip_norm_3d (line 158) | def check_grad_clip_norm_3d(rank, world_size, port): function test_grad_clip_norm (line 167) | def test_grad_clip_norm(): function test_grad_clip_norm_3d (line 174) | def test_grad_clip_norm_3d(): FILE: tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_zero_optimizer.py function check_forward_backward (line 24) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_test (line 137) | def run_test(test_config): function run_3d_test (line 174) | def run_3d_test(test_config): function check_grad_clip_norm (line 185) | def check_grad_clip_norm(rank, world_size, port): function check_grad_clip_norm_3d (line 191) | def check_grad_clip_norm_3d(rank, world_size, port): function test_grad_clip_norm (line 200) | def test_grad_clip_norm(): function test_grad_clip_norm_3d (line 207) | def test_grad_clip_norm_3d(): FILE: tests/test_shardformer/test_layer/test_dist_crossentropy.py function check_dist_crossentropy (line 15) | def check_dist_crossentropy(rank, world_size, port, ignore_index): function test_dist_crossentropy (line 49) | def test_dist_crossentropy(): FILE: tests/test_shardformer/test_layer/test_dist_log_prob.py function log_probs_from_logits (line 14) | def log_probs_from_logits(logits: torch.Tensor, labels: torch.Tensor) ->... function check_dist_log_prob (line 30) | def check_dist_log_prob(rank, world_size, port): function test_dist_log_prob (line 62) | def test_dist_log_prob(): FILE: tests/test_shardformer/test_layer/test_dropout.py function check_dropout_parallel_input (line 10) | def check_dropout_parallel_input(): function check_dropout_replicated_input (line 42) | def check_dropout_replicated_input(): function run_dist (line 58) | def run_dist(rank, world_size, port): function test_dropout (line 65) | def test_dropout(): FILE: tests/test_shardformer/test_layer/test_embedding.py function check_embedding_1d (line 15) | def check_embedding_1d(lazy_init: bool): function run_dist (line 45) | def run_dist(rank, world_size, port): function test_embedding_1d (line 51) | def test_embedding_1d(): FILE: tests/test_shardformer/test_layer/test_gpt2_qkv_fused_linear_1d.py class Conv1D (line 20) | class Conv1D(nn.Module): method __init__ (line 31) | def __init__(self, nf, nx): method forward (line 38) | def forward(self, x): function check_linear_conv_1d_col (line 45) | def check_linear_conv_1d_col(lazy_init: bool, seq_parallel_mode: str): function check_linear_conv_1d_row (line 86) | def check_linear_conv_1d_row(lazy_init: bool, seq_parallel_mode: bool): function check_linear_conv_1d_without_weight_grad_store (line 122) | def check_linear_conv_1d_without_weight_grad_store(lazy_init: bool, seq_... function check_linear_conv_1d_with_weight_grad_store (line 155) | def check_linear_conv_1d_with_weight_grad_store(lazy_init: bool, seq_par... function check_gpt2_qkv_fused_linear_1d (line 193) | def check_gpt2_qkv_fused_linear_1d(lazy_init: bool, seq_parallel_mode: b... function run_dist (line 200) | def run_dist(rank, world_size, port): function test_linearconv (line 208) | def test_linearconv(): FILE: tests/test_shardformer/test_layer/test_layernorm.py function check_layernorm (line 14) | def check_layernorm(lazy_init: bool): function run_dist (line 43) | def run_dist(rank, world_size, port): function test_layernorm (line 49) | def test_layernorm(): FILE: tests/test_shardformer/test_layer/test_linear_1d.py function check_linear_1d_col (line 19) | def check_linear_1d_col(lazy_init: bool, seq_parallel_mode: bool, overla... function check_linear_1d_row (line 75) | def check_linear_1d_row(lazy_init: bool, seq_parallel_mode: bool): function check_linear_without_weight_grad_store (line 121) | def check_linear_without_weight_grad_store(lazy_init: bool, seq_parallel... function check_linear_with_weight_grad_store (line 161) | def check_linear_with_weight_grad_store(lazy_init: bool, seq_parallel_mo... function check_linear_col_plus_row (line 208) | def check_linear_col_plus_row(lazy_init: bool, seq_parallel_mode: bool, ... function run_dist_linear_test (line 269) | def run_dist_linear_test(lazy_init, seq_parallel_mode, overlap): function check_dist_linear (line 277) | def check_dist_linear(rank, world_size, port): function test_linear (line 283) | def test_linear(): FILE: tests/test_shardformer/test_layer/test_qkv_fused_linear_1d.py function check_linear_1d_col (line 19) | def check_linear_1d_col(lazy_init: bool): function check_linear_1d_row (line 54) | def check_linear_1d_row(lazy_init: bool): function check_linear_1d_col_row (line 89) | def check_linear_1d_col_row(lazy_init: bool): function check_linear_1d_base (line 124) | def check_linear_1d_base(lazy_init: bool): function run_dist (line 155) | def run_dist(rank, world_size, port): function test_linearconv (line 165) | def test_linearconv(): FILE: tests/test_shardformer/test_layer/test_ring_attn.py function check_ring_attn (line 21) | def check_ring_attn(seq_len, bs, nheads, d, dtype, inner_ring_size): function check_packed_seq (line 86) | def check_packed_seq(seqlen, bs, nheads, d, dtype): function launch_single_ring (line 167) | def launch_single_ring(rank, world_size, port): function launch_double_ring (line 173) | def launch_double_ring(rank, world_size, port): function test_ring_attn (line 180) | def test_ring_attn(world_size): function test_double_ring (line 186) | def test_double_ring(world_size): FILE: tests/test_shardformer/test_layer/test_sequence_parallel.py class SequenceParallelAttention (line 15) | class SequenceParallelAttention(torch.nn.Module): method __init__ (line 25) | def __init__( method attn (line 49) | def attn(self, q, k, v): method forward (line 60) | def forward(self, x) -> Tensor: function seq_parallel_attn (line 94) | def seq_parallel_attn(seq_len, hidden_dim, head_num, batch_size): function run_seq_parallel_attn (line 163) | def run_seq_parallel_attn(seq_len, hidden_dim, head_num, batch_size): function check_all2all_attn (line 167) | def check_all2all_attn(rank, world_size, port): function test_all_to_all_attention (line 173) | def test_all_to_all_attention(): FILE: tests/test_shardformer/test_layer/test_vocab_parallel_embedding_1d.py function check_vocab_embedding_1d (line 15) | def check_vocab_embedding_1d(lazy_init: bool): function run_dist (line 47) | def run_dist(rank, world_size, port): function test_vocab_embedding (line 53) | def test_vocab_embedding(): FILE: tests/test_shardformer/test_model/_utils.py function build_model (line 31) | def build_model( function build_pipeline_model (line 63) | def build_pipeline_model( function run_forward (line 91) | def run_forward(original_model, sharded_model, data_gen_fn, output_trans... function check_state_dict (line 109) | def check_state_dict(org_model: Module, sharded_model: Module, name: str... function build_model_from_hybrid_plugin (line 120) | def build_model_from_hybrid_plugin( function build_model_from_low_level_zero_plugin (line 174) | def build_model_from_low_level_zero_plugin( function run_forward_backward_with_hybrid_plugin (line 200) | def run_forward_backward_with_hybrid_plugin( function run_forward_backward_with_low_level_zero_plugin (line 263) | def run_forward_backward_with_low_level_zero_plugin( function check_output_hidden_state (line 301) | def check_output_hidden_state( function check_loss (line 334) | def check_loss(org_loss: Tensor, sharded_loss: Tensor, atol: float = 1e-... function check_weight (line 338) | def check_weight( function get_grad_tensors_for_check (line 368) | def get_grad_tensors_for_check( function check_grad (line 408) | def check_grad( function unwrap_model (line 436) | def unwrap_model( function check_all_grad_tensors (line 450) | def check_all_grad_tensors(check_tensors): FILE: tests/test_shardformer/test_model/test_shard_bert.py function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_bert_test (line 145) | def run_bert_test(test_config): function run_bert_3d_test (line 189) | def run_bert_3d_test(test_config): function check_bert (line 200) | def check_bert(rank, world_size, port): function check_bert_3d (line 206) | def check_bert_3d(rank, world_size, port): function test_bert (line 215) | def test_bert(): function test_bert_3d (line 222) | def test_bert_3d(): FILE: tests/test_shardformer/test_model/test_shard_blip2.py function check_forward_backward (line 17) | def check_forward_backward(org_model, sharded_model, data_gen_fn, output... function run_blip2_test (line 72) | def run_blip2_test( function check_blip2 (line 99) | def check_blip2(rank, world_size, port): function test_blip2 (line 114) | def test_blip2(): FILE: tests/test_shardformer/test_model/test_shard_bloom.py function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_bloom_test (line 143) | def run_bloom_test(test_config): function run_bloom_3d_test (line 178) | def run_bloom_3d_test(test_config): function check_bloom (line 189) | def check_bloom(rank, world_size, port): function check_bloom_3d (line 195) | def check_bloom_3d(rank, world_size, port): function test_bloom (line 204) | def test_bloom(): function test_bloom_3d (line 211) | def test_bloom_3d(): FILE: tests/test_shardformer/test_model/test_shard_chatglm2.py function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_chatglm_test (line 229) | def run_chatglm_test(test_config): function run_chatglm_3d_test (line 273) | def run_chatglm_3d_test(test_config): function check_chatglm (line 289) | def check_chatglm(rank, world_size, port): function check_chatglm_3d (line 301) | def check_chatglm_3d(rank, world_size, port): function test_chatglm (line 316) | def test_chatglm(): function test_chatglm_3d (line 323) | def test_chatglm_3d(): FILE: tests/test_shardformer/test_model/test_shard_command.py function check_forward_backward (line 29) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_command_test (line 280) | def run_command_test(test_config): function run_command_3d_test (line 334) | def run_command_3d_test(test_config): function check_command (line 345) | def check_command(rank, world_size, port): function check_command_3d (line 351) | def check_command_3d(rank, world_size, port): function test_command (line 360) | def test_command(): function test_command_3d (line 367) | def test_command_3d(): FILE: tests/test_shardformer/test_model/test_shard_deepseek.py function run_deepseek_commom (line 28) | def run_deepseek_commom(parallel_config: Tuple[int, ...]): function run_deepseek_test (line 186) | def run_deepseek_test(config: Tuple[int, ...]): function run_deepseek_3d_test (line 211) | def run_deepseek_3d_test(config: Tuple[int, ...]): function check_deepseek (line 215) | def check_deepseek(rank, world_size, port): function check_deepseek_3d (line 220) | def check_deepseek_3d(rank, world_size, port): function test_deepseek (line 228) | def test_deepseek(world_size): function test_deepseek_3d (line 235) | def test_deepseek_3d(world_size): FILE: tests/test_shardformer/test_model/test_shard_deepseek_v3.py function check_forward_backward (line 26) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_deepseek_v3_test (line 70) | def run_deepseek_v3_test(config: Tuple[int, ...]): function check_deepseek_v3 (line 91) | def check_deepseek_v3(rank, world_size, port): function test_deepseek_v3 (line 99) | def test_deepseek_v3(world_size): FILE: tests/test_shardformer/test_model/test_shard_falcon.py function check_forward_backward (line 23) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_falcon_test (line 112) | def run_falcon_test(test_config): function run_falcon_3d_test (line 147) | def run_falcon_3d_test(test_config): function check_falcon (line 158) | def check_falcon(rank, world_size, port): function check_falcon_3d (line 164) | def check_falcon_3d(rank, world_size, port): function test_falcon (line 173) | def test_falcon(): function test_falcon_3d (line 180) | def test_falcon_3d(): FILE: tests/test_shardformer/test_model/test_shard_gpt2.py function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_gpt2_test (line 209) | def run_gpt2_test(test_config): function run_gpt2_3d_test (line 259) | def run_gpt2_3d_test(test_config): function check_gpt2 (line 279) | def check_gpt2(rank, world_size, port): function check_gpt2_3d (line 291) | def check_gpt2_3d(rank, world_size, port): function test_gpt2 (line 306) | def test_gpt2(): function test_gpt2_3d (line 313) | def test_gpt2_3d(): FILE: tests/test_shardformer/test_model/test_shard_gptj.py function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_gptj_test (line 183) | def run_gptj_test(test_config): function run_gptj_3d_test (line 224) | def run_gptj_3d_test(test_config): function check_gptj (line 240) | def check_gptj(rank, world_size, port): function check_gptj_3d (line 252) | def check_gptj_3d(rank, world_size, port): function test_gptj (line 268) | def test_gptj(): function test_gptj_3d (line 275) | def test_gptj_3d(): FILE: tests/test_shardformer/test_model/test_shard_llama.py function check_forward_backward (line 30) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_llama_test (line 282) | def run_llama_test(test_config): function run_llama_3d_test (line 353) | def run_llama_3d_test(test_config): function check_llama (line 368) | def check_llama(rank, world_size, port): function check_llama_3d (line 374) | def check_llama_3d(rank, world_size, port): function test_llama (line 383) | def test_llama(): function test_llama_3d (line 390) | def test_llama_3d(): FILE: tests/test_shardformer/test_model/test_shard_mistral.py function check_forward_backward (line 27) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_mistral_test (line 161) | def run_mistral_test(test_config): function check_mistral (line 172) | def check_mistral(rank, world_size, port): function test_mistral (line 180) | def test_mistral(): FILE: tests/test_shardformer/test_model/test_shard_mixtral.py function run_mixtral_commom (line 29) | def run_mixtral_commom(config: Tuple[int, ...]): function run_mixtral_test (line 179) | def run_mixtral_test(config: Tuple[int, ...]): function run_mixtral_3d_test (line 203) | def run_mixtral_3d_test(config: Tuple[int, ...]): function check_mixtral (line 208) | def check_mixtral(rank, world_size, port): function check_mixtral_3d (line 213) | def check_mixtral_3d(rank, world_size, port): function test_mixtral (line 221) | def test_mixtral(world_size): function test_mixtral_3d (line 228) | def test_mixtral_3d(world_size): FILE: tests/test_shardformer/test_model/test_shard_opt.py function check_forward_backward (line 26) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_opt_test (line 178) | def run_opt_test(test_config): function run_opt_3d_test (line 217) | def run_opt_3d_test(test_config): function check_OPTModel (line 233) | def check_OPTModel(rank, world_size, port): function check_opt_3d (line 245) | def check_opt_3d(rank, world_size, port): function test_OPTModel (line 260) | def test_OPTModel(): function test_opt_3d (line 267) | def test_opt_3d(): FILE: tests/test_shardformer/test_model/test_shard_qwen2.py function check_forward_backward (line 27) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_qwen2_test (line 212) | def run_qwen2_test(test_config): function run_qwen2_3d_test (line 261) | def run_qwen2_3d_test(test_config): function check_qwen2 (line 276) | def check_qwen2(rank, world_size, port): function check_qwen2_3d (line 282) | def check_qwen2_3d(rank, world_size, port): function test_qwen2 (line 292) | def test_qwen2(): function test_qwen2_3d (line 300) | def test_qwen2_3d(): FILE: tests/test_shardformer/test_model/test_shard_qwen3.py function check_forward_backward (line 23) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_qwen3_test (line 208) | def run_qwen3_test(test_config): function run_qwen3_3d_test (line 257) | def run_qwen3_3d_test(test_config): function check_qwen3 (line 272) | def check_qwen3(rank, world_size, port): function check_qwen3_3d (line 278) | def check_qwen3_3d(rank, world_size, port): function test_qwen3 (line 288) | def test_qwen3(): function test_qwen3_3d (line 296) | def test_qwen3_3d(): FILE: tests/test_shardformer/test_model/test_shard_sam.py function check_forward_backward (line 17) | def check_forward_backward(org_model, sharded_model, data_gen_fn, output... function run_sam_test (line 47) | def run_sam_test(enable_fused_normalization, enable_tensor_parallelism, ... function check_sam (line 58) | def check_sam(rank, world_size, port): function test_sam (line 67) | def test_sam(): FILE: tests/test_shardformer/test_model/test_shard_t5.py function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_t5_test (line 162) | def run_t5_test(test_config): function run_t5_3d_test (line 212) | def run_t5_3d_test(test_config): function check_t5 (line 228) | def check_t5(rank, world_size, port): function check_t5_3d (line 240) | def check_t5_3d(rank, world_size, port): function test_t5 (line 255) | def test_t5(): function test_t5_3d (line 262) | def test_t5_3d(): FILE: tests/test_shardformer/test_model/test_shard_vit.py function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_vit_test (line 132) | def run_vit_test(test_config): function run_vit_3d_test (line 158) | def run_vit_3d_test(test_config): function check_vit (line 168) | def check_vit(rank, world_size, port): function check_vit_3d (line 174) | def check_vit_3d(rank, world_size, port): function test_vit (line 183) | def test_vit(): function test_vit_3d (line 190) | def test_vit_3d(): FILE: tests/test_shardformer/test_model/test_shard_whisper.py function check_forward_backward (line 21) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l... function run_whisper_test (line 150) | def run_whisper_test(test_config): function run_whisper_3d_test (line 187) | def run_whisper_3d_test(test_config): function check_whisper (line 197) | def check_whisper(rank, world_size, port): function check_whisper_3d (line 203) | def check_whisper_3d(rank, world_size, port): function test_whisper (line 212) | def test_whisper(): function test_whisper_3d (line 219) | def test_whisper_3d(): FILE: tests/test_shardformer/test_shard_utils.py class Net (line 7) | class Net(nn.Module): method __init__ (line 8) | def __init__(self) -> None: function test_release_layer (line 14) | def test_release_layer(): FILE: tests/test_shardformer/test_with_torch_ddp.py function check_shardformer_with_ddp (line 18) | def check_shardformer_with_ddp(lazy_init: bool): function run_dist (line 72) | def run_dist(rank, world_size, port): function test_gpt2 (line 81) | def test_gpt2(): FILE: tests/test_smoothquant/test_llama_attention.py function torch_context_attention (line 30) | def torch_context_attention(xq, xk, xv, bs, seqlen, num_head, head_dim): function test_llama_context_attention (line 56) | def test_llama_context_attention(): FILE: tests/test_smoothquant/test_llama_mlp.py function torch_llama_mlp (line 29) | def torch_llama_mlp(gate_proj, up_proj, down_proj, x): function test_llama_mlp (line 49) | def test_llama_mlp(): FILE: tests/test_smoothquant/test_smoothquant_linear.py function test_linear (line 20) | def test_linear(): FILE: tests/test_smoothquant/test_sq_rotary_embedding.py function torch_rotary_emb (line 19) | def torch_rotary_emb(x, cos, sin): function test_rotary_emb (line 33) | def test_rotary_emb(): FILE: tests/test_tensor/test_comm_spec_apply.py function check_all_gather (line 13) | def check_all_gather(device_mesh, rank): function check_shard (line 40) | def check_shard(device_mesh, rank): function check_all_to_all (line 66) | def check_all_to_all(device_mesh, rank): function check_all_reduce_fwd (line 111) | def check_all_reduce_fwd(device_mesh, rank): function check_all_reduce_bwd (line 138) | def check_all_reduce_bwd(device_mesh, rank): function check_all_reduce_in_flatten_device_mesh (line 156) | def check_all_reduce_in_flatten_device_mesh(device_mesh, rank): function check_comm (line 179) | def check_comm(rank, world_size, port): function test_comm_spec (line 209) | def test_comm_spec(): FILE: tests/test_tensor/test_dtensor/test_comm_spec.py function check_all_gather (line 12) | def check_all_gather(process_groups_dict, rank): function check_shard (line 31) | def check_shard(process_groups_dict, rank): function check_all_to_all (line 51) | def check_all_to_all(process_groups_dict, rank): function check_all_reduce_fwd (line 92) | def check_all_reduce_fwd(process_groups_dict, rank): function check_all_reduce_bwd (line 113) | def check_all_reduce_bwd(process_groups_dict, rank): function check_comm (line 125) | def check_comm(rank, world_size, port): function test_comm_spec (line 155) | def test_comm_spec(): FILE: tests/test_tensor/test_dtensor/test_dtensor.py class TestModel (line 10) | class TestModel(torch.nn.Module): method __init__ (line 11) | def __init__(self, in_features, out_features): method forward (line 16) | def forward(self, x): function check_dtensor (line 22) | def check_dtensor(rank, world_size, port): function test_dtensor (line 81) | def test_dtensor(): FILE: tests/test_tensor/test_dtensor/test_dtensor_sharding_spec.py function test_dtensor_sharding_spec (line 7) | def test_dtensor_sharding_spec(): FILE: tests/test_tensor/test_dtensor/test_layout_converter.py function check_one_step_transform (line 21) | def check_one_step_transform(rank, world_size, port): function check_layout_converting (line 83) | def check_layout_converting(rank, world_size, port): function check_layout_converting_apply (line 142) | def check_layout_converting_apply(rank, world_size, port): function test_layout_converter (line 176) | def test_layout_converter(): FILE: tests/test_tensor/test_mix_gather.py function check_mix_gather_S0S1 (line 14) | def check_mix_gather_S0S1(device_mesh, rank): function check_two_all_gather_S0S1 (line 48) | def check_two_all_gather_S0S1(device_mesh, rank): function check_mix_gather_S1S0 (line 90) | def check_mix_gather_S1S0(device_mesh, rank): function check_two_all_gather_S1S0 (line 124) | def check_two_all_gather_S1S0(device_mesh, rank): function check_mix_gather_S01R (line 166) | def check_mix_gather_S01R(device_mesh, rank): function check_two_all_gather_S01R (line 193) | def check_two_all_gather_S01R(device_mesh, rank): function check_mix_gather_RS01 (line 231) | def check_mix_gather_RS01(device_mesh, rank): function check_two_all_gather_RS01 (line 259) | def check_two_all_gather_RS01(device_mesh, rank): function check_comm (line 297) | def check_comm(rank, world_size, port): function test_mix_gather (line 328) | def test_mix_gather(): FILE: tests/test_tensor/test_padded_tensor.py function check_padded_tensor (line 11) | def check_padded_tensor(rank, world_size, port): function test_padded_tensor (line 40) | def test_padded_tensor(): FILE: tests/test_tensor/test_shape_consistency.py function test_one_step_transform (line 18) | def test_one_step_transform(): function test_shape_consistency (line 94) | def test_shape_consistency(): FILE: tests/test_tensor/test_shape_consistency_apply.py function check_apply (line 12) | def check_apply(rank, world_size, port): function test_apply (line 70) | def test_apply(): FILE: tests/test_tensor/test_sharding_spec.py function test_sharding_spec (line 7) | def test_sharding_spec(): FILE: tests/test_zero/test_gemini/test_chunk_mgrv2.py function exam_chunk_memory (line 17) | def exam_chunk_memory(keep_gathered, pin_memory): function run_dist (line 51) | def run_dist(rank, world_size, port): function test_chunk_manager (line 59) | def test_chunk_manager(world_size): FILE: tests/test_zero/test_gemini/test_chunkv2.py function dist_sum (line 14) | def dist_sum(x): function add_param (line 20) | def add_param(param_list, param_cp_list, *args, **kwargs): function check_equal (line 26) | def check_equal(param, param_cp): function exam_chunk_basic (line 38) | def exam_chunk_basic(init_device, keep_gathered, pin_memory, async_op): function run_dist (line 114) | def run_dist(rank, world_size, port): function test_chunk_function (line 122) | def test_chunk_function(world_size): FILE: tests/test_zero/test_gemini/test_gemini_use_rmt.py function run_gemini_use_rmt (line 20) | def run_gemini_use_rmt(placement_policy, keep_gather, model_name: str, u... function run_dist (line 82) | def run_dist(rank, world_size, port): function test_gemini_use_rmt (line 91) | def test_gemini_use_rmt(world_size): FILE: tests/test_zero/test_gemini/test_grad_accum.py function check_grad (line 23) | def check_grad(model: GeminiDDP, torch_model: torch.nn.Module): function exam_gemini_grad_acc (line 53) | def exam_gemini_grad_acc( function run_dist (line 150) | def run_dist(rank, world_size, port): function test_grad_accumulation (line 157) | def test_grad_accumulation(): FILE: tests/test_zero/test_gemini/test_grad_clip.py function check_param (line 39) | def check_param(model: GeminiDDP, torch_model: torch.nn.Module): function exam_grad_clipping (line 57) | def exam_grad_clipping( function run_dist (line 125) | def run_dist(rank, world_size, port): function test_grad_clip (line 133) | def test_grad_clip(world_size): FILE: tests/test_zero/test_gemini/test_inference.py function check_param (line 27) | def check_param(model: GeminiDDP, torch_model: torch.nn.Module): function multi_chunk_init (line 40) | def multi_chunk_init(model: torch.nn.Module, placement_config: dict): function single_chunk_init (line 49) | def single_chunk_init(model: torch.nn.Module, placement_config: dict): function exam_inference (line 61) | def exam_inference(placement_config: dict, model_name: str, model_init_f... function run_dist (line 112) | def run_dist(rank, world_size, port): function test_inference (line 119) | def test_inference(world_size): FILE: tests/test_zero/test_gemini/test_optim.py function check_param (line 38) | def check_param(model: GeminiDDP, torch_model: torch.nn.Module, dtype: t... function exam_model_step (line 67) | def exam_model_step( function exam_tiny_example (line 132) | def exam_tiny_example(placement_config, model_name: str, mixed_precision... function run_dist (line 183) | def run_dist(rank, world_size, port): function test_optim (line 192) | def test_optim(world_size): FILE: tests/test_zero/test_gemini/test_runtime_mem_tracer.py function test_runtime_mem_tracer (line 14) | def test_runtime_mem_tracer(): FILE: tests/test_zero/test_gemini/test_search.py function exam_search_chunk_size (line 28) | def exam_search_chunk_size(): function exam_chunk_manager (line 40) | def exam_chunk_manager(): function run_dist (line 58) | def run_dist(rank, world_size, port): function test_search (line 67) | def test_search(world_size): FILE: tests/test_zero/test_gemini/test_zeroddp_state_dict.py function ignore_the_first_parameter (line 18) | def ignore_the_first_parameter(model: torch.nn.Module): function exam_state_dict (line 29) | def exam_state_dict(placement_config, keep_gathered, model_name: str, ma... function run_dist (line 76) | def run_dist(rank, world_size, port): function test_zero_ddp (line 84) | def test_zero_ddp(world_size): FILE: tests/test_zero/test_gemini/test_zerooptim_state_dict.py function exam_zero_optim_state_dict (line 23) | def exam_zero_optim_state_dict(placement_config, keep_gathered): function run_dist (line 70) | def run_dist(rank, world_size, port): function test_zero_optim (line 79) | def test_zero_optim(world_size): FILE: tests/test_zero/test_low_level/test_coll_nd.py function check_all_gather_2d (line 14) | def check_all_gather_2d(): function run_dist (line 29) | def run_dist(rank, world_size, port): function test_comm_nd (line 37) | def test_comm_nd(): FILE: tests/test_zero/test_low_level/test_grad_acc.py class MlpModel (line 17) | class MlpModel(nn.Module): method __init__ (line 18) | def __init__(self): method forward (line 23) | def forward(self, x): function exam_zero_1_2_grad_acc (line 29) | def exam_zero_1_2_grad_acc(): function exam_zero_1_grad_acc (line 76) | def exam_zero_1_grad_acc(sync): function run_dist (line 136) | def run_dist(rank, world_size, port): function test_grad_accumulation (line 145) | def test_grad_accumulation(): FILE: tests/test_zero/test_low_level/test_mem_leak.py class MlpModel (line 10) | class MlpModel(nn.Module): method __init__ (line 11) | def __init__(self): method forward (line 15) | def forward(self, x): class TestLowLevelZeroOptimizer (line 23) | class TestLowLevelZeroOptimizer(LowLevelZeroOptimizer): method __del__ (line 24) | def __del__(self): function exam_mem_leak (line 30) | def exam_mem_leak(world_size): function run_dist (line 48) | def run_dist(rank, world_size, port): function test_zero_1_2 (line 56) | def test_zero_1_2(): FILE: tests/test_zero/test_low_level/test_zero1_2.py class MlpModel (line 17) | class MlpModel(nn.Module): method __init__ (line 18) | def __init__(self): method forward (line 24) | def forward(self, x): function loose_close (line 30) | def loose_close(a, b, dtype: torch.dtype = torch.float32): function split_ddp_grad (line 46) | def split_ddp_grad(grad, world_size): function exam_zero_1_2 (line 57) | def exam_zero_1_2(fp8_communication: bool): function exam_zero_1_torch_ddp (line 129) | def exam_zero_1_torch_ddp(dtype: torch.dtype, master_weights: bool, extr... function run_dist (line 214) | def run_dist(rank, world_size, port): function test_zero_1_2 (line 223) | def test_zero_1_2(): FILE: tests/test_zero/test_low_level/test_zero_ckpt.py class MlpModel (line 17) | class MlpModel(nn.Module): method __init__ (line 18) | def __init__(self): method forward (line 23) | def forward(self, x): function loose_close (line 29) | def loose_close(a, b, dtype: torch.dtype = torch.float32): function exam_zero_1_torch_ddp_ckpt (line 46) | def exam_zero_1_torch_ddp_ckpt(extra_dp_size: int): function run_dist (line 120) | def run_dist(rank, world_size, port): function test_zero_ckpt (line 128) | def test_zero_ckpt():