SYMBOL INDEX (12172 symbols across 1438 files)

FILE: .github/workflows/scripts/check_doc_i18n.py
  function compare_dirs (line 5) | def compare_dirs(dir1, dir2):

FILE: .github/workflows/scripts/example_checks/check_dispatch_inputs.py
  function check_inputs (line 5) | def check_inputs(input_list):
  function main (line 13) | def main():

FILE: .github/workflows/scripts/example_checks/check_example_weekly.py
  function show_files (line 4) | def show_files(path, all_files):
  function join (line 19) | def join(input_list, sep=None):
  function main (line 23) | def main():

FILE: .github/workflows/scripts/example_checks/detect_changed_example.py
  function main (line 4) | def main():

FILE: .github/workflows/scripts/generate_leaderboard_and_send_to_lark.py
  class Counter (line 12) | class Counter(dict):
    method record (line 21) | def record(self, item: str):
    method to_sorted_list (line 27) | def to_sorted_list(self):
  function get_utc_time_one_week_ago (line 33) | def get_utc_time_one_week_ago():
  function datetime2str (line 42) | def datetime2str(dt):
  function str2datetime (line 49) | def str2datetime(string):
  function plot_bar_chart (line 56) | def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str,...
  function get_organization_repositories (line 69) | def get_organization_repositories(github_token, organization_name) -> Li...
  function get_issue_pull_request_comments (line 90) | def get_issue_pull_request_comments(github_token: str, org_name: str, re...
  function get_discussion_comments (line 141) | def get_discussion_comments(github_token: str, org_name: str, repo_name:...
  function generate_user_engagement_leaderboard_image (line 315) | def generate_user_engagement_leaderboard_image(
  function generate_contributor_leaderboard_image (line 378) | def generate_contributor_leaderboard_image(github_token, org_name, repo_...
  function upload_image_to_lark (line 467) | def upload_image_to_lark(lark_tenant_token: str, image_path: str) -> str:
  function generate_lark_tenant_access_token (line 486) | def generate_lark_tenant_access_token(app_id: str, app_secret: str) -> str:
  function send_image_to_lark (line 500) | def send_image_to_lark(image_key: str, webhook_url: str) -> None:
  function send_message_to_lark (line 512) | def send_message_to_lark(message: str, webhook_url: str):

FILE: .github/workflows/scripts/generate_release_draft.py
  function parse_args (line 14) | def parse_args():
  function get_latest_tag_commit (line 21) | def get_latest_tag_commit(headers=None):
  function get_commit_info (line 29) | def get_commit_info(commit_hash, headers=None):
  function get_all_commit_info (line 35) | def get_all_commit_info(since, headers=None):
  function collate_release_info (line 54) | def collate_release_info(commit_info_list):
  function generate_release_post_markdown (line 78) | def generate_release_post_markdown(current_version, last_version, releas...

FILE: .github/workflows/scripts/send_message_to_lark.py
  function parse_args (line 6) | def parse_args():
  function send_message_to_lark (line 13) | def send_message_to_lark(message, webhook_url):

FILE: .github/workflows/scripts/update_setup_for_nightly.py
  function open_setup_file (line 4) | def open_setup_file():
  function replace_nightly_package_info (line 10) | def replace_nightly_package_info(file_lines):
  function write_setup_file (line 22) | def write_setup_file(file_lines):
  function main (line 27) | def main():

FILE: applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py
  class SeparatorStyle (line 20) | class SeparatorStyle(Enum):
  class Conversation (line 25) | class Conversation:
    method clear (line 33) | def clear(self):
    method get_prompt (line 36) | def get_prompt(self, length: int = None):
    method save_prompt (line 51) | def save_prompt(self):
    method append_message (line 63) | def append_message(self, role, message):
    method copy (line 66) | def copy(self):
    method dict (line 76) | def dict(self):

FILE: applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py
  class RandomDataset (line 7) | class RandomDataset(Dataset):
    method __init__ (line 8) | def __init__(self, num_samples: int = 1000, max_length: int = 2048, vo...
    method __len__ (line 16) | def __len__(self):
    method __getitem__ (line 19) | def __getitem__(self, idx):

FILE: applications/Colossal-LLaMA/colossal_llama/dataset/loader.py
  function load_tokenized_dataset (line 19) | def load_tokenized_dataset(
  class DataCollatorForSupervisedDataset (line 51) | class DataCollatorForSupervisedDataset(object):
    method __call__ (line 63) | def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[...
  class StatefulDistributedSampler (line 141) | class StatefulDistributedSampler(DistributedSampler):
    method __init__ (line 146) | def __init__(
    method __iter__ (line 165) | def __iter__(self) -> Iterator:
    method __len__ (line 171) | def __len__(self) -> int:
    method set_start_index (line 174) | def set_start_index(self, start_index: int) -> None:

FILE: applications/Colossal-LLaMA/colossal_llama/dataset/spliced_and_tokenized_dataset.py
  function supervised_tokenize_pretrain (line 30) | def supervised_tokenize_pretrain(
  function supervised_tokenize_sft (line 73) | def supervised_tokenize_sft(
  class ClosedToConstantLengthSplicedDataset (line 188) | class ClosedToConstantLengthSplicedDataset(IterableDataset):
    method __init__ (line 194) | def __init__(
    method __len__ (line 226) | def __len__(self) -> int:
    method __iter__ (line 229) | def __iter__(self) -> Iterable[Dict[str, List[int]]]:

FILE: applications/Colossal-LLaMA/colossal_llama/model/init_model.py
  function main (line 18) | def main():

FILE: applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py
  function expand_vocab_tokenizer (line 23) | def expand_vocab_tokenizer(
  function main (line 62) | def main():

FILE: applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py
  function load_json (line 20) | def load_json(file_path: Union[str, os.PathLike]) -> Dict[str, Any]:
  function save_json (line 28) | def save_json(data: Dict[str, Any], file_path: Union[str, os.PathLike]) ...
  function save_checkpoint (line 36) | def save_checkpoint(
  function load_checkpoint (line 71) | def load_checkpoint(

FILE: applications/Colossal-LLaMA/colossal_llama/utils/froze.py
  function freeze_non_embeds_parameters (line 7) | def freeze_non_embeds_parameters(model: LlamaForCausalLM) -> None:
  function unfreeze_parameters (line 16) | def unfreeze_parameters(model: LlamaForCausalLM) -> None:

FILE: applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py
  function unwrap (line 18) | def unwrap(model):
  function neftune_post_forward_hook (line 25) | def neftune_post_forward_hook(module, input, output):
  function activate_neftune (line 51) | def activate_neftune(model, neftune_noise_alpha=0.1):
  function deactivate_neftune (line 65) | def deactivate_neftune(model, neftune_hook_handle):

FILE: applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py
  function get_prompt_template (line 13) | def get_prompt_template(
  function streaming_chat (line 52) | def streaming_chat(
  function stream_generate (line 141) | def stream_generate(

FILE: applications/Colossal-LLaMA/colossal_llama/utils/utils.py
  function all_reduce_mean (line 11) | def all_reduce_mean(tensor: torch.Tensor, plugin: Plugin = None) -> torc...
  function get_model_numel (line 21) | def get_model_numel(model: torch.nn.Module) -> int:
  function format_numel_str (line 25) | def format_numel_str(numel: int) -> str:

FILE: applications/Colossal-LLaMA/dataset/prepare_pretrain_dataset.py
  function main (line 26) | def main():

FILE: applications/Colossal-LLaMA/dataset/prepare_sft_dataset.py
  function main (line 23) | def main():

FILE: applications/Colossal-LLaMA/inference/inference_example.py
  function load_model (line 12) | def load_model(model_path, device="cuda", **kwargs):
  function generate (line 26) | def generate(args):

FILE: applications/Colossal-LLaMA/inference/stream_chat_example.py
  function main (line 9) | def main(args):

FILE: applications/Colossal-LLaMA/setup.py
  function fetch_requirements (line 4) | def fetch_requirements(path):
  function fetch_readme (line 9) | def fetch_readme():
  function fetch_version (line 14) | def fetch_version():

FILE: applications/Colossal-LLaMA/train.py
  function train (line 40) | def train(args) -> None:

FILE: applications/ColossalChat/benchmarks/benchmark_ppo.py
  function get_model_numel (line 39) | def get_model_numel(model: torch.nn.Module, plugin: str, tp: int) -> int:
  function get_gpt_config (line 46) | def get_gpt_config(model_name: str) -> OPTConfig:
  function benchmark_train (line 65) | def benchmark_train(args):

FILE: applications/ColossalChat/benchmarks/dummy_dataset.py
  class DummyLLMDataset (line 6) | class DummyLLMDataset(Dataset):
    method __init__ (line 7) | def __init__(self, keys, seq_len, size=500, gen_fn={}):
    method _generate_data (line 14) | def _generate_data(self):
    method __len__ (line 23) | def __len__(self):
    method __getitem__ (line 26) | def __getitem__(self, idx):

FILE: applications/ColossalChat/benchmarks/ray/1mmt_dummy.py
  function get_free_port (line 23) | def get_free_port():
  function get_local_ip (line 29) | def get_local_ip():
  function main (line 35) | def main(args):

FILE: applications/ColossalChat/benchmarks/ray/mmmt_dummy.py
  function get_free_port (line 23) | def get_free_port():
  function get_local_ip (line 29) | def get_local_ip():
  function main (line 35) | def main(args):

FILE: applications/ColossalChat/coati/dataset/conversation.py
  class Conversation (line 15) | class Conversation:
    method from_config (line 24) | def from_config(cls, tokenizer: PreTrainedTokenizer, config: Dict):
    method clear (line 35) | def clear(self):
    method get_conversation_template_keys (line 39) | def get_conversation_template_keys(cls):
    method __str__ (line 42) | def __str__(self):
    method get_prompt (line 49) | def get_prompt(self, length: int = None, add_generation_prompt=False) ...
    method save_prompt (line 75) | def save_prompt(self):
    method append_message (line 78) | def append_message(self, role: str, message: str):
    method copy (line 92) | def copy(self):
  function setup_conversation_template (line 96) | def setup_conversation_template(

FILE: applications/ColossalChat/coati/dataset/loader.py
  function load_tokenized_dataset (line 24) | def load_tokenized_dataset(
  class DataCollatorForSupervisedDataset (line 58) | class DataCollatorForSupervisedDataset(object):
    method __call__ (line 69) | def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[...
  class DataCollatorForPromptDataset (line 146) | class DataCollatorForPromptDataset(DataCollatorForSupervisedDataset):
    method __call__ (line 147) | def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[...
  class DataCollatorForPreferenceDataset (line 170) | class DataCollatorForPreferenceDataset(object):
    method __call__ (line 180) | def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[...
  class DataCollatorForKTODataset (line 241) | class DataCollatorForKTODataset(object):
    method __call__ (line 255) | def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[...
  class StatefulDistributedSampler (line 325) | class StatefulDistributedSampler(DistributedSampler):
    method __init__ (line 326) | def __init__(
    method __iter__ (line 338) | def __iter__(self) -> Iterator:
    method __len__ (line 344) | def __len__(self) -> int:
    method set_start_index (line 347) | def set_start_index(self, start_index: int) -> None:
  function apply_chat_template_and_mask (line 351) | def apply_chat_template_and_mask(
  class RawConversationDataset (line 420) | class RawConversationDataset(Dataset):
    method __init__ (line 426) | def __init__(self, tokenizer: PreTrainedTokenizer, input_file: str, ma...
    method __len__ (line 436) | def __len__(self) -> int:
    method __getitem__ (line 439) | def __getitem__(self, index: int):
  function collate_fn_grpo (line 447) | def collate_fn_grpo(batch):

FILE: applications/ColossalChat/coati/dataset/tokenization_utils.py
  function tokenize_sft (line 26) | def tokenize_sft(
  function tokenize_prompt (line 133) | def tokenize_prompt(
  function apply_rlhf_data_format (line 203) | def apply_rlhf_data_format(template: Conversation, tokenizer: Any):
  function tokenize_rlhf (line 226) | def tokenize_rlhf(
  function tokenize_kto (line 342) | def tokenize_kto(

FILE: applications/ColossalChat/coati/dataset/utils.py
  function is_rank_0 (line 11) | def is_rank_0() -> bool:
  function _make_r_io_base (line 15) | def _make_r_io_base(f, mode: str):
  function jload (line 21) | def jload(f, mode="r"):
  function read_string_by_schema (line 29) | def read_string_by_schema(data: Dict[str, Any], schema: str) -> str:
  function pad_to_max_len (line 46) | def pad_to_max_len(
  function chuncate_sequence (line 71) | def chuncate_sequence(sequence: List[torch.Tensor], max_length: int, dty...
  function find_first_occurrence_subsequence (line 82) | def find_first_occurrence_subsequence(seq: torch.Tensor, subseq: torch.T...
  function tokenize_and_concatenate (line 91) | def tokenize_and_concatenate(
  function split_templated_prompt_into_chunks (line 137) | def split_templated_prompt_into_chunks(messages: List[Dict[str, str]], p...

FILE: applications/ColossalChat/coati/distributed/comm.py
  function ray_broadcast_object (line 11) | def ray_broadcast_object(obj: Any, src: int = 0, device=None, group_name...
  function ray_broadcast_tensor_dict (line 36) | def ray_broadcast_tensor_dict(
  class SharedVariableActor (line 79) | class SharedVariableActor:
    method __init__ (line 80) | def __init__(self, number_of_readers: int = 0, buffer_size_limit: int ...
    method pickup_rollout_task (line 90) | def pickup_rollout_task(self, num_tasks: int):
    method append_data (line 108) | def append_data(self, data):
    method get_data (line 113) | def get_data(self, data_uid: int):
    method acquire_process_lock (line 134) | def acquire_process_lock(self, key: str):
    method release_process_lock (line 145) | def release_process_lock(self, key: str):
    method set_signal (line 150) | def set_signal(self, key: str, signal: str):
    method get_signal (line 153) | def get_signal(self):

FILE: applications/ColossalChat/coati/distributed/consumer.py
  class BaseConsumer (line 24) | class BaseConsumer:
    method __init__ (line 25) | def __init__(
    method setup (line 69) | def setup(self) -> None:
    method state_dict (line 108) | def state_dict(self) -> Dict[str, torch.Tensor]:
    method step (line 111) | def step(self, step_idx: int, **kwargs) -> Optional[float]:
    method prepare_mini_batch (line 114) | def prepare_mini_batch(self, effective_group_to_raw_group_mapping: Dic...
    method calculate_effective_group_to_raw_group_mapping (line 138) | def calculate_effective_group_to_raw_group_mapping(self, step):
    method loop (line 149) | def loop(self) -> None:
    method __del__ (line 358) | def __del__(self):
  class SimpleConsumer (line 364) | class SimpleConsumer(BaseConsumer):
    method __init__ (line 365) | def __init__(
    method setup (line 405) | def setup(self):
    method step (line 409) | def step(self, step_idx: int, pbar: Any, **kwargs) -> Optional[float]:
    method state_dict (line 430) | def state_dict(self):

FILE: applications/ColossalChat/coati/distributed/grpo_consumer.py
  class GRPOConsumer (line 19) | class GRPOConsumer(BaseConsumer):
    method __init__ (line 20) | def __init__(
    method setup (line 143) | def setup(self):
    method step (line 174) | def step(self, step_idx: int, pbar: Any, **kwargs) -> Optional[float]:
    method state_dict (line 607) | def state_dict(self):

FILE: applications/ColossalChat/coati/distributed/inference_backend.py
  class BaseInferenceBackend (line 22) | class BaseInferenceBackend:
    method __init__ (line 23) | def __init__(self, model_config: Dict[str, Any], generate_config: Dict...
    method generate (line 26) | def generate(self, input_ids: torch.Tensor, attention_mask: torch.Tens...
    method load_state_dict (line 42) | def load_state_dict(self, state_dict: Dict[str, torch.Tensor]) -> None:
  class TransformersInferenceBackend (line 46) | class TransformersInferenceBackend(BaseInferenceBackend):
    method __init__ (line 56) | def __init__(
    method generate (line 74) | def generate(self, input_ids: torch.Tensor, attention_mask: torch.Tens...
    method load_state_dict (line 125) | def load_state_dict(self, state_dict: Dict[str, torch.Tensor]) -> None:
  class SGLangInferenceBackend (line 129) | class SGLangInferenceBackend(BaseInferenceBackend):
    method __init__ (line 130) | def __init__(
    method generate (line 152) | def generate(self, input_ids: torch.Tensor, attention_mask: torch.Tens...
    method load_state_dict (line 179) | def load_state_dict(self, state_dict: Dict[str, torch.Tensor]) -> None:
  class VLLMInferenceBackend (line 186) | class VLLMInferenceBackend(BaseInferenceBackend):
    method __init__ (line 195) | def __init__(
    method generate (line 219) | def generate(self, input_ids: torch.Tensor, attention_mask: torch.Tens...
    method load_state_dict (line 283) | def load_state_dict(self, state_dict: Dict[str, torch.Tensor]) -> None:

FILE: applications/ColossalChat/coati/distributed/launch.py
  function get_jsonl_size_fast (line 21) | def get_jsonl_size_fast(path: str) -> int:
  function get_dp_size_fast (line 28) | def get_dp_size_fast(n_procs: int, plugin_config: Dict[str, Any]) -> int:
  function launch_distributed (line 36) | def launch_distributed(

FILE: applications/ColossalChat/coati/distributed/launch_zero_bubble.py
  function get_jsonl_size_fast (line 16) | def get_jsonl_size_fast(path: str) -> int:
  function get_dp_size_fast (line 23) | def get_dp_size_fast(n_procs: int, plugin_config: Dict[str, Any]) -> int:
  function launch_distributed (line 31) | def launch_distributed(

FILE: applications/ColossalChat/coati/distributed/loss.py
  class PolicyLoss (line 8) | class PolicyLoss(nn.Module):
    method __init__ (line 13) | def __init__(
    method forward (line 29) | def forward(

FILE: applications/ColossalChat/coati/distributed/producer.py
  class BaseProducer (line 34) | class BaseProducer:
    method __init__ (line 35) | def __init__(
    method setup (line 198) | def setup(self) -> None:
    method rollout (line 212) | def rollout(self, input_ids: torch.Tensor, attention_mask: torch.Tenso...
    method load_state_dict (line 215) | def load_state_dict(self, state_dict: Dict[str, torch.Tensor]) -> None:
    method loop (line 218) | def loop(self) -> None:
    method __del__ (line 417) | def __del__(self):
  class SimpleProducer (line 422) | class SimpleProducer(BaseProducer):
    method __init__ (line 423) | def __init__(
    method rollout (line 483) | def rollout(self, input_ids, attention_mask, **kwargs):
    method __del__ (line 506) | def __del__(self):
    method load_state_dict (line 512) | def load_state_dict(self, state_dict):

FILE: applications/ColossalChat/coati/distributed/profiling_utils.py
  class CustomProfiler (line 5) | class CustomProfiler:
    method __init__ (line 6) | def __init__(self, name, disabled=True):
    method _log (line 13) | def _log(self, message):
    method log (line 20) | def log(self, message):
    method enter (line 27) | def enter(self, event_name):
    method exit (line 30) | def exit(self, event_name):
    method close (line 33) | def close(self):

FILE: applications/ColossalChat/coati/distributed/reward/code_reward/testing_util.py
  function truncatefn (line 43) | def truncatefn(s, length=300):
  class CODE_TYPE (line 51) | class CODE_TYPE(Enum):
  class Capturing (line 59) | class Capturing(list):
    method __enter__ (line 60) | def __enter__(self):
    method __exit__ (line 67) | def __exit__(self, *args):
  function only_int_check (line 73) | def only_int_check(val):
  function string_int_check (line 77) | def string_int_check(val):
  function combined_int_check (line 81) | def combined_int_check(val):
  function clean_traceback (line 85) | def clean_traceback(error_traceback):
  function run_test (line 92) | def run_test(in_outs, test=None, debug=False, timeout=15, run_all_tests=...
  function custom_compare_ (line 551) | def custom_compare_(output, ground_truth):
  function stripped_string_compare (line 566) | def stripped_string_compare(s1, s2):
  function call_method (line 572) | def call_method(method, inputs):
  function reliability_guard (line 598) | def reliability_guard(maximum_memory_bytes=None):

FILE: applications/ColossalChat/coati/distributed/reward/code_reward/utils.py
  function _temp_run (line 27) | def _temp_run(sample, generation, debug, result, metadata_list, timeout):
  function check_correctness (line 39) | def check_correctness(in_outs: Optional[dict], generation, timeout=10, d...
  function check_correctness_code_api (line 61) | def check_correctness_code_api(

FILE: applications/ColossalChat/coati/distributed/reward/reward_fn.py
  function verify_math_representation (line 36) | def verify_math_representation(completion, gt_answer):
  function verify_model_answer (line 76) | def verify_model_answer(decoded_final_answer, gt_answer, ans_acc, acc_sc...
  function math_reward_fn (line 99) | def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
  function boxed_math_reward_fn (line 160) | def boxed_math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
  function code_reward_fn (line 225) | def code_reward_fn(input_ids, test_cases, response_idx, **kwargs):

FILE: applications/ColossalChat/coati/distributed/reward/reward_utils.py
  function validate_response_structure (line 20) | def validate_response_structure(processed_str: str, tags: Dict = None) -...
  function extract_solution (line 58) | def extract_solution(solution_str: str) -> Tuple[Optional[str], str]:
  function extract_boxed_solution (line 79) | def extract_boxed_solution(text: str) -> Optional[str]:

FILE: applications/ColossalChat/coati/distributed/reward/verifiable_reward.py
  class VerifiableReward (line 11) | class VerifiableReward:
    method __init__ (line 12) | def __init__(self, reward_fns: List[callable], **kwargs: List[Dict[str...
    method __call__ (line 16) | def __call__(

FILE: applications/ColossalChat/coati/distributed/utils.py
  function unbind_batch (line 11) | def unbind_batch(batch: Dict[str, torch.Tensor]) -> List[Dict[str, torch...
  function bind_batch (line 25) | def bind_batch(batches: List[Dict[str, torch.Tensor]]) -> Dict[str, torc...
  function pre_send (line 32) | def pre_send(batch: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
  function post_recv (line 41) | def post_recv(batch: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
  function update_by_default (line 50) | def update_by_default(data: Dict[str, Any], default: Dict[str, Any]) -> ...
  function log_probs_from_logits (line 58) | def log_probs_from_logits(logits: torch.Tensor, labels: torch.Tensor) ->...
  function memory_efficient_logprob (line 74) | def memory_efficient_logprob(
  function entropy_from_logits (line 113) | def entropy_from_logits(logits: torch.Tensor) -> torch.Tensor:
  function masked_mean (line 123) | def masked_mean(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1) ...
  function masked_sum (line 143) | def masked_sum(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1) -...
  function safe_append_to_jsonl_file (line 160) | def safe_append_to_jsonl_file(file_path, data):

FILE: applications/ColossalChat/coati/distributed/zero_bubble/consumer.py
  class BaseConsumer (line 21) | class BaseConsumer:
    method __init__ (line 22) | def __init__(
    method setup (line 69) | def setup(self) -> None:
    method get_ddp_config (line 94) | def get_ddp_config(self) -> Dict[str, Any]:
    method init_collective_group (line 110) | def init_collective_group(
    method state_dict (line 123) | def state_dict(self) -> Dict[str, torch.Tensor]:
    method step (line 126) | def step(self, **kwargs) -> Optional[float]:
    method prepare_mini_batch (line 129) | def prepare_mini_batch(self, effective_group_to_raw_group_mapping: Dic...
    method calculate_effective_group_to_raw_group_mapping (line 153) | def calculate_effective_group_to_raw_group_mapping(self):
    method loop (line 160) | def loop(self) -> None:
    method __del__ (line 345) | def __del__(self):

FILE: applications/ColossalChat/coati/distributed/zero_bubble/distributor.py
  class Distributor (line 13) | class Distributor:
    method __init__ (line 14) | def __init__(
    method init_collective_group (line 31) | def init_collective_group(
    method loop (line 44) | def loop(self):
    method get_weight_version (line 123) | def get_weight_version(self):

FILE: applications/ColossalChat/coati/distributed/zero_bubble/grpo_consumer.py
  class GRPOConsumer (line 19) | class GRPOConsumer(BaseConsumer):
    method __init__ (line 20) | def __init__(
    method setup (line 134) | def setup(self):
    method step (line 164) | def step(self, pbar: Any, **kwargs) -> Optional[float]:
    method state_dict (line 531) | def state_dict(self):

FILE: applications/ColossalChat/coati/distributed/zero_bubble/producer.py
  class BaseProducer (line 33) | class BaseProducer:
    method __init__ (line 34) | def __init__(
    method init_collective_group (line 193) | def init_collective_group(
    method rollout (line 206) | def rollout(self, input_ids: torch.Tensor, attention_mask: torch.Tenso...
    method load_state_dict (line 209) | def load_state_dict(self, state_dict: Dict[str, torch.Tensor]) -> None:
    method loop (line 212) | def loop(self) -> None:
    method __del__ (line 441) | def __del__(self):
  class SimpleProducer (line 446) | class SimpleProducer(BaseProducer):
    method __init__ (line 447) | def __init__(
    method rollout (line 510) | def rollout(self, input_ids, attention_mask, **kwargs):
    method __del__ (line 533) | def __del__(self):
    method load_state_dict (line 539) | def load_state_dict(self, state_dict):

FILE: applications/ColossalChat/coati/experience_buffer/base.py
  class ExperienceBuffer (line 7) | class ExperienceBuffer(ABC):
    method __init__ (line 15) | def __init__(self, sample_batch_size: int, limit: int = 0) -> None:
    method append (line 22) | def append(self, experience: Experience) -> None:
    method clear (line 26) | def clear(self) -> None:
    method sample (line 30) | def sample(self) -> Experience:
    method __len__ (line 34) | def __len__(self) -> int:
    method __getitem__ (line 38) | def __getitem__(self, idx: int) -> Any:
    method collate_fn (line 42) | def collate_fn(self, batch: Any) -> Experience:

FILE: applications/ColossalChat/coati/experience_buffer/naive.py
  class NaiveExperienceBuffer (line 15) | class NaiveExperienceBuffer(ExperienceBuffer):
    method __init__ (line 24) | def __init__(self, sample_batch_size: int, limit: int = 0, cpu_offload...
    method append (line 34) | def append(self, experience: Experience) -> None:
    method clear (line 49) | def clear(self) -> None:
    method sample (line 53) | def sample(self) -> Experience:
    method __len__ (line 69) | def __len__(self) -> int:
    method __getitem__ (line 72) | def __getitem__(self, idx: int) -> BufferItem:
    method collate_fn (line 75) | def collate_fn(self, batch) -> Experience:

FILE: applications/ColossalChat/coati/experience_buffer/utils.py
  class BufferItem (line 10) | class BufferItem:
  function split_experience_batch (line 35) | def split_experience_batch(experience: Experience) -> List[BufferItem]:
  function _zero_pad_sequences (line 53) | def _zero_pad_sequences(sequences: List[torch.Tensor], side: str = "left...
  function make_experience_batch (line 64) | def make_experience_batch(items: List[BufferItem]) -> Experience:

FILE: applications/ColossalChat/coati/experience_maker/base.py
  class Experience (line 11) | class Experience:
    method to_device (line 38) | def to_device(self, device: torch.device) -> None:
    method pin_memory (line 50) | def pin_memory(self):
  class ExperienceMaker (line 64) | class ExperienceMaker(ABC):
    method __init__ (line 69) | def __init__(
    method make_experience (line 79) | def make_experience(self, input_ids: torch.Tensor, attention_mask: tor...

FILE: applications/ColossalChat/coati/experience_maker/naive.py
  function is_rank_0 (line 24) | def is_rank_0() -> bool:
  class NaiveExperienceMaker (line 28) | class NaiveExperienceMaker(ExperienceMaker):
    method __init__ (line 33) | def __init__(
    method calculate_advantage (line 64) | def calculate_advantage(self, value: torch.Tensor, reward: torch.Tenso...
    method make_experience (line 87) | def make_experience(

FILE: applications/ColossalChat/coati/models/base.py
  class BaseModel (line 12) | class BaseModel(nn.Module):
    method __init__ (line 22) | def __init__(self, pretrained: str = None, config: Optional[Pretrained...
    method resize_token_embeddings (line 46) | def resize_token_embeddings(self, *args, **kwargs):

FILE: applications/ColossalChat/coati/models/critic.py
  class Critic (line 13) | class Critic(BaseModel):
    method __init__ (line 22) | def __init__(self, pretrained: str = None, config: Optional[Pretrained...
    method forward (line 27) | def forward(self, input_ids: torch.LongTensor, attention_mask: Optiona...
    method get_input_embeddings (line 36) | def get_input_embeddings(self):
    method get_output_embeddings (line 39) | def get_output_embeddings(self):

FILE: applications/ColossalChat/coati/models/generation.py
  function _prepare_logits_processor (line 19) | def _prepare_logits_processor(
  function _is_sequence_finished (line 44) | def _is_sequence_finished(unfinished_sequences: torch.Tensor) -> bool:
  function update_model_kwargs_fn (line 61) | def update_model_kwargs_fn(outputs: dict, new_mask, **model_kwargs) -> d...
  function prepare_inputs_fn (line 92) | def prepare_inputs_fn(input_ids: torch.Tensor, **model_kwargs) -> dict:
  function _sample (line 97) | def _sample(
  function generate (line 200) | def generate(
  function _sample_streaming (line 262) | def _sample_streaming(
  function generate_streaming (line 378) | def generate_streaming(

FILE: applications/ColossalChat/coati/models/lora.py
  class LoraManager (line 22) | class LoraManager:
  class LoraConfig (line 30) | class LoraConfig:
    method from_file (line 40) | def from_file(cls, config_file: str):
  class LoraBase (line 48) | class LoraBase(lora.LoRALayer, nn.Module):
    method __init__ (line 49) | def __init__(
    method reset_parameters (line 68) | def reset_parameters(self):
    method train (line 103) | def train(self, mode: bool = True):
  class LoraLinear (line 124) | class LoraLinear(LoraBase):
    method __init__ (line 127) | def __init__(
    method forward (line 160) | def forward(self, x: torch.Tensor):
  class LoraEmbedding (line 169) | class LoraEmbedding(LoraBase):
    method __init__ (line 172) | def __init__(
    method _embed (line 218) | def _embed(self, x: torch.Tensor, weight) -> torch.Tensor:
    method forward (line 229) | def forward(self, x: torch.Tensor):
    method train (line 239) | def train(self, mode: bool = True):
  function _lora_linear_wrapper (line 260) | def _lora_linear_wrapper(linear: nn.Linear, lora_config: LoraConfig) -> ...
  function _convert_to_lora_recursively (line 287) | def _convert_to_lora_recursively(module: nn.Module, parent_name: str, lo...
  function convert_to_lora_module (line 337) | def convert_to_lora_module(module: nn.Module, lora_config: LoraConfig) -...

FILE: applications/ColossalChat/coati/models/loss.py
  class GPTLMLoss (line 14) | class GPTLMLoss(nn.Module):
    method __init__ (line 19) | def __init__(self):
    method forward (line 24) | def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch...
  class PolicyLoss (line 31) | class PolicyLoss(nn.Module):
    method __init__ (line 36) | def __init__(self, clip_eps: float = 0.2, skip_threshold: float = 20.0...
    method forward (line 41) | def forward(
  class ValueLoss (line 70) | class ValueLoss(nn.Module):
    method __init__ (line 75) | def __init__(self, clip_eps: float = 0.2) -> None:
    method forward (line 79) | def forward(
  class DpoLoss (line 97) | class DpoLoss(nn.Module):
    method __init__ (line 106) | def __init__(self, beta: float = 0.1, gamma: float = 0.0):
    method forward (line 118) | def forward(
  class LogSigLoss (line 174) | class LogSigLoss(nn.Module):
    method forward (line 180) | def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Te...
  class LogExpLoss (line 184) | class LogExpLoss(nn.Module):
    method forward (line 190) | def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Te...
  class OddsRatioLoss (line 195) | class OddsRatioLoss(nn.Module):
    method forward (line 201) | def forward(
  class KTOLoss (line 219) | class KTOLoss(nn.Module):
    method __init__ (line 220) | def __init__(self, beta: float = 0.1, desirable_weight: float = 1.0, u...
    method forward (line 232) | def forward(

FILE: applications/ColossalChat/coati/models/reward_model.py
  class RewardModel (line 13) | class RewardModel(BaseModel):
    method __init__ (line 23) | def __init__(self, pretrained: str = None, config: Optional[Pretrained...
    method forward (line 28) | def forward(
    method get_input_embeddings (line 43) | def get_input_embeddings(self):
    method get_output_embeddings (line 46) | def get_output_embeddings(self):

FILE: applications/ColossalChat/coati/models/rlvr_reward_model.py
  class RLVRRewardModel (line 10) | class RLVRRewardModel:
    method __init__ (line 19) | def __init__(self, reward_fn_list: List[Callable], **kwargs) -> None:
    method __call__ (line 23) | def __call__(
    method to (line 46) | def to(self, device):
    method eval (line 49) | def eval(self):

FILE: applications/ColossalChat/coati/models/utils.py
  function get_model_numel (line 9) | def get_model_numel(model: torch.nn.Module) -> int:
  function compute_reward (line 13) | def compute_reward(
  function _log_probs_from_logits (line 41) | def _log_probs_from_logits(logits: torch.Tensor, labels: torch.Tensor) -...
  function calc_action_log_probs (line 57) | def calc_action_log_probs(logits: torch.Tensor, sequences: torch.LongTen...
  function masked_mean (line 72) | def masked_mean(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1) ...
  function calc_masked_log_probs (line 92) | def calc_masked_log_probs(
  function load_json (line 115) | def load_json(file_path: Union[str, os.PathLike]) -> Dict[str, Any]:
  function save_json (line 123) | def save_json(data: Dict[str, Any], file_path: Union[str, os.PathLike]) ...
  function disable_dropout (line 131) | def disable_dropout(model: torch.nn.Module):
  function repad_to_left (line 147) | def repad_to_left(tensor, tokenizer):

FILE: applications/ColossalChat/coati/quant/llama_gptq/loader.py
  function load_quant (line 8) | def load_quant(model: nn.Module, checkpoint: str, wbits: int, groupsize:...

FILE: applications/ColossalChat/coati/quant/llama_gptq/model_utils.py
  function find_layers (line 6) | def find_layers(module, layers=[nn.Conv2d, nn.Linear], name=""):

FILE: applications/ColossalChat/coati/quant/llama_gptq/quant.py
  function quantize (line 10) | def quantize(x, scale, zero, maxq):
  class Quantizer (line 15) | class Quantizer(nn.Module):
    method __init__ (line 16) | def __init__(self, shape=1):
    method configure (line 22) | def configure(self, bits, perchannel=False, sym=True, mse=False, norm=...
    method find_params (line 31) | def find_params(self, x, weight=False):
    method quantize (line 110) | def quantize(self, x):
    method enabled (line 115) | def enabled(self):
    method ready (line 118) | def ready(self):
  class QuantLinear (line 130) | class QuantLinear(nn.Module):
    method __init__ (line 131) | def __init__(self, bits, groupsize, infeatures, outfeatures):
    method pack (line 150) | def pack(self, linear, scales, zeros):
    method forward (line 239) | def forward(self, x):
  function make_quant (line 274) | def make_quant(module, names, bits, groupsize, name=""):

FILE: applications/ColossalChat/coati/quant/utils.py
  function _noop (line 6) | def _noop(*args, **kwargs):
  function low_resource_init (line 11) | def low_resource_init():

FILE: applications/ColossalChat/coati/ray/callbacks/base.py
  class TrainerCallback (line 6) | class TrainerCallback(ABC):
    method on_fit_start (line 11) | def on_fit_start(self) -> None:
    method on_fit_end (line 14) | def on_fit_end(self) -> None:
    method on_episode_start (line 17) | def on_episode_start(self, episode: int) -> None:
    method on_episode_end (line 20) | def on_episode_end(self, episode: int) -> None:
    method on_epoch_start (line 23) | def on_epoch_start(self, epoch: int) -> None:
    method on_epoch_end (line 26) | def on_epoch_end(self, epoch: int) -> None:
    method on_batch_start (line 29) | def on_batch_start(self) -> None:
    method on_batch_end (line 32) | def on_batch_end(self, metrics: dict, experience: Experience) -> None:
    method on_update_start (line 35) | def on_update_start(self) -> None:
    method on_update_end (line 38) | def on_update_end(self) -> None:
  class MakerCallback (line 42) | class MakerCallback(ABC):
    method on_loop_start (line 43) | def on_loop_start(self) -> None:
    method on_loop_end (line 46) | def on_loop_end(self) -> None:
    method on_make_experience_start (line 49) | def on_make_experience_start(self) -> None:
    method on_make_experience_end (line 52) | def on_make_experience_end(self, experience: Experience) -> None:
    method on_send_start (line 55) | def on_send_start(self) -> None:
    method on_send_end (line 58) | def on_send_end(self) -> None:
    method on_batch_start (line 61) | def on_batch_start(self) -> None:
    method on_batch_end (line 64) | def on_batch_end(self) -> None:

FILE: applications/ColossalChat/coati/ray/callbacks/performance_evaluator.py
  function get_world_size (line 11) | def get_world_size() -> int:
  function print_rank_0 (line 17) | def print_rank_0(*args, **kwargs) -> None:
  function all_reduce_mean (line 23) | def all_reduce_mean(x: float, world_size: int) -> float:
  class Timer (line 32) | class Timer:
    method __init__ (line 33) | def __init__(self) -> None:
    method start (line 37) | def start(self) -> None:
    method end (line 40) | def end(self) -> None:
    method reset (line 43) | def reset(self) -> None:
  class ExperienceMakerPerformanceEvaluator (line 47) | class ExperienceMakerPerformanceEvaluator(MakerCallback):
    method __init__ (line 48) | def __init__(
    method on_make_experience_start (line 68) | def on_make_experience_start(self) -> None:
    method on_make_experience_end (line 71) | def on_make_experience_end(self, experience: Experience) -> None:
    method on_send_start (line 92) | def on_send_start(self) -> None:
    method on_send_end (line 95) | def on_send_end(self) -> None:
    method on_batch_start (line 98) | def on_batch_start(self) -> None:
    method on_batch_end (line 101) | def on_batch_end(self) -> None:
    method on_loop_end (line 104) | def on_loop_end(self) -> None:
  class TrainerPerformanceEvaluator (line 127) | class TrainerPerformanceEvaluator(TrainerCallback):
    method __init__ (line 128) | def __init__(
    method on_episode_start (line 153) | def on_episode_start(self, episodes: int) -> None:
    method on_episode_end (line 159) | def on_episode_end(self, episodes: int) -> None:
    method on_batch_start (line 164) | def on_batch_start(self) -> None:
    method on_batch_end (line 169) | def on_batch_end(self, metrics: dict, experience: Experience) -> None:
    method on_update_start (line 183) | def on_update_start(self) -> None:
    method on_update_end (line 188) | def on_update_end(self) -> None:
    method on_fit_end (line 193) | def on_fit_end(self) -> None:

FILE: applications/ColossalChat/coati/ray/detached_replay_buffer.py
  class DetachedReplayBuffer (line 11) | class DetachedReplayBuffer:
    method __init__ (line 24) | def __init__(self, sample_batch_size: int, limit: int = 0) -> None:
    method append (line 31) | def append(self, experience: Experience) -> None:
    method extend (line 39) | def extend(self, items: List[BufferItem]) -> None:
    method clear (line 50) | def clear(self) -> None:
    method sample (line 58) | def sample(self, worker_rank=0, to_device="cpu") -> Experience:
    method _sample_and_erase (line 64) | def _sample_and_erase(self) -> Experience:
    method get_length (line 68) | def get_length(self) -> int:

FILE: applications/ColossalChat/coati/ray/detached_trainer_base.py
  class DetachedTrainer (line 17) | class DetachedTrainer(ABC):
    method __init__ (line 33) | def __init__(
    method update_target_holder_list (line 51) | def update_target_holder_list(self):
    method _update_remote_makers (line 59) | def _update_remote_makers(self, fully_update: bool = False, **kwargs):
    method sync_models_to_remote_makers (line 62) | def sync_models_to_remote_makers(self, **kwargs):
    method training_step (line 66) | def training_step(self, experience: Experience) -> Dict[str, Any]:
    method _learn (line 69) | def _learn(self, update_steps: int, train_epochs: int) -> None:
    method _learn_epoch (line 86) | def _learn_epoch(self, pbar: tqdm, data: List[Experience]) -> None:
    method fit (line 105) | def fit(self, total_steps: int, update_steps: int, train_epochs: int =...
    method buffer_get_length (line 117) | def buffer_get_length(self):
    method buffer_append (line 124) | def buffer_append(self, experience: Experience):
    method buffer_extend (line 131) | def buffer_extend(self, items: List[BufferItem]):
    method _buffer_sample (line 138) | def _buffer_sample(self):
    method _on_fit_start (line 141) | def _on_fit_start(self) -> None:
    method _on_fit_end (line 145) | def _on_fit_end(self) -> None:
    method _on_episode_start (line 149) | def _on_episode_start(self, episode: int) -> None:
    method _on_episode_end (line 153) | def _on_episode_end(self, episode: int) -> None:
    method _on_epoch_start (line 157) | def _on_epoch_start(self, epoch: int) -> None:
    method _on_epoch_end (line 161) | def _on_epoch_end(self, epoch: int) -> None:
    method _on_batch_start (line 165) | def _on_batch_start(self) -> None:
    method _on_batch_end (line 169) | def _on_batch_end(self, metrics: dict, experience: Experience) -> None:
    method _on_update_start (line 173) | def _on_update_start(self) -> None:
    method _on_update_end (line 177) | def _on_update_end(self) -> None:

FILE: applications/ColossalChat/coati/ray/detached_trainer_ppo.py
  class DetachedPPOTrainer (line 22) | class DetachedPPOTrainer(DetachedTrainer):
    method __init__ (line 43) | def __init__(
    method _update_remote_makers (line 104) | def _update_remote_makers(self, fully_update: bool = False, **config):
    method training_step (line 142) | def training_step(self, experience: Experience) -> Dict[str, float]:
    method strategy_save_actor (line 167) | def strategy_save_actor(self, path: str, only_rank0: bool = False) -> ...
    method strategy_save_critic (line 170) | def strategy_save_critic(self, path: str, only_rank0: bool = False) ->...
    method strategy_save_actor_optim (line 173) | def strategy_save_actor_optim(self, path: str, only_rank0: bool = Fals...
    method strategy_save_critic_optim (line 176) | def strategy_save_critic_optim(self, path: str, only_rank0: bool = Fal...
    method _get_model_state_dict_shard (line 179) | def _get_model_state_dict_shard(self, model: torch.nn.Module, fully_up...
    method _get_model_lora_config_dict (line 187) | def _get_model_lora_config_dict(self, model: torch.nn.Module):

FILE: applications/ColossalChat/coati/ray/experience_maker_holder.py
  class ExperienceMakerHolder (line 22) | class ExperienceMakerHolder:
    method __init__ (line 31) | def __init__(
    method _get_ready (line 93) | def _get_ready(self):
    method _fully_initialized (line 97) | def _fully_initialized(self):
    method _init_target_trainer_list (line 100) | def _init_target_trainer_list(self):
    method _make_experience (line 108) | def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -...
    method _send_items (line 117) | def _send_items(self, experience: Experience) -> None:
    method _inference_step (line 128) | def _inference_step(self, batch) -> None:
    method workingloop (line 141) | def workingloop(self, dataloader_fn: Callable[[], Iterable], num_epoch...
    method update_experience_maker (line 171) | def update_experience_maker(
    method _on_make_experience_start (line 231) | def _on_make_experience_start(self) -> None:
    method _on_make_experience_end (line 235) | def _on_make_experience_end(self, experience: Experience) -> None:
    method _on_loop_start (line 239) | def _on_loop_start(self) -> None:
    method _on_loop_end (line 243) | def _on_loop_end(self) -> None:
    method _on_send_start (line 247) | def _on_send_start(self) -> None:
    method _on_send_end (line 251) | def _on_send_end(self) -> None:
    method _on_batch_start (line 255) | def _on_batch_start(self) -> None:
    method _on_batch_end (line 259) | def _on_batch_end(self) -> None:
  function _set_default_generate_kwargs (line 264) | def _set_default_generate_kwargs(generate_kwargs: dict, actor: Actor) ->...

FILE: applications/ColossalChat/coati/ray/lora_constructor.py
  class LoRAConfig (line 10) | class LoRAConfig:
  class LoRAConstructor (line 17) | class LoRAConstructor:
    method __init__ (line 39) | def __init__(self):
    method register_lora_config (line 42) | def register_lora_config(self, lora_config_dict: Dict[str, Any]):
    method reconstruct_increase (line 45) | def reconstruct_increase(self, state_dict_lora: Dict[str, Any], lora_c...
    method _compute (line 72) | def _compute(self, lora_A, lora_B, config=LoRAConfig()):
    method load_state_dict_increase (line 82) | def load_state_dict_increase(self, model: nn.Module, state_dict_increa...
    method filter_state_dict_lora (line 90) | def filter_state_dict_lora(state_dict: Dict[str, Any], keep_non_lora=F...
    method extract_lora_config (line 107) | def extract_lora_config(model: nn.Module) -> Dict[str, LoRAConfig]:

FILE: applications/ColossalChat/coati/ray/utils.py
  function is_rank_0 (line 16) | def is_rank_0() -> bool:
  function get_rank (line 20) | def get_rank() -> int:
  function get_world_size (line 24) | def get_world_size() -> int:
  function get_actor_from_args (line 28) | def get_actor_from_args(model: str, pretrained: str = None, config=None,...
  function get_critic_from_args (line 42) | def get_critic_from_args(model: str, pretrained: str = None, config=None...
  function get_reward_model_from_args (line 56) | def get_reward_model_from_args(model: str, pretrained: str = None, confi...
  function get_strategy_from_args (line 70) | def get_strategy_from_args(strategy: str):
  function get_tokenizer_from_args (line 88) | def get_tokenizer_from_args(model: str, **kwargs):
  function set_dist_env (line 105) | def set_dist_env(env_info: Dict[str, str]):
  function get_model_numel (line 113) | def get_model_numel(model: nn.Module) -> int:
  function get_receivers_per_sender (line 118) | def get_receivers_per_sender(sender_idx: int, num_senders: int, num_rece...
  function state_dict_to (line 133) | def state_dict_to(

FILE: applications/ColossalChat/coati/trainer/base.py
  class SLTrainer (line 24) | class SLTrainer(ABC):
    method __init__ (line 35) | def __init__(
    method _train (line 53) | def _train(self, epoch):
    method _eval (line 57) | def _eval(self, epoch):
    method _before_fit (line 61) | def _before_fit(self):
    method fit (line 64) | def fit(self, *args, **kwargs):
  class OLTrainer (line 71) | class OLTrainer(ABC):
    method __init__ (line 83) | def __init__(
    method _fit_ctx (line 102) | def _fit_ctx(self) -> None:
    method _episode_ctx (line 112) | def _episode_ctx(self, episode: int) -> None:
    method _on_make_experience_start (line 121) | def _on_make_experience_start(self) -> None:
    method _on_make_experience_end (line 125) | def _on_make_experience_end(self, experience: Experience) -> None:
    method _on_learn_epoch_start (line 129) | def _on_learn_epoch_start(self, epoch: int) -> None:
    method _on_learn_epoch_end (line 133) | def _on_learn_epoch_end(self, epoch: int) -> None:
    method _on_learn_batch_start (line 137) | def _on_learn_batch_start(self) -> None:
    method _on_learn_batch_end (line 141) | def _on_learn_batch_end(self, experience: Experience) -> None:
    method _make_experience (line 146) | def _make_experience(self, collect_step: int):
    method _learn (line 153) | def _learn(self, update_step: int):
    method _setup_update_phrase_dataload (line 161) | def _setup_update_phrase_dataload(self):
    method _save_checkpoint (line 168) | def _save_checkpoint(self, episode: int = 0):
    method _collect_phase (line 174) | def _collect_phase(self, collect_step: int):
    method _update_phase (line 180) | def _update_phase(self, update_step: int):
    method _before_fit (line 185) | def _before_fit(self, *args, **kwargs):
    method fit (line 188) | def fit(

FILE: applications/ColossalChat/coati/trainer/callbacks/base.py
  class Callback (line 6) | class Callback(ABC):
    method on_fit_start (line 11) | def on_fit_start(self) -> None:
    method on_fit_end (line 14) | def on_fit_end(self) -> None:
    method on_episode_start (line 17) | def on_episode_start(self, episode: int) -> None:
    method on_episode_end (line 20) | def on_episode_end(self, episode: int) -> None:
    method on_make_experience_start (line 23) | def on_make_experience_start(self) -> None:
    method on_make_experience_end (line 26) | def on_make_experience_end(self, experience: Experience) -> None:
    method on_learn_epoch_start (line 29) | def on_learn_epoch_start(self, epoch: int) -> None:
    method on_learn_epoch_end (line 32) | def on_learn_epoch_end(self, epoch: int) -> None:
    method on_learn_batch_start (line 35) | def on_learn_batch_start(self) -> None:
    method on_learn_batch_end (line 38) | def on_learn_batch_end(self, experience: Experience) -> None:

FILE: applications/ColossalChat/coati/trainer/callbacks/performance_evaluator.py
  function get_world_size (line 11) | def get_world_size() -> int:
  function save_eval_result_rank_0 (line 17) | def save_eval_result_rank_0(s: str, save_path: str, **kwargs) -> None:
  function divide (line 24) | def divide(x: float, y: float) -> float:
  function all_reduce_mean (line 33) | def all_reduce_mean(x: float, world_size: int) -> float:
  class Timer (line 42) | class Timer:
    method __init__ (line 43) | def __init__(self) -> None:
    method start (line 47) | def start(self) -> None:
    method end (line 50) | def end(self) -> None:
    method reset (line 55) | def reset(self) -> None:
  class PerformanceEvaluator (line 59) | class PerformanceEvaluator(Callback):
    method __init__ (line 71) | def __init__(
    method on_episode_start (line 102) | def on_episode_start(self, episode: int) -> None:
    method on_episode_end (line 108) | def on_episode_end(self, episode: int) -> None:
    method on_make_experience_start (line 113) | def on_make_experience_start(self) -> None:
    method on_make_experience_end (line 118) | def on_make_experience_end(self, experience: Experience) -> None:
    method on_learn_batch_start (line 141) | def on_learn_batch_start(self) -> None:
    method on_learn_batch_end (line 146) | def on_learn_batch_end(self, experience: Experience) -> None:
    method on_fit_end (line 160) | def on_fit_end(self) -> None:

FILE: applications/ColossalChat/coati/trainer/dpo.py
  class DPOTrainer (line 29) | class DPOTrainer(SLTrainer):
    method __init__ (line 49) | def __init__(
    method _before_fit (line 86) | def _before_fit(
    method _train (line 123) | def _train(self, epoch: int):
    method _eval (line 406) | def _eval(self, epoch: int):

FILE: applications/ColossalChat/coati/trainer/grpo.py
  function _set_default_generate_kwargs (line 33) | def _set_default_generate_kwargs(actor: PreTrainedModel) -> Dict:
  class GRPOTrainer (line 53) | class GRPOTrainer(OLTrainer):
    method __init__ (line 78) | def __init__(
    method _before_fit (line 164) | def _before_fit(
    method _setup_update_phrase_dataload (line 195) | def _setup_update_phrase_dataload(self):
    method _make_experience (line 210) | def _make_experience(self, collect_step: int) -> Experience:
    method _training_step (line 228) | def _training_step(self, experience: Experience):
    method _learn (line 331) | def _learn(self, update_step: int):
    method _save_checkpoint (line 361) | def _save_checkpoint(self, num_train_step: int = 0):

FILE: applications/ColossalChat/coati/trainer/kto.py
  class KTOTrainer (line 28) | class KTOTrainer(SLTrainer):
    method __init__ (line 50) | def __init__(
    method _before_fit (line 89) | def _before_fit(
    method _train (line 119) | def _train(self, epoch: int):
    method _eval (line 265) | def _eval(self, epoch: int):

FILE: applications/ColossalChat/coati/trainer/orpo.py
  class ORPOTrainer (line 27) | class ORPOTrainer(SLTrainer):
    method __init__ (line 46) | def __init__(
    method _before_fit (line 79) | def _before_fit(
    method _train (line 109) | def _train(self, epoch: int):
    method _eval (line 240) | def _eval(self, epoch: int):

FILE: applications/ColossalChat/coati/trainer/ppo.py
  function _set_default_generate_kwargs (line 33) | def _set_default_generate_kwargs(actor: PreTrainedModel) -> Dict:
  class PPOTrainer (line 54) | class PPOTrainer(OLTrainer):
    method __init__ (line 81) | def __init__(
    method _before_fit (line 155) | def _before_fit(
    method _setup_update_phrase_dataload (line 186) | def _setup_update_phrase_dataload(self):
    method _make_experience (line 201) | def _make_experience(self, collect_step: int) -> Experience:
    method _training_step (line 217) | def _training_step(self, experience: Experience):
    method _learn (line 340) | def _learn(self, update_step: int):
    method _save_checkpoint (line 371) | def _save_checkpoint(self, episode: int = 0):

FILE: applications/ColossalChat/coati/trainer/rm.py
  class RewardModelTrainer (line 26) | class RewardModelTrainer(SLTrainer):
    method __init__ (line 46) | def __init__(
    method _before_fit (line 77) | def _before_fit(
    method _train (line 107) | def _train(self, epoch):
    method _eval (line 199) | def _eval(self, epoch):

FILE: applications/ColossalChat/coati/trainer/sft.py
  class SFTTrainer (line 25) | class SFTTrainer(SLTrainer):
    method __init__ (line 38) | def __init__(
    method _before_fit (line 65) | def _before_fit(
    method _train (line 98) | def _train(self, epoch: int):
    method _eval (line 181) | def _eval(self, epoch: int):

FILE: applications/ColossalChat/coati/trainer/utils.py
  class AnnealingScheduler (line 15) | class AnnealingScheduler:
    method __init__ (line 16) | def __init__(self, start, end, warmup_steps=100, annealing_step=2000):
    method get_temperature (line 23) | def get_temperature(self):
    method step_forward (line 32) | def step_forward(self):
  class CycledDataLoader (line 36) | class CycledDataLoader:
    method __init__ (line 52) | def __init__(
    method next (line 61) | def next(self):
  function is_rank_0 (line 81) | def is_rank_0() -> bool:
  function to_device (line 91) | def to_device(x: Any, device: torch.device) -> Any:
  function all_reduce_mean (line 111) | def all_reduce_mean(tensor: torch.Tensor, plugin: Plugin = None) -> torc...
  function all_reduce_sum (line 131) | def all_reduce_sum(tensor: torch.Tensor, plugin: Plugin = None) -> torch...
  function all_gather_tensors (line 149) | def all_gather_tensors(local_tensor_list: torch.Tensor, plugin: Plugin =...

FILE: applications/ColossalChat/coati/utils/accumulative_meter.py
  class AccumulativeMeanVariable (line 6) | class AccumulativeMeanVariable:
    method __init__ (line 11) | def __init__(self):
    method add (line 15) | def add(self, value, count_update=1):
    method get (line 26) | def get(self):
    method reset (line 35) | def reset(self):
  class AccumulativeMeanMeter (line 43) | class AccumulativeMeanMeter:
    method __init__ (line 56) | def __init__(self):
    method add (line 59) | def add(self, name, value, count_update=1):
    method get (line 64) | def get(self, name):
    method reset (line 67) | def reset(self):

FILE: applications/ColossalChat/coati/utils/ckpt_io.py
  function load_json (line 20) | def load_json(file_path: Union[str, os.PathLike]) -> Dict[str, Any]:
  function save_json (line 28) | def save_json(data: Dict[str, Any], file_path: Union[str, os.PathLike]) ...
  function save_checkpoint (line 36) | def save_checkpoint(
  function load_checkpoint (line 72) | def load_checkpoint(

FILE: applications/ColossalChat/coati/utils/reward_score/competition.py
  function math_competition_reward_fn (line 6) | def math_competition_reward_fn(input_ids, attention_mask, **kwargs):

FILE: applications/ColossalChat/coati/utils/reward_score/gsm8k.py
  function gsm8k_reward_fn (line 6) | def gsm8k_reward_fn(input_ids, attention_mask, **kwargs):

FILE: applications/ColossalChat/coati/utils/reward_score/utils.py
  function validate_response_structure (line 20) | def validate_response_structure(processed_str: str, tags: Dict = None) -...
  function extract_solution (line 58) | def extract_solution(solution_str: str) -> Tuple[Optional[str], str]:

FILE: applications/ColossalChat/examples/community/peft/easy_dataset.py
  function _tokenize_fn (line 13) | def _tokenize_fn(strings: Sequence[str], tokenizer: AutoTokenizer, max_l...
  function preprocess (line 37) | def preprocess(sources: Sequence[str], targets: Sequence[str], tokenizer...
  class EasySupervisedDataset (line 50) | class EasySupervisedDataset(Dataset):
    method __init__ (line 51) | def __init__(self, data_file: str, tokenizer: AutoTokenizer, max_lengt...
    method __len__ (line 71) | def __len__(self):
    method __getitem__ (line 74) | def __getitem__(self, i) -> Dict[str, torch.Tensor]:
    method __repr__ (line 77) | def __repr__(self):
    method __str__ (line 80) | def __str__(self):
  class EasyPromptsDataset (line 84) | class EasyPromptsDataset(Dataset):
    method __init__ (line 85) | def __init__(self, data_file: str, tokenizer: AutoTokenizer, max_lengt...
    method __len__ (line 100) | def __len__(self):
    method __getitem__ (line 103) | def __getitem__(self, idx):
    method __repr__ (line 106) | def __repr__(self):
    method __str__ (line 109) | def __str__(self):
  class EasyRewardDataset (line 113) | class EasyRewardDataset(Dataset):
    method __init__ (line 114) | def __init__(self, train_file: str, tokenizer: AutoTokenizer, special_...
    method __len__ (line 146) | def __len__(self):
    method __getitem__ (line 150) | def __getitem__(self, idx):
    method __repr__ (line 159) | def __repr__(self):
    method __str__ (line 162) | def __str__(self):
  class EasySFTDataset (line 172) | class EasySFTDataset(Dataset):
    method __init__ (line 173) | def __init__(self, data_file: str, tokenizer: AutoTokenizer, max_lengt...
    method __len__ (line 227) | def __len__(self):
    method __getitem__ (line 231) | def __getitem__(self, idx):
    method __repr__ (line 235) | def __repr__(self):
    method __str__ (line 239) | def __str__(self):

FILE: applications/ColossalChat/examples/community/peft/easy_models.py
  class Actor (line 13) | class Actor(Module):
    method __init__ (line 21) | def __init__(self, model: nn.Module) -> None:
    method generate (line 26) | def generate(
    method forward (line 48) | def forward(
    method get_base_model (line 57) | def get_base_model(self):
  class BLOOMActor (line 61) | class BLOOMActor(Actor):
    method __init__ (line 73) | def __init__(
    method print_trainable_parameters (line 92) | def print_trainable_parameters(self):

FILE: applications/ColossalChat/examples/community/peft/train_peft_prompts.py
  function main (line 22) | def main(args):

FILE: applications/ColossalChat/examples/community/peft/train_peft_sft.py
  function train (line 22) | def train(args):

FILE: applications/ColossalChat/examples/community/ray/ray_job_script.py
  function main (line 6) | def main(api_server_endpoint="http://127.0.0.1:8265"):

FILE: applications/ColossalChat/examples/community/ray/train_prompts_on_ray.py
  class ExperienceCompositionRefs (line 28) | class ExperienceCompositionRefs:
    method __init__ (line 29) | def __init__(
  class ExperienceMaker (line 44) | class ExperienceMaker:
    method __init__ (line 45) | def __init__(self, kl_coef) -> None:
    method make_experience (line 49) | def make_experience(self, experiment_computation_refs: ExperienceCompo...
  class DistributedTorchRayActor (line 65) | class DistributedTorchRayActor:
    method __init__ (line 66) | def __init__(self, world_size, rank, local_rank, master_addr, master_p...
    method _get_current_node_ip (line 83) | def _get_current_node_ip():
    method _get_free_port (line 87) | def _get_free_port():
    method get_master_addr_port (line 92) | def get_master_addr_port(self):
  class BasePPORole (line 96) | class BasePPORole(DistributedTorchRayActor):
    method add_experience_maker (line 97) | def add_experience_maker(self, kl_coef: float = 0.1):
    method make_experience (line 100) | def make_experience(self, experience_computation_ref: ExperienceCompos...
    method _init_strategy (line 103) | def _init_strategy(self, strategy: str):
    method _init_optimizer (line 114) | def _init_optimizer(self):
    method _prepare_model_with_strategy (line 120) | def _prepare_model_with_strategy(self, has_optimizer: bool):
    method _load_model_from_pretrained (line 127) | def _load_model_from_pretrained(self, model_class: Type[LoRAModule], p...
    method init_model_from_pretrained (line 130) | def init_model_from_pretrained(
    method eval (line 137) | def eval(self):
  class TrainablePPORole (line 141) | class TrainablePPORole(BasePPORole):
    method _load_model_from_pretrained (line 142) | def _load_model_from_pretrained(self, model_class, pretrain):
    method _train (line 146) | def _train(self):
    method _training_step (line 149) | def _training_step(self, experience: Experience):
    method learn_on_experiences (line 152) | def learn_on_experiences(self, experience_refs):
  class RayPPOActor (line 163) | class RayPPOActor(TrainablePPORole):
    method set_loss_function (line 164) | def set_loss_function(self, eps_clip: float):
    method load_tokenizer_from_pretrained (line 167) | def load_tokenizer_from_pretrained(self, model_type: str, pretrained):
    method setup_generate_kwargs (line 186) | def setup_generate_kwargs(self, generate_kwargs: dict):
    method load_csv_prompt_file_from_url_to_sampler (line 193) | def load_csv_prompt_file_from_url_to_sampler(self, prompt_url):
    method _generate (line 199) | def _generate(self, input_ids, **generate_kwargs):
    method sample_prompts_and_make_sequence (line 202) | def sample_prompts_and_make_sequence(self, experience_batch_size):
    method calculate_action_log_probs (line 211) | def calculate_action_log_probs(self, sequence_attention_action_mask):
    method _training_step (line 215) | def _training_step(self, experience):
    method save_checkpoint (line 226) | def save_checkpoint(self, save_path, should_save_optimizer: bool):
    method generate_answer (line 238) | def generate_answer(self, prompt, max_length=30, num_return_sequences=5):
  class RayPPOCritic (line 250) | class RayPPOCritic(TrainablePPORole):
    method set_loss_function (line 251) | def set_loss_function(self, value_clip: float):
    method _training_step (line 254) | def _training_step(self, experience):
    method calculate_value (line 267) | def calculate_value(self, sequence_attention_action_mask):
  class RayPPORewardModel (line 273) | class RayPPORewardModel(BasePPORole):
    method _load_model_from_pretrained (line 274) | def _load_model_from_pretrained(self, model_class, pretrain):
    method calculate_r (line 282) | def calculate_r(self, sequence_attention_action_mask):
  class RayPPOInitialModel (line 288) | class RayPPOInitialModel(BasePPORole):
    method _load_model_from_pretrained (line 289) | def _load_model_from_pretrained(self, model_class, pretrain):
    method calculate_base_action_log_probs (line 294) | def calculate_base_action_log_probs(self, sequence_attention_action_ma...
  class PPORayActorGroup (line 299) | class PPORayActorGroup:
    method __init__ (line 305) | def __init__(self, num_nodes, num_gpus_per_node, ray_actor_type: Type[...
    method _initiate_actors (line 311) | def _initiate_actors(self):
    method async_init_model_from_pretrained (line 344) | def async_init_model_from_pretrained(
  class TrainableModelRayActorGroup (line 353) | class TrainableModelRayActorGroup(PPORayActorGroup):
    method async_learn_on_experiences (line 354) | def async_learn_on_experiences(self, experience_refs):
  class PPOActorRayActorGroup (line 363) | class PPOActorRayActorGroup(TrainableModelRayActorGroup):
    method __init__ (line 364) | def __init__(self, num_nodes, num_gpus_per_node) -> None:
    method async_prepare_for_sequence_generation (line 367) | def async_prepare_for_sequence_generation(self, model: str, pretrain: ...
    method load_csv_prompt_file_from_url_to_sampler (line 374) | def load_csv_prompt_file_from_url_to_sampler(self, csv_url):
    method async_sample_prompts_and_make_sequence (line 377) | def async_sample_prompts_and_make_sequence(self, experience_batch_size):
    method async_calculate_action_log_probs (line 380) | def async_calculate_action_log_probs(self, sequences_attention_mask_ac...
    method set_loss_function (line 390) | def set_loss_function(self, eps_clip: float = 0.2):
    method save_checkpoint (line 393) | def save_checkpoint(self, save_path, should_save_optimizer):
  class PPOCriticRayActorGroup (line 397) | class PPOCriticRayActorGroup(TrainableModelRayActorGroup):
    method __init__ (line 398) | def __init__(self, num_nodes, num_gpus_per_node) -> None:
    method async_calculate_value (line 401) | def async_calculate_value(self, sequences_attention_mask_action_mask_r...
    method set_loss_function (line 411) | def set_loss_function(self, value_clip: float = 0.4):
  class PPOInitialRayActorGroup (line 415) | class PPOInitialRayActorGroup(PPORayActorGroup):
    method __init__ (line 416) | def __init__(self, num_nodes, num_gpus_per_node) -> None:
    method async_calculate_base_action_log_probs (line 419) | def async_calculate_base_action_log_probs(self, sequences_attention_ma...
  class PPORewardRayActorGroup (line 430) | class PPORewardRayActorGroup(PPORayActorGroup):
    method __init__ (line 431) | def __init__(self, num_nodes, num_gpus_per_node) -> None:
    method async_calculate_r (line 434) | def async_calculate_r(self, sequences_attention_mask_action_mask_refs):
  function main (line 445) | def main(args):

FILE: applications/ColossalChat/examples/data_preparation_scripts/prepare_dataset.py
  function main (line 52) | def main():

FILE: applications/ColossalChat/examples/inference/chatio.py
  class ChatIO (line 17) | class ChatIO(abc.ABC):
    method prompt_for_input (line 19) | def prompt_for_input(self, role: str) -> str:
    method prompt_for_output (line 23) | def prompt_for_output(self, role: str):
    method stream_output (line 27) | def stream_output(self, output_stream):
  class SimpleChatIO (line 31) | class SimpleChatIO(ChatIO):
    method prompt_for_input (line 32) | def prompt_for_input(self, role) -> str:
    method prompt_for_output (line 35) | def prompt_for_output(self, role: str):
    method stream_output (line 38) | def stream_output(self, output_stream):
  class RichChatIO (line 51) | class RichChatIO(ChatIO):
    method __init__ (line 52) | def __init__(self):
    method prompt_for_input (line 57) | def prompt_for_input(self, role) -> str:
    method prompt_for_output (line 68) | def prompt_for_output(self, role: str) -> str:
    method stream_output (line 71) | def stream_output(self, output_stream):
  class DummyChatIO (line 107) | class DummyChatIO(ChatIO):
    method __init__ (line 112) | def __init__(self):
    method prompt_for_input (line 116) | def prompt_for_input(self, role) -> str:
    method prompt_for_output (line 127) | def prompt_for_output(self, role: str) -> str:
    method stream_output (line 130) | def stream_output(self, output_stream):

FILE: applications/ColossalChat/examples/inference/inference.py
  function get_gpu_memory (line 17) | def get_gpu_memory(max_gpus=None):
  function load_model_and_tokenizer (line 42) | def load_model_and_tokenizer(model_path, tokenizer_path, device="cuda", ...
  function _set_default_generate_kwargs (line 64) | def _set_default_generate_kwargs(model: PreTrainedModel) -> Dict:
  function generation_wrapper (line 85) | def generation_wrapper(*args, **kwargs):
  function main (line 92) | def main(args):

FILE: applications/ColossalChat/examples/inference/web_chatbot/locustfile.py
  class GenerationUser (line 17) | class GenerationUser(HttpUser):
    method generate (line 19) | def generate(self):

FILE: applications/ColossalChat/examples/inference/web_chatbot/server.py
  class GenerationTaskReq (line 24) | class GenerationTaskReq(BaseModel):
  function generate_streamingly (line 57) | def generate_streamingly(prompt, max_length, max_new_tokens, top_k, top_...
  function event_generator (line 92) | async def event_generator(request: Request, generator: Generator):
  function generate (line 105) | def generate(data: GenerationTaskReq, request: Request):
  function generate_no_stream (line 116) | def generate_no_stream(data: GenerationTaskReq, request: Request):

FILE: applications/ColossalChat/examples/inference/web_chatbot/utils.py
  function update_model_kwargs_fn (line 12) | def update_model_kwargs_fn(outputs: dict, **model_kwargs) -> dict:
  class Dialogue (line 33) | class Dialogue(BaseModel):
  class ChatPromptProcessor (line 38) | class ChatPromptProcessor:
    method __init__ (line 41) | def __init__(self, censored_words: List[str] = []):
    method preprocess_prompt (line 45) | def preprocess_prompt(self, history: List[Dialogue]) -> str:
    method postprocess_output (line 53) | def postprocess_output(self, output: str) -> str:
    method has_censored_words (line 56) | def has_censored_words(self, text: str) -> bool:
  class LockedIterator (line 63) | class LockedIterator:
    method __init__ (line 64) | def __init__(self, it, lock: Lock) -> None:
    method __iter__ (line 68) | def __iter__(self):
    method __next__ (line 71) | def __next__(self):
  function load_json (line 76) | def load_json(path: str):

FILE: applications/ColossalChat/examples/training_scripts/lora_finetune.py
  function all_reduce_mean (line 39) | def all_reduce_mean(loss: torch.Tensor, plugin: Plugin) -> torch.Tensor:
  function train (line 46) | def train(args) -> None:

FILE: applications/ColossalChat/examples/training_scripts/train_dpo.py
  function train (line 25) | def train(args):

FILE: applications/ColossalChat/examples/training_scripts/train_grpo.py
  function train (line 41) | def train(args):

FILE: applications/ColossalChat/examples/training_scripts/train_kto.py
  function train (line 25) | def train(args):

FILE: applications/ColossalChat/examples/training_scripts/train_orpo.py
  function train (line 25) | def train(args):

FILE: applications/ColossalChat/examples/training_scripts/train_ppo.py
  function train (line 50) | def train(args):

FILE: applications/ColossalChat/examples/training_scripts/train_rm.py
  function train (line 27) | def train(args):

FILE: applications/ColossalChat/examples/training_scripts/train_sft.py
  function train (line 26) | def train(args):

FILE: applications/ColossalChat/setup.py
  function fetch_requirements (line 4) | def fetch_requirements(path):
  function fetch_readme (line 9) | def fetch_readme():
  function fetch_version (line 14) | def fetch_version():

FILE: applications/ColossalChat/start_code_verifier.py
  class CheckCorrectnessRequest (line 10) | class CheckCorrectnessRequest(BaseModel):
  class CheckCorrectnessResponse (line 18) | class CheckCorrectnessResponse(BaseModel):
  function check_correctness_api (line 24) | def check_correctness_api(request: CheckCorrectnessRequest):

FILE: applications/ColossalChat/tests/test_lora.py
  class SimpleNN (line 9) | class SimpleNN(nn.Module):
    method __init__ (line 10) | def __init__(self, input_size, hidden_size, num_classes):
    method forward (line 16) | def forward(self, x):
  function test_overfit (line 23) | def test_overfit():
  function test_lora_linear_accuracy (line 68) | def test_lora_linear_accuracy():
  function test_lora_embedding_accuracy (line 89) | def test_lora_embedding_accuracy():

FILE: applications/ColossalEval/colossal_eval/dataset/agieval.py
  function get_prompt (line 55) | def get_prompt(line: Dict, dataset_name: str, logger: DistributedLogger)...
  function combine_prompt (line 103) | def combine_prompt(prompt_path, dataset_name, load_explanation=True, cha...
  class AGIEvalDataset (line 180) | class AGIEvalDataset(BaseDataset):
    method load (line 200) | def load(path: str, logger: DistributedLogger, few_shot: bool, *args, ...

FILE: applications/ColossalEval/colossal_eval/dataset/base.py
  class BaseDataset (line 9) | class BaseDataset:
    method __init__ (line 18) | def __init__(self, path, logger, *args, **kwargs):
    method save (line 21) | def save(self, save_path):
    method load (line 26) | def load(path, logger: DistributedLogger, *args, **kwargs):
  class DistributedDataset (line 30) | class DistributedDataset(Dataset):
    method __init__ (line 31) | def __init__(self, data):
    method __len__ (line 34) | def __len__(self):
    method __getitem__ (line 37) | def __getitem__(self, idx):

FILE: applications/ColossalEval/colossal_eval/dataset/ceval.py
  function get_few_shot_data (line 78) | def get_few_shot_data(data: List[Dict], subject):
  class CEvalDataset (line 85) | class CEvalDataset(BaseDataset):
    method load (line 93) | def load(path: str, logger: DistributedLogger, few_shot: bool, *args, ...

FILE: applications/ColossalEval/colossal_eval/dataset/cmmlu.py
  function get_few_shot_data (line 89) | def get_few_shot_data(data: List[Dict], subject):
  class CMMLUDataset (line 96) | class CMMLUDataset(BaseDataset):
    method load (line 104) | def load(path: str, logger: DistributedLogger, few_shot: bool, *args, ...

FILE: applications/ColossalEval/colossal_eval/dataset/colossalai.py
  function get_data_per_category (line 24) | def get_data_per_category(data):
  class ColossalDataset (line 33) | class ColossalDataset(BaseDataset):
    method load (line 40) | def load(path: str, logger: DistributedLogger, *args, **kwargs) -> Lis...

FILE: applications/ColossalEval/colossal_eval/dataset/cvalues.py
  class CValuesDataset (line 23) | class CValuesDataset(BaseDataset):
    method load (line 31) | def load(path: str, logger: DistributedLogger, *args, **kwargs) -> Lis...

FILE: applications/ColossalEval/colossal_eval/dataset/gaokaobench.py
  function get_all_classes (line 44) | def get_all_classes(instruction: str):
  class GaoKaoBenchDataset (line 58) | class GaoKaoBenchDataset(BaseDataset):
    method load (line 72) | def load(path: str, logger: DistributedLogger, *args, **kwargs) -> Lis...

FILE: applications/ColossalEval/colossal_eval/dataset/gsm.py
  function get_few_shot_data (line 80) | def get_few_shot_data():
  class GSMDataset (line 88) | class GSMDataset(BaseDataset):
    method load (line 96) | def load(

FILE: applications/ColossalEval/colossal_eval/dataset/longbench.py
  class LongBenchDataset (line 68) | class LongBenchDataset(BaseDataset):
    method load (line 80) | def load(path: str, logger: DistributedLogger, *args, **kwargs) -> Lis...

FILE: applications/ColossalEval/colossal_eval/dataset/mmlu.py
  function get_few_shot_data (line 19) | def get_few_shot_data(data: List[Dict], subject):
  class MMLUDataset (line 26) | class MMLUDataset(BaseDataset):
    method load (line 34) | def load(path: str, logger: DistributedLogger, few_shot: bool, *args, ...

FILE: applications/ColossalEval/colossal_eval/dataset/mtbench.py
  class MTBenchDataset (line 23) | class MTBenchDataset(BaseDataset):
    method __init__ (line 30) | def __init__(self, path, logger: DistributedLogger, *args, **kwargs):
    method load (line 35) | def load(path: str, logger: DistributedLogger, *args, **kwargs) -> Lis...

FILE: applications/ColossalEval/colossal_eval/dataset/safetybench_en.py
  function get_query_str (line 36) | def get_query_str(question, options, choices_templates=CHOICE_TEMP, pad=...
  function process_test (line 55) | def process_test(sample_list, pad_choices=False):
  function process_dev (line 83) | def process_dev(sample_dict, pad_choices=False):
  function get_few_shot_data (line 107) | def get_few_shot_data(data: List[Dict]):
  function add_few_shot_to_test (line 114) | def add_few_shot_to_test(dataset):
  class SafetyBenchENDataset (line 125) | class SafetyBenchENDataset(BaseDataset):
    method load (line 133) | def load(path: str, logger: DistributedLogger, few_shot: bool, *args, ...

FILE: applications/ColossalEval/colossal_eval/dataset/safetybench_zh.py
  function get_query_str (line 36) | def get_query_str(question, options, choices_templates=CHOICE_TEMP, pad=...
  function process_test (line 55) | def process_test(sample_list, pad_choices=False):
  function process_dev (line 83) | def process_dev(sample_dict, pad_choices=False):
  function get_few_shot_data (line 107) | def get_few_shot_data(data: List[Dict]):
  function add_few_shot_to_test (line 114) | def add_few_shot_to_test(dataset):
  class SafetyBenchZHDataset (line 125) | class SafetyBenchZHDataset(BaseDataset):
    method load (line 133) | def load(path: str, logger: DistributedLogger, few_shot: bool, *args, ...

FILE: applications/ColossalEval/colossal_eval/evaluate/dataset_evaluator/dataset_evaluator.py
  class DatasetEvaluator (line 39) | class DatasetEvaluator(object):
    method __init__ (line 45) | def __init__(self, config_path: str, save_path: str):
    method _calculate_label_metrics (line 49) | def _calculate_label_metrics(self, metric: str, category: str):
    method _calculate_combined_metrics (line 93) | def _calculate_combined_metrics(self, metric: str, category: str):
    method _calculate_other_metrics (line 148) | def _calculate_other_metrics(self, metric: str, category: str):
    method _calculate_gpt_metrics (line 174) | def _calculate_gpt_metrics(self, metric: str, category: str):
    method _calculate_loss_metrics (line 192) | def _calculate_loss_metrics(self, metric: str, category: str):
    method _evaluate (line 245) | def _evaluate(self):
    method get_evaluation_results (line 282) | def get_evaluation_results(

FILE: applications/ColossalEval/colossal_eval/evaluate/dataset_evaluator/gpt_judge.py
  function load_mt_prompts (line 28) | def load_mt_prompts(prompt_file: str):
  function get_mt_prompt (line 37) | def get_mt_prompt(prompts: Dict[str, str], multiturn: bool, math: bool):
  function chat_compeletion_openai (line 48) | def chat_compeletion_openai(messages: List[Dict], temperature: float = 0...
  function get_mtbench_judgements (line 69) | def get_mtbench_judgements(question: Dict[str, Any], prompts: Dict[str, ...
  function mtbench_single_judge (line 119) | def mtbench_single_judge(data: List[Dict], config_path: str):

FILE: applications/ColossalEval/colossal_eval/evaluate/dataset_evaluator/metrics.py
  function _fix_fracs (line 205) | def _fix_fracs(string):
  function _fix_a_slash_b (line 237) | def _fix_a_slash_b(string):
  function _remove_right_units (line 252) | def _remove_right_units(string):
  function _fix_sqrt (line 262) | def _fix_sqrt(string):
  function _strip_string (line 277) | def _strip_string(string):
  function parse_math_answer (line 347) | def parse_math_answer(raw_string):
  function math_equivalence (line 418) | def math_equivalence(prediction, reference, **kwargs):
  function multi_choice_accuracy (line 436) | def multi_choice_accuracy(prediction, reference, **kwargs):
  function accuracy_by_options (line 460) | def accuracy_by_options(question, prediction, reference):
  function combined_single_choice_accuracy (line 474) | def combined_single_choice_accuracy(prediction, reference, **kwargs):
  function single_choice_accuracy (line 478) | def single_choice_accuracy(prediction, reference, **kwargs):
  function normalize_answer (line 500) | def normalize_answer(s):
  function normalize_zh_answer (line 519) | def normalize_zh_answer(s):
  function count_score (line 536) | def count_score(prediction, reference, **kwargs):
  function retrieval_score (line 546) | def retrieval_score(prediction, reference, **kwargs):
  function retrieval_zh_score (line 559) | def retrieval_zh_score(prediction, reference, **kwargs):
  function code_sim_score (line 572) | def code_sim_score(prediction, reference, **kwargs):
  function classification_score (line 582) | def classification_score(prediction, reference, **kwargs):
  function rouge_score (line 608) | def rouge_score(prediction, reference, **kwargs):
  function rouge_zh_score (line 617) | def rouge_zh_score(prediction, reference, **kwargs):
  function _f1_score (line 624) | def _f1_score(prediction, reference, **kwargs):
  function f1_score (line 635) | def f1_score(prediction, reference, **kwargs):
  function f1_zh_score (line 644) | def f1_zh_score(prediction, reference, **kwargs):
  function extract_answer_hf (line 654) | def extract_answer_hf(completion):
  function get_match_str (line 664) | def get_match_str(match, idx):
  function extract_answer (line 676) | def extract_answer(completion):
  function is_correct (line 697) | def is_correct(completion, answer):
  function gsm_accuracy (line 704) | def gsm_accuracy(prediction, reference, **kwargs):

FILE: applications/ColossalEval/colossal_eval/evaluate/evaluator.py
  class Evaluator (line 9) | class Evaluator(object):
    method __init__ (line 15) | def __init__(
    method battle (line 33) | def battle(self, answers1: List[Dict], answers2: List[Dict]) -> None:
    method evaluate (line 40) | def evaluate(self, answers: List[Dict], targets: List[Dict], save_path...
    method save (line 81) | def save(self, path: str, model_name_list: List[str]) -> None:

FILE: applications/ColossalEval/colossal_eval/evaluate/gpt_evaluate.py
  function get_battle_result (line 32) | def get_battle_result(sys_prompt: str, user_prompt: str, id: int, max_to...
  function parse_battle_score (line 70) | def parse_battle_score(evaluation: str) -> List[float]:
  function battle (line 108) | def battle(answer1: List[Dict], answer2: List[Dict], prompt_dict: Dict[s...
  function save_battle_results (line 164) | def save_battle_results(evaluations: List[Dict], name1: str, name2: str,...
  function reference_template (line 248) | def reference_template(metric: str, language: str, reference: Dict[str, ...
  function fill_in_message (line 289) | def fill_in_message(role: str, content: str) -> Dict[str, str]:
  function multiturn_chat_completion (line 304) | def multiturn_chat_completion(user_messages: List[str], model: str, max_...
  function get_gpt_evaluation_without_logprobs (line 355) | def get_gpt_evaluation_without_logprobs(
  function get_gpt_evaluation_with_logprobs (line 432) | def get_gpt_evaluation_with_logprobs(
  function evaluate (line 496) | def evaluate(
  function calculate_scores_form_logprobs (line 634) | def calculate_scores_form_logprobs(logprobs: Dict[str, Any]) -> float:
  function calculate_scores_form_response (line 670) | def calculate_scores_form_response(response: str, evaluation: Dict[str, ...
  function save_gpt_evaluation_results (line 694) | def save_gpt_evaluation_results(
  function save_gpt_evaluation_statistics (line 716) | def save_gpt_evaluation_statistics(model_name: str, evaluations: List[Di...
  function analyze_gpt_evaluation_statistics (line 771) | def analyze_gpt_evaluation_statistics(statistics_path: str, save_path: s...

FILE: applications/ColossalEval/colossal_eval/evaluate/utils.py
  function get_data_per_category (line 1) | def get_data_per_category(data, categories):

FILE: applications/ColossalEval/colossal_eval/models/base.py
  class BaseModel (line 9) | class BaseModel:
    method __init__ (line 21) | def __init__(
    method inference (line 41) | def inference(self, data: List[Dict]) -> None:
    method generate (line 51) | def generate(self, inputs: List[str], max_new_tokens: int) -> List[str]:
    method get_loss (line 64) | def get_loss(self, batch: List[str], batch_target: List[str]) -> List[...
    method to (line 77) | def to(self, device):

FILE: applications/ColossalEval/colossal_eval/models/chatglm.py
  class ChatGLMModel (line 13) | class ChatGLMModel(HuggingFaceModel):
    method _get_truncated_prompts (line 14) | def _get_truncated_prompts(self, inputs: List[str], max_new_tokens: in...
    method get_loss (line 30) | def get_loss(
    method _calculate_loss (line 114) | def _calculate_loss(self, input_ids_list: List[torch.LongTensor], labe...
  class ChatGLM2Model (line 150) | class ChatGLM2Model(ChatGLMModel):
    method _get_truncated_prompts (line 151) | def _get_truncated_prompts(self, inputs: List[str], max_new_tokens: in...
    method generate (line 167) | def generate(self, inputs: List[str], max_new_tokens: int, **kwargs) -...
    method get_loss (line 227) | def get_loss(

FILE: applications/ColossalEval/colossal_eval/models/huggingface.py
  class HuggingFaceModel (line 21) | class HuggingFaceModel(BaseModel):
    method __init__ (line 39) | def __init__(
    method _get_choices_indices (line 63) | def _get_choices_indices(self, language: str):
    method _load_tokenizer (line 84) | def _load_tokenizer(self, path: str, tokenizer_path: Optional[str], to...
    method _load_model (line 115) | def _load_model(
    method _calculate_loss (line 150) | def _calculate_loss(self, input_ids_list: List[torch.LongTensor], labe...
    method _get_truncated_prompts (line 186) | def _get_truncated_prompts(self, inputs: List[str], max_new_tokens: in...
    method _get_input_ids_and_labels_pretrain (line 212) | def _get_input_ids_and_labels_pretrain(self, batch_prompt: List[str]) ...
    method _get_input_ids_and_labels (line 253) | def _get_input_ids_and_labels(
    method inference (line 334) | def inference(self, data_loader: DataLoader, inference_kwargs: Dict[st...
    method generate (line 447) | def generate(self, inputs: List[str], max_new_tokens: int, **kwargs) -...
    method get_loss (line 505) | def get_loss(
  class HuggingFaceCausalLM (line 569) | class HuggingFaceCausalLM(HuggingFaceModel):
    method _load_model (line 587) | def _load_model(

FILE: applications/ColossalEval/colossal_eval/models/vllm.py
  class vLLMModel (line 18) | class vLLMModel(HuggingFaceModel):
    method __init__ (line 43) | def __init__(
    method _load_model (line 90) | def _load_model(
    method _calculate_loss (line 177) | def _calculate_loss(self, inputs: List[str], labels: List[str]) -> Tup...
    method inference (line 217) | def inference(self, data_loader: DataLoader, inference_kwargs: Dict[st...
    method generate (line 330) | def generate(self, inputs: List[str], max_new_tokens: int, **kwargs) -...
    method get_loss (line 366) | def get_loss(
  class GetTokenLogitsProcessor (line 469) | class GetTokenLogitsProcessor:
    method __init__ (line 478) | def __init__(
    method __call__ (line 485) | def __call__(self, input_ids: torch.Tensor, logits: torch.Tensor) -> t...
    method get_target_logits (line 497) | def get_target_logits(self) -> torch.Tensor:

FILE: applications/ColossalEval/colossal_eval/utils/conversation.py
  class SeparatorStyle (line 8) | class SeparatorStyle(Enum):
  class Conversation (line 16) | class Conversation:
    method clear (line 24) | def clear(self):
    method get_prompt (line 27) | def get_prompt(self):
    method get_prompt_with_target (line 63) | def get_prompt_with_target(self, target):
    method save_prompt (line 90) | def save_prompt(self):
    method append_message (line 102) | def append_message(self, role, message):
    method copy (line 105) | def copy(self):
    method dict (line 115) | def dict(self):
  function get_few_shot_prefix (line 126) | def get_few_shot_prefix(few_shot_data: List[str], tokenizer: Optional[Au...
  function get_batch_prompt (line 153) | def get_batch_prompt(

FILE: applications/ColossalEval/colossal_eval/utils/utilities.py
  function is_rank_0 (line 8) | def is_rank_0() -> bool:
  function _make_w_io_base (line 12) | def _make_w_io_base(f, mode: str):
  function _make_r_io_base (line 21) | def _make_r_io_base(f, mode: str):
  function jdump (line 27) | def jdump(obj, f, mode="w", indent=4, default=str):
  function jload (line 49) | def jload(f, mode="r"):
  function get_json_list (line 57) | def get_json_list(file_path):

FILE: applications/ColossalEval/examples/dataset_evaluation/eval_dataset.py
  function main (line 9) | def main(args):

FILE: applications/ColossalEval/examples/dataset_evaluation/inference.py
  function rm_and_merge (line 21) | def rm_and_merge(
  function main (line 87) | def main(args):

FILE: applications/ColossalEval/examples/gpt_evaluation/eval.py
  function main (line 9) | def main(args):

FILE: applications/ColossalEval/examples/gpt_evaluation/inference.py
  function rm_and_merge (line 18) | def rm_and_merge(
  function main (line 83) | def main(args):

FILE: applications/ColossalEval/setup.py
  function fetch_requirements (line 4) | def fetch_requirements(path):
  function fetch_readme (line 9) | def fetch_readme():

FILE: applications/ColossalMoE/infer.py
  function parse_args (line 14) | def parse_args():
  function main (line 54) | def main():

FILE: applications/ColossalMoE/setup.py
  function fetch_requirements (line 4) | def fetch_requirements(path):
  function fetch_readme (line 9) | def fetch_readme():
  function fetch_version (line 14) | def fetch_version():

FILE: applications/ColossalMoE/train.py
  function get_global_loss (line 21) | def get_global_loss(loss, booster):
  class RandomDataset (line 28) | class RandomDataset(Dataset):
    method __init__ (line 29) | def __init__(self, num_samples: int = 1000, max_length: int = 2048, vo...
    method __len__ (line 35) | def __len__(self):
    method __getitem__ (line 38) | def __getitem__(self, idx):
  function parse_args (line 46) | def parse_args():
  function main (line 142) | def main():

FILE: applications/ColossalMoE/utils.py
  function move_to_cuda (line 13) | def move_to_cuda(batch, device):
  function load_json (line 17) | def load_json(file_path: Union[str, os.PathLike]) -> Dict[str, Any]:
  function save_json (line 25) | def save_json(data: Dict[str, Any], file_path: Union[str, os.PathLike]) ...
  function save_checkpoint (line 33) | def save_checkpoint(
  function load_checkpoint (line 63) | def load_checkpoint(

FILE: applications/ColossalQA/colossalqa/chain/memory/summary.py
  class SummarizerMixin (line 24) | class SummarizerMixin(BaseModel):
    method predict_new_summary (line 36) | def predict_new_summary(self, messages: List[BaseMessage], existing_su...
  class ConversationSummaryMemory (line 51) | class ConversationSummaryMemory(BaseChatMemory, SummarizerMixin):
    method from_messages (line 58) | def from_messages(
    method memory_variables (line 71) | def memory_variables(self) -> List[str]:
    method load_memory_variables (line 75) | def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, A...
    method validate_prompt_input_variables (line 84) | def validate_prompt_input_variables(cls, values: Dict) -> Dict:
    method save_context (line 95) | def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]...
    method clear (line 100) | def clear(self) -> None:

FILE: applications/ColossalQA/colossalqa/chain/retrieval_qa/base.py
  class CustomBaseRetrievalQA (line 29) | class CustomBaseRetrievalQA(BaseRetrievalQA):
    method from_llm (line 33) | def from_llm(
    method from_chain_type (line 61) | def from_chain_type(
    method _call (line 74) | def _call(
    method _acall (line 133) | async def _acall(
  class RetrievalQA (line 181) | class RetrievalQA(CustomBaseRetrievalQA):
    method _get_docs (line 198) | def _get_docs(
    method _aget_docs (line 207) | async def _aget_docs(
    method _chain_type (line 217) | def _chain_type(self) -> str:

FILE: applications/ColossalQA/colossalqa/chain/retrieval_qa/load_chain.py
  class LoadingCallable (line 25) | class LoadingCallable(Protocol):
    method __call__ (line 28) | def __call__(self, llm: BaseLanguageModel, **kwargs: Any) -> BaseCombi...
  function _load_stuff_chain (line 32) | def _load_stuff_chain(
  function load_qa_chain (line 65) | def load_qa_chain(

FILE: applications/ColossalQA/colossalqa/chain/retrieval_qa/stuff.py
  class CustomStuffDocumentsChain (line 19) | class CustomStuffDocumentsChain(StuffDocumentsChain):
    method _get_inputs (line 57) | def _get_inputs(self, docs: List[Document], **kwargs: Any) -> dict:

FILE: applications/ColossalQA/colossalqa/data_loader/document_loader.py
  class DocumentLoader (line 23) | class DocumentLoader:
    method __init__ (line 28) | def __init__(self, files: List, **kwargs) -> None:
    method load_data (line 52) | def load_data(self, path: str) -> None:
    method clear (line 130) | def clear(self):

FILE: applications/ColossalQA/colossalqa/data_loader/table_dataloader.py
  class TableLoader (line 18) | class TableLoader:
    method __init__ (line 23) | def __init__(self, files: str, sql_path: str = "sqlite:///mydatabase.d...
    method load_data (line 51) | def load_data(self, path):
    method to_sql (line 99) | def to_sql(self, path, table_name):
    method get_sql_path (line 107) | def get_sql_path(self):
    method __del__ (line 110) | def __del__(self):

FILE: applications/ColossalQA/colossalqa/local/colossalcloud_llm.py
  class ColossalCloudLLM (line 32) | class ColossalCloudLLM(LLM):
    method __init__ (line 43) | def __init__(self, gen_config=None, **kwargs):
    method _identifying_params (line 61) | def _identifying_params(self) -> Mapping[str, Any]:
    method _llm_type (line 66) | def _llm_type(self) -> str:
    method set_auth_config (line 69) | def set_auth_config(self, **kwargs):
    method _call (line 78) | def _call(self, prompt: str, stop=None, **kwargs: Any) -> str:
    method text_completion (line 104) | def text_completion(self, prompt, gen_config, auth_config):

FILE: applications/ColossalQA/colossalqa/local/llm.py
  class ColossalAPI (line 28) | class ColossalAPI:
    method __init__ (line 35) | def __init__(self, model_type: str, model_path: str, ckpt_path: str = ...
    method get_api (line 57) | def get_api(model_type: str, model_path: str, ckpt_path: str = None):
    method generate (line 63) | def generate(self, input: str, **kwargs) -> str:
  class VllmAPI (line 89) | class VllmAPI:
    method __init__ (line 90) | def __init__(self, host: str = "localhost", port: int = 8077) -> None:
    method generate (line 96) | def generate(self, input: str, **kwargs):
  class ColossalLLM (line 101) | class ColossalLLM(LLM):
    method _llm_type (line 111) | def _llm_type(self) -> str:
    method _call (line 114) | def _call(
    method _identifying_params (line 136) | def _identifying_params(self) -> Mapping[str, int]:
    method get_token_ids (line 140) | def get_token_ids(self, text: str) -> List[int]:
  class VllmLLM (line 154) | class VllmLLM(LLM):
    method _llm_type (line 164) | def _llm_type(self) -> str:
    method _call (line 167) | def _call(
    method set_host_port (line 187) | def set_host_port(self, host: str = "localhost", port: int = 8077, **k...
    method _identifying_params (line 194) | def _identifying_params(self) -> Mapping[str, int]:

FILE: applications/ColossalQA/colossalqa/local/pangu_llm.py
  class Pangu (line 31) | class Pangu(LLM):
    method __init__ (line 41) | def __init__(self, gen_config=None, **kwargs):
    method _identifying_params (line 49) | def _identifying_params(self) -> Mapping[str, Any]:
    method _llm_type (line 54) | def _llm_type(self) -> str:
    method _call (line 57) | def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwarg...
    method set_auth_config (line 79) | def set_auth_config(self, **kwargs):
    method get_latest_auth_token (line 92) | def get_latest_auth_token(self, region, username, password, domain_name):
    method text_completion (line 110) | def text_completion(self, text, gen_config, auth_config):
    method chat_model (line 131) | def chat_model(self, messages, gen_config, auth_config):

FILE: applications/ColossalQA/colossalqa/local/utils.py
  function post_http_request (line 11) | def post_http_request(
  function get_response (line 27) | def get_response(response: requests.Response) -> List[str]:

FILE: applications/ColossalQA/colossalqa/memory.py
  class ConversationBufferWithSummary (line 18) | class ConversationBufferWithSummary(ConversationSummaryMemory):
    method buffer (line 39) | def buffer(self) -> Any:
    method buffer_as_str (line 44) | def buffer_as_str(self) -> str:
    method buffer_as_messages (line 50) | def buffer_as_messages(self) -> List[BaseMessage]:
    method clear (line 54) | def clear(self):
    method initiate_document_retrieval_chain (line 59) | def initiate_document_retrieval_chain(
    method memory_variables (line 80) | def memory_variables(self) -> List[str]:
    method format_dialogue (line 84) | def format_dialogue(self, lang: str = "en") -> str:
    method get_conversation_length (line 119) | def get_conversation_length(self):
    method load_memory_variables (line 125) | def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, s...
    method save_context (line 165) | def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]...

FILE: applications/ColossalQA/colossalqa/mylogging.py
  class ColossalQALogger (line 8) | class ColossalQALogger:
    method __init__ (line 20) | def __init__(self, name):
    method get_instance (line 30) | def get_instance(name: str):
    method info (line 45) | def info(self, message: str, verbose: bool = False) -> None:
    method warning (line 56) | def warning(self, message: str, verbose: bool = False) -> None:
    method debug (line 66) | def debug(self, message: str, verbose: bool = False) -> None:
    method error (line 76) | def error(self, message: str) -> None:
  function get_logger (line 85) | def get_logger(name: str = None, level=logging.INFO) -> ColossalQALogger:

FILE: applications/ColossalQA/colossalqa/retrieval_conversation_en.py
  class EnglishRetrievalConversation (line 18) | class EnglishRetrievalConversation:
    method __init__ (line 23) | def __init__(self, retriever: CustomRetriever, model_path: str, model_...
    method disambiguity (line 65) | def disambiguity(self, input: str):
    method from_retriever (line 70) | def from_retriever(
    method run (line 75) | def run(self, user_input: str, memory: ConversationBufferWithSummary) ...

FILE: applications/ColossalQA/colossalqa/retrieval_conversation_universal.py
  class UniversalRetrievalConversation (line 20) | class UniversalRetrievalConversation:
    method __init__ (line 25) | def __init__(
    method load_supporting_docs (line 90) | def load_supporting_docs(self, files: List[List[str]] = None, text_spl...
    method start_test_session (line 117) | def start_test_session(self):
    method run (line 130) | def run(self, user_input: str, which_language=str):

FILE: applications/ColossalQA/colossalqa/retrieval_conversation_zh.py
  class ChineseRetrievalConversation (line 18) | class ChineseRetrievalConversation:
    method __init__ (line 23) | def __init__(self, retriever: CustomRetriever, model_path: str, model_...
    method disambiguity (line 71) | def disambiguity(self, input: str):
    method from_retriever (line 76) | def from_retriever(
    method run (line 81) | def run(self, user_input: str, memory: ConversationBufferWithSummary) ...

FILE: applications/ColossalQA/colossalqa/retriever.py
  class CustomRetriever (line 22) | class CustomRetriever(BaseRetriever):
    method from_documents (line 39) | def from_documents(
    method add_documents (line 52) | def add_documents(
    method clear_documents (line 100) | def clear_documents(self):
    method __del__ (line 108) | def __del__(self):
    method set_sql_database_chain (line 113) | def set_sql_database_chain(self, db_chains) -> None:
    method set_rephrase_handler (line 120) | def set_rephrase_handler(self, handler: Callable = None) -> None:
    method _get_relevant_documents (line 126) | def _get_relevant_documents(

FILE: applications/ColossalQA/colossalqa/text_splitter/chinese_text_splitter.py
  class ChineseTextSplitter (line 11) | class ChineseTextSplitter(RecursiveCharacterTextSplitter):
    method __init__ (line 12) | def __init__(self, separators: Optional[List[str]] = None, is_separato...
    method split_text (line 21) | def split_text(self, text: str) -> List[str]:

FILE: applications/ColossalQA/colossalqa/text_splitter/utils.py
  function remove_format (line 4) | def remove_format(text: str) -> str:
  function get_cleaned_paragraph (line 13) | def get_cleaned_paragraph(s: str) -> str:

FILE: applications/ColossalQA/colossalqa/utils.py
  function drop_table (line 12) | def drop_table(engine: Engine) -> None:
  function create_empty_sql_database (line 25) | def create_empty_sql_database(database_uri):
  function destroy_sql_database (line 39) | def destroy_sql_database(sql_engine: Union[Engine, str]) -> None:
  function detect_lang_naive (line 50) | def detect_lang_naive(s):

FILE: applications/ColossalQA/examples/retrieval_conversation_chatgpt.py
  function disambiguity (line 118) | def disambiguity(input):

FILE: applications/ColossalQA/examples/retrieval_conversation_en.py
  function disambiguity (line 58) | def disambiguity(input):

FILE: applications/ColossalQA/examples/retrieval_conversation_en_customer_service.py
  function disambiguity (line 60) | def disambiguity(input):
  function metadata_func (line 85) | def metadata_func(data_sample, additional_fields):

FILE: applications/ColossalQA/examples/retrieval_conversation_zh.py
  function disambiguity (line 64) | def disambiguity(input: str):

FILE: applications/ColossalQA/examples/retrieval_intent_classification_zh_customer_service.py
  function metadata_func (line 47) | def metadata_func(data_sample, additional_fields):

FILE: applications/ColossalQA/examples/webui_demo/RAG_ChatBot.py
  class RAG_ChatBot (line 16) | class RAG_ChatBot:
    method __init__ (line 17) | def __init__(
    method set_embed_model (line 35) | def set_embed_model(self, **kwargs):
    method set_text_splitter (line 42) | def set_text_splitter(self, **kwargs):
    method set_memory (line 46) | def set_memory(self, **kwargs):
    method set_info_retriever (line 58) | def set_info_retriever(self, **kwargs):
    method set_rag_chain (line 63) | def set_rag_chain(self, **kwargs):
    method set_disambig_retriv (line 74) | def set_disambig_retriv(self, **kwargs):
    method load_doc_from_console (line 84) | def load_doc_from_console(self, json_parse_args: Dict = {}):
    method load_doc_from_files (line 96) | def load_doc_from_files(self, files, data_name="default_kb", json_pars...
    method split_docs_and_add_to_mem (line 103) | def split_docs_and_add_to_mem(self, **kwargs):
    method split_docs (line 110) | def split_docs(self, documents):
    method clear_docs (line 114) | def clear_docs(self, **kwargs):
    method reset_config (line 120) | def reset_config(self, rag_config):
    method run (line 130) | def run(self, user_input: str, memory: ConversationBufferWithSummary) ...
    method start_test_session (line 142) | def start_test_session(self):

FILE: applications/ColossalQA/examples/webui_demo/server.py
  function parseArgs (line 16) | def parseArgs():
  class DocUpdateReq (line 26) | class DocUpdateReq(BaseModel):
  class GenerationTaskReq (line 31) | class GenerationTaskReq(BaseModel):
  function update_docs (line 36) | def update_docs(data: DocUpdateReq, request: Request):
  function generate (line 51) | def generate(data: GenerationTaskReq, request: Request):

FILE: applications/ColossalQA/examples/webui_demo/utils.py
  class DocAction (line 4) | class DocAction(str, Enum):

FILE: applications/ColossalQA/examples/webui_demo/webui.py
  function parseArgs (line 10) | def parseArgs():
  function get_response (line 17) | def get_response(data, url):
  function add_text (line 24) | def add_text(history, text):
  function add_file (line 29) | def add_file(history, files):
  function bot (line 39) | def bot(history):
  function restart (line 50) | def restart(chatbot, txt):

FILE: applications/ColossalQA/setup.py
  function fetch_requirements (line 4) | def fetch_requirements(path):
  function fetch_readme (line 9) | def fetch_readme():
  function fetch_version (line 14) | def fetch_version():

FILE: applications/ColossalQA/tests/test_document_loader.py
  function test_add_document (line 6) | def test_add_document():

FILE: applications/ColossalQA/tests/test_memory.py
  function test_memory_long (line 12) | def test_memory_long():
  function test_memory_short (line 66) | def test_memory_short():

FILE: applications/ColossalQA/tests/test_retrieval_qa.py
  function test_en_retrievalQA (line 6) | def test_en_retrievalQA():
  function test_zh_retrievalQA (line 27) | def test_zh_retrievalQA():

FILE: applications/ColossalQA/tests/test_text_splitter.py
  function test_text_splitter (line 4) | def test_text_splitter():

FILE: colossalai/_analyzer/_subclasses/_meta_registration.py
  function new (line 26) | def new(*args, **kwargs):
  function new_strided (line 30) | def new_strided(*args, **kwargs):
  function new_like (line 34) | def new_like(*args, **kwargs):
  function register_meta (line 38) | def register_meta(op, register_dispatcher=True):
  function meta_conv (line 59) | def meta_conv(
  function meta__conv (line 185) | def meta__conv(
  function meta_conv_backward (line 201) | def meta_conv_backward(
  function meta_adaptive_avg_pool2d_backward (line 218) | def meta_adaptive_avg_pool2d_backward(
  function meta_cuda_rnn (line 227) | def meta_cuda_rnn(
  function meta_cudnn_rnn_backward (line 280) | def meta_cudnn_rnn_backward(
  function meta_unregistered_ewise (line 313) | def meta_unregistered_ewise(input: torch.Tensor, *args):
  function meta_bn (line 319) | def meta_bn(input: torch.Tensor, weight, bias, running_mean, running_var...
  function meta_bn_backward (line 325) | def meta_bn_backward(
  function meta_cudnn_bn (line 341) | def meta_cudnn_bn(input: torch.Tensor, weight, bias, running_mean, runni...
  function meta_cudnn_bn_backward (line 355) | def meta_cudnn_bn_backward(
  function meta_ln (line 370) | def meta_ln(input: torch.Tensor, normalized_shape, weight, bias, eps):
  function meta_ln_backward (line 376) | def meta_ln_backward(
  function meta_im2col (line 385) | def meta_im2col(input: torch.Tensor, kernel_size, dilation, padding, str...
  function meta_roll (line 390) | def meta_roll(input: torch.Tensor, shifts, dims):
  function meta_local_scalar_dense (line 395) | def meta_local_scalar_dense(self: torch.Tensor):
  function meta_where_self (line 400) | def meta_where_self(condition: torch.Tensor, self: torch.Tensor, other: ...
  function meta_embedding_dense_backward (line 408) | def meta_embedding_dense_backward(
  function meta_native_dropout_default (line 416) | def meta_native_dropout_default(input: torch.Tensor, p: float, train: bo...
  function meta_native_dropout_backward_default (line 422) | def meta_native_dropout_backward_default(grad: torch.Tensor, mask: torch...
  function meta_eye (line 428) | def meta_eye(n: int, m: int, out: torch.Tensor):
  function meta_index_Tensor (line 432) | def meta_index_Tensor(self, indices):

FILE: colossalai/_analyzer/_subclasses/flop_tensor.py
  class Phase (line 22) | class Phase(Enum):
  function normalize_tuple (line 27) | def normalize_tuple(x):
  function _format_flops (line 33) | def _format_flops(flop):
  function flop_count (line 50) | def flop_count(module: Union[torch.nn.Module, Callable] = None, *args, v...
  function matmul_flop_jit (line 225) | def matmul_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number:
  function addmm_flop_jit (line 259) | def addmm_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number:
  function linear_flop_jit (line 276) | def linear_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number:
  function bmm_flop_jit (line 290) | def bmm_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number:
  function conv_flop_count (line 304) | def conv_flop_count(
  function conv_flop_jit (line 329) | def conv_flop_jit(inputs: List[Any], outputs: List[Any]):
  function transpose_shape (line 340) | def transpose_shape(shape):
  function conv_backward_flop_jit (line 344) | def conv_backward_flop_jit(inputs: List[Any], outputs: List[Any]):
  function norm_flop_counter (line 360) | def norm_flop_counter(affine_arg_index: int, input_arg_index: int) -> Ca...
  function batchnorm_flop_jit (line 386) | def batchnorm_flop_jit(inputs: List[Any], outputs: List[Any], training: ...
  function ewise_flop_counter (line 397) | def ewise_flop_counter(input_scale: float = 1, output_scale: float = 0) ...
  function zero_flop_jit (line 419) | def zero_flop_jit(*args):

FILE: colossalai/_analyzer/_subclasses/meta_tensor.py
  function register_storage (line 14) | def register_storage(r, data_ptr_fn=None):
  function _normalize_tuple (line 23) | def _normalize_tuple(x):
  function _assert_alias (line 30) | def _assert_alias(func):
  class MetaTensor (line 34) | class MetaTensor(torch.Tensor):
    method __new__ (line 50) | def __new__(cls, elem, device=None, data_ptr_fn=None):
    method __repr__ (line 83) | def __repr__(self):
    method __torch_dispatch__ (line 90) | def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
    method to (line 126) | def to(self, *args, **kwargs) -> torch.Tensor:
    method cpu (line 152) | def cpu(self, *args, **kwargs):
    method cuda (line 157) | def cuda(self, device=None, non_blocking=False):
    method data_ptr (line 162) | def data_ptr(self):
  class MetaTensorMode (line 166) | class MetaTensorMode(object):
    method __init__ (line 179) | def __init__(self):
    method __enter__ (line 183) | def __enter__(self):
    method __exit__ (line 200) | def __exit__(self, exc_type, exc_value, traceback):

FILE: colossalai/_analyzer/envs.py
  class MeshConfig (line 5) | class MeshConfig:

FILE: colossalai/_analyzer/fx/codegen.py
  function _gen_ckpt_fn_def (line 28) | def _gen_ckpt_fn_def(label, free_vars: List[str]) -> str:
  function _gen_ckpt_output (line 35) | def _gen_ckpt_output(output_vars: List[str]) -> str:
  function _gen_ckpt_usage (line 42) | def _gen_ckpt_usage(label, input_vars, output_vars, use_reentrant=True):
  function _end_of_ckpt (line 51) | def _end_of_ckpt(node: Node, ckpt_level: int) -> bool:
  function _find_input_and_output_nodes (line 60) | def _find_input_and_output_nodes(nodes: List[Node]):
  function _find_nested_ckpt_regions (line 86) | def _find_nested_ckpt_regions(node_list: List[Node], ckpt_level: int = 0):
  function emit_ckpt_func (line 134) | def emit_ckpt_func(
  function emit_code_with_activation_checkpoint (line 210) | def emit_code_with_activation_checkpoint(body, ckpt_func, nodes, emit_no...
  class ActivationCheckpointCodeGen (line 248) | class ActivationCheckpointCodeGen(CodeGen):
    method _gen_python_code (line 249) | def _gen_python_code(self, nodes, root_module: str, namespace: _Namesp...

FILE: colossalai/_analyzer/fx/graph_module.py
  class _WrappedCall (line 27) | class _WrappedCall:
    method __init__ (line 28) | def __init__(self, cls, cls_call):
    method _generate_error_message (line 42) | def _generate_error_message(frame_summary: traceback.FrameSummary) -> ...
    method __call__ (line 65) | def __call__(self, obj, *args, **kwargs):
  class ColoGraphModule (line 85) | class ColoGraphModule(torch.fx.GraphModule):
    method __init__ (line 107) | def __init__(
    method bind (line 112) | def bind(self, ckpt_def, globals):
    method recompile (line 132) | def recompile(self) -> PythonCode:
    method to_folder (line 176) | def to_folder(self, folder: Union[str, os.PathLike], module_name: str ...

FILE: colossalai/_analyzer/fx/node_util.py
  function intersect (line 11) | def intersect(a, b):
  function subtract (line 15) | def subtract(a, b):
  function union (line 19) | def union(a, b):
  function compute_size_in_bytes (line 23) | def compute_size_in_bytes(elem: Union[torch.Tensor, Dict, List, Tuple, i...
  class MetaInfo (line 48) | class MetaInfo:
    method __new__ (line 119) | def __new__(cls, node: Node, **kwargs):
    method __post_init__ (line 136) | def __post_init__(self):
    method fwd_time (line 140) | def fwd_time(self, tflops: float = MeshConfig.TFLOPS, bandwidth: float...
    method bwd_time (line 144) | def bwd_time(self, tflops: float = MeshConfig.TFLOPS, bandwidth: float...
    method param_size (line 148) | def param_size(self):
    method buffer_size (line 152) | def buffer_size(self):
    method output_size (line 156) | def output_size(self):
    method accumulate_size (line 166) | def accumulate_size(self):
    method temp_size (line 176) | def temp_size(self):
    method backward_size (line 186) | def backward_size(self):
    method __repr__ (line 190) | def __repr__(self):

FILE: colossalai/_analyzer/fx/passes/graph_profile.py
  function _format_flops (line 13) | def _format_flops(flops: float) -> str:
  function _denormalize_tuple (line 26) | def _denormalize_tuple(t: Tuple[int, ...]) -> Tuple[int, ...]:
  function _normalize_tuple (line 30) | def _normalize_tuple(x):
  function _current_device (line 36) | def _current_device(module):
  class GraphProfiler (line 40) | class GraphProfiler(torch.fx.Interpreter):
    method __init__ (line 52) | def __init__(self, module: GraphModule, garbage_collect_values: bool =...
    method run (line 55) | def run(self, *args, initial_env: Optional[Dict[Node, Any]] = None, en...
    method fetch_initial_env (line 91) | def fetch_initial_env(self, device=None) -> Dict[Node, Any]:
    method propagate (line 107) | def propagate(self, *args, device=None):
    method summary (line 123) | def summary(self) -> str:
  class CommunicationProfiler (line 184) | class CommunicationProfiler(GraphProfiler):
    method __init__ (line 189) | def __init__(self, module: GraphModule, garbage_collect_values: bool =...
  class FlopProfiler (line 193) | class FlopProfiler(GraphProfiler):
    method run_node (line 232) | def run_node(self, n: torch.fx.Node) -> Any:
    method call_function (line 269) | def call_function(self, target: "Target", args: Tuple[Argument, ...], ...
    method call_method (line 293) | def call_method(self, target: "Target", args: Tuple[Argument, ...], kw...
    method call_module (line 311) | def call_module(self, target: "Target", args: Tuple[Argument, ...], kw...
  function graph_profile_pass (line 333) | def graph_profile_pass(module: GraphModule, *args, verbose=False) -> Gra...

FILE: colossalai/_analyzer/fx/passes/shape_prop.py
  class sim_env (line 17) | class sim_env(saved_tensors_hooks):
    method __init__ (line 32) | def __init__(self, module: Optional[torch.nn.Module] = None):
    method pack_hook (line 38) | def pack_hook(self, tensor: torch.Tensor):
    method unpack_hook (line 43) | def unpack_hook(self, tensor):
  function _normalize_tuple (line 47) | def _normalize_tuple(x):
  function _current_device (line 53) | def _current_device(module):
  class ShapeProp (line 61) | class ShapeProp(torch.fx.Interpreter):
    method __init__ (line 97) | def __init__(self, module: torch.fx.GraphModule, garbage_collect_value...
    method run_node (line 101) | def run_node(self, n: torch.fx.Node) -> Any:
    method call_function (line 174) | def call_function(self, target: "Target", args: Tuple[Any, ...], kwarg...
    method call_method (line 203) | def call_method(self, target: "Target", args: Tuple[Any, ...], kwargs:...
    method propagate (line 235) | def propagate(self, *args, device=None):
  function shape_prop_pass (line 256) | def shape_prop_pass(module: torch.fx.GraphModule, *args) -> torch.fx.Gra...

FILE: colossalai/_analyzer/fx/symbolic_profile.py
  function register_flop_count_impl (line 7) | def register_flop_count_impl(func):
  function register_shape_impl (line 15) | def register_shape_impl(func):
  function symbolic_profile (line 23) | def symbolic_profile(module: GraphModule, *args, verbose=False) -> Graph...

FILE: colossalai/_analyzer/fx/tracer/bias_addition.py
  function linear_impl (line 16) | def linear_impl(input, weight, bias=None):
  function conv1d_impl (line 24) | def conv1d_impl(input, weight, bias=None, stride=_single(1), padding=_si...
  function conv2d_impl (line 34) | def conv2d_impl(input, weight, bias=None, stride=_pair(1), padding=_pair...
  function conv3d_impl (line 44) | def conv3d_impl(input, weight, bias=None, stride=_triple(1), padding=_tr...
  function conv_transpose1d_impl (line 54) | def conv_transpose1d_impl(
  function conv_transpose2d_impl (line 87) | def conv_transpose2d_impl(
  function conv_transpose3d_impl (line 113) | def conv_transpose3d_impl(
  function addmm_impl (line 147) | def addmm_impl(input, mat1, mat2, beta=1, alpha=1):
  function addbmm_impl (line 160) | def addbmm_impl(input, batch1, batch2, beta=1, alpha=1):

FILE: colossalai/_analyzer/fx/tracer/custom_leaf_module.py
  function torch_nn_normalize (line 17) | def torch_nn_normalize(self, input: torch.Tensor):

FILE: colossalai/_analyzer/fx/tracer/proxy.py
  class ColoProxy (line 13) | class ColoProxy(Proxy):
    method __init__ (line 16) | def __init__(self, *args, data=None, **kwargs):
    method meta_data (line 21) | def meta_data(self):
    method meta_data (line 25) | def meta_data(self, args):
    method __torch_function__ (line 30) | def __torch_function__(cls, orig_method, types, args=(), kwargs=None):
    method from_torch_proxy (line 45) | def from_torch_proxy(cls, proxy: Proxy):
    method __repr__ (line 48) | def __repr__(self):
    method __len__ (line 51) | def __len__(self):
    method __int__ (line 54) | def __int__(self):
    method __index__ (line 57) | def __index__(self):
    method __float__ (line 63) | def __float__(self):
    method __bool__ (line 66) | def __bool__(self):
    method __getattr__ (line 69) | def __getattr__(self, k):
    method __setitem__ (line 72) | def __setitem__(self, key, value):
    method __contains__ (line 77) | def __contains__(self, key):
    method __isinstancecheck__ (line 85) | def __isinstancecheck__(self, type):
  class ColoAttribute (line 89) | class ColoAttribute(ColoProxy):
    method __init__ (line 90) | def __init__(self, root, attr: str, data=None):
    method node (line 98) | def node(self):
    method __call__ (line 105) | def __call__(self, *args, **kwargs):
    method __repr__ (line 108) | def __repr__(self):

FILE: colossalai/_analyzer/fx/tracer/symbolic_trace.py
  function _default_device (line 19) | def _default_device():
  function _current_device (line 23) | def _current_device(module: torch.nn.Module):
  function symbolic_trace (line 30) | def symbolic_trace(

FILE: colossalai/_analyzer/fx/tracer/tracer.py
  function _truncate_suffix (line 19) | def _truncate_suffix(s: str):
  function register_tracer_impl (line 26) | def register_tracer_impl(func: Callable[..., Any], name: Optional[str] =...
  function register_leaf_module_impl (line 35) | def register_leaf_module_impl(module: nn.Module):
  function register_leaf_module (line 43) | def register_leaf_module(module: nn.Module):
  function register_non_leaf_module (line 47) | def register_non_leaf_module(module: nn.Module):
  class ColoTracer (line 51) | class ColoTracer(Tracer):
    method __init__ (line 67) | def __init__(self, trace_act_ckpt: bool = False, bias_addition_split: ...
    method is_leaf_module (line 82) | def is_leaf_module(self, m: nn.Module, module_qualified_name: str) -> ...
    method call_module (line 92) | def call_module(
    method proxy (line 101) | def proxy(self, node: Node) -> "ColoProxy":
    method create_proxy (line 104) | def create_proxy(
    method create_node (line 161) | def create_node(self, *args, **kwargs) -> Node:
    method trace (line 166) | def trace(
    method _tracer_override (line 236) | def _tracer_override(self):
    method _torch_factory_override (line 269) | def _torch_factory_override(self):
    method _post_check (line 306) | def _post_check(self, non_concrete_arg_names: Set[str]):
    method getattr (line 336) | def getattr(self, attr, attr_val, parameter_proxy_cache):
    method _module_getattr (line 339) | def _module_getattr(self, attr, attr_val, parameter_proxy_cache):

FILE: colossalai/accelerator/api.py
  function set_accelerator (line 22) | def set_accelerator(accelerator: Union[str, BaseAccelerator]) -> None:
  function auto_set_accelerator (line 40) | def auto_set_accelerator() -> None:
  function get_accelerator (line 60) | def get_accelerator() -> BaseAccelerator:

FILE: colossalai/accelerator/base_accelerator.py
  class BaseAccelerator (line 11) | class BaseAccelerator(ABC):
    method __init__ (line 14) | def __init__(self, name: str, communication_backend: str, is_synchrono...
    method name (line 24) | def name(self) -> str:
    method communication_backend (line 31) | def communication_backend(self) -> str:
    method is_synchronous (line 38) | def is_synchronous(self) -> bool:
    method __repr__ (line 44) | def __repr__(self) -> str:
    method get_version (line 52) | def get_version(self) -> str:
    method get_current_device (line 58) | def get_current_device(self) -> torch.device:
    method current_device (line 64) | def current_device(self) -> int:
    method set_device (line 70) | def set_device(self, device: Optional[Union[torch.device, int]] = None...
    method get_device_name (line 76) | def get_device_name(self, device: Union[torch.device, int]) -> str:
    method synchronize (line 82) | def synchronize(self, device: Union[torch.device, int] = None):
    method is_available (line 88) | def is_available(self):
    method device_count (line 94) | def device_count(self):
    method set_to_device (line 99) | def set_to_device(self, models: Any) -> Any:
    method get_device_capability (line 116) | def get_device_capability(self, device=None) -> Tuple[int, int]:
    method get_device_name (line 122) | def get_device_name(self, device=None) -> str:
    method get_device_properties (line 128) | def get_device_properties(self, device):
    method utilization (line 134) | def utilization(self, device=None) -> int:
    method get_rng_state (line 143) | def get_rng_state(self, device="cuda") -> torch.Tensor:
    method get_rng_state_all (line 149) | def get_rng_state_all(self) -> List[torch.Tensor]:
    method set_rng_state (line 155) | def set_rng_state(self, new_state: torch.ByteTensor, device: str = "cu...
    method set_rng_state_all (line 161) | def set_rng_state_all(self, new_states: List[torch.ByteTensor]) -> None:
    method manual_seed (line 167) | def manual_seed(self, seed: int) -> None:
    method manual_seed_all (line 173) | def manual_seed_all(self, seed: int) -> None:
    method seed (line 179) | def seed(self) -> None:
    method seed_all (line 185) | def seed_all(self) -> None:
    method initial_seed (line 191) | def initial_seed(self) -> int:
    method empty_cache (line 200) | def empty_cache(self) -> None:
    method memory_stats (line 206) | def memory_stats(self, device=None) -> Dict[str, Any]:
    method memory_summary (line 212) | def memory_summary(self, device=None, abbreviated=False) -> str:
    method memory_snapshot (line 218) | def memory_snapshot(self):
    method memory_allocated (line 224) | def memory_allocated(self, device=None) -> int:
    method max_memory_allocated (line 230) | def max_memory_allocated(self, device=None) -> int:
    method reset_max_memory_allocated (line 236) | def reset_max_memory_allocated(self, device=None) -> None:
    method reset_max_memory_cached (line 242) | def reset_max_memory_cached(self, device=None) -> None:
    method memory_reserved (line 248) | def memory_reserved(self, device=None) -> int:
    method max_memory_reserved (line 254) | def max_memory_reserved(self, device=None) -> int:
    method set_per_process_memory_fraction (line 260) | def set_per_process_memory_fraction(self, fraction: float, device=None...
    method reset_peak_memory_stats (line 266) | def reset_peak_memory_stats(self, device=None) -> None:
    method Stream (line 276) | def Stream(self, device=None, priority=0, **kwargs):
    method Event (line 282) | def Event(self, enable_timing: bool = False, blocking: bool = False, i...
    method current_stream (line 288) | def current_stream(self, device=None):
    method default_stream (line 294) | def default_stream(self, device=None):
    method set_stream (line 300) | def set_stream(self, stream_):
    method stream (line 306) | def stream(self, stream_):
    method autocast (line 315) | def autocast(

FILE: colossalai/accelerator/cpu_accelerator.py
  class CpuAccelerator (line 15) | class CpuAccelerator(BaseAccelerator):
    method __init__ (line 21) | def __init__(self):
    method get_version (line 27) | def get_version(self) -> str:
    method get_current_device (line 33) | def get_current_device(self) -> torch.device:
    method current_device (line 39) | def current_device(self) -> int:
    method set_device (line 45) | def set_device(self, device: Optional[Union[torch.device, int]] = None...
    method get_device_name (line 51) | def get_device_name(self, device: Union[torch.device, int]) -> str:
    method synchronize (line 57) | def synchronize(self, device: Union[torch.device, int] = None):
    method is_available (line 63) | def is_available(self):
    method device_count (line 69) | def device_count(self):
    method get_device_capability (line 75) | def get_device_capability(self, device=None) -> Tuple[int, int]:
    method get_device_name (line 81) | def get_device_name(self, device=None) -> str:
    method get_device_properties (line 87) | def get_device_properties(self, device):
    method utilization (line 93) | def utilization(self, device=None) -> int:
    method get_rng_state (line 102) | def get_rng_state(self, device=None) -> torch.Tensor:
    method get_rng_state_all (line 108) | def get_rng_state_all(self) -> List[torch.Tensor]:
    method set_rng_state (line 114) | def set_rng_state(self, new_state: torch.ByteTensor, device: str = Non...
    method set_rng_state_all (line 120) | def set_rng_state_all(self, new_states: List[torch.ByteTensor]) -> None:
    method manual_seed (line 126) | def manual_seed(self, seed: int) -> None:
    method manual_seed_all (line 132) | def manual_seed_all(self, seed: int) -> None:
    method seed (line 138) | def seed(self) -> None:
    method seed_all (line 144) | def seed_all(self) -> None:
    method initial_seed (line 150) | def initial_seed(self) -> int:
    method empty_cache (line 160) | def empty_cache(self) -> None:
    method memory_stats (line 166) | def memory_stats(self, device=None) -> Dict[str, Any]:
    method memory_summary (line 172) | def memory_summary(self, device=None, abbreviated=False) -> str:
    method memory_snapshot (line 178) | def memory_snapshot(self):
    method memory_allocated (line 184) | def memory_allocated(self, device=None) -> int:
    method max_memory_allocated (line 190) | def max_memory_allocated(self, device=None) -> int:
    method reset_max_memory_allocated (line 196) | def reset_max_memory_allocated(self, device=None) -> None:
    method reset_max_memory_cached (line 202) | def reset_max_memory_cached(self, device=None) -> None:
    method memory_reserved (line 208) | def memory_reserved(self, device=None) -> int:
    method max_memory_reserved (line 214) | def max_memory_reserved(self, device=None) -> int:
    method set_per_process_memory_fraction (line 220) | def set_per_process_memory_fraction(self, fraction: float, device=None...
    method reset_peak_memory_stats (line 228) | def reset_peak_memory_stats(self, device=None) -> None:
    method Stream (line 238) | def Stream(self, device=None, priority=0, **kwargs):
    method Event (line 244) | def Event(self, enable_timing: bool = False, blocking: bool = False, i...
    method current_stream (line 250) | def current_stream(self, device=None):
    method default_stream (line 256) | def default_stream(self, device=None):
    method set_stream (line 262) | def set_stream(self, stream_):
    method stream (line 268) | def stream(self, stream_):
    method autocast (line 277) | def autocast(

FILE: colossalai/accelerator/cuda_accelerator.py
  class CudaAccelerator (line 13) | class CudaAccelerator(BaseAccelerator):
    method __init__ (line 18) | def __init__(self):
    method get_version (line 24) | def get_version(self) -> str:
    method get_current_device (line 30) | def get_current_device(self) -> torch.device:
    method current_device (line 36) | def current_device(self) -> int:
    method set_device (line 42) | def set_device(self, device: Optional[Union[torch.device, int]] = None...
    method get_device_name (line 52) | def get_device_name(self, device: Union[torch.device, int]) -> str:
    method synchronize (line 58) | def synchronize(self, device: Union[torch.device, int] = None):
    method is_available (line 64) | def is_available(self):
    method device_count (line 70) | def device_count(self):
    method get_device_capability (line 76) | def get_device_capability(self, device=None) -> Tuple[int, int]:
    method get_device_name (line 82) | def get_device_name(self, device=None) -> str:
    method get_device_properties (line 88) | def get_device_properties(self, device):
    method utilization (line 94) | def utilization(self, device=None) -> int:
    method get_rng_state (line 103) | def get_rng_state(self, device="cuda") -> torch.Tensor:
    method get_rng_state_all (line 109) | def get_rng_state_all(self) -> List[torch.Tensor]:
    method set_rng_state (line 115) | def set_rng_state(self, new_state: torch.ByteTensor, device: str = "cu...
    method set_rng_state_all (line 121) | def set_rng_state_all(self, new_states: List[torch.ByteTensor]) -> None:
    method manual_seed (line 127) | def manual_seed(self, seed: int) -> None:
    method manual_seed_all (line 133) | def manual_seed_all(self, seed: int) -> None:
    method seed (line 139) | def seed(self) -> None:
    method seed_all (line 145) | def seed_all(self) -> None:
    method initial_seed (line 151) | def initial_seed(self) -> int:
    method empty_cache (line 161) | def empty_cache(self) -> None:
    method memory_stats (line 167) | def memory_stats(self, device=None) -> Dict[str, Any]:
    method memory_summary (line 173) | def memory_summary(self, device=None, abbreviated=False) -> str:
    method memory_snapshot (line 179) | def memory_snapshot(self):
    method memory_allocated (line 185) | def memory_allocated(self, device=None) -> int:
    method max_memory_allocated (line 191) | def max_memory_allocated(self, device=None) -> int:
    method reset_max_memory_allocated (line 197) | def reset_max_memory_allocated(self, device=None) -> None:
    method reset_max_memory_cached (line 203) | def reset_max_memory_cached(self, device=None) -> None:
    method memory_reserved (line 209) | def memory_reserved(self, device=None) -> int:
    method max_memory_reserved (line 215) | def max_memory_reserved(self, device=None) -> int:
    method set_per_process_memory_fraction (line 221) | def set_per_process_memory_fraction(self, fraction: float, device=None...
    method reset_peak_memory_stats (line 227) | def reset_peak_memory_stats(self, device=None) -> None:
    method Stream (line 237) | def Stream(self, device=None, priority=0, **kwargs):
    method Event (line 243) | def Event(self, enable_timing: bool = False, blocking: bool = False, i...
    method current_stream (line 249) | def current_stream(self, device=None):
    method default_stream (line 255) | def default_stream(self, device=None):
    method set_stream (line 261) | def set_stream(self, stream_):
    method stream (line 267) | def stream(self, stream_):
    method autocast (line 276) | def autocast(

FILE: colossalai/accelerator/npu_accelerator.py
  class NpuAccelerator (line 19) | class NpuAccelerator(BaseAccelerator):
    method __init__ (line 24) | def __init__(self):
    method get_version (line 30) | def get_version(self) -> str:
    method get_current_device (line 36) | def get_current_device(self) -> torch.device:
    method current_device (line 42) | def current_device(self) -> int:
    method set_device (line 48) | def set_device(self, device: Optional[Union[torch.device, int]] = None...
    method get_device_name (line 58) | def get_device_name(self, device: Union[torch.device, int]) -> str:
    method synchronize (line 64) | def synchronize(self, device: Union[torch.device, int] = None):
    method is_available (line 70) | def is_available(self):
    method device_count (line 76) | def device_count(self):
    method get_device_capability (line 82) | def get_device_capability(self, device=None) -> Tuple[int, int]:
    method get_device_name (line 88) | def get_device_name(self, device=None) -> str:
    method get_device_properties (line 94) | def get_device_properties(self, device):
    method utilization (line 100) | def utilization(self, device=None) -> int:
    method get_rng_state (line 109) | def get_rng_state(self, device="npu") -> torch.Tensor:
    method get_rng_state_all (line 115) | def get_rng_state_all(self) -> List[torch.Tensor]:
    method set_rng_state (line 121) | def set_rng_state(self, new_state: torch.ByteTensor, device: str = "np...
    method set_rng_state_all (line 127) | def set_rng_state_all(self, new_states: List[torch.ByteTensor]) -> None:
    method manual_seed (line 133) | def manual_seed(self, seed: int) -> None:
    method manual_seed_all (line 139) | def manual_seed_all(self, seed: int) -> None:
    method seed (line 145) | def seed(self) -> None:
    method seed_all (line 151) | def seed_all(self) -> None:
    method initial_seed (line 157) | def initial_seed(self) -> int:
    method empty_cache (line 167) | def empty_cache(self) -> None:
    method memory_stats (line 173) | def memory_stats(self, device=None) -> Dict[str, Any]:
    method memory_summary (line 179) | def memory_summary(self, device=None, abbreviated=False) -> str:
    method memory_snapshot (line 185) | def memory_snapshot(self):
    method memory_allocated (line 191) | def memory_allocated(self, device=None) -> int:
    method max_memory_allocated (line 197) | def max_memory_allocated(self, device=None) -> int:
    method reset_max_memory_allocated (line 203) | def reset_max_memory_allocated(self, device=None) -> None:
    method reset_max_memory_cached (line 209) | def reset_max_memory_cached(self, device=None) -> None:
    method memory_reserved (line 215) | def memory_reserved(self, device=None) -> int:
    method max_memory_reserved (line 221) | def max_memory_reserved(self, device=None) -> int:
    method set_per_process_memory_fraction (line 227) | def set_per_process_memory_fraction(self, fraction: float, device=None...
    method reset_peak_memory_stats (line 233) | def reset_peak_memory_stats(self, device=None) -> None:
    method Stream (line 243) | def Stream(self, device=None, priority=0, **kwargs):
    method Event (line 249) | def Event(self, enable_timing: bool = False, blocking: bool = False, i...
    method current_stream (line 255) | def current_stream(self, device=None):
    method default_stream (line 261) | def default_stream(self, device=None):
    method set_stream (line 267) | def set_stream(self, stream_):
    method stream (line 273) | def stream(self, stream_):
    method autocast (line 282) | def autocast(

FILE: colossalai/amp/naive_amp/grad_scaler/base_grad_scaler.py
  class BaseGradScaler (line 16) | class BaseGradScaler(ABC):
    method __init__ (line 24) | def __init__(self, initial_scale: float, verbose: bool):
    method scale (line 33) | def scale(self) -> Tensor:
    method inv_scale (line 39) | def inv_scale(self) -> Tensor:
    method state_dict (line 44) | def state_dict(self) -> Dict:
    method load_state_dict (line 51) | def load_state_dict(self, state_dict: Dict) -> None:
    method update (line 61) | def update(self, overflow: bool) -> None:
    method log (line 68) | def log(self, message, *args, **kwargs):

FILE: colossalai/amp/naive_amp/grad_scaler/constant_grad_scaler.py
  class ConstantGradScaler (line 8) | class ConstantGradScaler(BaseGradScaler):
    method __init__ (line 16) | def __init__(self, initial_scale: int, verbose: bool):
    method update (line 20) | def update(self, overflow: bool) -> None:

FILE: colossalai/amp/naive_amp/grad_scaler/dynamic_grad_scaler.py
  class DynamicGradScaler (line 15) | class DynamicGradScaler(BaseGradScaler):
    method __init__ (line 29) | def __init__(
    method _sanity_checks (line 65) | def _sanity_checks(self) -> None:
    method update (line 78) | def update(self, overflow: bool) -> None:
    method _backoff_scale (line 103) | def _backoff_scale(self) -> None:
    method _grow_scale (line 110) | def _grow_scale(self) -> None:
    method state_dict (line 117) | def state_dict(self):
    method load_state_dict (line 125) | def load_state_dict(self, state_dict):

FILE: colossalai/amp/naive_amp/mixed_precision_mixin/base.py
  class MixedPrecisionMixin (line 7) | class MixedPrecisionMixin(ABC):
    method pre_backward (line 46) | def pre_backward(self, loss: Tensor, *args, **kwargs) -> Tensor:
    method pre_backward_by_grad (line 57) | def pre_backward_by_grad(self, tensor: Tensor, grad: Tensor) -> Tensor:
    method should_skip_step (line 69) | def should_skip_step(self) -> bool:
    method pre_zero_grad (line 77) | def pre_zero_grad(self) -> None:
    method get_grad_div_scale (line 81) | def get_grad_div_scale(self) -> float:

FILE: colossalai/amp/naive_amp/mixed_precision_mixin/bf16.py
  class BF16MixedPrecisionMixin (line 7) | class BF16MixedPrecisionMixin(MixedPrecisionMixin):
    method pre_backward (line 10) | def pre_backward(self, loss: Tensor) -> Tensor:
    method pre_backward_by_grad (line 13) | def pre_backward_by_grad(self, tensor: Tensor, grad: Tensor) -> Tensor:
    method should_skip_step (line 16) | def should_skip_step(self) -> bool:
    method pre_zero_grad (line 19) | def pre_zero_grad(self) -> None:
    method get_grad_div_scale (line 22) | def get_grad_div_scale(self) -> float:

FILE: colossalai/amp/naive_amp/mixed_precision_mixin/fp16.py
  class OptimState (line 14) | class OptimState(Enum):
  class FP16MixedPrecisionMixin (line 19) | class FP16MixedPrecisionMixin(MixedPrecisionMixin):
    method __init__ (line 22) | def __init__(
    method loss_scale (line 46) | def loss_scale(self) -> float:
    method check_local_overflow (line 50) | def check_local_overflow(self) -> bool:
    method check_overflow (line 57) | def check_overflow(self) -> bool:
    method pre_backward (line 65) | def pre_backward(self, loss: Tensor) -> Tensor:
    method pre_backward_by_grad (line 70) | def pre_backward_by_grad(self, tensor: Tensor, grad: Tensor) -> Tensor:
    method should_skip_step (line 74) | def should_skip_step(self) -> bool:
    method pre_zero_grad (line 81) | def pre_zero_grad(self) -> None:
    method get_grad_div_scale (line 84) | def get_grad_div_scale(self) -> float:

FILE: colossalai/amp/naive_amp/mixed_precision_optimizer.py
  class NaiveFP16MixedPrecisionMixin (line 13) | class NaiveFP16MixedPrecisionMixin(FP16MixedPrecisionMixin):
    method __init__ (line 14) | def __init__(
    method check_local_overflow (line 30) | def check_local_overflow(self) -> bool:
  class MixedPrecisionOptimizer (line 37) | class MixedPrecisionOptimizer(OptimizerWrapper):
    method __init__ (line 38) | def __init__(
    method backward (line 89) | def backward(self, loss: Tensor, inputs=None, retain_graph=False, **kw...
    method backward_by_grad (line 93) | def backward_by_grad(self, tensor: Tensor, grad: Tensor, inputs: Tenso...
    method zero_grad (line 102) | def zero_grad(self, *args, **kwargs):
    method _unscale_and_clip_grads (line 108) | def _unscale_and_clip_grads(self, total_norm: float) -> None:
    method _compute_grad_norm (line 140) | def _compute_grad_norm(self, param_gradient_pairs: List[Tuple[Tensor]]...
    method step (line 169) | def step(self, *args, **kwargs):
    method update_master_params (line 208) | def update_master_params(self, model: Module):
    method get_working_to_master_map (line 217) | def get_working_to_master_map(self) -> Dict[int, torch.Tensor]:
    method get_master_to_working_map (line 220) | def get_master_to_working_map(self) -> Dict[int, torch.Tensor]:
    method get_grad_norm (line 223) | def get_grad_norm(self, norm_type=2, **kwargs):

FILE: colossalai/auto_parallel/checkpoint/ckpt_solver_base.py
  function _copy_output (line 18) | def _copy_output(src: Graph, dst: Graph):
  function _get_param_size (line 25) | def _get_param_size(module: torch.nn.Module):
  class CheckpointSolverBase (line 30) | class CheckpointSolverBase(ABC):
    method __init__ (line 31) | def __init__(
    method solve (line 82) | def solve(self):
    method get_node_list (line 85) | def get_node_list(self):
    method _linearize_graph (line 89) | def _linearize_graph(self) -> List[List[Node]]:

FILE: colossalai/auto_parallel/checkpoint/ckpt_solver_chen.py
  class CheckpointSolverChen (line 14) | class CheckpointSolverChen(CheckpointSolverBase):
    method __init__ (line 15) | def __init__(self, graph: Graph, cnode: List[str] = None, num_grids: i...
    method solve (line 36) | def solve(self) -> Graph:
    method run_chen_greedy (line 52) | def run_chen_greedy(self, b: int = 0) -> Tuple[Set, int]:
    method grid_search (line 73) | def grid_search(self) -> Set:

FILE: colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.c
  function PyObject (line 50) | static PyObject* computeTable(PyObject* self, PyObject* args) {
  type PyModuleDef (line 199) | struct PyModuleDef
  function PyMODINIT_FUNC (line 209) | PyMODINIT_FUNC PyInit_rotorc(void) { return PyModule_Create(&rotorModule...

FILE: colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py
  class CheckpointSolverRotor (line 24) | class CheckpointSolverRotor(CheckpointSolverBase):
    method __init__ (line 25) | def __init__(
    method solve (line 66) | def solve(self, force_python: bool = False, verbose: bool = False) -> ...
    method print_chain (line 104) | def print_chain(self):
    method print_sequence (line 116) | def print_sequence(self):
    method _construct_chain (line 120) | def _construct_chain(cls, graph: Graph, node_list: List[List[Node]]) -...
    method _extract_node_info (line 141) | def _extract_node_info(cls, node: List[Node]) -> Tuple[int, ...]:
    method _extract_input (line 168) | def _extract_input(graph: Graph) -> Tuple[Tensor, ...]:
    method _extract_unused_output (line 177) | def _extract_unused_output(node: Node) -> int:
    method _extract_btmp (line 182) | def _extract_btmp(node: List[Node]) -> int:
    method _compute_table (line 209) | def _compute_table(chain: Chain, mmax: int) -> Tuple:
    method _compute_table_c (line 276) | def _compute_table_c(chain: Chain, mmax: int) -> Tuple:
    method _backtrack (line 308) | def _backtrack(
    method _annotate_from_sequence (line 361) | def _annotate_from_sequence(sequence: Sequence, node_list: List[List[N...

FILE: colossalai/auto_parallel/checkpoint/operation.py
  class Chain (line 8) | class Chain:
    method __init__ (line 9) | def __init__(
    method check_lengths (line 40) | def check_lengths(self):
    method __repr__ (line 50) | def __repr__(self):
    method __len__ (line 58) | def __len__(self):
    method discretize_all (line 61) | def discretize_all(self, unit: int):
  class Operation (line 70) | class Operation(ABC):
    method __repr__ (line 73) | def __repr__(self) -> str:
    method shift (line 76) | def shift(self, value):
  class Forward (line 83) | class Forward(Operation):
    method __init__ (line 86) | def __init__(self, index):
    method cost (line 89) | def cost(self, chain: Chain):
  class ForwardEnable (line 96) | class ForwardEnable(Forward):
  class ForwardNograd (line 100) | class ForwardNograd(Forward):
  class ForwardCheck (line 104) | class ForwardCheck(Forward):
  class Forwards (line 108) | class Forwards(Operation):
    method __init__ (line 109) | def __init__(self, start, end):
    method __repr__ (line 112) | def __repr__(self):
    method cost (line 115) | def cost(self, chain: Chain):
  function isForward (line 122) | def isForward(op):
  class Backward (line 126) | class Backward(Operation):
    method __init__ (line 129) | def __init__(self, index):
    method cost (line 132) | def cost(self, chain: Chain):
  class Loss (line 139) | class Loss(Operation):
    method __init__ (line 140) | def __init__(self):
    method __repr__ (line 143) | def __repr__(self):
    method cost (line 146) | def cost(self, chain):
  class MemoryAccess (line 150) | class MemoryAccess(Operation):
    method __init__ (line 153) | def __init__(self, index):
    method cost (line 156) | def cost(self, chain: Chain):
  class WriteMemory (line 160) | class WriteMemory(MemoryAccess):
  class ReadMemory (line 164) | class ReadMemory(MemoryAccess):
  class DiscardMemory (line 168) | class DiscardMemory(MemoryAccess):
  class Sequence (line 172) | class Sequence(list):
    method __init__ (line 173) | def __init__(self):
    method __repr__ (line 176) | def __repr__(self):
    method list_operations (line 179) | def list_operations(self):

FILE: colossalai/auto_parallel/meta_profiler/meta_registry/activation.py
  function elementwise_meta_info (line 14) | def elementwise_meta_info(temp_mem_scale: float = 0, buffer_mem_scale: f...

FILE: colossalai/auto_parallel/meta_profiler/meta_registry/binary_elementwise_ops.py
  function binary_elementwise_meta_info (line 16) | def binary_elementwise_meta_info(*args, **kwargs) -> Tuple[TrainCycleIte...

FILE: colossalai/auto_parallel/meta_profiler/meta_registry/conv.py
  function convnd_meta_info (line 20) | def convnd_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycl...

FILE: colossalai/auto_parallel/meta_profiler/meta_registry/embedding.py
  function embedding_meta_info (line 15) | def embedding_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainC...

FILE: colossalai/auto_parallel/meta_profiler/meta_registry/linear.py
  function linear_meta_info (line 17) | def linear_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycl...
  function matmul_meta_info (line 190) | def matmul_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycl...

FILE: colossalai/auto_parallel/meta_profiler/meta_registry/non_spmd.py
  function non_spmd_meta_info (line 17) | def non_spmd_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCy...

FILE: colossalai/auto_parallel/meta_profiler/meta_registry/norm.py
  function batchnormnd_meta_info (line 17) | def batchnormnd_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, Trai...
  function layernorm_meta_info (line 113) | def layernorm_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainC...

FILE: colossalai/auto_parallel/meta_profiler/meta_registry/pooling.py
  function avgpool_meta_info (line 17) | def avgpool_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCyc...
  function maxpool_meta_info (line 74) | def maxpool_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCyc...

FILE: colossalai/auto_parallel/meta_profiler/meta_registry/tensor.py
  function tensor_related_metainfo (line 13) | def tensor_related_metainfo(bwd_mem_out_factor: float = 1, bwd_mem_tmp_f...

FILE: colossalai/auto_parallel/meta_profiler/meta_registry/where.py
  function where_meta_info (line 15) | def where_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycle...

FILE: colossalai/auto_parallel/meta_profiler/registry.py
  class Registry (line 4) | class Registry:
    method __init__ (line 5) | def __init__(self, name):
    method register (line 9) | def register(self, source):
    method get (line 21) | def get(self, source):
    method has (line 26) | def has(self, source):

FILE: colossalai/auto_parallel/meta_profiler/shard_metainfo.py
  class ShardMetaInfo (line 14) | class ShardMetaInfo:
    method __init__ (line 20) | def __init__(self, strategy: ShardingStrategy = None, target: Callable...
    method strategy (line 47) | def strategy(self) -> ShardingStrategy:
    method target (line 51) | def target(self) -> Callable:
    method strategy (line 55) | def strategy(self, strategy: ShardingStrategy) -> None:
    method target (line 61) | def target(self, target: Callable) -> None:
    method compute_sharded_opdata (line 66) | def compute_sharded_opdata(self, operation_data: OperationData, shardi...
    method compute_shard_metainfo (line 91) | def compute_shard_metainfo(self):

FILE: colossalai/auto_parallel/offload/amp_optimizer.py
  class OptimState (line 17) | class OptimState(Enum):
  class AMPOptimizer (line 22) | class AMPOptimizer(OptimizerWrapper):
    method __init__ (line 40) | def __init__(
    method _set_grad_ptr (line 87) | def _set_grad_ptr(self):
    method _update_fp16_params (line 97) | def _update_fp16_params(self):
    method _check_overflow (line 105) | def _check_overflow(self):
    method _get_combined_scale (line 110) | def _get_combined_scale(self):
    method loss_scale (line 125) | def loss_scale(self):
    method zero_grad (line 128) | def zero_grad(self, *args, **kwargs):
    method step (line 132) | def step(self, *args, **kwargs):
    method clip_grad_norm (line 155) | def clip_grad_norm(self, model: torch.nn.Module, max_norm: float, norm...
    method backward (line 158) | def backward(self, loss: torch.Tensor):
    method __init__optimizer (line 163) | def __init__optimizer(self):

FILE: colossalai/auto_parallel/offload/base_offload_module.py
  class BaseOffloadModule (line 14) | class BaseOffloadModule:
    method __init__ (line 24) | def __init__(self, model: nn.Module, region_manager: RegionManager, is...
    method register_grad_hook (line 34) | def register_grad_hook(self):
    method remove_grad_hook (line 39) | def remove_grad_hook(self):
    method __call__ (line 43) | def __call__(self, *args, **kwargs):
    method _pre_forward (line 46) | def _pre_forward(self):
    method forward (line 51) | def forward(self, *args, **kwargs):
    method backward (line 58) | def backward(self, loss):
    method _post_backward (line 62) | def _post_backward(self):
    method grad_handle (line 72) | def grad_handle(self, p, grad):
    method _cast_buffers (line 86) | def _cast_buffers(self):
    method parameters (line 90) | def parameters(self, recurse: bool = True):
    method named_parameters (line 93) | def named_parameters(self, prefix: str = "", recurse: bool = True):
    method named_buffers (line 96) | def named_buffers(self, prefix: str = "", recurse: bool = True):
    method named_children (line 99) | def named_children(self):
    method named_modules (line 102) | def named_modules(

FILE: colossalai/auto_parallel/offload/mem_optimize.py
  function memory_optimize (line 17) | def memory_optimize(

FILE: colossalai/auto_parallel/offload/region.py
  class Region (line 10) | class Region:
    method __init__ (line 18) | def __init__(self, r_id: int = 0) -> None:
    method can_release (line 41) | def can_release(self) -> bool:
    method has_inf_or_nan (line 48) | def has_inf_or_nan(self) -> bool:
    method init_param_data (line 54) | def init_param_data(self, pre_alloc_tensor: torch.Tensor = None):
    method move_param_to_cuda (line 74) | def move_param_to_cuda(self):
    method move_grad_to_cpu (line 92) | def move_grad_to_cpu(self):
    method free_cuda_data (line 105) | def free_cuda_data(self):
    method copy_grad_to_region_slice (line 110) | def copy_grad_to_region_slice(self, param: torch.nn.Parameter, data_sl...
    method split (line 125) | def split(self, cut_node_idx: int, cut_param_idx: int):
    method __update_params_ptr (line 143) | def __update_params_ptr(self) -> None:

FILE: colossalai/auto_parallel/offload/region_manager.py
  class RegionManager (line 12) | class RegionManager:
    method __init__ (line 23) | def __init__(self, graph: Graph, solver_name: str = "asyn", memory_bud...
    method _build_regions (line 42) | def _build_regions(self):
    method _pre_process (line 59) | def _pre_process(self):
    method _post_process (line 99) | def _post_process(self, ts: TrainingSimulator = None):
    method _early_region_placement (line 104) | def _early_region_placement(self, ts: TrainingSimulator):
    method _merge_small_regions (line 144) | def _merge_small_regions(self, orig_reg_list: List[Region]) -> List[Re...
    method _search_block_size (line 173) | def _search_block_size(
    method _init_region_data (line 217) | def _init_region_data(self):
    method _process_shared_region (line 241) | def _process_shared_region(self):
    method _linearize_graph (line 271) | def _linearize_graph(self) -> List[Region]:
    method _set_node_and_region_info (line 466) | def _set_node_and_region_info(self, node_id: int, cur_n: Node, cur_reg...
    method get_region (line 502) | def get_region(self, param: torch.nn.Parameter) -> Region:
    method __update_param_region_map (line 511) | def __update_param_region_map(self, params: List[torch.nn.Parameter], ...

FILE: colossalai/auto_parallel/offload/runtime.py
  class SynPreFwdPostBwdOP (line 10) | class SynPreFwdPostBwdOP(torch.autograd.Function):
    method forward (line 23) | def forward(ctx, input_, fwd_info, bwd_info):
    method backward (line 40) | def backward(ctx, grad_output):
  class AsynPreFwdPostBwdOP (line 50) | class AsynPreFwdPostBwdOP(torch.autograd.Function):
    method forward (line 63) | def forward(ctx, input_, fwd_info, bwd_info):
    method backward (line 88) | def backward(ctx, grad_output):
  function convert_fwd_upload_bwd_offload_to_action (line 114) | def convert_fwd_upload_bwd_offload_to_action(tensor, fwd_info, bwd_info):
  function convert_fwd_prefetch_bwd_offload_to_action (line 130) | def convert_fwd_prefetch_bwd_offload_to_action(tensor, fwd_info, bwd_info):
  function replace_node_users (line 146) | def replace_node_users(orig_node: Node, inserted_node: Node, rep_user_no...
  function runtime_syn_offload_apply_pass (line 166) | def runtime_syn_offload_apply_pass(gm: torch.fx.GraphModule, region_list...
  function runtime_asyn_offload_apply_pass (line 200) | def runtime_asyn_offload_apply_pass(gm: torch.fx.GraphModule, region_lis...

FILE: colossalai/auto_parallel/offload/solver.py
  function benchmark_func (line 21) | def benchmark_func(func, number=1, repeat=1, warmup=3):
  class Solver (line 42) | class Solver(ABC):
    method __init__ (line 53) | def __init__(self, region_list: List[Region], memory_budget: float = -...
    method _call_solver (line 69) | def _call_solver(self):
    method _try_to_offload (line 73) | def _try_to_offload(self, *args):
    method _eval_one_choice (line 77) | def _eval_one_choice(self, *args):
    method _compute_offload_profit (line 80) | def _compute_offload_profit(self, total_mem_saving: float, peak_mem_sa...
    method _compare_profit (line 99) | def _compare_profit(self, profit_a: tuple, profit_b: tuple) -> bool:
    method _update_state (line 116) | def _update_state(self, best_ts: TrainingSimulator):
    method _update_node_mem_info (line 124) | def _update_node_mem_info(self, fwd_mem_info: Dict[Node, float], bwd_m...
    method _extract_computing_power (line 140) | def _extract_computing_power(self):
    method _profile_bandwidth (line 164) | def _profile_bandwidth(self):
  class SynGreedySolver (line 203) | class SynGreedySolver(Solver):
    method __init__ (line 204) | def __init__(self, region_list: List[Region], memory_budget: float = -...
    method _init_state (line 210) | def _init_state(self):
    method _call_solver (line 219) | def _call_solver(self):
    method _call_solver_l2l (line 254) | def _call_solver_l2l(self):
    method _try_to_offload (line 263) | def _try_to_offload(self, offload_region: Region):
    method _eval_one_choice (line 275) | def _eval_one_choice(self, offload_region: Region):
  class AsynGreedySolver (line 299) | class AsynGreedySolver(Solver):
    method __init__ (line 300) | def __init__(self, region_list: List[Region], memory_budget: float = -...
    method _init_state (line 310) | def _init_state(self):
    method _call_solver (line 320) | def _call_solver(self):
    method _try_to_offload (line 383) | def _try_to_offload(self, host_region: Region, offload_region: Region):
    method _try_convert_to_syn_upload (line 408) | def _try_convert_to_syn_upload(self, host_region: Region, offload_regi...
    method _repair_strategy (line 429) | def _repair_strategy(self):
    method _eval_one_choice (line 472) | def _eval_one_choice(self):
  class SolverFactory (line 490) | class SolverFactory:
    method create (line 494) | def create(solver_name: str) -> Type[Solver]:
    method get_solver_names (line 500) | def get_solver_names():

FILE: colossalai/auto_parallel/offload/training_simulator.py
  class ExecutionPeriod (line 13) | class ExecutionPeriod:
  class TrainingSimulator (line 18) | class TrainingSimulator(ABC):
    method __init__ (line 29) | def __init__(self, region_list: List[Region], comp_power: float, link_...
    method execute (line 47) | def execute(self):
    method _eval_fwd_mem_per_region (line 51) | def _eval_fwd_mem_per_region(self, region: Region):
    method _eval_bwd_mem_per_region (line 55) | def _eval_bwd_mem_per_region(self, region: Region):
    method _get_bandwidth (line 58) | def _get_bandwidth(self, link: str, comm_volumn: float) -> float:
    method _get_communication_overhead (line 79) | def _get_communication_overhead(self, link: str, comm_volumn: float) -...
    method _get_computing_overhead (line 82) | def _get_computing_overhead(self, flop: float) -> float:
  class SynTrainingSimulator (line 86) | class SynTrainingSimulator(TrainingSimulator):
    method __init__ (line 87) | def __init__(self, region_list: List[Region], comp_power: float, link_...
    method execute (line 90) | def execute(self):
    method _eval_fwd_mem_per_region (line 101) | def _eval_fwd_mem_per_region(self, region: Region):
    method _eval_bwd_mem_per_region (line 119) | def _eval_bwd_mem_per_region(self, region: Region):
  class AsynTrainingSimulator (line 170) | class AsynTrainingSimulator(TrainingSimulator):
    method __init__ (line 171) | def __init__(self, region_list: List[Region], comp_power: float, link_...
    method execute (line 205) | def execute(self):
    method _insert_h2d_exec (line 234) | def _insert_h2d_exec(self, region: Region, is_fwd: bool = True):
    method _insert_comp_exec (line 248) | def _insert_comp_exec(self, region: Region, is_fwd: bool = True):
    method _insert_d2h_exec (line 269) | def _insert_d2h_exec(self, region: Region):
    method _eval_fwd_cost_per_region (line 280) | def _eval_fwd_cost_per_region(self, region: Region):
    method _eval_fwd_mem_per_region (line 297) | def _eval_fwd_mem_per_region(self, region: Region):
    method _eval_bwd_cost_per_region (line 330) | def _eval_bwd_cost_per_region(self, region: Region):
    method _eval_bwd_mem_per_region (line 361) | def _eval_bwd_mem_per_region(self, region: Region):

FILE: colossalai/auto_parallel/offload/util.py
  class NodeInfo (line 13) | class NodeInfo:
  class NvDevicePower (line 19) | class NvDevicePower:
  class GlobalRuntimeInfo (line 37) | class GlobalRuntimeInfo(metaclass=SingletonMeta):
    method __init__ (line 38) | def __init__(self):
  function compute_act_peak_mem (line 46) | def compute_act_peak_mem(region_list: List[Region]) -> float:
  function compute_max_param_mem (line 76) | def compute_max_param_mem(region_list: List[Region]) -> float:
  function compute_total_param_mem (line 80) | def compute_total_param_mem(region_list: List[Region]) -> float:
  function requires_upload_p_in_fwd (line 84) | def requires_upload_p_in_fwd(shared_reg: Region):
  function requires_release_p_in_bwd (line 90) | def requires_release_p_in_bwd(shared_reg: Region):
  function requires_offload_g_in_bwd (line 96) | def requires_offload_g_in_bwd(region: Region):

FILE: colossalai/auto_parallel/passes/comm_metainfo_pass.py
  function _construct_shard_meta_info (line 17) | def _construct_shard_meta_info(
  function _runtime_apply_meta_info (line 61) | def _runtime_apply_meta_info(node: Node, origin_spec_dict, sharding_spec...
  function _runtime_comm_spec_apply_meta_info (line 77) | def _runtime_comm_spec_apply_meta_info(node: Node, comm_actions_dict: Di...
  function comm_metainfo_pass (line 111) | def comm_metainfo_pass(

FILE: colossalai/auto_parallel/passes/meta_info_prop.py
  function _normalize_tuple (line 16) | def _normalize_tuple(x):
  class MetaInfoProp (line 23) | class MetaInfoProp:
    method __init__ (line 24) | def __init__(self, module: GraphModule) -> None:
    method _set_data_ptr (line 35) | def _set_data_ptr(self, x):
    method _is_inplace (line 44) | def _is_inplace(self, node: Node):
    method run (line 54) | def run(self) -> GraphModule:
    method placeholder_handler (line 63) | def placeholder_handler(self, node: Node) -> None:
    method get_attr_handler (line 73) | def get_attr_handler(self, node: Node) -> None:
    method output_handler (line 81) | def output_handler(self, node: Node) -> None:
    method node_handler (line 94) | def node_handler(self, node: Node) -> None:

FILE: colossalai/auto_parallel/passes/runtime_apply_pass.py
  function runtime_apply (line 15) | def runtime_apply(node: Node, origin_dict: Dict, input_dict: Dict, node_...
  function runtime_apply_for_iterable_object (line 25) | def runtime_apply_for_iterable_object(
  function runtime_comm_spec_apply (line 45) | def runtime_comm_spec_apply(tensor: torch.Tensor, comm_actions_dict: Dic...
  function _preprocess_graph (line 59) | def _preprocess_graph(nodes: List[Node]):
  function _shape_consistency_apply (line 85) | def _shape_consistency_apply(gm: torch.fx.GraphModule):
  function _comm_spec_apply (line 151) | def _comm_spec_apply(gm: torch.fx.GraphModule):
  function _act_annotation_pass (line 225) | def _act_annotation_pass(gm: torch.fx.GraphModule):
  function runtime_apply_pass (line 252) | def runtime_apply_pass(gm: torch.fx.GraphModule):

FILE: colossalai/auto_parallel/passes/runtime_preparation_pass.py
  function size_processing (line 21) | def size_processing(
  function solution_annotation_pass (line 52) | def solution_annotation_pass(
  function size_value_converting_pass (line 131) | def size_value_converting_pass(gm: torch.fx.GraphModule, device_mesh: De...
  function node_args_converting_pass (line 280) | def node_args_converting_pass(gm: torch.fx.GraphModule, device_mesh: Dev...
  function module_params_sharding_pass (line 384) | def module_params_sharding_pass(gm: torch.fx.GraphModule, device_mesh: D...
  function implicit_comm_action_apply (line 496) | def implicit_comm_action_apply(gm: torch.fx.GraphModule):
  function runtime_preparation_pass (line 502) | def runtime_preparation_pass(

FILE: colossalai/auto_parallel/tensor_shard/initialize.py
  class ModuleWrapper (line 22) | class ModuleWrapper(nn.Module):
    method __init__ (line 28) | def __init__(
    method forward (line 48) | def forward(self, *args, **kwargs):
  function extract_meta_args_from_dataloader (line 58) | def extract_meta_args_from_dataloader(data_loader: torch.utils.data.Data...
  function extract_alpha_beta_for_device_mesh (line 65) | def extract_alpha_beta_for_device_mesh(alpha_beta_dict: Dict[Tuple[int],...
  function build_strategy_constructor (line 73) | def build_strategy_constructor(
  function solve_solution (line 117) | def solve_solution(gm: ColoGraphModule, strategy_constructor: Strategies...
  function transform_to_sharded_model (line 135) | def transform_to_sharded_model(
  function initialize_device_mesh (line 160) | def initialize_device_mesh(
  function initialize_model (line 221) | def initialize_model(
  function autoparallelize (line 300) | def autoparallelize(

FILE: colossalai/auto_parallel/tensor_shard/node_handler/addmm_handler.py
  class ADDMMFunctionHandler (line 16) | class ADDMMFunctionHandler(NodeHandler):
    method _infer_op_data_type (line 23) | def _infer_op_data_type(self, tensor: torch.Tensor) -> OperationDataType:
    method get_operation_data_mapping (line 30) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:
    method get_strategy_generator (line 64) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method post_process (line 72) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt...

FILE: colossalai/auto_parallel/tensor_shard/node_handler/batch_norm_handler.py
  class BatchNormModuleHandler (line 16) | class BatchNormModuleHandler(MetaInfoModuleHandler):
    method get_strategy_generator (line 21) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 27) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/binary_elementwise_handler.py
  class BinaryElementwiseHandler (line 18) | class BinaryElementwiseHandler(MetaInfoNodeHandler):
    method get_operation_data_mapping (line 24) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:
    method get_strategy_generator (line 83) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method post_process (line 89) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt...

FILE: colossalai/auto_parallel/tensor_shard/node_handler/bmm_handler.py
  function _get_data_mapping_for_bmm_op (line 14) | def _get_data_mapping_for_bmm_op(node, input_idx, other_idx, bias_idx=No...
  class BMMFunctionHandler (line 48) | class BMMFunctionHandler(NodeHandler):
    method get_operation_data_mapping (line 55) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:
    method get_strategy_generator (line 59) | def get_strategy_generator(self) -> List[StrategyGenerator]:
  class AddBMMFunctionHandler (line 68) | class AddBMMFunctionHandler(NodeHandler):
    method get_operation_data_mapping (line 77) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:
    method get_strategy_generator (line 81) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method post_process (line 90) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt...

FILE: colossalai/auto_parallel/tensor_shard/node_handler/conv_handler.py
  class ConvModuleHandler (line 18) | class ConvModuleHandler(MetaInfoModuleHandler):
    method get_strategy_generator (line 23) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 29) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:
    method post_process (line 57) | def post_process(self, strategy: ShardingStrategy):
  class ConvFunctionHandler (line 70) | class ConvFunctionHandler(MetaInfoNodeHandler):
    method get_strategy_generator (line 75) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 81) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:
    method post_process (line 121) | def post_process(self, strategy: ShardingStrategy):

FILE: colossalai/auto_parallel/tensor_shard/node_handler/default_reshape_handler.py
  class DefaultReshapeHandler (line 16) | class DefaultReshapeHandler(MetaInfoNodeHandler):
    method get_strategy_generator (line 21) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method infer_logical_shape (line 27) | def infer_logical_shape(self, data):
    method get_operation_data_mapping (line 45) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/embedding_handler.py
  function _convert_logical_sharding_to_physical_sharding_spec_for_embedding (line 18) | def _convert_logical_sharding_to_physical_sharding_spec_for_embedding(
  class EmbeddingModuleHandler (line 116) | class EmbeddingModuleHandler(ModuleHandler):
    method get_strategy_generator (line 121) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 127) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:
    method post_process (line 163) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt...
  class EmbeddingFunctionHandler (line 177) | class EmbeddingFunctionHandler(NodeHandler):
    method get_strategy_generator (line 182) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 188) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:
    method post_process (line 230) | def post_process(self, strategy: ShardingStrategy):

FILE: colossalai/auto_parallel/tensor_shard/node_handler/getattr_handler.py
  class GetattrHandler (line 10) | class GetattrHandler(NodeHandler):
    method get_strategy_generator (line 15) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 21) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/getitem_handler.py
  class GetItemHandler (line 15) | class GetItemHandler(NodeHandler):
    method get_strategy_generator (line 20) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 30) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/layer_norm_handler.py
  class LayerNormModuleHandler (line 14) | class LayerNormModuleHandler(MetaInfoModuleHandler):
    method get_strategy_generator (line 19) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 25) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/linear_handler.py
  function _update_sharding_spec_for_transposed_weight_for_linear (line 18) | def _update_sharding_spec_for_transposed_weight_for_linear(
  function _convert_logical_sharding_to_physical_sharding_spec_for_linear (line 40) | def _convert_logical_sharding_to_physical_sharding_spec_for_linear(
  class LinearModuleHandler (line 152) | class LinearModuleHandler(MetaInfoModuleHandler):
    method get_strategy_generator (line 157) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 170) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:
    method post_process (line 205) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt...
  class LinearFunctionHandler (line 224) | class LinearFunctionHandler(MetaInfoNodeHandler):
    method get_strategy_generator (line 229) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 237) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:
    method post_process (line 285) | def post_process(self, strategy: ShardingStrategy):

FILE: colossalai/auto_parallel/tensor_shard/node_handler/matmul_handler.py
  class MatMulType (line 30) | class MatMulType(Enum):
  function get_matmul_type (line 47) | def get_matmul_type(input_dim: int, other_dim: int):
  class BmmTransform (line 70) | class BmmTransform(ABC):
    method apply (line 77) | def apply(self, shape_mapping: Dict[str, List[int]]):
    method recover (line 81) | def recover(self, op_data_mapping: Dict[str, OperationData], strategy:...
  class Padder (line 85) | class Padder(BmmTransform):
    method __init__ (line 90) | def __init__(self) -> None:
    method apply (line 94) | def apply(self, shape_mapping: Dict[str, List[int]]):
    method recover (line 113) | def recover(self, op_data_mapping: Dict[str, OperationData], strategy:...
  class Broadcaster (line 159) | class Broadcaster(BmmTransform):
    method __init__ (line 164) | def __init__(self) -> None:
    method apply (line 167) | def apply(self, shape_mapping: Dict[str, List[int]]):
    method recover (line 196) | def recover(self, op_data_mapping: Dict[str, OperationData], strategy:...
  class Viewer (line 236) | class Viewer(BmmTransform):
    method __init__ (line 241) | def __init__(self) -> None:
    method apply (line 244) | def apply(self, shape_mapping: Dict[str, List[int]]):
    method recover (line 262) | def recover(self, op_data_mapping: Dict[str, OperationData], strategy:...
  function _get_bmm_logical_shape (line 305) | def _get_bmm_logical_shape(input_shape, other_shape, transforms):
  class MatMulHandler (line 331) | class MatMulHandler(MetaInfoNodeHandler):
    method __init__ (line 338) | def __init__(self, *args, **kwargs) -> None:
    method get_strategy_generator (line 358) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 373) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:
    method _get_op_data_mapping (line 384) | def _get_op_data_mapping(self, input_logical_shape, other_logical_shap...
    method _get_logical_shape_for_dot (line 418) | def _get_logical_shape_for_dot(self):
    method _get_logical_shape_for_mm (line 424) | def _get_logical_shape_for_mm(self):
    method _get_logical_shape_for_mv (line 437) | def _get_logical_shape_for_mv(self):
    method _get_logical_shape_for_bmm (line 443) | def _get_logical_shape_for_bmm(self):
    method post_process (line 448) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt...

FILE: colossalai/auto_parallel/tensor_shard/node_handler/node_handler.py
  class NodeHandler (line 24) | class NodeHandler(ABC):
    method __init__ (line 34) | def __init__(
    method update_resharding_cost (line 50) | def update_resharding_cost(self, strategy: ShardingStrategy) -> None:
    method get_target_function (line 143) | def get_target_function(self) -> callable:
    method register_strategy (line 162) | def register_strategy(self, compute_resharding_cost: bool = True) -> S...
    method post_process (line 221) | def post_process(self, strategy: ShardingStrategy) -> Union[ShardingSt...
    method get_strategy_generator (line 227) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 233) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:
  class MetaInfoNodeHandler (line 255) | class MetaInfoNodeHandler(NodeHandler):
    method register_strategy (line 263) | def register_strategy(self, compute_resharding_cost: bool = True) -> S...
  class ModuleHandler (line 291) | class ModuleHandler(NodeHandler):
    method __init__ (line 292) | def __init__(self, *args, **kwargs) -> None:
  class MetaInfoModuleHandler (line 310) | class MetaInfoModuleHandler(ModuleHandler):
    method register_strategy (line 318) | def register_strategy(self, compute_resharding_cost: bool = True) -> S...

FILE: colossalai/auto_parallel/tensor_shard/node_handler/normal_pooling_handler.py
  class NormPoolingHandler (line 19) | class NormPoolingHandler(MetaInfoModuleHandler):
    method get_strategy_generator (line 24) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 30) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/output_handler.py
  class OutputHandler (line 14) | class OutputHandler(NodeHandler):
    method __init__ (line 19) | def __init__(
    method get_strategy_generator (line 25) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 31) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/permute_handler.py
  class PermuteHandler (line 15) | class PermuteHandler(NodeHandler):
    method get_strategy_generator (line 20) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 26) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/placeholder_handler.py
  class PlaceholderHandler (line 14) | class PlaceholderHandler(NodeHandler):
    method __init__ (line 19) | def __init__(
    method get_strategy_generator (line 25) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 33) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/registry.py
  class Registry (line 1) | class Registry:
    method __init__ (line 2) | def __init__(self, name):
    method register (line 6) | def register(self, source):
    method get (line 18) | def get(self, source):
    method has (line 23) | def has(self, source):

FILE: colossalai/auto_parallel/tensor_shard/node_handler/softmax_handler.py
  class SoftmaxHandler (line 15) | class SoftmaxHandler(NodeHandler):
    method get_strategy_generator (line 21) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 27) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/split_handler.py
  class SplitHandler (line 15) | class SplitHandler(NodeHandler):
    method get_strategy_generator (line 20) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 26) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/batch_norm_generator.py
  class BatchNormStrategyGenerator (line 20) | class BatchNormStrategyGenerator(StrategyGenerator):
    method validate (line 32) | def validate(self) -> bool:
    method update_compute_cost (line 46) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 73) | def update_memory_cost(self, strategy: ShardingStrategy):
    method split_input_channel (line 115) | def split_input_channel(self, mesh_dim_0):
    method split_input_channel_1d (line 139) | def split_input_channel_1d(self, mesh_dim_0, mesh_dim_1):
    method non_split (line 163) | def non_split(self):
    method split_input_batch (line 187) | def split_input_batch(self, mesh_dim_0):
    method split_input_batch_1d (line 224) | def split_input_batch_1d(self, mesh_dim_0, mesh_dim_1):
    method split_input_both_dim (line 261) | def split_input_both_dim(self, mesh_dim_0, mesh_dim_1):
    method collate_strategies (line 311) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/binary_elementwise_generator.py
  class BinaryElementwiseStrategyGenerator (line 20) | class BinaryElementwiseStrategyGenerator(StrategyGenerator):
    method validate (line 28) | def validate(self) -> bool:
    method update_compute_cost (line 36) | def update_compute_cost(self, strategy: ShardingStrategy) -> ShardingS...
    method update_memory_cost (line 49) | def update_memory_cost(self, strategy: ShardingStrategy) -> ShardingSt...
    method enumerate_all_possible_output (line 67) | def enumerate_all_possible_output(self, mesh_dim_0, mesh_dim_1):
    method collate_strategies (line 111) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/conv_strategy_generator.py
  class ConvStrategyGenerator (line 18) | class ConvStrategyGenerator(StrategyGenerator):
    method validate (line 24) | def validate(self) -> bool:
    method update_compute_cost (line 38) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 78) | def update_memory_cost(self, strategy: ShardingStrategy):
    method split_input_batch_weight_out_channel (line 111) | def split_input_batch_weight_out_channel(self, mesh_dim_0, mesh_dim_1):
    method split_input_batch (line 178) | def split_input_batch(self, mesh_dim_0):
    method split_input_both_dim_weight_in_channel (line 238) | def split_input_both_dim_weight_in_channel(self, mesh_dim_0, mesh_dim_1):
    method split_input_in_channel_weight_both_channel (line 308) | def split_input_in_channel_weight_both_channel(self, mesh_dim_0, mesh_...
    method split_input_in_channel_weight_in_channel (line 355) | def split_input_in_channel_weight_in_channel(self, mesh_dim_0):
    method split_weight_out_channel (line 390) | def split_weight_out_channel(self, mesh_dim_0):
    method non_split (line 428) | def non_split(self):
    method split_1d_parallel_on_input_batch (line 447) | def split_1d_parallel_on_input_batch(self, mesh_dim_0, mesh_dim_1):
    method split_1d_parallel_on_in_channel (line 509) | def split_1d_parallel_on_in_channel(self, mesh_dim_0, mesh_dim_1):
    method split_1d_parallel_on_out_channel (line 543) | def split_1d_parallel_on_out_channel(self, mesh_dim_0, mesh_dim_1):
    method collate_strategies (line 579) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/embedding_generator.py
  class EmbeddingStrategyGenerator (line 18) | class EmbeddingStrategyGenerator(StrategyGenerator):
    method validate (line 24) | def validate(self) -> bool:
    method update_compute_cost (line 27) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 54) | def update_memory_cost(self, strategy: ShardingStrategy):
    method non_split (line 83) | def non_split(self):
    method split_input (line 99) | def split_input(self, mesh_dim_0):
    method split_input_and_embedding_dim (line 139) | def split_input_and_embedding_dim(self, mesh_dim_0, mesh_dim_1):
    method split_1d_parallel_on_input (line 193) | def split_1d_parallel_on_input(self, mesh_dim_0, mesh_dim_1):
    method split_embedding_dim (line 235) | def split_embedding_dim(self, mesh_dim_0):
    method split_1d_parallel_on_embedding_dim (line 268) | def split_1d_parallel_on_embedding_dim(self, mesh_dim_0, mesh_dim_1):
    method collate_strategies (line 300) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/getattr_generator.py
  class GetattrGenerator (line 16) | class GetattrGenerator(StrategyGenerator):
    method validate (line 21) | def validate(self) -> bool:
    method update_compute_cost (line 24) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 28) | def update_memory_cost(self, strategy: ShardingStrategy):
    method enumerate_all_possible_output (line 47) | def enumerate_all_possible_output(self, mesh_dim_0, mesh_dim_1):
    method collate_strategies (line 89) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/getitem_generator.py
  class GetItemStrategyGenerator (line 13) | class GetItemStrategyGenerator(FollowingStrategyGenerator):
    method validate (line 24) | def validate(self) -> bool:
    method update_compute_cost (line 27) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 31) | def update_memory_cost(self, strategy: ShardingStrategy):
  class TensorStrategyGenerator (line 62) | class TensorStrategyGenerator(GetItemStrategyGenerator):
    method collate_strategies (line 67) | def collate_strategies(self) -> List[ShardingStrategy]:
  class TensorTupleStrategyGenerator (line 137) | class TensorTupleStrategyGenerator(GetItemStrategyGenerator):
    method collate_strategies (line 142) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/layer_norm_generator.py
  class LayerNormGenerator (line 24) | class LayerNormGenerator(StrategyGenerator):
    method validate (line 30) | def validate(self) -> bool:
    method update_compute_cost (line 33) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 64) | def update_memory_cost(self, strategy: ShardingStrategy):
    method _generate_strategy_with_dim_partition (line 100) | def _generate_strategy_with_dim_partition(self, dim_partition):
    method split_input_batch_single_mesh_dim (line 145) | def split_input_batch_single_mesh_dim(self, mesh_dim_0, batch_dimensio...
    method split_input_batch_both_mesh_dim (line 153) | def split_input_batch_both_mesh_dim(self, mesh_dim_0, mesh_dim_1, batc...
    method non_split (line 162) | def non_split(self):
    method collate_strategies (line 182) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/matmul_strategy_generator.py
  class MatMulStrategyGenerator (line 18) | class MatMulStrategyGenerator(StrategyGenerator):
    method update_memory_cost (line 24) | def update_memory_cost(self, strategy: ShardingStrategy) -> ShardingSt...
  class DotProductStrategyGenerator (line 54) | class DotProductStrategyGenerator(MatMulStrategyGenerator):
    method validate (line 55) | def validate(self) -> bool:
    method update_compute_cost (line 60) | def update_compute_cost(self, strategy: ShardingStrategy) -> ShardingS...
    method no_split (line 70) | def no_split(self):
    method split_one_dim (line 82) | def split_one_dim(self, mesh_dim):
    method collate_strategies (line 103) | def collate_strategies(self) -> List[ShardingStrategy]:
  class MatVecStrategyGenerator (line 118) | class MatVecStrategyGenerator(MatMulStrategyGenerator):
    method validate (line 119) | def validate(self) -> bool:
    method update_compute_cost (line 124) | def update_compute_cost(self, strategy: ShardingStrategy) -> ShardingS...
    method no_split (line 134) | def no_split(self):
    method split_input_batch (line 146) | def split_input_batch(self, mesh_dim):
    method collate_strategies (line 203) | def collate_strategies(self) -> List[ShardingStrategy]:
  class LinearProjectionStrategyGenerator (line 216) | class LinearProjectionStrategyGenerator(MatMulStrategyGenerator):
    method __init__ (line 217) | def __init__(
    method update_compute_cost (line 228) | def update_compute_cost(self, strategy: ShardingStrategy) -> ShardingS...
    method dp_strategies (line 246) | def dp_strategies(self) -> List[ShardingStrategy]:
    method tp_strategies (line 254) | def tp_strategies(self) -> List[ShardingStrategy]:
    method mix_strategies (line 277) | def mix_strategies(self) -> List[ShardingStrategy]:
    method collate_strategies (line 293) | def collate_strategies(self) -> List[ShardingStrategy]:
    method split_lhs_space_rhs_space (line 308) | def split_lhs_space_rhs_space(self, mesh_dim_0, mesh_dim_1):
    method split_lhs_space_both_contract (line 384) | def split_lhs_space_both_contract(self, mesh_dim_0, mesh_dim_1):
    method split_rhs_space_both_contract (line 463) | def split_rhs_space_both_contract(self, mesh_dim_0, mesh_dim_1):
    method recompute_split_both_contract (line 503) | def recompute_split_both_contract(self, mesh_dim):
    method split_rhs_space_only (line 534) | def split_rhs_space_only(self, mesh_dim):
    method split_lhs_1st_dim_1d (line 566) | def split_lhs_1st_dim_1d(self, mesh_dim_0, mesh_dim_1):
    method split_lhs_2nd_dim_1d (line 632) | def split_lhs_2nd_dim_1d(self, mesh_dim_0, mesh_dim_1):
    method split_rhs_2nd_dim_1d (line 664) | def split_rhs_2nd_dim_1d(self, mesh_dim_0, mesh_dim_1):
    method non_split (line 697) | def non_split(self):
    method validate (line 721) | def validate(self) -> bool:
  class BatchedMatMulStrategyGenerator (line 736) | class BatchedMatMulStrategyGenerator(MatMulStrategyGenerator):
    method __init__ (line 751) | def __init__(self, *args, **kwargs):
    method _pop_batch_dim_sharding_for_output (line 755) | def _pop_batch_dim_sharding_for_output(self, dim_partition_dict):
    method validate (line 767) | def validate(self) -> bool:
    method update_compute_cost (line 776) | def update_compute_cost(self, strategy: ShardingStrategy) -> ShardingS...
    method split_one_batch_dim (line 787) | def split_one_batch_dim(self, mesh_dim):
    method split_two_batch_dim (line 814) | def split_two_batch_dim(self, mesh_dim_0, mesh_dim_1):
    method split_batch_dim_lhs_space (line 845) | def split_batch_dim_lhs_space(self, mesh_dim_0, mesh_dim_1):
    method split_batch_dim_rhs_space (line 887) | def split_batch_dim_rhs_space(self, mesh_dim_0, mesh_dim_1):
    method split_batch_dim_both_contract (line 928) | def split_batch_dim_both_contract(self, mesh_dim_0, mesh_dim_1):
    method collate_strategies (line 968) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/normal_pooling_generator.py
  class NormalPoolStrategyGenerator (line 16) | class NormalPoolStrategyGenerator(StrategyGenerator):
    method validate (line 23) | def validate(self) -> bool:
    method update_compute_cost (line 37) | def update_compute_cost(self, strategy: ShardingStrategy) -> TrainCycl...
    method update_memory_cost (line 65) | def update_memory_cost(self, strategy: ShardingStrategy) -> ShardingSt...
    method _generate_strategy_with_dim_partition (line 89) | def _generate_strategy_with_dim_partition(self, dim_partition):
    method enumerate_all_possible_batch_dimensions_dim_partition (line 107) | def enumerate_all_possible_batch_dimensions_dim_partition(self, mesh_d...
    method collate_strategies (line 117) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/output_generator.py
  class OutputGenerator (line 18) | class OutputGenerator(OutputStrategyGenerator):
    method __init__ (line 23) | def __init__(
    method validate (line 33) | def validate(self) -> bool:
    method update_compute_cost (line 36) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 40) | def update_memory_cost(self, strategy: ShardingStrategy):
    method replica_strategy (line 53) | def replica_strategy(self) -> List[ShardingStrategy]:
    method distributed_strategy (line 87) | def distributed_strategy(self, mesh_list: List[List[int]] = None) -> L...
    method collate_strategies (line 118) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/placeholder_generator.py
  class PlaceholderGenerator (line 16) | class PlaceholderGenerator(StrategyGenerator):
    method __init__ (line 21) | def __init__(
    method validate (line 27) | def validate(self) -> bool:
    method update_compute_cost (line 30) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 34) | def update_memory_cost(self, strategy: ShardingStrategy):
    method replica_placeholder (line 52) | def replica_placeholder(self) -> ShardingStrategy:
    method distributed_placeholder (line 72) | def distributed_placeholder(self, mesh_list) -> ShardingStrategy:
    method collate_strategies (line 92) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/reshape_generator.py
  class ReshapeGenerator (line 23) | class ReshapeGenerator(FollowingStrategyGenerator):
    method validate (line 28) | def validate(self) -> bool:
    method update_compute_cost (line 31) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 35) | def update_memory_cost(self, strategy: ShardingStrategy):
    method collate_strategies (line 65) | def collate_strategies(self) -> List[ShardingStrategy]:
  class ViewGenerator (line 69) | class ViewGenerator(ReshapeGenerator):
    method collate_strategies (line 74) | def collate_strategies(self) -> List[ShardingStrategy]:
  class PermuteGenerator (line 155) | class PermuteGenerator(ReshapeGenerator):
    method collate_strategies (line 160) | def collate_strategies(self) -> List[ShardingStrategy]:
  class TransposeGenerator (line 195) | class TransposeGenerator(ReshapeGenerator):
    method collate_strategies (line 200) | def collate_strategies(self) -> List[ShardingStrategy]:
  class SplitGenerator (line 241) | class SplitGenerator(ReshapeGenerator):
    method collate_strategies (line 246) | def collate_strategies(self) -> List[ShardingStrategy]:
  class DefaultReshapeGenerator (line 314) | class DefaultReshapeGenerator(ReshapeGenerator):
    method collate_strategies (line 320) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/softmax_generator.py
  class SoftmaxGenerator (line 12) | class SoftmaxGenerator(FollowingStrategyGenerator):
    method validate (line 17) | def validate(self) -> bool:
    method update_compute_cost (line 20) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 35) | def update_memory_cost(self, strategy: ShardingStrategy):
    method collate_strategies (line 65) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/strategy_generator.py
  class StrategyGenerator (line 23) | class StrategyGenerator(ABC):
    method __init__ (line 30) | def __init__(self, operation_data_mapping: Dict[str, OperationData], d...
    method has_bias (line 38) | def has_bias(self):
    method is_param (line 44) | def is_param(self, op_data_name):
    method is_buffer (line 48) | def is_buffer(self, op_data_name):
    method get_sharding_strategy (line 52) | def get_sharding_strategy(
    method to_sharding_spec_mapping (line 69) | def to_sharding_spec_mapping(self, mapping: Dict[str, Dict[int, List[i...
    method replace_op_name_with_op_data (line 117) | def replace_op_name_with_op_data(self, mapping: Dict[str, Any]):
    method get_communication_spec (line 127) | def get_communication_spec(
    method get_communication_action (line 140) | def get_communication_action(
    method update_communication_cost (line 163) | def update_communication_cost(self, strategy: ShardingStrategy) -> Sha...
    method update_compute_cost (line 204) | def update_compute_cost(self, strategy: ShardingStrategy) -> ShardingS...
    method update_memory_cost (line 210) | def update_memory_cost(self, strategy: ShardingStrategy) -> ShardingSt...
    method _compute_size_in_bytes (line 215) | def _compute_size_in_bytes(self, strategy: ShardingStrategy, key: str):
    method generate (line 258) | def generate(self) -> List[ShardingStrategy]:
    method collate_strategies (line 281) | def collate_strategies(self) -> List[ShardingStrategy]:
    method validate (line 285) | def validate(self) -> bool:
  class FollowingStrategyGenerator (line 292) | class FollowingStrategyGenerator(StrategyGenerator):
    method __init__ (line 299) | def __init__(
  class OutputStrategyGenerator (line 307) | class OutputStrategyGenerator(StrategyGenerator):
    method __init__ (line 312) | def __init__(

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/sum_generator.py
  class SumGenerator (line 12) | class SumGenerator(FollowingStrategyGenerator):
    method validate (line 17) | def validate(self) -> bool:
    method update_compute_cost (line 20) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 32) | def update_memory_cost(self, strategy: ShardingStrategy):
    method collate_strategies (line 62) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/tensor_constructor_generator.py
  class TensorConstructorGenerator (line 10) | class TensorConstructorGenerator(StrategyGenerator):
    method validate (line 16) | def validate(self) -> bool:
    method update_compute_cost (line 19) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 23) | def update_memory_cost(self, strategy: ShardingStrategy):
    method collate_strategies (line 43) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/unary_elementwise_generator.py
  class UnaryElementwiseGenerator (line 11) | class UnaryElementwiseGenerator(FollowingStrategyGenerator):
    method validate (line 16) | def validate(self) -> bool:
    method update_compute_cost (line 19) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 23) | def update_memory_cost(self, strategy: ShardingStrategy):
    method collate_strategies (line 53) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/strategy/where_generator.py
  class WhereGenerator (line 16) | class WhereGenerator(StrategyGenerator):
    method validate (line 21) | def validate(self) -> bool:
    method update_compute_cost (line 24) | def update_compute_cost(self, strategy: ShardingStrategy):
    method update_memory_cost (line 28) | def update_memory_cost(self, strategy: ShardingStrategy):
    method _generate_strategy_with_dim_partition (line 57) | def _generate_strategy_with_dim_partition(self, dim_partition):
    method enumerate_all_possible_output_spec (line 78) | def enumerate_all_possible_output_spec(self, mesh_dim_0, mesh_dim_1, d...
    method collate_strategies (line 88) | def collate_strategies(self) -> List[ShardingStrategy]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/sum_handler.py
  class SumHandler (line 15) | class SumHandler(NodeHandler):
    method get_strategy_generator (line 20) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 26) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/tensor_constructor_handler.py
  class TensorConstructorHandler (line 15) | class TensorConstructorHandler(NodeHandler):
    method get_strategy_generator (line 20) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 26) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/transpose_handler.py
  class TransposeHandler (line 15) | class TransposeHandler(NodeHandler):
    method get_strategy_generator (line 20) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 26) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/unary_elementwise_handler.py
  class UnaryElementwiseHandler (line 22) | class UnaryElementwiseHandler(MetaInfoNodeHandler):
    method get_strategy_generator (line 27) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 33) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/view_handler.py
  class ViewHandler (line 16) | class ViewHandler(NodeHandler):
    method get_strategy_generator (line 21) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 27) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:

FILE: colossalai/auto_parallel/tensor_shard/node_handler/where_handler.py
  class WhereHandler (line 16) | class WhereHandler(NodeHandler):
    method get_strategy_generator (line 21) | def get_strategy_generator(self) -> List[StrategyGenerator]:
    method get_operation_data_mapping (line 27) | def get_operation_data_mapping(self) -> Dict[str, OperationData]:
    method convert_physical_operand_to_logical_operand (line 55) | def convert_physical_operand_to_logical_operand(self, physical_operand...
    method post_process (line 60) | def post_process(self, strategy: ShardingStrategy):

FILE: colossalai/auto_parallel/tensor_shard/options.py
  class SolverPerference (line 7) | class SolverPerference(Enum):
  class ShardOption (line 17) | class ShardOption(Enum):
  class DataloaderOption (line 36) | class DataloaderOption(Enum):
  class SolverOptions (line 46) | class SolverOptions:

FILE: colossalai/auto_parallel/tensor_shard/sharding_strategy.py
  class OperationDataType (line 23) | class OperationDataType(Enum):
  class OperationData (line 36) | class OperationData:
    method __post_init__ (line 52) | def __post_init__(self):
    method __repr__ (line 72) | def __repr__(self) -> str:
    method __eq__ (line 75) | def __eq__(self, other) -> bool:
    method __hash__ (line 78) | def __hash__(self) -> int:
  class TrainCycleItem (line 83) | class TrainCycleItem:
  class MemoryCost (line 99) | class MemoryCost:
  class CommType (line 116) | class CommType(Enum):
  class CommAction (line 134) | class CommAction:
  class ShardingStrategy (line 152) | class ShardingStrategy:
    method input_sharding_specs (line 174) | def input_sharding_specs(self) -> Dict[OperationData, ShardingSpec]:
    method argument_sharding_specs (line 181) | def argument_sharding_specs(self) -> Dict[OperationData, ShardingSpec]:
    method param_sharding_specs (line 185) | def param_sharding_specs(self) -> Dict[OperationData, ShardingSpec]:
    method output_sharding_specs (line 189) | def output_sharding_specs(self) -> Dict[OperationData, ShardingSpec]:
    method _get_sharding_spec (line 192) | def _get_sharding_spec(self, operation_data_type: OperationDataType):
    method get_op_data_by_name (line 196) | def get_op_data_by_name(self, name: str):
    method get_sharding_spec_by_name (line 202) | def get_sharding_spec_by_name(self, name: str):
    method clone (line 208) | def clone(self):
  class StrategiesVector (line 237) | class StrategiesVector(list):
    method __init__ (line 246) | def __init__(self, node: Node):
    method check_merge (line 254) | def check_merge(self):

FILE: colossalai/auto_parallel/tensor_shard/solver/cost_graph.py
  class CostGraph (line 6) | class CostGraph:
    method __init__ (line 20) | def __init__(self, leaf_strategies, simplify=True, forward_only=False):
    method _remove_invalid_node (line 32) | def _remove_invalid_node(self, node, attr_name):
    method _build_cost_graph (line 41) | def _build_cost_graph(self):
    method get_edge_cost (line 98) | def get_edge_cost(self, src_node, dst_node):
    method merge_node (line 101) | def merge_node(self, src_node, dst_node):
    method _reindexing_src (line 190) | def _reindexing_src(self, src):
    method simplify_graph (line 195) | def simplify_graph(self):

FILE: colossalai/auto_parallel/tensor_shard/solver/graph_analysis.py
  class LiveVariable (line 14) | class LiveVariable:
  class LiveVariableVector (line 24) | class LiveVariableVector(list):
    method exists (line 29) | def exists(self, name) -> bool:
    method get (line 38) | def get(self, name) -> LiveVariable:
    method copy (line 44) | def copy(self) -> "LiveVariableVector":
  class LiveStage (line 55) | class LiveStage:
  class GraphAnalyser (line 66) | class GraphAnalyser:
    method __init__ (line 67) | def __init__(self, gm: GraphModule):
    method gm (line 72) | def gm(self) -> GraphModule:
    method graph (line 79) | def graph(self) -> Graph:
    method liveness_analysis (line 85) | def liveness_analysis(self) -> List[LiveStage]:
    method get_alias_set (line 166) | def get_alias_set(self):

FILE: colossalai/auto_parallel/tensor_shard/solver/solver.py
  class Solver (line 29) | class Solver:
    method __init__ (line 30) | def __init__(
    method _recover_merged_node_strategy (line 78) | def _recover_merged_node_strategy(self):
    method _generate_node_index_dict (line 95) | def _generate_node_index_dict(self) -> Dict[Node, int]:
    method _prepare_data_for_solver (line 101) | def _prepare_data_for_solver(self):
    method _call_solver_serialized_args (line 211) | def _call_solver_serialized_args(
    method call_solver_serialized_args (line 497) | def call_solver_serialized_args(self):

FILE: colossalai/auto_parallel/tensor_shard/solver/strategies_constructor.py
  class StrategiesConstructor (line 19) | class StrategiesConstructor:
    method __init__ (line 29) | def __init__(self, graph: Graph, device_mesh: DeviceMesh, solver_optio...
    method remove_duplicated_strategy (line 41) | def remove_duplicated_strategy(self, strategies_vector):
    method generate_alias_set (line 57) | def generate_alias_set(self):
    method build_strategies_and_cost (line 72) | def build_strategies_and_cost(self):

FILE: colossalai/auto_parallel/tensor_shard/utils/broadcast.py
  class BroadcastType (line 25) | class BroadcastType(Enum):
  function is_broadcastable (line 31) | def is_broadcastable(shape1: torch.Size, shape2: torch.Size) -> bool:
  function get_broadcast_shape (line 43) | def get_broadcast_shape(shape1: torch.Size, shape2: torch.Size) -> List[...
  function get_broadcast_dim_info (line 61) | def get_broadcast_dim_info(logical_shape, physical_shape):
  function recover_sharding_spec_for_broadcast_shape (line 92) | def recover_sharding_spec_for_broadcast_shape(
  function comm_actions_for_oprands (line 141) | def comm_actions_for_oprands(

FILE: colossalai/auto_parallel/tensor_shard/utils/factory.py
  function generate_sharding_spec (line 20) | def generate_sharding_spec(
  function generate_resharding_costs (line 55) | def generate_resharding_costs(
  function find_repeat_blocks (line 101) | def find_repeat_blocks(node_list: List[torch.fx.Node], root_module, comm...

FILE: colossalai/auto_parallel/tensor_shard/utils/misc.py
  function ignore_sharding_exception (line 12) | def ignore_sharding_exception(func):
  function check_sharding_spec_validity (line 37) | def check_sharding_spec_validity(sharding_spec: ShardingSpec, tensor: to...
  function pytree_map (line 79) | def pytree_map(obj: Any, fn: Callable, process_types: Union[Type, Tuple[...

FILE: colossalai/auto_parallel/tensor_shard/utils/reshape.py
  class PreviousStatus (line 7) | class PreviousStatus(Enum):
  function detect_reshape_mapping (line 19) | def detect_reshape_mapping(origin_shape: torch.Size, tgt_shape: torch.Si...
  function check_keep_sharding_status (line 134) | def check_keep_sharding_status(
  function infer_output_dim_partition_dict (line 177) | def infer_output_dim_partition_dict(

FILE: colossalai/auto_parallel/tensor_shard/utils/sharding.py
  function transpose_partition_dim (line 19) | def transpose_partition_dim(sharding_spec: ShardingSpec, dim1: int, dim2...
  function update_partition_dim (line 50) | def update_partition_dim(
  function enumerate_all_possible_2d_sharding (line 88) | def enumerate_all_possible_2d_sharding(mesh_dim_0, mesh_dim_1, dim_size):
  function enumerate_all_possible_1d_sharding (line 104) | def enumerate_all_possible_1d_sharding(mesh_dim_0, dim_size):
  function generate_sharding_size (line 114) | def generate_sharding_size(dim_partition_dict, device_mesh):

FILE: colossalai/autochunk/autochunk_codegen.py
  function _gen_chunk_slice_dim (line 31) | def _gen_chunk_slice_dim(chunk_dim: int, chunk_indice_name: str, shape: ...
  function _gen_loop_start (line 54) | def _gen_loop_start(chunk_input: List[Node], chunk_output: List[Node], c...
  function _gen_loop_end (line 100) | def _gen_loop_end(
  function _replace_name (line 135) | def _replace_name(context: str, name_from: str, name_to: str) -> str:
  function _replace_reshape_size (line 149) | def _replace_reshape_size(context: str, node_name: str, reshape_size_dic...
  function _replace_new_tensor_like_shape (line 159) | def _replace_new_tensor_like_shape(
  function _replace_new_tensor_shape (line 184) | def _replace_new_tensor_shape(
  function _add_node_slice (line 211) | def _add_node_slice(
  function emit_code_with_chunk (line 246) | def emit_code_with_chunk(
  class AutoChunkCodeGen (line 358) | class AutoChunkCodeGen(CodeGen):
    method __init__ (line 359) | def __init__(
    method _gen_python_code (line 375) | def _gen_python_code(self, nodes, root_module: str, namespace: _Namesp...

FILE: colossalai/autochunk/estimate_memory.py
  class EstimateMemory (line 9) | class EstimateMemory(object):
    method __init__ (line 14) | def __init__(self) -> None:
    method _get_node_size (line 17) | def _get_node_size(self, x: Node) -> float:
    method _add_active_node (line 29) | def _add_active_node(self, n: Node, active_nodes: Dict, chunk_ratio: f...
    method _build_delete_node_dict (line 41) | def _build_delete_node_dict(self, node_mgr: NodeMgr) -> Dict:
    method _remove_deactive_node (line 62) | def _remove_deactive_node(
    method _get_tmp_memory (line 81) | def _get_tmp_memory(self, node, not_contiguous_list, delete=False):
    method _get_chunk_ratio (line 101) | def _get_chunk_ratio(self, node, chunk_node_dim, chunk_size):
    method _print_compute_op_mem_log (line 111) | def _print_compute_op_mem_log(self, log, nodes, title=None):
    method _add_active_nodes_from_list (line 124) | def _add_active_nodes_from_list(self, active_nodes: List, nodes: List)...
    method _get_memory_from_active_nodes (line 131) | def _get_memory_from_active_nodes(self, active_nodes: Dict) -> float:
    method estimate_chunk_inference_mem (line 139) | def estimate_chunk_inference_mem(self, node_list: List, chunk_infos: D...

FILE: colossalai/autochunk/reorder_graph.py
  class ReorderGraph (line 5) | class ReorderGraph(object):
    method __init__ (line 10) | def __init__(self, trace_indice: TraceIndice, node_mgr: NodeMgr) -> None:
    method _get_reorder_map (line 15) | def _get_reorder_map(self, chunk_info):
    method _reorder_chunk_info (line 36) | def _reorder_chunk_info(self, chunk_info, reorder_map):
    method _update_all_reorder_map (line 51) | def _update_all_reorder_map(self, reorder_map):
    method _reorder_self_node_list (line 55) | def _reorder_self_node_list(self, reorder_map):
    method _reorder_idx_trace (line 61) | def _reorder_idx_trace(self, reorder_map):
    method reorder_all (line 82) | def reorder_all(self, chunk_info):
    method reorder_node_list (line 94) | def reorder_node_list(self, node_list):
    method tmp_reorder (line 100) | def tmp_reorder(self, node_list, chunk_info):

FILE: colossalai/autochunk/search_chunk.py
  class SearchChunk (line 14) | class SearchChunk(object):
    method __init__ (line 43) | def __init__(self, gm, max_memory=None, print_mem=False, print_progres...
    method _init_trace (line 61) | def _init_trace(self) -> None:
    method _find_peak_region (line 74) | def _find_peak_region(self, mem_peak: List) -> int:
    method _search_max_chunk_region (line 106) | def _search_max_chunk_region(self, active_node: List, peak_region: int...
    method _find_chunk_info (line 157) | def _find_chunk_info(self, input_trace, output_trace, start_idx, end_i...
    method _search_possible_chunk_regions (line 196) | def _search_possible_chunk_regions(self, max_chunk_region: Tuple, peak...
    method _step_search (line 230) | def _step_search(
    method search_region (line 262) | def search_region(self) -> Dict:

FILE: colossalai/autochunk/select_chunk.py
  class SelectChunk (line 7) | class SelectChunk(object):
    method __init__ (line 8) | def __init__(
    method _select_best_chunk_region (line 26) | def _select_best_chunk_region(self, possible_chunk_regions, chunk_info...
    method _select_fit_memory_chunk_region (line 35) | def _select_fit_memory_chunk_region(self, possible_chunk_regions, chun...
    method _get_fit_chunk_size (line 84) | def _get_fit_chunk_size(self, chunk_region_dict, chunk_infos):
    method _chunk_size_binary_search (line 105) | def _chunk_size_binary_search(self, left, right, chunk_region_dict, ch...
    method _get_compute_node_num (line 125) | def _get_compute_node_num(self, start, end):
    method _select_min_memory_chunk_region (line 132) | def _select_min_memory_chunk_region(self, possible_chunk_regions, chun...
    method _is_legal_region (line 178) | def _is_legal_region(self, cur_chunk_info, chunk_infos):

FILE: colossalai/autochunk/trace_flow.py
  class TraceFlow (line 18) | class TraceFlow(object):
    method __init__ (line 19) | def __init__(self, trace_indice: TraceIndice, node_mgr: NodeMgr) -> None:
    method check_index_source (line 23) | def check_index_source(self, start_dim, start_node, start_idx, end_dim...
    method check_index_compute (line 48) | def check_index_compute(self, start_idx, end_dim, end_node, end_idx):
    method _assign_single_node_flow (line 66) | def _assign_single_node_flow(
    method _get_all_node_info (line 152) | def _get_all_node_info(self, end_dim, start_idx, end_idx):
    method _get_input_nodes_dim (line 197) | def _get_input_nodes_dim(self, inputs: List[Node], start_idx: int, end...
    method _get_prepose_nodes (line 245) | def _get_prepose_nodes(self, all_node_info: Dict, start_idx: int, end_...
    method _get_non_chunk_inputs (line 316) | def _get_non_chunk_inputs(self, chunk_info, start_idx, end_idx):
    method flow_search (line 328) | def flow_search(self, start_idx, start_dim, end_idx, end_dim):
    method _get_other_output_info (line 374) | def _get_other_output_info(
    method _update_chunk_info (line 407) | def _update_chunk_info(self, chunk_info: Dict, new_all_node_info: Dict...
    method _reassign_reshape_size (line 433) | def _reassign_reshape_size(self, chunk_info):
    method check_region_start_end (line 470) | def check_region_start_end(

FILE: colossalai/autochunk/trace_indice.py
  class TraceIndice (line 9) | class TraceIndice(object):
    method __init__ (line 31) | def __init__(self, node_mgr: NodeMgr) -> None:
    method _init_indice_trace_list (line 38) | def _init_indice_trace_list(self) -> List:
    method set_active_nodes (line 52) | def set_active_nodes(self, active_node_list: List) -> None:
    method _add_indice (line 55) | def _add_indice(self) -> int:
    method _del_dim (line 65) | def _del_dim(self, idx: int, dim_idx: int) -> None:
    method _add_dim (line 73) | def _add_dim(self, node_idx: int, dim_idx: int) -> None:
    method _add_source (line 84) | def _add_source(
    method _transform_indice (line 114) | def _transform_indice(self, node: Node, node_dim: int) -> int:
    method _inherit_indice (line 119) | def _inherit_indice(
    method _inherit_all_indice (line 143) | def _inherit_all_indice(self, node_from: Node, node_to: Node) -> None:
    method _inherit_more_indice_from_node_with_exclude (line 154) | def _inherit_more_indice_from_node_with_exclude(self, node_from: Node,...
    method _mark_computation (line 170) | def _mark_computation(self, node: Node, idx: int, dim: int) -> None:
    method _find_trace_from_node (line 187) | def _find_trace_from_node(self, node: Node) -> Dict:
    method _find_source_trace_from_node (line 201) | def _find_source_trace_from_node(self, node: Node) -> List:
    method _find_indice_trace_from_node (line 215) | def _find_indice_trace_from_node(self, node) -> List:
    method _find_compute_trace_from_node (line 227) | def _find_compute_trace_from_node(self, node: Node) -> List:
    method _assign_indice_as_input (line 239) | def _assign_indice_as_input(self, node: Node, node_idx: int, input_nod...
    method _assign_all_indice (line 251) | def _assign_all_indice(self, node: Node, node_idx: int) -> None:
    method _assign_transpose_indice (line 267) | def _assign_transpose_indice(self, node: Node, node_idx: int) -> None:
    method _assign_permute_indice (line 284) | def _assign_permute_indice(self, node: Node, node_idx: int) -> None:
    method _assign_linear_indice (line 301) | def _assign_linear_indice(self, node: Node, node_idx: int) -> None:
    method _assign_addmm_indice (line 322) | def _assign_addmm_indice(self, node: Node, node_idx: int) -> None:
    method _assign_baddbmm_indice (line 338) | def _assign_baddbmm_indice(self, node: Node, node_idx: int) -> None:
    method _assign_matmul_indice (line 360) | def _assign_matmul_indice(self, node: Node, node_idx: int) -> None:
    method _assign_conv2d_indice (line 380) | def _assign_conv2d_indice(self, node: Node, node_idx: int) -> None:
    method _assign_interpolate_indice (line 406) | def _assign_interpolate_indice(self, node: Node, node_idx: int) -> None:
    method _assign_layernorm_indice (line 422) | def _assign_layernorm_indice(self, node, idx):
    method _assign_groupnorm_indice (line 435) | def _assign_groupnorm_indice(self, node, idx):
    method _assign_elementwise_indice (line 447) | def _assign_elementwise_indice(self, node, idx):
    method _assign_no_change_indice (line 464) | def _assign_no_change_indice(self, node, idx):
    method _assign_einsum_indice (line 470) | def _assign_einsum_indice(self, node, idx):
    method _assign_softmax_indice (line 506) | def _assign_softmax_indice(self, node, idx):
    method _assign_split_indice (line 519) | def _assign_split_indice(self, node: Node, node_idx: int) -> None:
    method _assign_unsqueeze_indice (line 532) | def _assign_unsqueeze_indice(self, node: Node, node_idx: int) -> None:
    method _assign_cat_indice (line 549) | def _assign_cat_indice(self, node: Node, node_idx: int) -> None:
    method _assign_sum_indice (line 565) | def _assign_sum_indice(self, node: Node, node_idx: int) -> None:
    method _assign_flatten_indice (line 581) | def _assign_flatten_indice(self, node: Node, node_idx: int) -> None:
    method _assign_expand_indice (line 601) | def _assign_expand_indice(self, node: Node, node_idx: int) -> None:
    method _assign_unbind_indice (line 622) | def _assign_unbind_indice(self, node: Node, node_idx: int) -> None:
    method _assign_embedding_indice (line 635) | def _assign_embedding_indice(self, node: Node, node_idx: int) -> None:
    method _assign_getitem_indice (line 647) | def _assign_getitem_indice(self, node: Node, node_idx: int) -> None:
    method _assign_view_reshape_indice (line 718) | def _assign_view_reshape_indice(self, node: Node, node_idx: int) -> None:
    method _clear_trace (line 817) | def _clear_trace(self, node_idx: int) -> None:
    method trace_indice (line 838) | def trace_indice(self) -> None:

FILE: colossalai/autochunk/utils.py
  class NodeMgr (line 12) | class NodeMgr(object):
    method __init__ (line 13) | def __init__(self, nodes_list: List[Node]) -> None:
    method _set_node_dict (line 18) | def _set_node_dict(self) -> None:
    method find_node_idx (line 26) | def find_node_idx(self, node: Node) -> int:
    method find_node_idx_by_name (line 32) | def find_node_idx_by_name(self, node_name: str) -> int:
    method get_node_by_idx (line 38) | def get_node_by_idx(self, idx: int) -> Node:
    method get_node_slice_by_idx (line 44) | def get_node_slice_by_idx(self, start: int, end: int) -> List[Node]:
    method get_node_list (line 50) | def get_node_list(self) -> List:
    method update_node_list (line 56) | def update_node_list(self, node_list: List) -> None:
  function get_logger (line 64) | def get_logger() -> Any:
  function flat_list (line 68) | def flat_list(inputs: Any) -> List:
  function find_first_tensor_arg (line 85) | def find_first_tensor_arg(node: Node) -> Node:
  function is_non_compute_node (line 95) | def is_non_compute_node(node: Node) -> bool:
  function get_node_shape (line 111) | def get_node_shape(node: Node) -> Any:
  function is_non_memory_node (line 122) | def is_non_memory_node(node: Node) -> bool:
  function is_non_compute_node_except_placeholder (line 130) | def is_non_compute_node_except_placeholder(node: Node) -> bool:
  function is_non_compute_node_except_placeholder_output (line 136) | def is_non_compute_node_except_placeholder_output(node: Node) -> bool:
  function delete_free_var_from_last_use (line 142) | def delete_free_var_from_last_use(user_to_last_uses: Dict) -> None:
  function find_chunk_all_input_nodes (line 149) | def find_chunk_all_input_nodes(nodes: List[Node]) -> List:
  function find_chunk_compute_input_and_output_nodes (line 163) | def find_chunk_compute_input_and_output_nodes(nodes: List[Node]) -> Unio...
  function get_module_node_name (line 197) | def get_module_node_name(node: Node) -> str:
  function get_node_name (line 210) | def get_node_name(node: Node) -> str:
  function find_tensor_node (line 227) | def find_tensor_node(node_list: List[Node]) -> List[Node]:
  function find_tensor_shape_node (line 238) | def find_tensor_shape_node(node_list: List[Node]) -> List[Node]:

FILE: colossalai/booster/accelerator.py
  class Accelerator (line 16) | class Accelerator:
    method __init__ (line 24) | def __init__(self, device: str):
    method bind (line 31) | def bind(self):
    method configure_model (line 46) | def configure_model(self, model: nn.Module) -> nn.Module:

FILE: colossalai/booster/booster.py
  class Booster (line 33) | class Booster:
    method __init__ (line 74) | def __init__(
    method boost (line 126) | def boost(
    method backward (line 175) | def backward(self, loss: torch.Tensor, optimizer: Optimizer) -> None:
    method execute_pipeline (line 185) | def execute_pipeline(
    method no_sync (line 223) | def no_sync(self, model: nn.Module = None, optimizer: OptimizerWrapper...
    method enable_lora (line 240) | def enable_lora(
    method load_model (line 291) | def load_model(
    method save_model (line 315) | def save_model(
    method load_optimizer (line 352) | def load_optimizer(
    method save_optimizer (line 372) | def save_optimizer(
    method save_lr_scheduler (line 400) | def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str...
    method load_lr_scheduler (line 409) | def load_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str...
    method save_lora_as_pretrained (line 418) | def save_lora_as_pretrained(

FILE: colossalai/booster/mixed_precision/__init__.py
  function mixed_precision_factory (line 28) | def mixed_precision_factory(mixed_precision_type: str) -> MixedPrecision:

FILE: colossalai/booster/mixed_precision/bf16.py
  class BF16MixedPrecision (line 4) | class BF16MixedPrecision(MixedPrecision):

FILE: colossalai/booster/mixed_precision/fp16_apex.py
  class FP16ApexMixedPrecision (line 8) | class FP16ApexMixedPrecision(MixedPrecision):
    method __init__ (line 26) | def __init__(

FILE: colossalai/booster/mixed_precision/fp16_naive.py
  class FP16NaiveMixedPrecision (line 4) | class FP16NaiveMixedPrecision(MixedPrecision):
    method __init__ (line 18) | def __init__(

FILE: colossalai/booster/mixed_precision/fp16_torch.py
  class TorchAMPOptimizer (line 16) | class TorchAMPOptimizer(OptimizerWrapper):
    method __init__ (line 33) | def __init__(
    method backward (line 49) | def backward(self, loss: Tensor, inputs=None, retain_graph=False, **kw...
    method step (line 53) | def step(self, *args, **kwargs) -> Optional[float]:
    method scale_loss (line 58) | def scale_loss(self, loss: Tensor) -> Tensor:
    method unscale_grad (line 61) | def unscale_grad(self) -> None:
    method clip_grad_by_value (line 64) | def clip_grad_by_value(self, clip_value: float, *args, **kwargs) -> None:
    method clip_grad_by_norm (line 68) | def clip_grad_by_norm(
  class TorchAMPModule (line 80) | class TorchAMPModule(ModelWrapper):
    method __init__ (line 88) | def __init__(self, module: nn.Module):
    method forward (line 91) | def forward(self, *args, **kwargs):
  class FP16TorchMixedPrecision (line 96) | class FP16TorchMixedPrecision(MixedPrecision):
    method __init__ (line 112) | def __init__(
    method configure (line 127) | def configure(

FILE: colossalai/booster/mixed_precision/fp8.py
  class FP8MixedPrecision (line 4) | class FP8MixedPrecision(MixedPrecision):

FILE: colossalai/booster/mixed_precision/mixed_precision_base.py
  class MixedPrecision (line 10) | class MixedPrecision(ABC):
    method configure (line 16) | def configure(

FILE: colossalai/booster/plugin/dp_plugin_base.py
  class DPPluginBase (line 12) | class DPPluginBase(Plugin):
    method __init__ (line 15) | def __init__(self) -> None:
    method prepare_dataloader (line 23) | def prepare_dataloader(

FILE: colossalai/booster/plugin/gemini_plugin.py
  function get_param_info (line 45) | def get_param_info(optim: Optimizer):
  class GeminiCheckpointIO (line 63) | class GeminiCheckpointIO(GeneralCheckpointIO):
    method __init__ (line 64) | def __init__(self) -> None:
    method save_unsharded_model (line 69) | def save_unsharded_model(
    method load_unsharded_model (line 98) | def load_unsharded_model(
    method save_unsharded_optimizer (line 115) | def save_unsharded_optimizer(
    method load_unsharded_optimizer (line 141) | def load_unsharded_optimizer(
    method save_sharded_model (line 153) | def save_sharded_model(
    method load_sharded_model (line 219) | def load_sharded_model(
    method save_sharded_optimizer (line 242) | def save_sharded_optimizer(
    method load_sharded_optimizer (line 317) | def load_sharded_optimizer(
    method save_lr_scheduler (line 361) | def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str):
  class GeminiPlugin (line 369) | class GeminiPlugin(DPPluginBase):
    method __init__ (line 442) | def __init__(
    method __del__ (line 569) | def __del__(self):
    method support_no_sync (line 573) | def support_no_sync(self) -> bool:
    method support_lora (line 576) | def support_lora(self) -> bool:
    method control_precision (line 579) | def control_precision(self) -> bool:
    method supported_precisions (line 582) | def supported_precisions(self) -> List[str]:
    method control_device (line 585) | def control_device(self) -> bool:
    method supported_devices (line 588) | def supported_devices(self) -> List[str]:
    method prepare_dataloader (line 591) | def prepare_dataloader(
    method configure (line 655) | def configure(
    method control_checkpoint_io (line 700) | def control_checkpoint_io(self) -> bool:
    method get_checkpoint_io (line 703) | def get_checkpoint_io(self) -> CheckpointIO:
    method no_sync (line 706) | def no_sync(self, model: nn.Module, optimizer: OptimizerWrapper) -> It...
    method enable_lora (line 709) | def enable_lora(

FILE: colossalai/booster/plugin/hybrid_parallel_plugin.py
  function _convert_floating_point (line 53) | def _convert_floating_point(x, dtype: torch.dtype = torch.float16):
  class HybridParallelModule (line 59) | class HybridParallelModule(ModelWrapper, AMPModelMixin):
    method __init__ (line 60) | def __init__(
    method sync_shared_params (line 131) | def sync_shared_params(self):
    method no_sync (line 139) | def no_sync(self):
    method sync_dp_grads (line 161) | def sync_dp_grads(self):
    method sync_sp_grads (line 185) | def sync_sp_grads(self, grads: Optional[List[torch.Tensor]] = None):
    method forward (line 217) | def forward(self, *args, **kwargs):
    method unwrap (line 224) | def unwrap(self, unwrap_peft: bool = True):
    method _force_wait_all_gather (line 232) | def _force_wait_all_gather(self):
    method _hook_context (line 236) | def _hook_context(self):
  function get_param_info (line 240) | def get_param_info(optim: Optimizer):
  function reinitialize_optimizer (line 269) | def reinitialize_optimizer(optim: Optimizer, model: Module):
  class HybridParallelNaiveOptimizer (line 278) | class HybridParallelNaiveOptimizer(OptimizerWrapper):
    method __init__ (line 279) | def __init__(
    method backward (line 303) | def backward(self, loss: Tensor, inputs=None, retain_graph=False, **kw...
    method backward_by_grad (line 331) | def backward_by_grad(self, tensor: Tensor, grad: Tensor, inputs: Tenso...
    method step (line 357) | def step(self, *args, **kwargs):
    method _compute_grad_norm (line 380) | def _compute_grad_norm(self, param_gradient_pairs: List[Tuple[Tensor]]...
    method _clip_grad_norm (line 458) | def _clip_grad_norm(self, total_norm: float) -> None:
    method update_master_params (line 477) | def update_master_params(self, model: Module):
    method get_working_to_master_map (line 480) | def get_working_to_master_map(self):
    method get_master_to_working_map (line 483) | def get_master_to_working_map(self):
    method get_grad_norm (line 486) | def get_grad_norm(self, norm_type=2, **kwargs):
  class HybridParallelAMPOptimizer (line 490) | class HybridParallelAMPOptimizer(MixedPrecisionOptimizer):
    method __init__ (line 491) | def __init__(
    method backward (line 532) | def backward(self, loss: Tensor, inputs=None, retain_graph=False, **kw...
    method backward_by_grad (line 559) | def backward_by_grad(self, tensor: Tensor, grad: Tensor, inputs: Tenso...
    method _compute_grad_norm (line 584) | def _compute_grad_norm(self, param_gradient_pairs: List[Tuple[Tensor]]...
  class HybridParallelZeroOptimizer (line 666) | class HybridParallelZeroOptimizer(LowLevelZeroOptimizer):
    method __init__ (line 667) | def __init__(
    method sync_dp_grads (line 727) | def sync_dp_grads(self):
    method _sync_sp_grads (line 745) | def _sync_sp_grads(self):
    method backward (line 792) | def backward(self, loss, inputs=None, retain_graph=False):
    method backward_by_grad (line 817) | def backward_by_grad(self, tensor, grad, inputs: Tensor = None, retain...
    method _compute_grad_norm (line 842) | def _compute_grad_norm(self, dp_pg, gradients: List[Tensor], norm_type...
  class HybridParallelPlugin (line 928) | class HybridParallelPlugin(PipelinePluginBase):
    method __init__ (line 1000) | def __init__(
    method __del__ (line 1256) | def __del__(self):
    method enable_pipeline_parallelism (line 1261) | def enable_pipeline_parallelism(self) -> bool:
    method supported_devices (line 1264) | def supported_devices(self) -> List[str]:
    method supported_precisions (line 1267) | def supported_precisions(self) -> List[str]:
    method control_device (line 1270) | def control_device(self) -> bool:
    method control_precision (line 1273) | def control_precision(self) -> bool:
    method support_no_sync (line 1276) | def support_no_sync(self) -> bool:
    method support_lora (line 1279) | def support_lora(self) -> bool:
    method control_checkpoint_io (line 1282) | def control_checkpoint_io(self) -> bool:
    method configure (line 1285) | def configure(
    method execute_pipeline (line 1387) | def execute_pipeline(
    method prepare_dataloader (line 1437) | def prepare_dataloader(
    method get_checkpoint_io (line 1497) | def get_checkpoint_io(self) -> CheckpointIO:
    method no_sync (line 1502) | def no_sync(self, model: Module, optimizer: OptimizerWrapper) -> Itera...
    method enable_lora (line 1508) | def enable_lora(

FILE: colossalai/booster/plugin/low_level_zero_plugin.py
  function _convert_floating_point (line 52) | def _convert_floating_point(x, dtype: torch.dtype = torch.float16):
  class OptimizerParamCheckState (line 61) | class OptimizerParamCheckState(enum.Enum):
  class LowLevelZeroModel (line 67) | class LowLevelZeroModel(ModelWrapper, AMPModelMixin):
    method __init__ (line 68) | def __init__(
    method forward (line 102) | def forward(self, *args, **kwargs):
    method _force_wait_all_gather (line 109) | def _force_wait_all_gather(self):
    method _hook_context (line 113) | def _hook_context(self):
  class LowLevelZeroCheckpointIO (line 117) | class LowLevelZeroCheckpointIO(TorchDDPCheckpointIO):
    method save_unsharded_optimizer (line 118) | def save_unsharded_optimizer(
    method load_unsharded_optimizer (line 149) | def load_unsharded_optimizer(
    method save_sharded_optimizer (line 163) | def save_sharded_optimizer(
    method load_sharded_optimizer (line 247) | def load_sharded_optimizer(
    method load_unsharded_model (line 301) | def load_unsharded_model(
    method load_sharded_model (line 316) | def load_sharded_model(
    method save_unsharded_model (line 339) | def save_unsharded_model(
    method save_sharded_model (line 346) | def save_sharded_model(
    method save_lora_as_pretrained (line 362) | def save_lora_as_pretrained(self, model, checkpoint, use_safetensors, ...
  class LowLevelZeroPlugin (line 368) | class LowLevelZeroPlugin(DPPluginBase):
    method __init__ (line 407) | def __init__(
    method support_no_sync (line 472) | def support_no_sync(self) -> bool:
    method support_lora (line 475) | def support_lora(self) -> bool:
    method control_precision (line 478) | def control_precision(self) -> bool:
    method supported_precisions (line 481) | def supported_precisions(self) -> List[str]:
    method control_device (line 484) | def control_device(self) -> bool:
    method supported_devices (line 487) | def supported_devices(self) -> List[str]:
    method support_lora (line 490) | def support_lora(self) -> bool:
    method enable_lora (line 493) | def enable_lora(
    method get_param_group_id (line 515) | def get_param_group_id(self, optimizer: Optimizer, origin_param: Param...
    method get_param_group_id (line 523) | def get_param_group_id(self, optimizer: Optimizer, origin_param: Param...
    method add_lora_params_to_optimizer (line 539) | def add_lora_params_to_optimizer(self, model, optimizer):
    method configure (line 564) | def configure(
    method control_checkpoint_io (line 624) | def control_checkpoint_io(self) -> bool:
    method get_checkpoint_io (line 627) | def get_checkpoint_io(self) -> CheckpointIO:
    method no_sync (line 630) | def no_sync(self, model: nn.Module, optimizer: OptimizerWrapper) -> It...

FILE: colossalai/booster/plugin/moe_hybrid_parallel_plugin.py
  class MoeHybridParallelZeroOptimizer (line 39) | class MoeHybridParallelZeroOptimizer(HybridParallelZeroOptimizer):
    method __init__ (line 40) | def __init__(
  class MoeHybridParallelPlugin (line 107) | class MoeHybridParallelPlugin(HybridParallelPlugin):
    method __init__ (line 177) | def __init__(
    method get_checkpoint_io (line 412) | def get_checkpoint_io(self) -> MoECheckpointIO:
    method configure (line 423) | def configure(

FILE: colossalai/booster/plugin/plugin_base.py
  class Plugin (line 15) | class Plugin(ABC):
    method supported_devices (line 17) | def supported_devices(self) -> List[str]:
    method supported_precisions (line 21) | def supported_precisions(self) -> List[str]:
    method control_precision (line 25) | def control_precision(self) -> bool:
    method control_device (line 29) | def control_device(self) -> bool:
    method support_no_sync (line 33) | def support_no_sync(self) -> bool:
    method support_lora (line 37) | def support_lora(self) -> bool:
    method configure (line 41) | def configure(
    method control_checkpoint_io (line 53) | def control_checkpoint_io(self) -> bool:
    method get_checkpoint_io (line 59) | def get_checkpoint_io(self) -> CheckpointIO:
    method no_sync (line 65) | def no_sync(self, model: nn.Module, optimizer: OptimizerWrapper) -> It...
    method enable_lora (line 71) | def enable_lora(self, model: nn.Module, pretrained_dir: str, lora_conf...
    method prepare_dataloader (line 77) | def prepare_dataloader(

FILE: colossalai/booster/plugin/pp_plugin_base.py
  class PipelinePluginBase (line 11) | class PipelinePluginBase(Plugin):
    method execute_pipeline (line 13) | def execute_pipeline(

FILE: colossalai/booster/plugin/torch_ddp_plugin.py
  class TorchDDPCheckpointIO (line 25) | class TorchDDPCheckpointIO(GeneralCheckpointIO):
    method __init__ (line 26) | def __init__(self) -> None:
    method load_unsharded_model (line 31) | def load_unsharded_model(
    method save_unsharded_model (line 47) | def save_unsharded_model(
    method load_unsharded_optimizer (line 59) | def load_unsharded_optimizer(
    method save_unsharded_optimizer (line 70) | def save_unsharded_optimizer(
    method save_lr_scheduler (line 80) | def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str):
    method save_sharded_model (line 87) | def save_sharded_model(
    method load_sharded_model (line 112) | def load_sharded_model(
    method save_sharded_optimizer (line 136) | def save_sharded_optimizer(
    method load_sharded_optimizer (line 154) | def load_sharded_optimizer(
    method save_lora_as_pretrained (line 170) | def save_lora_as_pretrained(
  class TorchDDPModel (line 197) | class TorchDDPModel(ModelWrapper):
    method __init__ (line 198) | def __init__(self, module: nn.Module, *args, **kwargs) -> None:
    method unwrap (line 202) | def unwrap(self, unwrap_peft: bool = True) -> nn.Module:
  class TorchDDPPlugin (line 209) | class TorchDDPPlugin(DPPluginBase):
    method __init__ (line 235) | def __init__(
    method support_no_sync (line 256) | def support_no_sync(self) -> bool:
    method support_lora (line 259) | def support_lora(self) -> bool:
    method control_precision (line 262) | def control_precision(self) -> bool:
    method supported_precisions (line 265) | def supported_precisions(self) -> List[str]:
    method control_device (line 268) | def control_device(self) -> bool:
    method supported_devices (line 271) | def supported_devices(self) -> List[str]:
    method configure (line 274) | def configure(
    method control_checkpoint_io (line 301) | def control_checkpoint_io(self) -> bool:
    method get_checkpoint_io (line 304) | def get_checkpoint_io(self) -> CheckpointIO:
    method no_sync (line 307) | def no_sync(self, model: nn.Module, optimizer: OptimizerWrapper) -> It...
    method enable_lora (line 311) | def enable_lora(

FILE: colossalai/booster/plugin/torch_fsdp_plugin.py
  class TorchFSDPCheckpointIO (line 40) | class TorchFSDPCheckpointIO(GeneralCheckpointIO):
    method __init__ (line 41) | def __init__(self) -> None:
    method load_unsharded_model (line 46) | def load_unsharded_model(
    method load_unsharded_optimizer (line 54) | def load_unsharded_optimizer(
    method save_unsharded_model (line 91) | def save_unsharded_model(
    method save_unsharded_optimizer (line 118) | def save_unsharded_optimizer(
    method save_sharded_model (line 164) | def save_sharded_model(
    method load_sharded_model (line 232) | def load_sharded_model(
    method save_sharded_optimizer (line 264) | def save_sharded_optimizer(
    method load_sharded_optimizer (line 363) | def load_sharded_optimizer(
    method save_lr_scheduler (line 427) | def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str):
  class TorchFSDPModel (line 435) | class TorchFSDPModel(ModelWrapper):
    method __init__ (line 436) | def __init__(self, module: nn.Module, *args, **kwargs) -> None:
  class FSDPOptimizerWrapper (line 441) | class FSDPOptimizerWrapper(OptimizerWrapper):
    method __init__ (line 442) | def __init__(self, optimizer: Optimizer, model: nn.Module):
    method unwrap_model (line 446) | def unwrap_model(self) -> nn.Module:
  class TorchFSDPPlugin (line 450) | class TorchFSDPPlugin(DPPluginBase):
    method __init__ (line 472) | def __init__(
    method support_no_sync (line 503) | def support_no_sync(self) -> bool:
    method support_lora (line 506) | def support_lora(self) -> bool:
    method no_sync (line 509) | def no_sync(self, model: nn.Module, optimizer: OptimizerWrapper) -> It...
    method control_precision (line 512) | def control_precision(self) -> bool:
    method supported_precisions (line 515) | def supported_precisions(self) -> List[str]:
    method control_device (line 518) | def control_device(self) -> bool:
    method supported_devices (line 521) | def supported_devices(self) -> List[str]:
    method configure (line 524) | def configure(
    method control_checkpoint_io (line 560) | def control_checkpoint_io(self) -> bool:
    method get_checkpoint_io (line 563) | def get_checkpoint_io(self) -> CheckpointIO:
    method enable_lora (line 566) | def enable_lora(

FILE: colossalai/checkpoint_io/checkpoint_io_base.py
  class CheckpointIO (line 18) | class CheckpointIO(ABC):
    method __init__ (line 65) | def __init__(self):
    method _sync_io (line 70) | def _sync_io(self):
    method _sync_d2h (line 75) | def _sync_d2h(self):
    method synchronize (line 79) | def synchronize(self):
    method __del__ (line 83) | def __del__(self):
    method load_model (line 87) | def load_model(
    method save_model (line 143) | def save_model(
    method load_optimizer (line 196) | def load_optimizer(
    method save_optimizer (line 232) | def save_optimizer(
    method load_sharded_model (line 268) | def load_sharded_model(
    method load_unsharded_model (line 284) | def load_unsharded_model(
    method save_sharded_model (line 300) | def save_sharded_model(
    method save_unsharded_model (line 323) | def save_unsharded_model(
    method load_sharded_optimizer (line 341) | def load_sharded_optimizer(
    method load_unsharded_optimizer (line 361) | def load_unsharded_optimizer(
    method save_sharded_optimizer (line 375) | def save_sharded_optimizer(
    method save_unsharded_optimizer (line 396) | def save_unsharded_optimizer(
    method save_lr_scheduler (line 413) | def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str):
    method load_lr_scheduler (line 423) | def load_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str):
    method save_lora_as_pretrained (line 439) | def save_lora_as_pretrained(

FILE: colossalai/checkpoint_io/general_checkpoint_io.py
  class GeneralCheckpointIO (line 37) | class GeneralCheckpointIO(CheckpointIO):
    method load_unsharded_model (line 42) | def load_unsharded_model(
    method save_unsharded_model (line 55) | def save_unsharded_model(
    method load_sharded_optimizer (line 71) | def load_sharded_optimizer(
    method save_sharded_optimizer (line 104) | def save_sharded_optimizer(
    method load_unsharded_optimizer (line 174) | def load_unsharded_optimizer(
    method save_unsharded_optimizer (line 185) | def save_unsharded_optimizer(
    method save_sharded_model (line 210) | def save_sharded_model(
    method load_sharded_model (line 269) | def load_sharded_model(
    method save_lora_as_pretrained (line 311) | def save_lora_as_pretrained(

FILE: colossalai/checkpoint_io/hybrid_parallel_checkpoint_io.py
  class HybridParallelCheckpointIO (line 59) | class HybridParallelCheckpointIO(GeneralCheckpointIO):
    method __init__ (line 71) | def __init__(
    method _model_sharder (line 97) | def _model_sharder(
    method _optimizer_sharder (line 159) | def _optimizer_sharder(
    method save_sharded_model (line 205) | def save_sharded_model(
    method load_sharded_model (line 361) | def load_sharded_model(
    method save_sharded_optimizer (line 469) | def save_sharded_optimizer(
    method load_sharded_optimizer (line 647) | def load_sharded_optimizer(
    method load_states_into_optimizer (line 737) | def load_states_into_optimizer(self, optimizer: Optimizer, state_dict:...
    method save_unsharded_model (line 761) | def save_unsharded_model(
    method load_unsharded_model (line 824) | def load_unsharded_model(
    method save_unsharded_optimizer (line 861) | def save_unsharded_optimizer(
    method load_unsharded_optimizer (line 956) | def load_unsharded_optimizer(
    method save_lr_scheduler (line 1009) | def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str):
    method gather_from_sharded_optimizer_state (line 1017) | def gather_from_sharded_optimizer_state(
    method shard_from_complete_optimizer_state (line 1082) | def shard_from_complete_optimizer_state(
    method save_lora_as_pretrained (line 1142) | def save_lora_as_pretrained(self, model, checkpoint, use_safetensors, ...

FILE: colossalai/checkpoint_io/index_file.py
  class CheckpointIndexFile (line 12) | class CheckpointIndexFile:
    method __init__ (line 23) | def __init__(self, root_path=None) -> None:
    method from_file (line 31) | def from_file(index_path: Union[str, Path]):
    method load (line 45) | def load(self, json_path: str):
    method export (line 65) | def export(self, json_path: str):
    method append_weight_map (line 81) | def append_weight_map(self, param_name: str, shard_file: str):
    method append_meta_data (line 91) | def append_meta_data(self, name: str, val: Any):
    method contains_dtensor (line 101) | def contains_dtensor(self):
    method get_checkpoint_filenames (line 114) | def get_checkpoint_filenames(self) -> List[str]:
    method assert_no_dtensor_checkpoint (line 138) | def assert_no_dtensor_checkpoint(self):
    method get_checkpoint_file (line 143) | def get_checkpoint_file(self, param_name: str) -> str:
    method get_all_param_names (line 156) | def get_all_param_names(self):
    method get_param_group_filename (line 162) | def get_param_group_filename(self) -> Union[str, None]:
    method write_index_file (line 174) | def write_index_file(self, save_index_file):

FILE: colossalai/checkpoint_io/moe_checkpoint.py
  class MoECheckpointIO (line 44) | class MoECheckpointIO(HybridParallelCheckpointIO):
    method __init__ (line 45) | def __init__(
    method _model_sharder (line 71) | def _model_sharder(
    method save_sharded_model (line 116) | def save_sharded_model(
    method gather_from_sharded_optimizer_state (line 249) | def gather_from_sharded_optimizer_state(
    method _optimizer_sharder (line 323) | def _optimizer_sharder(
    method save_sharded_optimizer (line 369) | def save_sharded_optimizer(
    method load_sharded_optimizer (line 516) | def load_sharded_optimizer(
    method shard_from_complete_optimizer_state (line 624) | def shard_from_complete_optimizer_state(
    method pre_save_model (line 686) | def pre_save_model(self, model: nn.Module) -> dict:
    method save_unsharded_model (line 724) | def save_unsharded_model(
    method save_unsharded_optimizer (line 747) | def save_unsharded_optimizer(
    method load_unsharded_optimizer (line 811) | def load_unsharded_optimizer(
    method save_lora_as_pretrained (line 899) | def save_lora_as_pretrained(self, model, checkpoint, use_safetensors, ...

FILE: colossalai/checkpoint_io/utils.py
  function calculate_tensor_size (line 48) | def calculate_tensor_size(tensor: torch.Tensor) -> float:
  function is_safetensors_available (line 62) | def is_safetensors_available() -> bool:
  function is_dtensor_checkpoint (line 75) | def is_dtensor_checkpoint(checkpoint_file_path: str) -> bool:
  function is_safetensor_checkpoint (line 91) | def is_safetensor_checkpoint(checkpoint_file_path: str) -> bool:
  function search_tp_partition_dim (line 107) | def search_tp_partition_dim(current_shape: torch.Size, original_shape: t...
  function search_padding_dim (line 135) | def search_padding_dim(global_shape: torch.Size, original_shape: torch.S...
  class StateDictSharder (line 149) | class StateDictSharder:
    method __init__ (line 150) | def __init__(self, size_per_shard: int) -> None:
    method append_param (line 155) | def append_param(self, name: str, tensor: torch.Tensor) -> Tuple[Optio...
    method append_optim_state (line 172) | def append_optim_state(self, param_id: int, state: OrderedDict) -> Tup...
  function gather_distributed_param (line 209) | def gather_distributed_param(param: torch.Tensor, keep_vars: bool = Fals...
  function save_state_dict_shards (line 229) | def save_state_dict_shards(
  function async_save_state_dict_shards (line 278) | def async_save_state_dict_shards(
  function async_move_save_state_dict_shards (line 336) | def async_move_save_state_dict_shards(
  function shard_model_checkpoint (line 405) | def shard_model_checkpoint(
  function shard_optimizer_checkpoint (line 432) | def shard_optimizer_checkpoint(
  function save_state_dict (line 467) | def save_state_dict(
  function save_param_groups (line 495) | def save_param_groups(state_dict: dict, group_file_path: str) -> None:
  function clean_folder (line 507) | def clean_folder(
  function save_config_file (line 543) | def save_config_file(model: nn.Module, checkpoint_path: str, is_master: ...
  function save_dtensor (line 579) | def save_dtensor(name: str, tensor: torch.Tensor, index_file: "Checkpoin...
  function get_checkpoint_file_suffix (line 614) | def get_checkpoint_file_suffix(use_safetensors: bool) -> str:
  function generate_checkpoint_shard_file_name (line 630) | def generate_checkpoint_shard_file_name(
  function generate_dtensor_file_name (line 653) | def generate_dtensor_file_name(param_name: str, index: int, use_safetens...
  function load_shard_state_dict (line 674) | def load_shard_state_dict(checkpoint_file: Path, use_safetensors: bool =...
  function load_state_dict_into_model (line 688) | def load_state_dict_into_model(
  function load_param_groups_into_optimizer (line 741) | def load_param_groups_into_optimizer(optimizer: Optimizer, param_group_p...
  function load_states_into_optimizer (line 786) | def load_states_into_optimizer(optimizer: Optimizer, state_dict: dict, i...
  function sharded_optimizer_loading_epilogue (line 834) | def sharded_optimizer_loading_epilogue(optimizer: Optimizer):
  function has_index_file (line 849) | def has_index_file(checkpoint_path: str) -> Tuple[bool, Optional[Path]]:
  function load_state_dict (line 885) | def load_state_dict(checkpoint_file_path: Path):
  function add_prefix (line 918) | def add_prefix(weights_name: str, prefix: Optional[str] = None) -> str:
  function get_model_base_filenames (line 927) | def get_model_base_filenames(prefix: str = None, use_safetensors: bool =...
  function get_optimizer_base_filenames (line 940) | def get_optimizer_base_filenames(prefix: str = None, use_safetensors: bo...
  function get_shard_filename (line 956) | def get_shard_filename(weights_name: str, idx: int):
  function _pin_tensor (line 965) | def _pin_tensor(tensor: torch.Tensor, empty: bool = True) -> torch.Tensor:
  function create_pinned_state_dict (line 971) | def create_pinned_state_dict(
  function load_optim_or_model_shard (line 991) | def load_optim_or_model_shard(path: str, is_optim: bool, use_safetensors...
  function load_state_dict_shards (line 1002) | def load_state_dict_shards(
  function get_lora_state_dict (line 1024) | def get_lora_state_dict(
  function gather_state_dict_fast (line 1120) | def gather_state_dict_fast(

FILE: colossalai/cli/check/__init__.py
  function check (line 10) | def check(installation):

FILE: colossalai/cli/check/check_installation.py
  function to_click_output (line 10) | def to_click_output(val):
  function check_installation (line 20) | def check_installation():
  function _is_compatible (line 101) | def _is_compatible(versions):
  function _parse_colossalai_version (line 127) | def _parse_colossalai_version():
  function _check_aot_built_cuda_extension_installed (line 151) | def _check_aot_built_cuda_extension_installed():
  function _check_torch_version (line 165) | def _check_torch_version():
  function _check_cuda_version (line 191) | def _check_cuda_version():

FILE: colossalai/cli/cli.py
  class Arguments (line 7) | class Arguments:
    method __init__ (line 8) | def __init__(self, arg_dict):
  function cli (line 14) | def cli():

FILE: colossalai/cli/launcher/__init__.py
  function run (line 70) | def run(

FILE: colossalai/cli/launcher/hostinfo.py
  class HostInfo (line 4) | class HostInfo:
    method __init__ (line 13) | def __init__(
    method is_host_localhost (line 23) | def is_host_localhost(hostname: str, port: str = None) -> None:
    method __str__ (line 51) | def __str__(self):
    method __repr__ (line 54) | def __repr__(self):
  class HostInfoList (line 58) | class HostInfoList:
    method __init__ (line 63) | def __init__(self):
    method append (line 66) | def append(self, hostinfo: HostInfo) -> None:
    method remove (line 76) | def remove(self, hostname: str) -> None:
    method get_hostinfo (line 87) | def get_hostinfo(self, hostname: str) -> HostInfo:
    method has (line 104) | def has(self, hostname: str) -> bool:
    method __iter__ (line 119) | def __iter__(self):
    method __len__ (line 122) | def __len__(self):

FILE: colossalai/cli/launcher/multinode_runner.py
  function run_on_host (line 10) | def run_on_host(
  class MultiNodeRunner (line 66) | class MultiNodeRunner:
    method __init__ (line 72) | def __init__(self):
    method connect (line 77) | def connect(self, host_info_list: HostInfoList, workdir: str, env: dic...
    method send (line 95) | def send(self, hostinfo: HostInfo, cmd: str) -> None:
    method stop_all (line 108) | def stop_all(self) -> None:
    method recv_from_all (line 116) | def recv_from_all(self) -> dict:

FILE: colossalai/cli/launcher/run.py
  function fetch_hostfile (line 18) | def fetch_hostfile(hostfile_path: str, ssh_port: int) -> HostInfoList:
  function parse_device_filter (line 58) | def parse_device_filter(device_pool: HostInfoList, include_str=None, exc...
  function get_launch_command (line 108) | def get_launch_command(
  function launch_multi_processes (line 212) | def launch_multi_processes(args: Config) -> None:

FILE: colossalai/cluster/device_mesh_manager.py
  class DeviceMeshInfo (line 12) | class DeviceMeshInfo:
    method __post_init__ (line 24) | def __post_init__(self):
  function initialize_device_mesh (line 33) | def initialize_device_mesh(device_mesh_info: DeviceMeshInfo):
  class DeviceMeshManager (line 58) | class DeviceMeshManager:
    method __init__ (line 63) | def __init__(self):
    method create_device_mesh (line 66) | def create_device_mesh(self, name, device_mesh_info: DeviceMeshInfo) -...
    method get (line 81) | def get(self, name: str) -> DeviceMesh:
    method destroy (line 96) | def destroy(self, name: str) -> None:
    method destroy_all (line 111) | def destroy_all(self):

FILE: colossalai/cluster/dist_coordinator.py
  class DistCoordinator (line 11) | class DistCoordinator(metaclass=SingletonMeta):
    method __init__ (line 40) | def __init__(self):
    method rank (line 50) | def rank(self) -> int:
    method world_size (line 54) | def world_size(self) -> int:
    method local_rank (line 58) | def local_rank(self) -> int:
    method _assert_local_rank_set (line 61) | def _assert_local_rank_set(self):
    method is_master (line 69) | def is_master(self, process_group: ProcessGroup = None) -> bool:
    method is_node_master (line 82) | def is_node_master(self) -> bool:
    method is_last_process (line 92) | def is_last_process(self, process_group: ProcessGroup = None) -> bool:
    method print_on_master (line 106) | def print_on_master(self, msg: str, process_group: ProcessGroup = None):
    method print_on_node_master (line 118) | def print_on_node_master(self, msg: str):
    method priority_execution (line 130) | def priority_execution(self, executor_rank: int = 0, process_group: Pr...
    method destroy (line 159) | def destroy(self, process_group: ProcessGroup = None):
    method block_all (line 168) | def block_all(self, process_group: ProcessGroup = None):
    method on_master_only (line 177) | def on_master_only(self, process_group: ProcessGroup = None):

FILE: colossalai/cluster/process_group_manager.py
  class ProcessGroupManager (line 7) | class ProcessGroupManager:
    method __init__ (line 19) | def __init__(self):
    method create_process_group (line 22) | def create_process_group(self, name: str, ranks: List[int], backend: s...
    method get (line 41) | def get(self, name: str) -> ProcessGroup:
    method destroy (line 56) | def destroy(self, name: str) -> None:
    method destroy_all (line 69) | def destroy_all(self) -> None:

FILE: colossalai/cluster/process_group_mesh.py
  function prod (line 13) | def prod(nums: List[int]) -> int:
  class ProcessGroupMesh (line 25) | class ProcessGroupMesh:
    method __init__ (line 40) | def __init__(self, *size: int) -> None:
    method destroy_mesh_process_groups (line 54) | def destroy_mesh_process_groups(self):
    method shape (line 76) | def shape(self) -> Tuple[int, ...]:
    method rank (line 80) | def rank(self) -> int:
    method size (line 83) | def size(self, dim: Optional[int] = None) -> Union[int, Tuple[int, ...]]:
    method coordinate (line 97) | def coordinate(self, dim: Optional[int] = None) -> Union[int, Tuple[in...
    method unravel (line 112) | def unravel(rank: int, shape: Tuple[int, ...]) -> Tuple[int, ...]:
    method ravel (line 125) | def ravel(coord: Tuple[int, ...], shape: Tuple[int, ...], mode: str = ...
    method _get_group (line 143) | def _get_group(self, ranks_in_group: List[int], backend: Optional[str]...
    method get_ranks_in_group (line 161) | def get_ranks_in_group(self, group: ProcessGroup) -> List[int]:
    method get_coords_along_axis (line 173) | def get_coords_along_axis(
    method create_group_along_axis (line 210) | def create_group_along_axis(
    method get_group_along_axis (line 251) | def get_group_along_axis(

FILE: colossalai/context/config.py
  class Config (line 12) | class Config(dict):
    method __init__ (line 20) | def __init__(self, config: dict = None):
    method __missing__ (line 25) | def __missing__(self, key):
    method __getattr__ (line 28) | def __getattr__(self, key):
    method __setattr__ (line 35) | def __setattr__(self, key, value):
    method _add_item (line 38) | def _add_item(self, key, value):
    method update (line 44) | def update(self, config):
    method from_file (line 51) | def from_file(filename: str):
  class ConfigException (line 106) | class ConfigException(Exception):

FILE: colossalai/context/singleton_meta.py
  class SingletonMeta (line 4) | class SingletonMeta(type):
    method __call__ (line 13) | def __call__(cls, *args, **kwargs):

FILE: colossalai/device/alpha_beta_profiler.py
  class AlphaBetaProfiler (line 15) | class AlphaBetaProfiler:
    method __init__ (line 32) | def __init__(
    method _init_profiling (line 65) | def _init_profiling(self):
    method _profile (line 80) | def _profile(self, process_group, pg_handler, nbytes):
    method profile_latency (line 127) | def profile_latency(self, process_group, pg_handler):
    method profile_bandwidth (line 152) | def profile_bandwidth(self, process_group, pg_handler, maxbytes=(1 * G...
    method profile_ab (line 163) | def profile_ab(self):
    method search_best_logical_mesh (line 212) | def search_best_logical_mesh(self):
    method extract_alpha_beta_for_device_mesh (line 355) | def extract_alpha_beta_for_device_mesh(self):

FILE: colossalai/device/calc_pipeline_strategy.py
  function get_submesh_choices (line 6) | def get_submesh_choices(num_hosts, num_devices_per_host, mode="new"):
  function alpa_dp_impl (line 29) | def alpa_dp_impl(
  function alpa_dp (line 92) | def alpa_dp(

FILE: colossalai/device/device_mesh.py
  class ProcessGroupContainer (line 16) | class ProcessGroupContainer:
  class DeviceMesh (line 22) | class DeviceMesh:
    method __init__ (line 43) | def __init__(
    method shape (line 143) | def shape(self) -> torch.Size:
    method num_devices (line 150) | def num_devices(self) -> int:
    method logical_mesh_id (line 157) | def logical_mesh_id(self) -> torch.Tensor:
    method is_initialized (line 164) | def is_initialized(self) -> bool:
    method from_process_group (line 171) | def from_process_group(process_group: Union[ProcessGroup, List[Process...
    method get_process_group (line 230) | def get_process_group(self, axis: int, global_rank: int = None) -> Pro...
    method get_process_group_for_all_axes (line 246) | def get_process_group_for_all_axes(self, global_rank: int = None) -> D...
    method get_ranks_in_process_group (line 261) | def get_ranks_in_process_group(self, axis: int, global_rank: int = Non...
    method __deepcopy__ (line 277) | def __deepcopy__(self, memo) -> "DeviceMesh":
    method _init_global_to_logical_rank_mapping (line 290) | def _init_global_to_logical_rank_mapping(
    method init_logical_process_group (line 320) | def init_logical_process_group(self):
    method _init_ranks_in_the_same_group (line 364) | def _init_ranks_in_the_same_group(self):
    method global_rank_to_local_rank (line 383) | def global_rank_to_local_rank(self, rank: int, axis: int = None) -> Un...
    method _collate_global_ranks_in_same_process_group (line 402) | def _collate_global_ranks_in_same_process_group(self, global_rank):
    method flatten (line 481) | def flatten(self):
    method all_gather_cost (line 500) | def all_gather_cost(self, num_bytes, mesh_dim):
    method all_reduce_cost (line 504) | def all_reduce_cost(self, num_bytes, mesh_dim):
    method reduce_scatter_cost (line 512) | def reduce_scatter_cost(self, num_bytes, mesh_dim):
    method all_to_all_cost (line 518) | def all_to_all_cost(self, num_bytes, mesh_dim):

FILE: colossalai/fx/_compatibility.py
  function compatibility (line 18) | def compatibility(is_backward_compatible: bool = False) -> Callable:
  function is_compatible_with_meta (line 44) | def is_compatible_with_meta() -> bool:

FILE: colossalai/fx/_meta_regist_12.py
  function register_meta (line 18) | def register_meta(op, register_dispatcher=True):
  function meta_conv (line 38) | def meta_conv(
  function meta_conv_1 (line 165) | def meta_conv_1(
  function meta_conv_backward (line 182) | def meta_conv_backward(
  function meta_adaptive_avg_pool2d_backward (line 200) | def meta_adaptive_avg_pool2d_backward(
  function meta_cuda_rnn (line 211) | def meta_cuda_rnn(
  function meta_cudnn_rnn_backward (line 265) | def meta_cudnn_rnn_backward(
  function meta_relu (line 285) | def meta_relu(input: torch.Tensor):
  function meta_prelu (line 290) | def meta_prelu(input: torch.Tensor, weight: torch.Tensor):
  function meta_hardswish (line 295) | def meta_hardswish(input: torch.Tensor):
  function meta_hardtanh (line 300) | def meta_hardtanh(input: torch.Tensor, min, max):
  function meta_hardswish_backward (line 305) | def meta_hardswish_backward(grad_out: torch.Tensor, input: torch.Tensor):
  function meta_hardtanh_backward (line 311) | def meta_hardtanh_backward(grad_out: torch.Tensor, input: torch.Tensor, ...
  function meta_bn (line 319) | def meta_bn(input: torch.Tensor, weight, bias, running_mean, running_var...
  function meta_bn_backward (line 330) | def meta_bn_backward(
  function meta_cudnn_bn (line 350) | def meta_cudnn_bn(input: torch.Tensor, weight, bias, running_mean, runni...
  function meta_cudnn_bn_backward (line 365) | def meta_cudnn_bn_backward(
  function meta_ln (line 384) | def meta_ln(input: torch.Tensor, normalized_shape, weight, bias, eps):
  function meta_ln_backward (line 396) | def meta_ln_backward(
  function meta_gn_backward (line 407) | def meta_gn_backward(dY: torch.Tensor, input: torch.Tensor, mean, rstd, ...
  function meta_roll (line 417) | def meta_roll(input: torch.Tensor, shifts, dims):
  function meta_local_scalar_dense (line 423) | def meta_local_scalar_dense(self: torch.Tensor):
  function meta_where_self (line 429) | def meta_where_self(condition: torch.Tensor, self: torch.Tensor, other: ...
  function meta_index_Tensor (line 435) | def meta_index_Tensor(self, indices):
  function meta_embedding_dense_backward (line 530) | def meta_embedding_dense_backward(
  function meta_native_dropout_default (line 544) | def meta_native_dropout_default(input: torch.Tensor, p: float, train: bo...
  function meta_native_dropout_backward_default (line 553) | def meta_native_dropout_backward_default(grad: torch.Tensor, mask: torch...

FILE: colossalai/fx/_meta_regist_13.py
  function meta_convolution_backward (line 11) | def meta_convolution_backward(
  function meta__adaptive_avg_pool2d_backward (line 41) | def meta__adaptive_avg_pool2d_backward(grad_out, self):

FILE: colossalai/fx/codegen/activation_checkpoint_codegen.py
  function _gen_saved_tensors_hooks (line 45) | def _gen_saved_tensors_hooks():
  function _gen_save_tensors_hooks_context (line 74) | def _gen_save_tensors_hooks_context(offload_input=True) -> str:
  function _gen_save_on_cpu_context (line 90) | def _gen_save_on_cpu_context():
  function _find_input_and_output_nodes (line 99) | def _find_input_and_output_nodes(nodes: List[Node]):
  function _find_ckpt_regions (line 125) | def _find_ckpt_regions(nodes: List[Node]):
  function _find_offload_regions (line 167) | def _find_offload_regions(nodes: List[Node]):
  function _gen_ckpt_fn_def (line 212) | def _gen_ckpt_fn_def(label, free_vars: List[str]) -> str:
  function _gen_ckpt_output (line 219) | def _gen_ckpt_output(output_vars: List[str]) -> str:
  function _gen_ckpt_usage (line 226) | def _gen_ckpt_usage(label, activation_offload, input_vars, output_vars, ...
  function _end_of_ckpt (line 235) | def _end_of_ckpt(node: Node, check_idx: int) -> bool:
  function _find_nested_ckpt_regions (line 253) | def _find_nested_ckpt_regions(nodes, check_idx=0):
  function emit_ckpt_func (line 302) | def emit_ckpt_func(
  function emit_code_with_nested_activation_checkpoint (line 400) | def emit_code_with_nested_activation_checkpoint(body, ckpt_func, nodes, ...
  function emit_code_with_activation_checkpoint (line 490) | def emit_code_with_activation_checkpoint(body, ckpt_func, nodes, emit_no...
  class ActivationCheckpointCodeGen (line 627) | class ActivationCheckpointCodeGen(CodeGen):
    method _gen_python_code (line 628) | def _gen_python_code(self, nodes, root_module: str, namespace: _Namesp...
  function python_code_with_activation_checkpoint (line 866) | def python_code_with_activation_checkpoint(self, root_module: str, names...

FILE: colossalai/fx/graph_module.py
  class ColoGraphModule (line 25) | class ColoGraphModule(GraphModule):
    method __init__ (line 26) | def __init__(
    method bind (line 37) | def bind(self, ckpt_def, globals):
    method recompile (line 57) | def recompile(self) -> PythonCode:
    method to_folder (line 101) | def to_folder(self, folder: Union[str, os.PathLike], module_name: str ...
    method __init__ (line 183) | def __init__(self, root: Union[torch.nn.Module, Dict[str, Any]], graph...
  class ColoGraphModule (line 182) | class ColoGraphModule(GraphModule):
    method __init__ (line 26) | def __init__(
    method bind (line 37) | def bind(self, ckpt_def, globals):
    method recompile (line 57) | def recompile(self) -> PythonCode:
    method to_folder (line 101) | def to_folder(self, folder: Union[str, os.PathLike], module_name: str ...
    method __init__ (line 183) | def __init__(self, root: Union[torch.nn.Module, Dict[str, Any]], graph...

FILE: colossalai/fx/passes/adding_split_node_pass.py
  function pipe_split (line 8) | def pipe_split():
  function block_split (line 12) | def block_split():
  function construct_blocks (line 17) | def construct_blocks(gm: torch.fx.GraphModule, limit=0.01):
  function remove_blocks (line 46) | def remove_blocks(gm: torch.fx.GraphModule):
  function get_compute_costs (line 52) | def get_compute_costs(node_list):
  function do_dp_split_gpipe_impl (line 64) | def do_dp_split_gpipe_impl(num_nodes, num_stages, num_microbatches, comp...
  function do_dp_split_gpipe (line 110) | def do_dp_split_gpipe(node_list, compute_costs, num_stages: int, num_mic...
  function gpipe_dp_split_pass (line 141) | def gpipe_dp_split_pass(gm: torch.fx.GraphModule, pp_size: int, num_micr...
  function avgcompute_split_pass (line 174) | def avgcompute_split_pass(gm: torch.fx.GraphModule, pp_size: int):
  function avgnode_split_pass (line 208) | def avgnode_split_pass(gm: torch.fx.GraphModule, pp_size: int):
  function balanced_split_pass (line 232) | def balanced_split_pass(gm: torch.fx.GraphModule, pp_size: int):
  function balanced_split_pass_v2 (line 279) | def balanced_split_pass_v2(gm: torch.fx.GraphModule, pp_size: int):
  function uniform_split_pass (line 313) | def uniform_split_pass(gm: torch.fx.GraphModule, pp_size: int):
  function split_with_split_nodes_pass (line 342) | def split_with_split_nodes_pass(annotated_gm: torch.fx.GraphModule, merg...

FILE: colossalai/fx/passes/concrete_info_prop.py
  class ConcreteInfoProp (line 14) | class ConcreteInfoProp(torch.fx.Interpreter):
    method run (line 50) | def run(self, *args, initial_env: Optional[Dict[Node, Any]] = None, en...
    method run_node (line 72) | def run_node(self, n: Node) -> Any:
    method placeholder (line 101) | def placeholder(self, target: "Target", args: Tuple[Argument, ...], kw...
    method get_attr (line 122) | def get_attr(self, target: "Target", args: Tuple[Argument, ...], kwarg...
    method call_function (line 141) | def call_function(self, target: "Target", args: Tuple[Argument, ...], ...
    method call_method (line 160) | def call_method(self, target: "Target", args: Tuple[Argument, ...], kw...
    method call_module (line 178) | def call_module(self, target: "Target", args: Tuple[Argument, ...], kw...
    method output (line 200) | def output(self, target: "Target", args: Tuple[Argument, ...], kwargs:...
    method propagate (line 218) | def propagate(self, *args):
    method summary (line 231) | def summary(self, unit: str = "MB") -> str:

FILE: colossalai/fx/passes/experimental/adding_shape_consistency_pass.py
  function apply (line 11) | def apply(*args, **kwargs):
  function solution_annotation_pass (line 16) | def solution_annotation_pass(gm: torch.fx.GraphModule, solution: List[in...
  function shape_consistency_pass (line 60) | def shape_consistency_pass(gm: torch.fx.GraphModule):

FILE: colossalai/fx/passes/meta_info_prop.py
  class TensorMetadata (line 23) | class TensorMetadata(NamedTuple):
  function _extract_tensor_metadata (line 37) | def _extract_tensor_metadata(result: torch.Tensor) -> TensorMetadata:
  class MetaInfoProp (line 52) | class MetaInfoProp(torch.fx.Interpreter):
    method run_node (line 89) | def run_node(self, n: Node) -> Any:
    method placeholder (line 128) | def placeholder(self, target: "Target", args: Tuple[Argument, ...], kw...
    method get_attr (line 149) | def get_attr(self, target: "Target", args: Tuple[Argument, ...], kwarg...
    method call_function (line 168) | def call_function(self, target: "Target", args: Tuple[Argument, ...], ...
    method call_method (line 187) | def call_method(self, target: "Target", args: Tuple[Argument, ...], kw...
    method call_module (line 205) | def call_module(self, target: "Target", args: Tuple[Argument, ...], kw...
    method output (line 227) | def output(self, target: "Target", args: Tuple[Argument, ...], kwargs:...
    method propagate (line 247) | def propagate(self, *args):
    method summary (line 260) | def summary(self, unit: str = "MB") -> str:
  function metainfo_trace (line 330) | def metainfo_trace(gm: torch.fx.GraphModule, *args, verbose: bool = Fals...

FILE: colossalai/fx/passes/passes_for_gpt2_test.py
  function customized_split_pass_for_gpt2 (line 14) | def customized_split_pass_for_gpt2(gm: torch.fx.GraphModule, pp_size: in...
  function split_with_split_nodes_pass_for_gp2_test (line 47) | def split_with_split_nodes_pass_for_gp2_test(annotated_gm: torch.fx.Grap...
  function split_module_for_gpt2_test (line 161) | def split_module_for_gpt2_test(

FILE: colossalai/fx/passes/shard_1d_pass.py
  function weight_split (line 28) | def weight_split(weight: torch.nn.parameter.Parameter, dim: int, col_nor...
  function column_shard_linear_pass (line 46) | def column_shard_linear_pass(gm: torch.fx.GraphModule):
  function row_shard_linear_pass (line 61) | def row_shard_linear_pass(gm: torch.fx.GraphModule):
  function transformer_mlp_pass (line 74) | def transformer_mlp_pass(graph_module: torch.fx.GraphModule, process_gro...

FILE: colossalai/fx/passes/split_module.py
  class Partition (line 11) | class Partition:
    method __init__ (line 16) | def __init__(self, name: str):
    method __repr__ (line 27) | def __repr__(self) -> str:
  function split_module (line 40) | def split_module(

FILE: colossalai/fx/passes/utils.py
  function get_comm_size (line 8) | def get_comm_size(prev_partition, next_partition):
  function get_leaf (line 33) | def get_leaf(graph: Graph):
  function is_leaf (line 53) | def is_leaf(graph: Graph, node: Node):
  function get_top (line 57) | def get_top(graph: Graph):
  function is_top (line 81) | def is_top(graph: Graph, node: Node):
  function get_all_consumers (line 85) | def get_all_consumers(graph: Graph, node: Node):
  function assign_bfs_level_to_nodes (line 99) | def assign_bfs_level_to_nodes(graph: Graph):
  function get_node_module (line 161) | def get_node_module(node) -> torch.nn.Module:

FILE: colossalai/fx/profiler/dataflow.py
  class Phase (line 11) | class Phase(Enum):
  class GraphInfo (line 19) | class GraphInfo:
  function is_phase (line 70) | def is_phase(n: Node, phase: Phase) -> bool:
  function autograd_graph_analysis (line 76) | def autograd_graph_analysis(graph: Graph) -> GraphInfo:

FILE: colossalai/fx/profiler/experimental/profiler.py
  class GraphInfo (line 18) | class GraphInfo:
  function profile_function (line 76) | def profile_function(target: "Target") -> Callable:
  function profile_method (line 115) | def profile_method(target: "Target") -> Callable:
  function profile_module (line 144) | def profile_module(module: torch.nn.Module) -> Callable:

FILE: colossalai/fx/profiler/experimental/profiler_function/activation_function.py
  function torch_nn_func_non_linear_act (line 32) | def torch_nn_func_non_linear_act(input: torch.Tensor, inplace: bool = Fa...

FILE: colossalai/fx/profiler/experimental/profiler_function/arithmetic.py
  function _elementwise_flops_compute (line 13) | def _elementwise_flops_compute(input, other):
  function torch_add_like_ops (line 53) | def torch_add_like_ops(input: Any, other: Any, *, out: Optional[torch.Te...
  function torch_elementwise_op (line 58) | def torch_elementwise_op(input: torch.Tensor, *, out: Optional[torch.Ten...
  function torch_matmul (line 67) | def torch_matmul(input: torch.Tensor, other: torch.Tensor, *, out: Optio...
  function torch_bmm (line 74) | def torch_bmm(input: torch.Tensor, other: torch.Tensor, *, out: Optional...
  function torch_var_mean (line 81) | def torch_var_mean(

FILE: colossalai/fx/profiler/experimental/profiler_function/embedding.py
  function torch_nn_functional_embedding (line 9) | def torch_nn_functional_embedding(

FILE: colossalai/fx/profiler/experimental/profiler_function/linear.py
  function torch_nn_linear (line 9) | def torch_nn_linear(input: torch.Tensor, weight: torch.Tensor, bias: tor...

FILE: colossalai/fx/profiler/experimental/profiler_function/normalization.py
  function torch_nn_func_instancenorm (line 9) | def torch_nn_func_instancenorm(
  function torch_nn_func_groupnorm (line 26) | def torch_nn_func_groupnorm(
  function torch_nn_func_layernorm (line 40) | def torch_nn_func_layernorm(
  function torch_nn_func_batchnorm (line 54) | def torch_nn_func_batchnorm(

FILE: colossalai/fx/profiler/experimental/profiler_function/pooling.py
  function torch_nn_func_pooling (line 20) | def torch_nn_func_pooling(input: torch.Tensor, *args, **kwargs) -> Tuple...

FILE: colossalai/fx/profiler/experimental/profiler_function/python_ops.py
  function operator_getitem (line 8) | def operator_getitem(a: Any, b: Any) -> Tuple[int, int]:
  function python_getattr (line 15) | def python_getattr(a: Any, b: Any) -> Tuple[int, int]:

FILE: colossalai/fx/profiler/experimental/profiler_function/torch_ops.py
  function torch_zero_flops_op (line 32) | def torch_zero_flops_op(*args, **kwargs) -> Tuple[int, int]:
  function torch_where (line 39) | def torch_where(condition: torch.Tensor, x: Any, y: Any) -> Tuple[int, i...
  function torch_max (line 48) | def torch_max(

FILE: colossalai/fx/profiler/experimental/profiler_module/activation_function.py
  function torch_nn_non_linear_act (line 32) | def torch_nn_non_linear_act(self: torch.nn.Module, input: torch.Tensor) ...

FILE: colossalai/fx/profiler/experimental/profiler_module/attention.py
  function torch_nn_msa (line 10) | def torch_nn_msa(

FILE: colossalai/fx/profiler/experimental/profiler_module/convolution.py
  function torch_nn_conv1d (line 15) | def torch_nn_conv1d(self: torch.nn.Conv1d, input: torch.Tensor) -> Tuple...
  function torch_nn_conv2d (line 37) | def torch_nn_conv2d(self: torch.nn.Conv2d, input: torch.Tensor) -> Tuple...
  function torch_nn_conv3d (line 63) | def torch_nn_conv3d(self: torch.nn.Conv3d, input: torch.Tensor) -> Tuple...
  function torch_nn_convtranspose1d (line 93) | def torch_nn_convtranspose1d(self: torch.nn.ConvTranspose1d, input: torc...
  function torch_nn_convtranspose2d (line 121) | def torch_nn_convtranspose2d(self: torch.nn.ConvTranspose2d, input: torc...
  function torch_nn_convtranspose3d (line 155) | def torch_nn_convtranspose3d(self: torch.nn.ConvTranspose3d, input: torc...

FILE: colossalai/fx/profiler/experimental/profiler_module/dropout.py
  function torch_nn_dropout (line 9) | def torch_nn_dropout(self: torch.nn.Module, input: torch.Tensor) -> Tupl...

FILE: colossalai/fx/profiler/experimental/profiler_module/embedding.py
  function torch_nn_embedding (line 9) | def torch_nn_embedding(self: torch.nn.Embedding, input: torch.Tensor) ->...

FILE: colossalai/fx/profiler/experimental/profiler_module/linear.py
  function torch_nn_linear (line 10) | def torch_nn_linear(self: torch.nn.Linear, input: torch.Tensor) -> Tuple...

FILE: colossalai/fx/profiler/experimental/profiler_module/normalization.py
  function torch_nn_normalize (line 19) | def torch_nn_normalize(

FILE: colossalai/fx/profiler/experimental/profiler_module/pooling.py
  function torch_nn_pooling (line 20) | def torch_nn_pooling(self: torch.nn.Module, input: torch.Tensor) -> Tupl...

FILE: colossalai/fx/profiler/experimental/profiler_module/rnn.py
  function _rnn_flops (line 10) | def _rnn_flops(
  function torch_nn_rnn (line 44) | def torch_nn_rnn(self: torch.nn.RNNBase, input: torch.Tensor, hx: Option...
  function torch_nn_rnn (line 66) | def torch_nn_rnn(self: torch.nn.RNNCellBase, input: torch.Tensor, hx: Op...

FILE: colossalai/fx/profiler/experimental/profiler_module/torch_op.py
  function torch_nn_flatten (line 9) | def torch_nn_flatten(self: torch.nn.Flatten, input: torch.Tensor) -> Tup...

FILE: colossalai/fx/profiler/experimental/registry.py
  class ProfilerRegistry (line 1) | class ProfilerRegistry:
    method __init__ (line 2) | def __init__(self, name):
    method register (line 6) | def register(self, source):
    method get (line 13) | def get(self, source):
    method has (line 18) | def has(self, source):

FILE: colossalai/fx/profiler/experimental/shard_utils.py
  function calculate_fwd_in (line 11) | def calculate_fwd_in(n: Node) -> bool:
  function calculate_fwd_tmp (line 24) | def calculate_fwd_tmp(n: Node) -> int:
  function calculate_fwd_out (line 37) | def calculate_fwd_out(n: Node) -> int:

FILE: colossalai/fx/profiler/memory_utils.py
  function activation_size (line 12) | def activation_size(out: Union[torch.Tensor, Dict, List, Tuple, int]) ->...
  function parameter_size (line 37) | def parameter_size(mod: torch.nn.Module) -> int:
  function is_inplace (line 52) | def is_inplace(n: Node):

FILE: colossalai/fx/profiler/opcount.py
  function matmul_flop_jit (line 15) | def matmul_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number:
  function addmm_flop_jit (line 49) | def addmm_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number:
  function linear_flop_jit (line 66) | def linear_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number:
  function bmm_flop_jit (line 80) | def bmm_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number:
  function baddbmm_flop_jit (line 94) | def baddbmm_flop_jit(inputs: List[Any], outputs: List[Any]) -> Number:
  function conv_flop_count (line 107) | def conv_flop_count(
  function conv_flop_jit (line 132) | def conv_flop_jit(inputs: List[Any], outputs: List[Any]):
  function transpose_shape (line 143) | def transpose_shape(shape):
  function conv_backward_flop_jit (line 147) | def conv_backward_flop_jit(inputs: List[Any], outputs: List[Any]):
  function norm_flop_counter (line 163) | def norm_flop_counter(affine_arg_index: int, input_arg_index: int) -> Ca...
  function batchnorm_flop_jit (line 189) | def batchnorm_flop_jit(inputs: List[Any], outputs: List[Any], training: ...
  function elementwise_flop_counter (line 200) | def elementwise_flop_counter(input_scale: float = 1, output_scale: float...
  function zero_flop_jit (line 222) | def zero_flop_jit(*args):

FILE: colossalai/fx/profiler/profiler.py
  function normalize_tuple (line 28) | def normalize_tuple(x):
  function is_autogradable (line 34) | def is_autogradable(x):
  function detach_variables (line 38) | def detach_variables(x):
  function _profile_concrete (line 48) | def _profile_concrete(target: Callable, *args, **kwargs) -> Tuple[Tuple[...
  function _profile_meta (line 148) | def _profile_meta(target: Callable, *args, **kwargs) -> Tuple[Tuple[Any,...
  function profile_function (line 289) | def profile_function(target: "Target", device: str = "meta") -> Callable:
  function profile_method (line 345) | def profile_method(target: "Target", device: str = "meta") -> Callable:
  function profile_module (line 364) | def profile_module(module: torch.nn.Module, device: str = "meta") -> Cal...

FILE: colossalai/fx/profiler/shard_utils.py
  function calculate_fwd_in (line 14) | def calculate_fwd_in(n: Node) -> int:
  function calculate_fwd_tmp (line 28) | def calculate_fwd_tmp(n: Node) -> int:
  function calculate_fwd_out (line 74) | def calculate_fwd_out(n: Node) -> int:
  function calculate_fwd_time (line 95) | def calculate_fwd_time(n: Node) -> float:
  function calculate_bwd_time (line 106) | def calculate_bwd_time(n: Node) -> float:

FILE: colossalai/fx/profiler/tensor.py
  function set_data_ptr (line 13) | def set_data_ptr(x):
  class MetaTensor (line 21) | class MetaTensor(torch.Tensor):
    method __new__ (line 30) | def __new__(cls, elem, fake_device=None):
    method __repr__ (line 57) | def __repr__(self):
    method __torch_dispatch__ (line 63) | def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
    method to (line 102) | def to(self, *args, **kwargs) -> torch.Tensor:
    method cpu (line 130) | def cpu(self, *args, **kwargs):
    method cuda (line 135) | def cuda(self, device=None, non_blocking=False):

FILE: colossalai/fx/proxy.py
  class ColoProxy (line 11) | class ColoProxy(Proxy):
    method __init__ (line 24) | def __init__(self, *args, **kwargs):
    method meta_data (line 29) | def meta_data(self):
    method meta_data (line 33) | def meta_data(self, data: Any):
    method has_meta_data (line 37) | def has_meta_data(self):
    method _assert_meta_data_is_tensor (line 40) | def _assert_meta_data_is_tensor(self):
    method _assert_has_meta_data (line 45) | def _assert_has_meta_data(self):
    method __len__ (line 48) | def __len__(self):
    method __int__ (line 52) | def __int__(self):
    method __float__ (line 56) | def __float__(self):
    method __bool__ (line 60) | def __bool__(self):
    method __getattr__ (line 64) | def __getattr__(self, k):
    method __contains__ (line 67) | def __contains__(self, key):
  function extract_meta (line 76) | def extract_meta(*args, **kwargs):
  class ColoAttribute (line 93) | class ColoAttribute(ColoProxy):
    method __init__ (line 94) | def __init__(self, root, attr: str):
    method node (line 101) | def node(self):
    method __call__ (line 113) | def __call__(self, *args, **kwargs):

FILE: colossalai/fx/tracer/_meta_trace.py
  function normalize_tuple (line 6) | def normalize_tuple(x):
  function is_autogradable (line 12) | def is_autogradable(x):
  function meta_trace (line 16) | def meta_trace(module: torch.nn.Module, fake_device=None, *args, **kwarg...

FILE: colossalai/fx/tracer/_symbolic_trace.py
  function symbolic_trace (line 12) | def symbolic_trace(

FILE: colossalai/fx/tracer/_tracer_utils.py
  function is_element_in_list (line 11) | def is_element_in_list(elements: Union[List[Any], Any], list_: List[Any]):
  function extract_meta (line 23) | def extract_meta(*args, **kwargs):
  function compute_meta_data_for_functions_proxy (line 37) | def compute_meta_data_for_functions_proxy(target, args, kwargs):

FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/addbmm.py
  class Addbmm (line 9) | class Addbmm(LinearBasedBiasFunc):
    method extract_kwargs_from_origin_func (line 10) | def extract_kwargs_from_origin_func(self):
    method create_non_bias_func_proxy (line 18) | def create_non_bias_func_proxy(self, input_proxy, other_proxy):
    method insert_sum_node (line 33) | def insert_sum_node(self, input_proxy, sum_dims=0):
    method generate (line 44) | def generate(self):

FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/addmm.py
  class Addmm (line 9) | class Addmm(LinearBasedBiasFunc):
    method extract_kwargs_from_origin_func (line 10) | def extract_kwargs_from_origin_func(self):
    method transpose_other_operand_for_linear (line 18) | def transpose_other_operand_for_linear(self, other_proxy):
    method generate (line 37) | def generate(self):

FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/bias_addition_function.py
  class BiasAdditionFunc (line 8) | class BiasAdditionFunc(ABC):
    method __init__ (line 14) | def __init__(self, tracer, target, args, kwargs, substitute_func):
    method extract_kwargs_from_origin_func (line 22) | def extract_kwargs_from_origin_func(self):
    method generate (line 34) | def generate(self):
    method create_mul_node (line 53) | def create_mul_node(self, input_proxy, coefficent):
  class LinearBasedBiasFunc (line 71) | class LinearBasedBiasFunc(BiasAdditionFunc):
    method create_non_bias_func_proxy (line 77) | def create_non_bias_func_proxy(self, input_proxy, other_proxy):
    method create_bias_addition_proxy (line 92) | def create_bias_addition_proxy(self, non_bias_func_proxy, bias_proxy):

FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/linear.py
  class Linear (line 8) | class Linear(LinearBasedBiasFunc):
    method extract_kwargs_from_origin_func (line 9) | def extract_kwargs_from_origin_func(self):
    method generate (line 16) | def generate(self):

FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/bias_addition_module.py
  class BiasAdditionModule (line 8) | class BiasAdditionModule(ABC):
    method __init__ (line 14) | def __init__(self, tracer, target, args, kwargs, substitute_func):
    method _create_weight_proxy (line 23) | def _create_weight_proxy(self):
    method _create_bias_proxy (line 35) | def _create_bias_proxy(self):
    method extract_kwargs_from_mod (line 48) | def extract_kwargs_from_mod(self):
    method create_non_bias_func_proxy (line 58) | def create_non_bias_func_proxy(self, input_proxy=None):
    method create_bias_addition_proxy (line 72) | def create_bias_addition_proxy(self, non_bias_func_proxy, bias_proxy):
    method generate (line 84) | def generate(self):

FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/conv.py
  class BiasAdditionConv (line 11) | class BiasAdditionConv(BiasAdditionModule):
    method extract_kwargs_from_mod (line 12) | def extract_kwargs_from_mod(self):
    method create_bias_reshape_proxy (line 35) | def create_bias_reshape_proxy(self, dimensions):
    method generate (line 50) | def generate(self):

FILE: colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/linear.py
  class BiasAdditionLinear (line 8) | class BiasAdditionLinear(BiasAdditionModule):
    method extract_kwargs_from_mod (line 9) | def extract_kwargs_from_mod(self):
    method generate (line 12) | def generate(self):

FILE: colossalai/fx/tracer/experimental.py
  function _truncate_suffix (line 53) | def _truncate_suffix(s: str):
  function default_device (line 59) | def default_device():
  class ColoProxy (line 64) | class ColoProxy(Proxy):
    method __init__ (line 65) | def __init__(self, *args, data=None, **kwargs):
    method meta_data (line 70) | def meta_data(self):
    method meta_data (line 74) | def meta_data(self, args):
    method __torch_function__ (line 79) | def __torch_function__(cls, orig_method, types, args=(), kwargs=None):
    method from_torch_proxy (line 88) | def from_torch_proxy(cls, proxy: Proxy):
    method __repr__ (line 91) | def __repr__(self):
    method __len__ (line 94) | def __len__(self):
    method __int__ (line 97) | def __int__(self):
    method __index__ (line 100) | def __index__(self):
    method __float__ (line 106) | def __float__(self):
    method __bool__ (line 109) | def __bool__(self):
    method __getattr__ (line 112) | def __getattr__(self, k):
    method __setitem__ (line 115) | def __setitem__(self, key, value):
    method __contains__ (line 120) | def __contains__(self, key):
    method __isinstancecheck__ (line 128) | def __isinstancecheck__(self, type):
    method shape (line 132) | def shape(self):
    method ndim (line 136) | def ndim(self):
    method device (line 140) | def device(self):
    method dtype (line 146) | def dtype(self):
    method to (line 151) | def to(self, *args, **kwargs):
    method cpu (line 154) | def cpu(self, *args, **kwargs):
    method cuda (line 157) | def cuda(self, *args, **kwargs):
  class ColoAttribute (line 162) | class ColoAttribute(ColoProxy):
    method __init__ (line 163) | def __init__(self, root, attr: str, data=None):
    method node (line 171) | def node(self):
    method __call__ (line 178) | def __call__(self, *args, **kwargs):
    method __repr__ (line 181) | def __repr__(self):
  class ColoTracer (line 186) | class ColoTracer(Tracer):
    method __init__ (line 187) | def __init__(self, trace_act_ckpt: bool = False, *args, **kwargs):
    method proxy (line 198) | def proxy(self, node: Node) -> "ColoProxy":
    method create_proxy (line 201) | def create_proxy(
    method create_node (line 252) | def create_node(self, *args, **kwargs) -> Node:
    method trace (line 260) | def trace(
    method trace_activation_checkpoint (line 306) | def trace_activation_checkpoint(self, enabled: bool):
    method _post_check (line 334) | def _post_check(self, non_concrete_arg_names: Set[str]):
    method _module_getattr (line 366) | def _module_getattr(self, attr, attr_val, parameter_proxy_cache):
  function symbolic_trace (line 402) | def symbolic_trace(
  class _TorchTensorOverride (line 429) | class _TorchTensorOverride(object):
    method __init__ (line 430) | def __init__(self, tracer: Tracer):
    method __enter__ (line 434) | def __enter__(self):
    method __exit__ (line 463) | def __exit__(self, exc_type, exc_val, exc_tb):
  function meta_prop_pass (line 468) | def meta_prop_pass(
  function _meta_data_computing (line 497) | def _meta_data_computing(meta_args, concrete_args, root, kind, target, a...
  function _meta_data_computing_v0 (line 525) | def _meta_data_computing_v0(meta_args, root, kind, target, args, kwargs):
  function bias_addition_pass (line 595) | def bias_addition_pass(gm: ColoGraphModule, root_model: torch.nn.Module,...

FILE: colossalai/fx/tracer/meta_patch/patched_function/activation_function.py
  function torch_nn_func_relu (line 7) | def torch_nn_func_relu(input, inplace=False):

FILE: colossalai/fx/tracer/meta_patch/patched_function/arithmetic.py
  function torch_matmul (line 8) | def torch_matmul(input, other, *, out=None):
  function torch_abs (line 46) | def torch_abs(input, *, out=None):
  function torch_bmm (line 52) | def torch_bmm(input, mat2, *, out=None):
  function torch_linear (line 61) | def torch_linear(input, mat2, bias=None, *, out=None):
  function torch_addbmm (line 72) | def torch_addbmm(input, mat1, mat2, *, beta=1, alpha=1, out=None):
  function torch_addmm (line 82) | def torch_addmm(input, mat1, mat2, *, beta=1, alpha=1, out=None):
  function torch_var_mean (line 91) | def torch_var_mean(input, dim, unbiased=True, keepdim=False, *, out=None):

FILE: colossalai/fx/tracer/meta_patch/patched_function/convolution.py
  function _ntuple (line 10) | def _ntuple(n, name="parse"):
  function _extract_kwargs (line 25) | def _extract_kwargs(kwargs):
  function torch_nn_functional_conv1d (line 48) | def torch_nn_functional_conv1d(input, weight, **kwargs):
  function torch_nn_functional_conv2d (line 67) | def torch_nn_functional_conv2d(input, weight, **kwargs):
  function torch_nn_functional_conv3d (line 88) | def torch_nn_functional_conv3d(input, weight, **kwargs):
  function torch_nn_functional_convtranspose1d (line 111) | def torch_nn_functional_convtranspose1d(input, weight, **kwargs):
  function torch_nn_functional_convtranspose2d (line 133) | def torch_nn_functional_convtranspose2d(input, weight, **kwargs):
  function torch_nn_functional_convtranspose3d (line 159) | def torch_nn_functional_convtranspose3d(input, weight, **kwargs):

FILE: colossalai/fx/tracer/meta_patch/patched_function/embedding.py
  function torch_nn_functional_embedding (line 7) | def torch_nn_functional_embedding(

FILE: colossalai/fx/tracer/meta_patch/patched_function/normalization.py
  function torch_nn_func_layernorm (line 7) | def torch_nn_func_layernorm(input, normalized_shape, weight=None, bias=N...
  function torch_nn_func_batchnorm (line 12) | def torch_nn_func_batchnorm(

FILE: colossalai/fx/tracer/meta_patch/patched_function/python_ops.py
  function operator_getitem (line 11) | def operator_getitem(a, b):

FILE: colossalai/fx/tracer/meta_patch/patched_function/torch_ops.py
  function torch_arange (line 7) | def torch_arange(*args, **kwargs):
  function torch_finfo (line 29) | def torch_finfo(*args):
  function torch_where (line 34) | def torch_where(condition, x, y):
  function torch_tensor_repeat (line 41) | def torch_tensor_repeat(self, *sizes):
  function torch_index_select (line 49) | def torch_index_select(input, dim, index, *, out=None):
  function torch_tensor_index_select (line 56) | def torch_tensor_index_select(self, dim, index):
  function torch_squeeze (line 61) | def torch_squeeze(input, dim=None):
  function torch_tensor_squeeze (line 79) | def torch_tensor_squeeze(self, dim=None):
  function torch_unsqueeze (line 84) | def torch_unsqueeze(input, dim):
  function torch_tensor_unsqueeze (line 93) | def torch_tensor_unsqueeze(self, dim):
  function torch_cat (line 98) | def torch_cat(tensors, dim=None, axis=None, *, out=None):
  function torch_repeat_interleave (line 113) | def torch_repeat_interleave(input, repeats, dim=None, output_size=None):
  function torch_tensor_repeat_interleave (line 130) | def torch_tensor_repeat_interleave(self, repeats, dim=None, *, output_si...
  function torch_roll (line 135) | def torch_roll(input, shifts, dims=None):
  function torch_full (line 140) | def torch_full(size, fill_value, *, out=None, dtype=None, layout=torch.s...
  function torch_max (line 146) | def torch_max(input, dim=None, keepdim=False, *, out=None):
  function torch_tensor_cpu (line 169) | def torch_tensor_cpu(input):
  function torch_tensor_cuda (line 174) | def torch_tensor_cuda(input, *args, **kwargs):

FILE: colossalai/fx/tracer/meta_patch/patched_module/activation_function.py
  function torch_nn_non_linear_act (line 12) | def torch_nn_non_linear_act(self, input):

FILE: colossalai/fx/tracer/meta_patch/patched_module/convolution.py
  function torch_nn_conv1d (line 9) | def torch_nn_conv1d(self, input):
  function torch_nn_conv2d (line 25) | def torch_nn_conv2d(self, input):
  function torch_nn_conv3d (line 45) | def torch_nn_conv3d(self, input):
  function torch_nn_convtranspose1d (line 69) | def torch_nn_convtranspose1d(self, input):
  function torch_nn_convtranspose2d (line 89) | def torch_nn_convtranspose2d(self, input):
  function torch_nn_convtranspose3d (line 117) | def torch_nn_convtranspose3d(self, input):

FILE: colossalai/fx/tracer/meta_patch/patched_module/embedding.py
  function torch_nn_embedding (line 7) | def torch_nn_embedding(self, input):

FILE: colossalai/fx/tracer/meta_patch/patched_module/linear.py
  function torch_nn_linear (line 7) | def torch_nn_linear(self, input):

FILE: colossalai/fx/tracer/meta_patch/patched_module/normalization.py
  function torch_nn_normalize (line 11) | def torch_nn_normalize(self, input):

FILE: colossalai/fx/tracer/meta_patch/patched_module/pooling.py
  function torch_nn_avgpool1d (line 9) | def torch_nn_avgpool1d(self, input):
  function torch_nn_avgpool2d (line 32) | def torch_nn_avgpool2d(self, input):
  function torch_nn_avgpool3d (line 59) | def torch_nn_avgpool3d(self, input):
  function torch_nn_maxpool1d (line 88) | def torch_nn_maxpool1d(self, input):
  function torch_nn_maxpool2d (line 112) | def torch_nn_maxpool2d(self, input):
  function torch_nn_maxpool3d (line 140) | def torch_nn_maxpool3d(self, input):
  function torch_nn_adapative_pooling_1d (line 171) | def torch_nn_adapative_pooling_1d(self, input):
  function torch_nn_adapative_pooling_2d (line 183) | def torch_nn_adapative_pooling_2d(self, input):
  function torch_nn_adapative_pooling_3d (line 195) | def torch_nn_adapative_pooling_3d(self, input):

FILE: colossalai/fx/tracer/meta_patch/patched_module/rnn.py
  function torch_nn_rnn (line 8) | def torch_nn_rnn(self, input, hx):

FILE: colossalai/fx/tracer/registry.py
  class PatchRegistry (line 1) | class PatchRegistry:
    method __init__ (line 2) | def __init__(self, name):
    method register (line 6) | def register(self, source):
    method get (line 13) | def get(self, source):
    method has (line 18) | def has(self, source):

FILE: colossalai/fx/tracer/tracer.py
  class TracerType (line 35) | class TracerType(enum.Enum):
  class ColoTracer (line 40) | class ColoTracer(Tracer):
    method __init__ (line 67) | def __init__(self, trace_act_ckpt: bool = False, *args, **kwargs):
    method create_proxy (line 83) | def create_proxy(self, kind, target, args, kwargs, name=None, type_exp...
    method _module_getattr (line 152) | def _module_getattr(self, attr, attr_val, parameter_proxy_cache):
    method call_module (line 189) | def call_module(self, m, forward, args, kwargs):
    method proxy (line 202) | def proxy(self, node) -> Proxy:
    method _configure_tracer_type (line 208) | def _configure_tracer_type(self, tracer_type: TracerType):
    method _meta_data_computing (line 218) | def _meta_data_computing(self, kind, target, args, kwargs):
    method trace (line 317) | def trace(
    method trace_activation_checkpoint (line 448) | def trace_activation_checkpoint(self, enabled: bool):
    method create_node (line 476) | def create_node(self, *args, **kwargs) -> Node:
  function wrap_tensor_constructor_method (line 485) | def wrap_tensor_constructor_method(target):
  function _scope (line 527) | def _scope(method):
  function _define_reflectable (line 545) | def _define_reflectable(orig_method_name):

FILE: colossalai/inference/batch_bucket.py
  class BatchBucket (line 9) | class BatchBucket:
    method __init__ (line 23) | def __init__(
    method is_empty (line 68) | def is_empty(self):
    method current_batch_size (line 72) | def current_batch_size(self):
    method __len__ (line 75) | def __len__(self):
    method available_batch_size (line 79) | def available_batch_size(self):
    method block_tables (line 83) | def block_tables(self):
    method seq_lengths (line 87) | def seq_lengths(self):
    method seqs_ids (line 91) | def seqs_ids(self):
    method seqs_li (line 95) | def seqs_li(self):
    method is_compact (line 99) | def is_compact(self):
    method use_spec_dec (line 108) | def use_spec_dec(self) -> bool:
    method num_tokens_to_verify (line 112) | def num_tokens_to_verify(self) -> int:
    method batch_token_ids (line 116) | def batch_token_ids(self) -> List[List[int]]:
    method streamingllm_update_batch (line 122) | def streamingllm_update_batch(self, start_token_size: int, generated_t...
    method set_use_spec_dec (line 149) | def set_use_spec_dec(self, num_tokens_to_verify: int = 5) -> None:
    method reset_use_spec_dec (line 157) | def reset_use_spec_dec(self) -> None:
    method _make_compact (line 162) | def _make_compact(self) -> None:
    method add_seq (line 184) | def add_seq(
    method add_seqs (line 219) | def add_seqs(
    method pop_seq_update_batch (line 271) | def pop_seq_update_batch(
    method pop_seqs (line 324) | def pop_seqs(
    method pop_n_seqs (line 347) | def pop_n_seqs(
    method pop_finished (line 378) | def pop_finished(
    method append_batch_tokens (line 407) | def append_batch_tokens(self, tokens: torch.Tensor) -> None:
    method revoke_batch_tokens (line 422) | def revoke_batch_tokens(self, n_tokens: int, n_seqs: int = 1) -> None:
    method clear (line 440) | def clear(self, free_block_tables_fn: Optional[Callable[[torch.Tensor]...
    method merge (line 455) | def merge(self, other: "BatchBucket") -> List[int]:
    method is_prompts (line 488) | def is_prompts(self) -> bool:
    method get_1D_inputs_spec_dec (line 495) | def get_1D_inputs_spec_dec(self, n: int) -> torch.Tensor:
    method get_1D_inputs (line 511) | def get_1D_inputs(self) -> torch.Tensor:
    method get_block_table_tensor (line 542) | def get_block_table_tensor(self) -> torch.Tensor:
    method get_sequence_lengths (line 548) | def get_sequence_lengths(self) -> torch.Tensor:
    method fd_inter_tensor (line 555) | def fd_inter_tensor(self) -> None:
    method __repr__ (line 559) | def __repr__(self) -> str:

FILE: colossalai/inference/config.py
  class RPC_PARAM (line 35) | class RPC_PARAM(ABC):
    method to_rpc_param (line 43) | def to_rpc_param(self):
    method from_rpc_param (line 48) | def from_rpc_param():
  class InputMetaData (line 53) | class InputMetaData(RPC_PARAM):
    method to_rpc_param (line 90) | def to_rpc_param(self) -> Dict[str, any]:
    method from_rpc_param (line 108) | def from_rpc_param(rpc_dict: Dict[str, any]) -> "InputMetaData":
    method __repr__ (line 135) | def __repr__(self) -> str:
  class InferenceConfig (line 151) | class InferenceConfig(RPC_PARAM):
    method __post_init__ (line 254) | def __post_init__(self):
    method _verify_config (line 258) | def _verify_config(self) -> None:
    method to_generation_config (line 317) | def to_generation_config(self, model_config) -> GenerationConfig:
    method to_model_shard_inference_config (line 334) | def to_model_shard_inference_config(self) -> "ModelShardInferenceConfig":
    method to_rpc_param (line 345) | def to_rpc_param(self) -> dict:
    method from_rpc_param (line 363) | def from_rpc_param(rpc_dict: dict) -> "InferenceConfig":
    method from_dict (line 383) | def from_dict(cls, config_dict: Dict[str, Any]) -> "InferenceConfig":
  class ModelShardInferenceConfig (line 399) | class ModelShardInferenceConfig:
  class DiffusionGenerationConfig (line 418) | class DiffusionGenerationConfig:
    method to_dict (line 449) | def to_dict(self) -> Dict[str, Any]:
    method from_kwargs (line 459) | def from_kwargs(cls, **kwargs) -> "DiffusionGenerationConfig":

FILE: colossalai/inference/core/async_engine.py
  function _raise_exception_on_finish (line 14) | def _raise_exception_on_finish(task: asyncio.Task, request_tracker: "Tra...
  class RequstStream (line 29) | class RequstStream:
    method __init__ (line 39) | def __init__(self, request_id: int) -> None:
    method set_result (line 43) | def set_result(self, result) -> None:
    method get_result (line 48) | async def get_result(self):
    method finished (line 53) | def finished(self) -> bool:
  class Tracer (line 58) | class Tracer:
    method __init__ (line 67) | def __init__(self) -> None:
    method __contains__ (line 73) | def __contains__(self, item):
    method init_event (line 76) | def init_event(self):
    method propagate_exception (line 79) | def propagate_exception(self, exc: Exception, request_id: Optional[int...
    method process_finished_request (line 89) | def process_finished_request(self, finished_request) -> None:
    method add_request (line 98) | def add_request(self, request_id: int, **engine_add_request_kwargs) ->...
    method abort_request (line 113) | def abort_request(self, request_id: int, *, verbose: bool = False) -> ...
    method get_new_requests (line 127) | def get_new_requests(self):
    method wait_for_new_requests (line 151) | async def wait_for_new_requests(self):
  class _AsyncInferenceEngine (line 155) | class _AsyncInferenceEngine(InferenceEngine):
    method async_step (line 161) | async def async_step(self) -> List[str]:
    method add_single_request (line 206) | def add_single_request(self, request_id: int, prompt: str, prompt_toke...
  class AsyncInferenceEngine (line 212) | class AsyncInferenceEngine:
    method __init__ (line 225) | def __init__(self, start_engine_loop: bool = True, **kwargs):
    method background_loop_status (line 234) | def background_loop_status(self):
    method start_background_loop (line 237) | def start_background_loop(self):
    method _init_engine (line 249) | def _init_engine(self, **kwargs):
    method step (line 252) | async def step(self):
    method _engine_abort (line 267) | async def _engine_abort(self, request_ids: Iterable[int]):
    method abort (line 270) | async def abort(self, request_id: int):
    method _abort (line 278) | def _abort(self, request_id: int):
    method run_engine_loop (line 281) | async def run_engine_loop(self):
    method add_request (line 289) | async def add_request(
    method generate (line 312) | async def generate(

FILE: colossalai/inference/core/base_engine.py
  class BaseEngine (line 13) | class BaseEngine(ABC):
    method __init__ (line 15) | def __init__(self, model_or_path, inference_config=None, verbose=False...
    method init_model (line 19) | def init_model(self, model_or_path, model_policy=None, model_shard_inf...
    method generate (line 25) | def generate(self, request_ids=None, prompts=None, generation_config=N...
    method add_request (line 31) | def add_request(self, prompts, request_ids=None, **kwargs):
    method step (line 37) | def step(self):
    method _verify_args (line 43) | def _verify_args(self):
    method capture_model (line 49) | def capture_model(self):
    method _shardformer (line 55) | def _shardformer(

FILE: colossalai/inference/core/diffusion_engine.py
  class DiffusionEngine (line 27) | class DiffusionEngine(BaseEngine):
    method __init__ (line 28) | def __init__(
    method _verify_args (line 53) | def _verify_args(self) -> None:
    method init_model (line 56) | def init_model(
    method generate (line 128) | def generate(
    method add_request (line 161) | def add_request(
    method step (line 189) | def step(self) -> List[PIL.Image.Image]:

FILE: colossalai/inference/core/engine.py
  class InferenceEngine (line 16) | class InferenceEngine:
    method __init__ (line 28) | def __init__(
    method _verify_args (line 64) | def _verify_args(self) -> None:
    method generate (line 69) | def generate(
    method add_request (line 87) | def add_request(
    method step (line 107) | def step(self):
    method __getattr__ (line 111) | def __getattr__(self, name):
    method __setattr__ (line 126) | def __setattr__(self, name, value):

FILE: colossalai/inference/core/llm_engine.py
  class LLMEngine (line 46) | class LLMEngine(BaseEngine):
    method __init__ (line 58) | def __init__(
    method init_model (line 107) | def init_model(
    method capture_model (line 213) | def capture_model(self, k_cache: List[torch.Tensor], v_cache: List[tor...
    method _verify_args (line 285) | def _verify_args(self) -> None:
    method enable_spec_dec (line 301) | def enable_spec_dec(
    method disable_spec_dec (line 369) | def disable_spec_dec(self) -> None:
    method clear_spec_dec (line 377) | def clear_spec_dec(self) -> None:
    method steps_spec_dec (line 388) | def steps_spec_dec(self) -> List[Sequence]:
    method generate (line 496) | def generate(
    method has_prompt_template (line 561) | def has_prompt_template(self) -> bool:
    method format_prompt (line 565) | def format_prompt(self, prompts: Union[List[str], str]) -> Union[List[...
    method add_request (line 580) | def add_request(
    method prepare_input (line 671) | def prepare_input(self, batch: BatchBucket) -> Tuple[torch.Tensor, tor...
    method step (line 719) | def step(self) -> List[str]:

FILE: colossalai/inference/core/plugin.py
  class InferCheckpoint_io (line 21) | class InferCheckpoint_io(GeneralCheckpointIO):
    method __init__ (line 27) | def __init__(
    method load_sharded_model (line 35) | def load_sharded_model(self, model: ModelWrapper, checkpoint_index_fil...
    method save_sharded_model (line 131) | def save_sharded_model(

FILE: colossalai/inference/core/request_handler.py
  class RunningList (line 19) | class RunningList:
    method __init__ (line 30) | def __init__(self, prefill_ratio: int, prefill: List[Sequence] = None)...
    method decoding (line 38) | def decoding(self):
    method prefill (line 42) | def prefill(self):
    method prefill_seq_num (line 46) | def prefill_seq_num(self):
    method decoding_seq_num (line 50) | def decoding_seq_num(self):
    method total_seq_num (line 54) | def total_seq_num(self):
    method append (line 57) | def append(self, seq: Sequence):
    method extend (line 63) | def extend(self, seqs: List[Sequence]):
    method find_seq (line 67) | def find_seq(self, request_id) -> Union[Sequence, None]:
    method remove (line 75) | def remove(self, seq: Sequence) -> None:
    method ready_for_prefill (line 83) | def ready_for_prefill(self):
    method is_empty (line 88) | def is_empty(self):
    method mark_prefill_running (line 91) | def mark_prefill_running(self) -> None:
    method move_prefill_to_decoding (line 95) | def move_prefill_to_decoding(self, seq_ids: List[int]) -> None:
  class NaiveRequestHandler (line 101) | class NaiveRequestHandler:
    method __init__ (line 102) | def __init__(self) -> None:
    method _has_waiting (line 106) | def _has_waiting(self) -> bool:
    method _has_running (line 109) | def _has_running(self) -> bool:
    method check_unfinished_reqs (line 112) | def check_unfinished_reqs(self):
    method add_sequence (line 115) | def add_sequence(self, seq: DiffusionSequence):
    method _find_sequence (line 122) | def _find_sequence(self, request_id: int) -> DiffusionSequence:
    method schedule (line 132) | def schedule(self):
  class RequestHandler (line 140) | class RequestHandler(NaiveRequestHandler):
    method __init__ (line 151) | def __init__(self, inference_config: InferenceConfig, model_config: Pr...
    method _has_running (line 218) | def _has_running(self) -> bool:
    method _init_cache (line 221) | def _init_cache(self, model_config):
    method get_kvcache (line 224) | def get_kvcache(self):
    method set_spec_dec_mode (line 227) | def set_spec_dec_mode(self, n_spec_tokens: int):
    method unset_spec_dec_mode (line 231) | def unset_spec_dec_mode(self):
    method schedule (line 235) | def schedule(self):
    method allocate_batch_spec_dec (line 293) | def allocate_batch_spec_dec(self, batch: BatchBucket, n: int):
    method add_sequence (line 300) | def add_sequence(self, req: Sequence):
    method abort_sequence (line 310) | def abort_sequence(self, request_id: int):
    method _find_sequence (line 330) | def _find_sequence(self, request_id: int) -> Sequence:
    method update_seq_finished (line 344) | def update_seq_finished(self, sequence: Sequence, generation_config: G...
    method update_batch_finished (line 351) | def update_batch_finished(self, batch: BatchBucket, generation_config:...
    method check_unfinished_reqs (line 360) | def check_unfinished_reqs(self) -> bool:
    method total_requests_in_batch_bucket (line 363) | def total_requests_in_batch_bucket(self) -> int:
    method append_next_tokens (line 366) | def append_next_tokens(self, sample_tokens: torch.Tensor):
    method update (line 380) | def update(self):
    method streamingllm_free_block_tables (line 398) | def streamingllm_free_block_tables(self, updated_block_ids: List[int]):
  class RPCRequestHandler (line 405) | class RPCRequestHandler(RequestHandler):
    method __init__ (line 410) | def __init__(self, inference_config: InferenceConfig, model_config: Pr...
    method _init_cache (line 451) | def _init_cache(self, model_config):

FILE: colossalai/inference/core/rpc_engine.py
  function run_server (line 27) | def run_server(host, port, event: mp.Event = None):
  class RPCInferenceEngine (line 36) | class RPCInferenceEngine(InferenceEngine):
    method __init__ (line 51) | def __init__(
    method _verify_args (line 125) | def _verify_args(self) -> None:
    method init_workers (line 134) | def init_workers(self):
    method async_parallel_wrapper (line 167) | async def async_parallel_wrapper(self, f, *args, **kwargs):
    method init_worker_env (line 173) | async def init_worker_env(self):
    method init_model (line 186) | async def init_model(self, model_or_path: Union[nn.Module, str], model...
    method init_scheduler (line 200) | def init_scheduler(self, inference_config: InferenceConfig, model_conf...
    method _init_device_cache (line 203) | async def _init_device_cache(self, alloc_shape: Tuple[int, int, int, i...
    method init_device_cache (line 210) | def init_device_cache(self, alloc_shape: Tuple[Tuple[int, ...], Tuple[...
    method prepare_input (line 213) | def prepare_input(self, batch: BatchBucket) -> Tuple[List[int], InputM...
    method step_ (line 257) | async def step_(self, input_token_ids, input_meta_data: InputMetaData):
    method step (line 273) | def step(self) -> List[str]:
    method kill_workers (line 286) | def kill_workers(self):
    method __del__ (line 296) | def __del__(self):

FILE: colossalai/inference/executor/rpc_worker.py
  class rpcWorkerService (line 44) | class rpcWorkerService(rpyc.Service):
    method exposed_init_dist_env (line 51) | def exposed_init_dist_env(self, rank, world_size, master_address, mast...
    method exposed_init_model (line 56) | def exposed_init_model(
    method exposed_init_cache (line 72) | def exposed_init_cache(self, alloc_shape: Tuple[Tuple[int, ...], Tuple...
    method exposed_execute_model_forward (line 100) | def exposed_execute_model_forward(
    method _init_output_tensor (line 134) | def _init_output_tensor(self):
    method _init_fd_tensor (line 142) | def _init_fd_tensor(self):
    method _init_model (line 169) | def _init_model(self, model_or_path: Union[nn.Module, str], model_poli...
    method _shardformer (line 261) | def _shardformer(
    method exposed_compute_only_for_test (line 295) | def exposed_compute_only_for_test(self):

FILE: colossalai/inference/flash_decoding_utils.py
  class FDIntermTensors (line 7) | class FDIntermTensors(metaclass=SingletonMeta):
    method __init__ (line 12) | def __init__(self):
    method _reset (line 15) | def _reset(self):
    method is_initialized (line 23) | def is_initialized(self):
    method mid_output (line 27) | def mid_output(self):
    method mid_output_lse (line 32) | def mid_output_lse(self):
    method exp_sums (line 37) | def exp_sums(self):
    method max_logits (line 42) | def max_logits(self):
    method initialize (line 46) | def initialize(

FILE: colossalai/inference/graph_runner.py
  class CUDAGraphRunner (line 10) | class CUDAGraphRunner:
    method __init__ (line 11) | def __init__(self, model: nn.Module):
    method capture (line 18) | def capture(
    method forward (line 66) | def forward(
    method __call__ (line 99) | def __call__(self, *args, **kwargs):

FILE: colossalai/inference/kv_cache/block_cache.py
  class CacheBlock (line 6) | class CacheBlock:
    method __init__ (line 9) | def __init__(self, block_id: int, block_size: int, elem_size: int, k_p...
    method available_space (line 32) | def available_space(self) -> int:
    method add_ref (line 36) | def add_ref(self) -> None:
    method remove_ref (line 39) | def remove_ref(self) -> None:
    method has_ref (line 43) | def has_ref(self) -> bool:
    method allocate (line 46) | def allocate(self, size: int) -> None:
    method is_empty (line 50) | def is_empty(self):
    method clear (line 53) | def clear(self) -> None:
    method __repr__ (line 57) | def __repr__(self):

FILE: colossalai/inference/kv_cache/kvcache_manager.py
  class KVCacheManager (line 18) | class KVCacheManager:
    method __init__ (line 48) | def __init__(self, config: InferenceConfig, model_config: PretrainedCo...
    method total_num_blocks (line 127) | def total_num_blocks(self) -> int:
    method num_available_blocks (line 132) | def num_available_blocks(self) -> int:
    method get_head_size (line 136) | def get_head_size(self):
    method get_kv_cache (line 139) | def get_kv_cache(self):
    method get_max_blocks_per_sequence (line 143) | def get_max_blocks_per_sequence(self) -> int:
    method check_allocation (line 150) | def check_allocation(self, seq: Sequence) -> bool:
    method get_block_kv_ptrs (line 154) | def get_block_kv_ptrs(self, block_id: int, layer_id: int) -> Tuple[Lis...
    method get_block_table_kv_ptrs (line 159) | def get_block_table_kv_ptrs(self, block_table: torch.Tensor, layer_id:...
    method allocate_context_from_block_table (line 170) | def allocate_context_from_block_table(self, block_table: torch.Tensor,...
    method allocate_context_from_block_tables (line 220) | def allocate_context_from_block_tables(self, block_tables: torch.Tenso...
    method allocate_token_from_block_table (line 295) | def allocate_token_from_block_table(self, block_table: torch.Tensor, c...
    method allocate_tokens_from_block_tables (line 309) | def allocate_tokens_from_block_tables(
    method allocate_n_tokens_from_block_tables (line 376) | def allocate_n_tokens_from_block_tables(
    method allocate_single_block (line 396) | def allocate_single_block(self, block_table: torch.Tensor, block_local...
    method free_block_table (line 426) | def free_block_table(self, block_table: torch.Tensor) -> None:
    method free_block_tables (line 441) | def free_block_tables(self, block_tables: torch.Tensor, first_n: int =...
    method clear_all (line 450) | def clear_all(self) -> None:
    method streamingllm_free_block_tables (line 457) | def streamingllm_free_block_tables(self, updated_block_ids: List[int]):
    method get_physical_cache (line 471) | def get_physical_cache(self, layer_id: int, block_idx: int) -> Tuple[t...
    method _allocate_on_block (line 475) | def _allocate_on_block(self, block: CacheBlock, space_asked: int) -> int:
    method _init_logical_caches (line 486) | def _init_logical_caches(self):
    method _init_device_caches (line 508) | def _init_device_caches(
  class RPCKVCacheManager (line 524) | class RPCKVCacheManager(KVCacheManager):
    method __init__ (line 525) | def __init__(self, config: InferenceConfig, model_config: PretrainedCo...
    method get_physical_cache_shape (line 578) | def get_physical_cache_shape(self) -> Tuple[Tuple[int, ...], Tuple[int...
    method get_kv_cache (line 594) | def get_kv_cache(self):
    method _init_logical_caches (line 598) | def _init_logical_caches(self):

FILE: colossalai/inference/logit_processors.py
  function register_logits_processor (line 11) | def register_logits_processor(process_type):
  function apply_no_repeat_ngram_size (line 25) | def apply_no_repeat_ngram_size(logits, ngram_size: int, batch_token_ids:...
  function apply_repetition_penalty (line 57) | def apply_repetition_penalty(logits, penalty: float, batch_token_ids: Li...
  function apply_temperature (line 83) | def apply_temperature(logits, temperature: float):
  function apply_top_k (line 98) | def apply_top_k(logits, top_k: int):
  function apply_top_p (line 112) | def apply_top_p(logits, top_p: float):
  function apply_forced_eos_token_id (line 134) | def apply_forced_eos_token_id(
  function get_logits_processor (line 172) | def get_logits_processor(processor: str, logits, *args, **kwargs):

FILE: colossalai/inference/modeling/backends/attention_backend.py
  class AttentionMetaData (line 12) | class AttentionMetaData:
  class AttentionBackend (line 30) | class AttentionBackend(ABC):
    method prefill (line 32) | def prefill(self, attn_metadata: AttentionMetaData, **kwargs):
    method decode (line 36) | def decode(self, attn_metadatas: AttentionMetaData, **kwargs):
  class CudaAttentionBackend (line 40) | class CudaAttentionBackend(AttentionBackend):
    method __init__ (line 46) | def __init__(self, use_flash_attn: bool = False):
    method prefill (line 51) | def prefill(self, attn_metadata: AttentionMetaData, **kwargs):
    method decode (line 89) | def decode(self, attn_metadata: AttentionMetaData, **kwargs):
  class TritonAttentionBackend (line 110) | class TritonAttentionBackend(AttentionBackend):
    method prefill (line 115) | def prefill(self, attn_metadata: AttentionMetaData, **kwargs):
    method decode (line 131) | def decode(self, attn_metadata: AttentionMetaData, **kwargs):
  function get_attention_backend (line 151) | def get_attention_backend(

FILE: colossalai/inference/modeling/backends/pre_attention_backend.py
  class PreAttentionBackend (line 9) | class PreAttentionBackend(ABC):
    method prefill (line 11) | def prefill(self, attn_metadata: AttentionMetaData, **kwargs):
    method decode (line 15) | def decode(self, attn_metadata: AttentionMetaData, **kwargs):
  class CudaPreAttentionBackend (line 19) | class CudaPreAttentionBackend(PreAttentionBackend):
    method __init__ (line 24) | def __init__(self, use_flash_attn: bool):
    method prefill (line 29) | def prefill(self, attn_metadata: AttentionMetaData, **kwargs):
    method decode (line 57) | def decode(self, attn_metadata: AttentionMetaData, **kwargs):
  class TritonPreAttentionBackend (line 82) | class TritonPreAttentionBackend(PreAttentionBackend):
    method prefill (line 87) | def prefill(self, attn_metadata: AttentionMetaData, **kwargs):
    method decode (line 96) | def decode(self, attn_metadata: AttentionMetaData, **kwargs):
  function get_pre_attention_backend (line 133) | def get_pre_attention_backend(

FILE: colossalai/inference/modeling/layers/attention.py
  function copy_to_cache (line 9) | def copy_to_cache(source, cache, lengths, block_tables, type: str = "pre...
  function convert_kvcache (line 43) | def convert_kvcache(cache, lengths, block_tables, pad_id=0):
  class PagedAttention (line 75) | class PagedAttention:
    method pad_and_reshape (line 82) | def pad_and_reshape(tensor, seq_lengths, max_seq_len, num_heads, head_...
    method generate_padding_mask (line 97) | def generate_padding_mask(lengths, max_seq_len):
    method repeat_kv (line 103) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int = 1) -> torch.Te...
    method nopad_context_forward (line 120) | def nopad_context_forward(
    method pad_context_forward (line 188) | def pad_context_forward(
    method pad_decoding_forward (line 245) | def pad_decoding_forward(
    method no_pad_decoding_forward (line 301) | def no_pad_decoding_forward(

FILE: colossalai/inference/modeling/layers/baichuan_tp_linear.py
  class BaichuanLMHeadLinear1D_Col (line 12) | class BaichuanLMHeadLinear1D_Col(Linear1D_Col):
    method from_native_module (line 14) | def from_native_module(
    method _load_from_state_dict (line 58) | def _load_from_state_dict(

FILE: colossalai/inference/modeling/layers/diffusion.py
  class DiffusionPipe (line 8) | class DiffusionPipe(nn.Module):
    method __init__ (line 13) | def __init__(self, source_obj) -> None:
    method _execution_device (line 40) | def _execution_device(self):
    method device (line 50) | def device(self):
    method forward (line 53) | def forward(self, *args, **kwargs):

FILE: colossalai/inference/modeling/layers/distrifusion.py
  function PixArtAlphaTransformer2DModel_forward (line 36) | def PixArtAlphaTransformer2DModel_forward(
  function SD3Transformer2DModel_forward (line 154) | def SD3Transformer2DModel_forward(
  class DistrifusionPatchEmbed (line 212) | class DistrifusionPatchEmbed(ParallelModule):
    method __init__ (line 213) | def __init__(
    method from_native_module (line 225) | def from_native_module(module: PatchEmbed, process_group: Union[Proces...
    method forward (line 232) | def forward(self, latent):
  class DistrifusionConv2D (line 268) | class DistrifusionConv2D(ParallelModule):
    method __init__ (line 270) | def __init__(
    method from_native_module (line 282) | def from_native_module(module: nn.Conv2d, process_group: Union[Process...
    method sliced_forward (line 287) | def sliced_forward(self, x: torch.Tensor) -> torch.Tensor:
    method forward (line 315) | def forward(self, input: torch.Tensor) -> Tuple[torch.Tensor, torch.Te...
  class DistrifusionFusedAttention (line 321) | class DistrifusionFusedAttention(ParallelModule):
    method __init__ (line 323) | def __init__(
    method from_native_module (line 340) | def from_native_module(
    method _forward (line 350) | def _forward(
    method forward (line 444) | def forward(
  class DistriSelfAttention (line 492) | class DistriSelfAttention(ParallelModule):
    method __init__ (line 493) | def __init__(
    method from_native_module (line 510) | def from_native_module(
    method _forward (line 520) | def _forward(self, hidden_states: torch.FloatTensor, scale: float = 1.0):
    method forward (line 598) | def forward(

FILE: colossalai/inference/modeling/models/glide_llama.py
  function rotate_half (line 28) | def rotate_half(x):
  function apply_single_rotary_pos_emb (line 35) | def apply_single_rotary_pos_emb(q, cos, sin, position_ids):
  function glide_llama_causal_lm_forward (line 45) | def glide_llama_causal_lm_forward(
  function glide_llama_model_forward (line 121) | def glide_llama_model_forward(
  class GlideLlamaConfig (line 217) | class GlideLlamaConfig(LlamaConfig):
    method __init__ (line 220) | def __init__(
  class LlamaCrossAttention (line 231) | class LlamaCrossAttention(nn.Module):
    method __init__ (line 234) | def __init__(self, config: GlideLlamaConfig):
    method _shape (line 258) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 261) | def forward(
  class GlideLlamaDecoderLayer (line 309) | class GlideLlamaDecoderLayer(nn.Module):
    method __init__ (line 310) | def __init__(self, config: GlideLlamaConfig, layer_idx: Optional[int] ...
    method from_native_module (line 320) | def from_native_module(module: LlamaDecoderLayer, *args, **kwargs) -> ...
    method forward (line 329) | def forward(
  class GlideLlamaForCausalLM (line 411) | class GlideLlamaForCausalLM(LlamaForCausalLM):
    method __init__ (line 412) | def __init__(self, config: GlideLlamaConfig):

FILE: colossalai/inference/modeling/models/nopadding_baichuan.py
  function baichuan_rmsnorm_forward (line 25) | def baichuan_rmsnorm_forward(
  class NopadBaichuanAttention (line 54) | class NopadBaichuanAttention(ParallelModule):
    method __init__ (line 55) | def __init__(
    method from_native_module (line 97) | def from_native_module(
    method forward (line 123) | def forward(
  class NopadBaichuanMLP (line 218) | class NopadBaichuanMLP(NopadLlamaMLP):
    method from_native_module (line 220) | def from_native_module(

FILE: colossalai/inference/modeling/models/nopadding_llama.py
  function llama_causal_lm_forward (line 35) | def llama_causal_lm_forward(
  function llama_model_forward (line 68) | def llama_model_forward(
  function llama_decoder_layer_forward (line 168) | def llama_decoder_layer_forward(
  function llama_rmsnorm_forward (line 239) | def llama_rmsnorm_forward(
  class NopadLlamaMLP (line 259) | class NopadLlamaMLP(LlamaMLP, ParallelModule):
    method __init__ (line 260) | def __init__(
    method from_native_module (line 295) | def from_native_module(
    method _load_from_state_dict (line 323) | def _load_from_state_dict(
    method forward (line 373) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
    method extra_repr (line 384) | def extra_repr(self) -> str:
  class NopadLlamaAttention (line 388) | class NopadLlamaAttention(LlamaAttention, ParallelModule):
    method __init__ (line 389) | def __init__(
    method from_native_module (line 453) | def from_native_module(
    method forward (line 489) | def forward(
    method _load_from_state_dict (line 592) | def _load_from_state_dict(
    method extra_repr (line 676) | def extra_repr(self) -> str:

FILE: colossalai/inference/modeling/models/pixart_alpha.py
  function pixart_alpha_forward (line 23) | def pixart_alpha_forward(

FILE: colossalai/inference/modeling/models/stablediffusion3.py
  function sd3_forward (line 12) | def sd3_forward(

FILE: colossalai/inference/modeling/policy/glide_llama.py
  class GlideLlamaModelPolicy (line 13) | class GlideLlamaModelPolicy(LlamaForCausalLMPolicy):
    method module_policy (line 14) | def module_policy(self):
    method postprocess (line 42) | def postprocess(self):

FILE: colossalai/inference/modeling/policy/nopadding_baichuan.py
  class NoPaddingBaichuanModelInferPolicy (line 19) | class NoPaddingBaichuanModelInferPolicy(LlamaForCausalLMPolicy, RPC_PARAM):
    method __init__ (line 20) | def __init__(self) -> None:
    method module_policy (line 23) | def module_policy(self):
    method postprocess (line 102) | def postprocess(self):
    method to_rpc_param (line 106) | def to_rpc_param(self) -> str:
    method from_rpc_param (line 110) | def from_rpc_param() -> "NoPaddingBaichuanModelInferPolicy":

FILE: colossalai/inference/modeling/policy/nopadding_llama.py
  class NoPaddingLlamaModelInferPolicy (line 18) | class NoPaddingLlamaModelInferPolicy(LlamaForCausalLMPolicy, RPC_PARAM):
    method __init__ (line 19) | def __init__(self) -> None:
    method module_policy (line 22) | def module_policy(self):
    method postprocess (line 106) | def postprocess(self):
    method to_rpc_param (line 110) | def to_rpc_param(self) -> str:
    method from_rpc_param (line 114) | def from_rpc_param() -> "NoPaddingLlamaModelInferPolicy":

FILE: colossalai/inference/modeling/policy/pixart_alpha.py
  class PixArtAlphaInferPolicy (line 17) | class PixArtAlphaInferPolicy(Policy, RPC_PARAM):
    method __init__ (line 18) | def __init__(self) -> None:
    method module_policy (line 21) | def module_policy(self):
    method preprocess (line 65) | def preprocess(self) -> nn.Module:
    method postprocess (line 68) | def postprocess(self):
    method config_sanity_check (line 71) | def config_sanity_check(self):
    method to_rpc_param (line 74) | def to_rpc_param(self) -> str:
    method from_rpc_param (line 78) | def from_rpc_param() -> "PixArtAlphaInferPolicy":

FILE: colossalai/inference/modeling/policy/stablediffusion3.py
  class StableDiffusion3InferPolicy (line 17) | class StableDiffusion3InferPolicy(Policy, RPC_PARAM):
    method __init__ (line 18) | def __init__(self) -> None:
    method module_policy (line 21) | def module_policy(self):
    method preprocess (line 64) | def preprocess(self) -> nn.Module:
    method postprocess (line 67) | def postprocess(self):
    method config_sanity_check (line 70) | def config_sanity_check(self):
    method to_rpc_param (line 73) | def to_rpc_param(self) -> str:
    method from_rpc_param (line 77) | def from_rpc_param() -> "StableDiffusion3InferPolicy":

FILE: colossalai/inference/sampler.py
  function greedy_sample (line 9) | def greedy_sample(
  function multinomial_sample (line 19) | def multinomial_sample(
  function beam_search_sample (line 29) | def beam_search_sample(
  function search_tokens (line 64) | def search_tokens(

FILE: colossalai/inference/server/api_server.py
  function health_check (line 42) | def health_check() -> JSONResponse:
  function engine_check (line 48) | def engine_check() -> bool:
  function generate (line 57) | async def generate(request: Request) -> Response:
  function create_completion (line 99) | async def create_completion(request: Request):
  function create_chat (line 115) | async def create_chat(request: Request):
  function get_generation_config (line 130) | def get_generation_config(request):
  function add_engine_config (line 138) | def add_engine_config(parser):
  function parse_args (line 167) | def parse_args():

FILE: colossalai/inference/server/chat_service.py
  class ChatServing (line 14) | class ChatServing:
    method __init__ (line 15) | def __init__(
    method create_chat (line 28) | async def create_chat(self, request: Request, generation_config):
    method chat_completion_stream_generator (line 52) | async def chat_completion_stream_generator(self, request, request_dict...
    method chat_completion_full_generator (line 88) | async def chat_completion_full_generator(
    method get_chat_request_role (line 121) | def get_chat_request_role(self, request: Request, request_dict: dict) ...
    method _load_chat_template (line 128) | def _load_chat_template(self, chat_template):

FILE: colossalai/inference/server/completion_service.py
  class CompletionServing (line 8) | class CompletionServing:
    method __init__ (line 9) | def __init__(self, engine: AsyncInferenceEngine, served_model: str):
    method create_completion (line 18) | async def create_completion(self, request, generation_config):

FILE: colossalai/inference/server/utils.py
  class NumericIDGenerator (line 7) | class NumericIDGenerator:
    method __new__ (line 10) | def __new__(cls):
    method __call__ (line 16) | def __call__(self):
  class ChatMessage (line 24) | class ChatMessage(BaseModel):
  class DeltaMessage (line 29) | class DeltaMessage(BaseModel):
  class ChatCompletionResponseStreamChoice (line 34) | class ChatCompletionResponseStreamChoice(BaseModel):

FILE: colossalai/inference/spec/drafter.py
  class Drafter (line 13) | class Drafter:
    method __init__ (line 22) | def __init__(
    method get_model (line 36) | def get_model(self) -> nn.Module:
    method trim_kv_cache (line 40) | def trim_kv_cache(
    method speculate (line 65) | def speculate(

FILE: colossalai/inference/spec/struct.py
  class DrafterOutput (line 8) | class DrafterOutput:
    method __post_init__ (line 25) | def __post_init__(self):
  class GlideInput (line 33) | class GlideInput:
    method glimpse_ready (line 52) | def glimpse_ready(self):

FILE: colossalai/inference/struct.py
  class RequestStatus (line 15) | class RequestStatus(enum.Enum):
    method is_finished (line 34) | def is_finished(status: "RequestStatus") -> bool:
    method is_running (line 42) | def is_running(status: "RequestStatus") -> bool:
    method is_waiting (line 46) | def is_waiting(status: "RequestStatus") -> bool:
  class DiffusionSequence (line 51) | class DiffusionSequence:
  class Sequence (line 62) | class Sequence:
    method __post_init__ (line 91) | def __post_init__(self):
    method sentence_len (line 96) | def sentence_len(self) -> int:
    method input_len (line 103) | def input_len(self) -> int:
    method output_len (line 110) | def output_len(self) -> int:
    method check_finish (line 116) | def check_finish(self) -> bool:
    method revoke_finished_status (line 135) | def revoke_finished_status(self) -> None:
    method __hash__ (line 143) | def __hash__(self):
    method mark_running (line 146) | def mark_running(self) -> None:
    method mark_finished (line 155) | def mark_finished(self) -> None:
    method mark_aborted (line 161) | def mark_aborted(self) -> None:
    method recycle (line 167) | def recycle(self) -> None:
    method __repr__ (line 177) | def __repr__(self) -> str:
  function _pad_to_max (line 190) | def _pad_to_max(x: List[int], max_len: int, pad: int) -> List[int]:

FILE: colossalai/inference/utils.py
  function init_to_get_rotary (line 22) | def init_to_get_rotary(self, base=10000, use_elem=False):
  function has_index_file (line 67) | def has_index_file(checkpoint_path: str) -> Tuple[bool, Optional[Path]]:
  function get_model_size (line 103) | def get_model_size(model: nn.Module):
  function find_available_ports (line 116) | def find_available_ports(num: int):
  function get_alibi_slopes (line 125) | def get_alibi_slopes(num_heads: int, device: torch.device) -> torch.Tensor:
  function can_use_flash_attn2 (line 150) | def can_use_flash_attn2(dtype: torch.dtype) -> bool:
  class ModelType (line 166) | class ModelType(Enum):
  function get_model_type (line 172) | def get_model_type(model_or_path: Union[nn.Module, str, DiffusionPipelin...

FILE: colossalai/initialize.py
  function launch (line 20) | def launch(
  function launch_from_slurm (line 78) | def launch_from_slurm(
  function launch_from_openmpi (line 115) | def launch_from_openmpi(
  function launch_from_torch (line 154) | def launch_from_torch(backend: str = "nccl", seed: int = 1024, verbose: ...

FILE: colossalai/interface/model.py
  function extract_lora_layers (line 9) | def extract_lora_layers(model: PeftModel, names: Set[str], adapter_name:...
  class PeftUnwrapMixin (line 49) | class PeftUnwrapMixin:
    method __init__ (line 50) | def __init__(self, peft_model: PeftModel):
    method named_parameters (line 61) | def named_parameters(self):
    method named_buffers (line 67) | def named_buffers(self):
    method _modules (line 71) | def _modules(self):
    method _non_persistent_buffers_set (line 75) | def _non_persistent_buffers_set(self):
    method patch_state_dict (line 78) | def patch_state_dict(self, state_dict: Dict[str, torch.Tensor]):
    method state_dict (line 86) | def state_dict(self):
    method load_state_dict (line 94) | def load_state_dict(self, state_dict, strict: bool = True, assign: boo...
    method __hash__ (line 98) | def __hash__(self):
  class ModelWrapper (line 102) | class ModelWrapper(nn.Module):
    method __init__ (line 110) | def __init__(self, module: nn.Module) -> None:
    method unwrap (line 114) | def unwrap(self, unwrap_peft: bool = True):
    method forward (line 126) | def forward(self, *args, **kwargs):
  class AMPModelMixin (line 130) | class AMPModelMixin:
    method update_master_params (line 133) | def update_master_params(self):

FILE: colossalai/interface/optimizer.py
  class OptimizerWrapper (line 10) | class OptimizerWrapper:
    method __init__ (line 18) | def __init__(self, optim: Optimizer):
    method parameters (line 22) | def parameters(self):
    method param_groups (line 30) | def param_groups(self):
    method defaults (line 34) | def defaults(self):
    method add_param_group (line 37) | def add_param_group(self, *args, **kwargs):
    method step (line 40) | def step(self, *args, **kwargs):
    method zero_grad (line 46) | def zero_grad(self, *args, **kwargs):
    method backward (line 52) | def backward(self, loss: Tensor, inputs=None, retain_graph=False, **kw...
    method backward_by_grad (line 58) | def backward_by_grad(self, tensor: Tensor, grad: Tensor, inputs: Tenso...
    method state_dict (line 78) | def state_dict(self):
    method load_state_dict (line 84) | def load_state_dict(self, *args, **kwargs):
    method clip_grad_by_value (line 90) | def clip_grad_by_value(self, clip_value: float, *args, **kwargs) -> None:
    method clip_grad_by_norm (line 103) | def clip_grad_by_norm(
    method scale_loss (line 126) | def scale_loss(self, loss: Tensor):
    method unscale_grad (line 139) | def unscale_grad(self):
    method unwrap (line 149) | def unwrap(self):
    method get_grad_norm (line 155) | def get_grad_norm(self, norm_type: Union[float, int] = 2.0, **kwargs) ...
  class DistributedOptim (line 168) | class DistributedOptim(Optimizer):
    method setup_distributed (line 169) | def setup_distributed(

FILE: colossalai/interface/pretrained.py
  function get_pretrained_path (line 11) | def get_pretrained_path(model: Module) -> Optional[str]:
  function set_pretrained_path (line 15) | def set_pretrained_path(model: Module, path: str) -> None:

FILE: colossalai/kernel/jit/bias_dropout_add.py
  function bias_dropout_add (line 4) | def bias_dropout_add(x, bias, residual, prob, training):
  function bias_dropout_add_fused_train (line 12) | def bias_dropout_add_fused_train(
  function bias_dropout_add_fused_inference (line 19) | def bias_dropout_add_fused_inference(

FILE: colossalai/kernel/jit/bias_gelu.py
  function bias_gelu (line 14) | def bias_gelu(bias, y):
  function bias_gelu_back (line 23) | def bias_gelu_back(g, bias, y):
  class GeLUFunction (line 31) | class GeLUFunction(torch.autograd.Function):
    method forward (line 34) | def forward(ctx, input, bias):
    method backward (line 39) | def backward(ctx, grad_output):

FILE: colossalai/kernel/jit/option.py
  function set_jit_fusion_options (line 11) | def set_jit_fusion_options():
  function warmup_jit_fusion (line 39) | def warmup_jit_fusion(

FILE: colossalai/kernel/kernel_loader.py
  class KernelLoader (line 31) | class KernelLoader:
    method register_extension (line 43) | def register_extension(cls, extension: _Extension):
    method load (line 53) | def load(self, ext_name: str = None):
  class CPUAdamLoader (line 86) | class CPUAdamLoader(KernelLoader):
  class LayerNormLoader (line 90) | class LayerNormLoader(KernelLoader):
  class MoeLoader (line 94) | class MoeLoader(KernelLoader):
  class FusedOptimizerLoader (line 98) | class FusedOptimizerLoader(KernelLoader):
  class InferenceOpsLoader (line 102) | class InferenceOpsLoader(KernelLoader):
  class ScaledMaskedSoftmaxLoader (line 106) | class ScaledMaskedSoftmaxLoader(KernelLoader):
  class ScaledUpperTriangleMaskedSoftmaxLoader (line 110) | class ScaledUpperTriangleMaskedSoftmaxLoader(KernelLoader):
  class FlashAttentionLoader (line 114) | class FlashAttentionLoader(KernelLoader):
  class FlashAttentionDaoLoader (line 122) | class FlashAttentionDaoLoader(KernelLoader):
  class FlashAttentionWithCustomMaskLoader (line 126) | class FlashAttentionWithCustomMaskLoader(KernelLoader):
  class FlashAttentionForFloatAndCustomMaskLoader (line 130) | class FlashAttentionForFloatAndCustomMaskLoader(KernelLoader):

FILE: colossalai/kernel/triton/context_attn_unpad.py
  function _fwd_context_paged_attention_kernel (line 16) | def _fwd_context_paged_attention_kernel(
  function _fwd_context_paged_attention_kernel_v2 (line 193) | def _fwd_context_paged_attention_kernel_v2(
  function _alibi_fwd_context_paged_attention_kernel (line 368) | def _alibi_fwd_context_paged_attention_kernel(
  function context_attention_unpadded (line 552) | def context_attention_unpadded(

FILE: colossalai/kernel/triton/flash_decoding.py
  function _flash_decoding_fwd_kernel (line 11) | def _flash_decoding_fwd_kernel(
  function _alibi_flash_decoding_fwd_kernel (line 135) | def _alibi_flash_decoding_fwd_kernel(
  function _flash_decoding_fwd_reduce_kernel (line 258) | def _flash_decoding_fwd_reduce_kernel(
  function flash_decoding_attention (line 318) | def flash_decoding_attention(

FILE: colossalai/kernel/triton/fused_rotary_embedding.py
  function fused_rotary_emb (line 7) | def fused_rotary_emb(
  function fused_rotary_embedding (line 120) | def fused_rotary_embedding(

FILE: colossalai/kernel/triton/kvcache_copy.py
  function _copy_to_kcache_seqlen_n_kernel (line 11) | def _copy_to_kcache_seqlen_n_kernel(
  function _copy_to_kvcache_seqlen1_kernel (line 65) | def _copy_to_kvcache_seqlen1_kernel(
  function copy_k_to_blocked_cache (line 130) | def copy_k_to_blocked_cache(
  function copy_kv_to_blocked_cache (line 208) | def copy_kv_to_blocked_cache(

FILE: colossalai/kernel/triton/llama_act_combine_kernel.py
  function _llama_act_combine_forward (line 25) | def _llama_act_combine_forward(
  function _llama_act_combine_backward (line 54) | def _llama_act_combine_backward(
  class LlamaActCombine (line 99) | class LlamaActCombine(torch.autograd.Function):
    method forward (line 112) | def forward(ctx: Any, x_gate: torch.Tensor, x_up: torch.Tensor, activa...
    method backward (line 158) | def backward(ctx: Any, *grad_outputs: Tensor) -> Tuple[Tensor, Tensor,...

FILE: colossalai/kernel/triton/no_pad_rotary_embedding.py
  function rotary_embedding_kernel (line 26) | def rotary_embedding_kernel(
  function fused_rotary_embedding_kernel (line 127) | def fused_rotary_embedding_kernel(
  function fused_rotary_embedding_kernel_v2 (line 273) | def fused_rotary_embedding_kernel_v2(
  function decoding_fused_rotary_embedding_kernel (line 379) | def decoding_fused_rotary_embedding_kernel(
  function rotary_embedding (line 480) | def rotary_embedding(
  function decoding_fused_rotary_embedding (line 575) | def decoding_fused_rotary_embedding(

FILE: colossalai/kernel/triton/qkv_matmul_kernel.py
  function qkv_gemm_4d_kernel (line 17) | def qkv_gemm_4d_kernel(

FILE: colossalai/kernel/triton/rms_layernorm.py
  function _rmsnorm_kernel (line 15) | def _rmsnorm_kernel(
  function _rmsnorm_with_residual_kernel (line 51) | def _rmsnorm_with_residual_kernel(
  function rms_layernorm (line 93) | def rms_layernorm(x, weight, eps, norm_output=None, residual=None):

FILE: colossalai/kernel/triton/rotary_cache_copy.py
  function prefill_cache_kernel (line 7) | def prefill_cache_kernel(
  function decoding_cache_kernel (line 46) | def decoding_cache_kernel(
  function get_xine_cache (line 80) | def get_xine_cache(lengths: torch.Tensor, cos_cache: torch.Tensor, sin_c...

FILE: colossalai/kernel/triton/softmax.py
  function softmax_kernel (line 19) | def softmax_kernel(output_ptr, input_ptr, row_stride, n_cols, mask_ptr, ...
  function softmax (line 50) | def softmax(input: torch.Tensor, mask: torch.Tensor = None, dim=-1) -> t...

FILE: colossalai/lazy/construction.py
  class ConstructorManager (line 48) | class ConstructorManager:
    method apply (line 54) | def apply(overwrites: Dict[Callable, Callable]):
    method undo (line 60) | def undo():
    method redo (line 67) | def redo():
    method disable (line 75) | def disable():
    method clear (line 84) | def clear():

FILE: colossalai/lazy/lazy_init.py
  class _MyTensor (line 79) | class _MyTensor(Tensor):
    method __new__ (line 86) | def __new__(cls, func, *args, concrete_data=None, **kwargs) -> "_MyTen...
    method __torch_function__ (line 97) | def __torch_function__(cls, func, types, args=(), kwargs=None):
  function _data_tolist (line 102) | def _data_tolist(tensor: torch.Tensor) -> list:
  function _convert_cls (line 107) | def _convert_cls(tensor: "LazyTensor", target: torch.Tensor, requires_gr...
  class LazyTensor (line 134) | class LazyTensor(torch.Tensor):
    method __new__ (line 175) | def __new__(cls, func, *args, meta_data=None, concrete_data=None, **kw...
    method __init__ (line 195) | def __init__(self, func, *args, meta_data=None, concrete_data=None, **...
    method device (line 204) | def device(self) -> torch.device:
    method __repr__ (line 207) | def __repr__(self):
    method materialize (line 210) | def materialize(self) -> torch.Tensor:
    method clean (line 221) | def clean(self) -> None:
    method _replace_with_materialized (line 229) | def _replace_with_materialized(x):
    method _materialize_data (line 234) | def _materialize_data(self) -> torch.Tensor:
    method _rerun_ops (line 249) | def _rerun_ops(self, target=None) -> torch.Tensor:
    method __torch_function__ (line 283) | def __torch_function__(cls, func, types, args=(), kwargs=None):
    method to (line 361) | def to(self, *args, **kwargs) -> torch.Tensor:
    method cpu (line 384) | def cpu(self, memory_format: torch.memory_format = torch.preserve_form...
    method cuda (line 387) | def cuda(self, device=None, non_blocking=False, memory_format: torch.m...
    method clone (line 391) | def clone(self) -> "LazyTensor":
    method detach (line 400) | def detach(self) -> Tensor:
    method __deepcopy__ (line 403) | def __deepcopy__(self, memo):
    method data (line 431) | def data(self):
    method data (line 435) | def data(self, other: "LazyTensor"):
    method tolist (line 460) | def tolist(self) -> list:
    method __hash__ (line 466) | def __hash__(self):
    method __rpow__ (line 469) | def __rpow__(self, other):
  class LazyInitContext (line 474) | class LazyInitContext:
    method __init__ (line 486) | def __init__(
    method __enter__ (line 496) | def __enter__(self):
    method __exit__ (line 584) | def __exit__(self, exc_type, exc_val, exc_tb):
    method materialize (line 591) | def materialize(module: nn.Module, verbose: bool = False) -> nn.Module:
  function _apply_to_lazy_module (line 605) | def _apply_to_lazy_module(
  function _is_int_tuple (line 654) | def _is_int_tuple(args) -> bool:
  function _copy_tensor (line 663) | def _copy_tensor(tensor: Tensor, requires_grad: bool) -> Tensor:

FILE: colossalai/lazy/pretrained.py
  class PretrainedManager (line 11) | class PretrainedManager:
    method inject (line 15) | def inject() -> None:
    method recover (line 25) | def recover() -> None:
  function new_from_pretrained (line 36) | def new_from_pretrained(

FILE: colossalai/legacy/amp/__init__.py
  function convert_to_amp (line 18) | def convert_to_amp(model: nn.Module, optimizer: Optimizer, criterion: _L...

FILE: colossalai/legacy/amp/amp_type.py
  class AMP_TYPE (line 7) | class AMP_TYPE(Enum):

FILE: colossalai/legacy/amp/apex_amp/__init__.py
  function convert_to_apex_amp (line 7) | def convert_to_apex_amp(model: nn.Module, optimizer: Optimizer, amp_conf...

FILE: colossalai/legacy/amp/apex_amp/apex_amp.py
  class ApexAMPOptimizer (line 17) | class ApexAMPOptimizer(OptimizerWrapper):
    method backward (line 22) | def backward(self, loss: Tensor):
    method clip_grad_norm (line 31) | def clip_grad_norm(self, model: nn.Module, max_norm: float):

FILE: colossalai/legacy/amp/naive_amp/__init__.py
  function convert_to_naive_amp (line 13) | def convert_to_naive_amp(model: nn.Module, optimizer: Optimizer, amp_con...

FILE: colossalai/legacy/amp/naive_amp/_fp16_optimizer.py
  function load_fused_optim (line 27) | def load_fused_optim():
  function _multi_tensor_copy_this_to_that (line 34) | def _multi_tensor_copy_this_to_that(this, that, overflow_buf=None):
  class FP16Optimizer (line 54) | class FP16Optimizer(Optimizer):
    method __init__ (line 66) | def __init__(
    method max_norm (line 171) | def max_norm(self):
    method grad_scaler (line 176) | def grad_scaler(self):
    method loss_scale (line 186) | def loss_scale(self):
    method optimizer (line 195) | def optimizer(self):
    method defaults (line 204) | def defaults(self):
    method _check_overflow (line 212) | def _check_overflow(self):
    method zero_grad (line 233) | def zero_grad(self, set_to_none=True):
    method _get_fp32_param_groups_to_update (line 244) | def _get_fp32_param_groups_to_update(self):
    method _unscale_grads (line 247) | def _unscale_grads(self):
    method _assign_grad_to_fp32_master_param (line 253) | def _assign_grad_to_fp32_master_param(self):
    method _update_fp16_param_from_fp32_param (line 262) | def _update_fp16_param_from_fp32_param(self):
    method step (line 273) | def step(self):
    method backward (line 302) | def backward(self, loss):
    method state_dict (line 312) | def state_dict(self):
    method load_state_dict (line 322) | def load_state_dict(self, state_dict):
    method clip_grad_norm (line 344) | def clip_grad_norm(self, clip_grad):
    method _get_state (line 358) | def _get_state(self):
    method _set_state (line 361) | def _set_state(self, value):
    method _get_param_groups (line 369) | def _get_param_groups(self):
    method _set_param_groups (line 372) | def _set_param_groups(self, value):

FILE: colossalai/legacy/amp/naive_amp/_utils.py
  function has_inf_or_nan (line 6) | def has_inf_or_nan(tensor):
  function zero_gard_by_list (line 35) | def zero_gard_by_list(tensor_list: List[Tensor], set_to_none: bool = Tru...

FILE: colossalai/legacy/amp/naive_amp/naive_amp.py
  class NaiveAMPOptimizer (line 21) | class NaiveAMPOptimizer(OptimizerWrapper):
    method __init__ (line 35) | def __init__(self, optim: Optimizer, *args, **kwargs):
    method backward (line 39) | def backward(self, loss: Tensor):
    method step (line 42) | def step(self):
    method clip_grad_norm (line 45) | def clip_grad_norm(self, model: nn.Module, max_norm: float):
  class NaiveAMPModel (line 55) | class NaiveAMPModel(nn.Module):
    method __init__ (line 71) | def __init__(
    method sync_buffer (line 93) | def sync_buffer(self):
    method sync_buffer (line 97) | def sync_buffer(self, state: bool):
    method _convert_to_fp16 (line 100) | def _convert_to_fp16(self, input_: Any):
    method _convert_to_fp32 (line 105) | def _convert_to_fp32(self, input_: Any):
    method _reduce_module_buffer (line 110) | def _reduce_module_buffer(self):
    method eval (line 132) | def eval(self):
    method forward (line 139) | def forward(self, *args, **kwargs):

FILE: colossalai/legacy/amp/torch_amp/__init__.py
  function convert_to_torch_amp (line 12) | def convert_to_torch_amp(

FILE: colossalai/legacy/amp/torch_amp/_grad_scaler.py
  class _MultiDeviceReplicator (line 20) | class _MultiDeviceReplicator(object):
    method __init__ (line 25) | def __init__(self, master_tensor: torch.Tensor) -> None:
    method get (line 30) | def get(self, device) -> torch.Tensor:
  class OptState (line 43) | class OptState(Enum):
  function _refresh_per_optimizer_state (line 49) | def _refresh_per_optimizer_state():
  class GradScaler (line 53) | class GradScaler(object):
    method __init__ (line 121) | def __init__(self, init_scale=2.0**16, growth_factor=2.0, backoff_fact...
    method _check_scale_growth_tracker (line 151) | def _check_scale_growth_tracker(self, funcname) -> Tuple[torch.Tensor,...
    method _lazy_init_scale_growth_tracker (line 157) | def _lazy_init_scale_growth_tracker(self, dev):
    method scale (line 162) | def scale(self, outputs):
    method _unscale_grads_ (line 207) | def _unscale_grads_(self, optimizer, inv_scale, found_inf, allow_fp16):
    method unscale_ (line 253) | def unscale_(self, optimizer):
    method _maybe_opt_step (line 305) | def _maybe_opt_step(self, optimizer, optimizer_state, *args, **kwargs):
    method step (line 311) | def step(self, optimizer, *args, **kwargs):
    method update (line 367) | def update(self, new_scale=None):
    method _get_scale_async (line 441) | def _get_scale_async(self):
    method get_scale (line 444) | def get_scale(self):
    method get_growth_factor (line 456) | def get_growth_factor(self):
    method set_growth_factor (line 462) | def set_growth_factor(self, new_factor):
    method get_backoff_factor (line 469) | def get_backoff_factor(self):
    method set_backoff_factor (line 475) | def set_backoff_factor(self, new_factor):
    method get_growth_interval (line 482) | def get_growth_interval(self):
    method set_growth_interval (line 488) | def set_growth_interval(self, new_interval):
    method _get_growth_tracker (line 495) | def _get_growth_tracker(self):
    method is_enabled (line 501) | def is_enabled(self):
    method state_dict (line 507) | def state_dict(self):
    method load_state_dict (line 535) | def load_state_dict(self, state_dict):
    method __getstate__ (line 561) | def __getstate__(self):
    method __setstate__ (line 577) | def __setstate__(self, state):
    method _check_inf_per_device (line 580) | def _check_inf_per_device(self, optimizer):
    method _found_inf_per_device (line 592) | def _found_inf_per_device(self, optimizer):

FILE: colossalai/legacy/amp/torch_amp/torch_amp.py
  class TorchAMPOptimizer (line 18) | class TorchAMPOptimizer(OptimizerWrapper):
    method __init__ (line 34) | def __init__(self, optim: Optimizer, *args, **kwargs):
    method backward (line 38) | def backward(self, loss: Tensor):
    method step (line 46) | def step(self):
    method clip_grad_norm (line 51) | def clip_grad_norm(self, model: nn.Module, max_norm: float):
  class TorchAMPModel (line 63) | class TorchAMPModel(nn.Module):
    method __init__ (line 71) | def __init__(self, model: nn.Module) -> None:
    method forward (line 76) | def forward(self, *args, **kwargs):
  class TorchAMPLoss (line 83) | class TorchAMPLoss(nn.Module):
    method __init__ (line 90) | def __init__(self, loss: _Loss):
    method forward (line 95) | def forward(self, *args, **kwargs):

FILE: colossalai/legacy/builder/builder.py
  function build_from_config (line 9) | def build_from_config(module, config: dict):
  function build_from_registry (line 26) | def build_from_registry(config, registry: Registry):
  function build_gradient_handler (line 62) | def build_gradient_handler(config, model, optimizer):

FILE: colossalai/legacy/communication/collective.py
  function all_gather (line 18) | def all_gather(tensor: Tensor, dim: int, parallel_mode: ParallelMode, as...
  function reduce_scatter (line 53) | def reduce_scatter(
  function all_reduce (line 94) | def all_reduce(
  function broadcast (line 130) | def broadcast(tensor: Tensor, src: int, parallel_mode: ParallelMode, asy...
  function reduce (line 162) | def reduce(tensor: Tensor, dst: int, parallel_mode: ParallelMode, op: Re...
  function scatter_object_list (line 194) | def scatter_object_list(scatter_object_output_list, scatter_object_input...

FILE: colossalai/legacy/communication/p2p.py
  function _get_tensor_shape (line 20) | def _get_tensor_shape(tensor_shape: TensorShape, chunk_tensor: bool = Fa...
  function create_recv_buffer_with_shapes (line 43) | def create_recv_buffer_with_shapes(recv_shapes, dtype, scatter_gather_te...
  function process_object_to_send (line 60) | def process_object_to_send(object_send, scatter_gather_tensors):
  function filling_ops_queue (line 79) | def filling_ops_queue(obj, comm_op, comm_rank, ops_queue):
  function _communicate (line 89) | def _communicate(
  function recv_forward (line 197) | def recv_forward(
  function recv_backward (line 222) | def recv_backward(
  function send_forward (line 247) | def send_forward(output_tensor, next_rank=None, scatter_gather_tensors=F...
  function send_backward (line 258) | def send_backward(input_tensor_grad, prev_rank=None, scatter_gather_tens...
  function send_forward_recv_backward (line 271) | def send_forward_recv_backward(
  function send_backward_recv_forward (line 299) | def send_backward_recv_forward(
  function send_forward_recv_forward (line 332) | def send_forward_recv_forward(
  function send_backward_recv_backward (line 364) | def send_backward_recv_backward(
  function send_forward_backward_recv_forward_backward (line 396) | def send_forward_backward_recv_forward_backward(

FILE: colossalai/legacy/communication/p2p_v2.py
  function init_process_group (line 21) | def init_process_group():
  function _acquire_pair_group_handle (line 35) | def _acquire_pair_group_handle(first_rank: int, second_rank: int) -> Pro...
  function _cuda_safe_tensor_to_object (line 53) | def _cuda_safe_tensor_to_object(tensor: torch.Tensor, tensor_size: torch...
  function _broadcast_object_list (line 78) | def _broadcast_object_list(object_list: List[Any], src: int, dst: int, d...
  function _send_object (line 159) | def _send_object(object: Any, dst: int) -> None:
  function _recv_object (line 182) | def _recv_object(src: int) -> Any:
  function recv_forward (line 207) | def recv_forward(prev_rank: int = None) -> Any:
  function recv_backward (line 227) | def recv_backward(next_rank: int = None) -> Any:
  function send_forward (line 247) | def send_forward(output_object: Any, next_rank: int = None) -> None:
  function send_backward (line 260) | def send_backward(input_object: Any, prev_rank: int = None) -> None:

FILE: colossalai/legacy/communication/ring.py
  function ring_forward (line 11) | def ring_forward(tensor_send_next: torch.Tensor, parallel_mode: Parallel...

FILE: colossalai/legacy/communication/utils.py
  function send_meta_helper (line 13) | def send_meta_helper(obj, next_rank, tensor_kwargs):
  function send_obj_meta (line 20) | def send_obj_meta(obj, need_meta=True, next_rank=None) -> bool:
  function recv_meta_helper (line 52) | def recv_meta_helper(prev_rank, tensor_kwargs):
  function recv_obj_meta (line 60) | def recv_obj_meta(obj_shape, prev_rank=None) -> torch.Size:
  function split_tensor_into_1d_equal_chunks (line 92) | def split_tensor_into_1d_equal_chunks(tensor: torch.Tensor, new_buffer=F...
  function gather_split_1d_tensor (line 113) | def gather_split_1d_tensor(tensor: torch.Tensor) -> torch.Tensor:

FILE: colossalai/legacy/context/parallel_context.py
  class ParallelContext (line 24) | class ParallelContext(metaclass=SingletonMeta):
    method __init__ (line 34) | def __init__(self):
    method config (line 60) | def config(self):
    method verbose (line 64) | def verbose(self):
    method verbose (line 68) | def verbose(self, verbose_: bool):
    method logger (line 72) | def logger(self):
    method load_config (line 77) | def load_config(self, config: Union[dict, str]):
    method detect_num_processes_on_current_node (line 94) | def detect_num_processes_on_current_node(self):
    method _check_parallel_mode (line 102) | def _check_parallel_mode(parallel_mode: ParallelMode):
    method get_global_rank (line 107) | def get_global_rank(self):
    method add_global_rank (line 115) | def add_global_rank(self, parallel_mode: ParallelMode, rank: int):
    method get_local_rank (line 129) | def get_local_rank(self, parallel_mode: ParallelMode):
    method _add_local_rank (line 145) | def _add_local_rank(self, parallel_mode: ParallelMode, rank: int):
    method get_next_global_rank (line 159) | def get_next_global_rank(self, parallel_mode: ParallelMode):
    method get_prev_global_rank (line 181) | def get_prev_global_rank(self, parallel_mode: ParallelMode):
    method is_first_rank (line 203) | def is_first_rank(self, parallel_mode: ParallelMode):
    method is_last_rank (line 221) | def is_last_rank(self, parallel_mode: ParallelMode):
    method is_pipeline_first_stage (line 240) | def is_pipeline_first_stage(self, ignore_virtual=False):
    method is_pipeline_last_stage (line 246) | def is_pipeline_last_stage(self, ignore_virtual=False):
    method get_world_size (line 255) | def get_world_size(self, parallel_mode: ParallelMode):
    method _add_world_size (line 271) | def _add_world_size(self, parallel_mode: ParallelMode, world_size: int):
    method get_group (line 285) | def get_group(self, parallel_mode: ParallelMode):
    method _add_group (line 301) | def _add_group(self, parallel_mode: ParallelMode, group: dist.ProcessG...
    method get_cpu_group (line 315) | def get_cpu_group(self, parallel_mode: ParallelMode):
    method _add_cpu_group (line 328) | def _add_cpu_group(self, parallel_mode: ParallelMode, group: dist.Proc...
    method get_ranks_in_group (line 341) | def get_ranks_in_group(self, parallel_mode: ParallelMode):
    method _add_ranks_in_group (line 357) | def _add_ranks_in_group(self, parallel_mode: ParallelMode, ranks: list):
    method init_global_dist (line 371) | def init_global_dist(self, rank: int, world_size: int, backend: str, h...
    method _register_dist (line 391) | def _register_dist(self, local_rank, world_size, process_group, cpu_gr...
    method check_sanity (line 398) | def check_sanity(self):
    method _set_parallel_size_from_config (line 415) | def _set_parallel_size_from_config(self, config: dict, key: str, attr_...
    method init_parallel_groups (line 427) | def init_parallel_groups(self):
    method is_initialized (line 502) | def is_initialized(self, parallel_mode: ParallelMode):
    method destroy (line 514) | def destroy(self):
    method set_device (line 523) | def set_device(self, device_ordinal: int = None):
    method set_seed (line 538) | def set_seed(self, seed: int):
    method set_virtual_pipeline_parallel_size (line 589) | def set_virtual_pipeline_parallel_size(self, size):
    method set_virtual_pipeline_parallel_rank (line 592) | def set_virtual_pipeline_parallel_rank(self, rank):

FILE: colossalai/legacy/context/parallel_mode.py
  class ParallelMode (line 8) | class ParallelMode(Enum):

FILE: colossalai/legacy/context/process_group_initializer/initializer_1d.py
  class Initializer_1D (line 14) | class Initializer_1D(ProcessGroupInitializer):
    method __init__ (line 26) | def __init__(self, *args, **kwargs):
    method init_dist_group (line 30) | def init_dist_group(self):

FILE: colossalai/legacy/context/process_group_initializer/initializer_2d.py
  function _check_summa_env_var (line 12) | def _check_summa_env_var(summa_dim):
  class Initializer_2D_Row (line 25) | class Initializer_2D_Row(ProcessGroupInitializer):
    method __init__ (line 39) | def __init__(self, num_group, summa_dim, *args, **kwargs):
    method init_dist_group (line 44) | def init_dist_group(self):
  class Initializer_2D_Col (line 73) | class Initializer_2D_Col(ProcessGroupInitializer):
    method __init__ (line 87) | def __init__(self, num_group, summa_dim, *args, **kwargs):
    method init_dist_group (line 92) | def init_dist_group(self):
  class Initializer_2D (line 123) | class Initializer_2D(ProcessGroupInitializer):
    method __init__ (line 136) | def __init__(self, *args, **kwargs):
    method init_dist_group (line 147) | def init_dist_group(self):

FILE: colossalai/legacy/context/process_group_initializer/initializer_2p5d.py
  function _check_tesseract_env_var (line 16) | def _check_tesseract_env_var(tesseract_dim: int, tesseract_dep: int):
  class Initializer_2p5D_ROW (line 36) | class Initializer_2p5D_ROW(ProcessGroupInitializer):
    method __init__ (line 50) | def __init__(self, tesseract_dim: int, tesseract_dep: int, *args):
    method init_dist_group (line 59) | def init_dist_group(self):
  class Initializer_2p5D_Col (line 93) | class Initializer_2p5D_Col(ProcessGroupInitializer):
    method __init__ (line 107) | def __init__(self, tesseract_dim: int, tesseract_dep: int, *args):
    method init_dist_group (line 113) | def init_dist_group(self):
  class Initializer_2p5D_Dep (line 147) | class Initializer_2p5D_Dep(ProcessGroupInitializer):
    method __init__ (line 161) | def __init__(self, tesseract_dim: int, tesseract_dep: int, *args):
    method init_dist_group (line 167) | def init_dist_group(self):
  class Initializer_2p5D_XZ (line 202) | class Initializer_2p5D_XZ(ProcessGroupInitializer):
    method __init__ (line 216) | def __init__(self, tesseract_dim: int, tesseract_dep: int, *args):
    method init_dist_group (line 222) | def init_dist_group(self):
  class Initializer_2p5D (line 257) | class Initializer_2p5D(ProcessGroupInitializer):
    method __init__ (line 271) | def __init__(
    method init_dist_group (line 297) | def init_dist_group(self):

FILE: colossalai/legacy/context/process_group_initializer/initializer_3d.py
  function _check_depth_env_var (line 15) | def _check_depth_env_var(depth):
  class Initializer_3D_Input (line 28) | class Initializer_3D_Input(ProcessGroupInitializer):
    method __init__ (line 42) | def __init__(self, num_group: int, depth: int, *args):
    method init_dist_group (line 47) | def init_dist_group(self):
  class Initializer_3D_Weight (line 79) | class Initializer_3D_Weight(ProcessGroupInitializer):
    method __init__ (line 93) | def __init__(self, num_group: int, depth: int, *args):
    method init_dist_group (line 98) | def init_dist_group(self):
  class Initializer_3D_Output (line 130) | class Initializer_3D_Output(ProcessGroupInitializer):
    method __init__ (line 144) | def __init__(self, num_group: int, depth: int, *args):
    method init_dist_group (line 149) | def init_dist_group(self):
  class Initializer_3D_InputxWeight (line 181) | class Initializer_3D_InputxWeight(ProcessGroupInitializer):
    method __init__ (line 195) | def __init__(self, num_group: int, depth: int, *args):
    method init_dist_group (line 200) | def init_dist_group(self):
  class Initializer_3D_OutputxWeight (line 235) | class Initializer_3D_OutputxWeight(ProcessGroupInitializer):
    method __init__ (line 249) | def __init__(self, num_group: int, depth: int, *args):
    method init_dist_group (line 254) | def init_dist_group(self):
  class Initializer_3D (line 290) | class Initializer_3D(ProcessGroupInitializer):
    method __init__ (line 302) | def __init__(self, *args):
    method init_dist_group (line 317) | def init_dist_group(self):

FILE: colossalai/legacy/context/process_group_initializer/initializer_data.py
  class Initializer_Data (line 13) | class Initializer_Data(ProcessGroupInitializer):
    method __init__ (line 25) | def __init__(self, *args, **kwargs):
    method init_dist_group (line 29) | def init_dist_group(self):

FILE: colossalai/legacy/context/process_group_initializer/initializer_model.py
  class Initializer_Model (line 13) | class Initializer_Model(ProcessGroupInitializer):
    method __init__ (line 26) | def __init__(self, *args, **kwargs):
    method init_dist_group (line 31) | def init_dist_group(self):

FILE: colossalai/legacy/context/process_group_initializer/initializer_pipeline.py
  class Initializer_Pipeline (line 13) | class Initializer_Pipeline(ProcessGroupInitializer):
    method __init__ (line 25) | def __init__(self, *args, **kwargs):
    method init_dist_group (line 30) | def init_dist_group(self):

FILE: colossalai/legacy/context/process_group_initializer/initializer_sequence.py
  class Initializer_Sequence_DP (line 13) | class Initializer_Sequence_DP(ProcessGroupInitializer):
    method __init__ (line 28) | def __init__(self, *args, **kwargs):
    method init_dist_group (line 33) | def init_dist_group(self):
  class Initializer_Sequence (line 62) | class Initializer_Sequence(ProcessGroupInitializer):
    method __init__ (line 74) | def __init__(self, *args, **kwargs):
    method init_dist_group (line 80) | def init_dist_group(self):

FILE: colossalai/legacy/context/process_group_initializer/initializer_tensor.py
  class Initializer_Tensor (line 13) | class Initializer_Tensor(ProcessGroupInitializer):
    method __init__ (line 25) | def __init__(self, *args, **kwargs):
    method init_dist_group (line 29) | def init_dist_group(self):

FILE: colossalai/legacy/context/process_group_initializer/process_group_initializer.py
  class ProcessGroupInitializer (line 9) | class ProcessGroupInitializer(ABC):
    method __init__ (line 21) | def __init__(
    method init_dist_group (line 39) | def init_dist_group(self):

FILE: colossalai/legacy/context/random/_helper.py
  function get_seeds (line 16) | def get_seeds():
  function get_states (line 25) | def get_states(copy=False):
  function get_current_mode (line 43) | def get_current_mode():
  function add_seed (line 52) | def add_seed(parallel_mode: ParallelMode, seed: int, overwrite: bool = F...
  function set_mode (line 69) | def set_mode(parallel_mode: ParallelMode):
  function set_seed_states (line 82) | def set_seed_states(parallel_mode: ParallelMode, state: Tensor):
  function sync_states (line 95) | def sync_states():
  function seed (line 102) | def seed(parallel_mode: ParallelMode):
  function with_seed (line 123) | def with_seed(func, parallel_mode: ParallelMode):
  function moe_set_seed (line 162) | def moe_set_seed(seed):
  function reset_seeds (line 172) | def reset_seeds():

FILE: colossalai/legacy/context/random/seed_manager.py
  class SeedManager (line 10) | class SeedManager:
    method __init__ (line 18) | def __init__(self):
    method current_mode (line 24) | def current_mode(self):
    method seeds (line 28) | def seeds(self):
    method seed_states (line 32) | def seed_states(self):
    method set_state (line 35) | def set_state(self, parallel_mode: ParallelMode, state: Tensor):
    method set_mode (line 48) | def set_mode(self, parallel_mode: ParallelMode):
    method add_seed (line 62) | def add_seed(self, parallel_mode: ParallelMode, seed: int, overwrite: ...
    method reset (line 86) | def reset(self):

FILE: colossalai/legacy/engine/_base_engine.py
  class Engine (line 23) | class Engine:
    method __init__ (line 62) | def __init__(
    method ophooks (line 110) | def ophooks(self):
    method model (line 115) | def model(self):
    method optimizer (line 120) | def optimizer(self):
    method criterion (line 125) | def criterion(self):
    method schedule (line 130) | def schedule(self):
    method uses_pipeline (line 135) | def uses_pipeline(self):
    method add_hook (line 139) | def add_hook(self, ophook: Type[BaseOpHook]) -> None:
    method remove_hook (line 149) | def remove_hook(self, ophook: Type[BaseOpHook]) -> None:
    method zero_grad (line 154) | def zero_grad(self):
    method step (line 158) | def step(self):
    method backward (line 164) | def backward(self, loss: Tensor):
    method backward_by_grad (line 175) | def backward_by_grad(self, tensor, grad):
    method __call__ (line 187) | def __call__(self, *args, **kwargs):
    method _all_reduce_gradients (line 195) | def _all_reduce_gradients(self):
    method execute_schedule (line 200) | def execute_schedule(self, data_iter: Iterable, **kwargs):
    method train (line 210) | def train(self):
    method eval (line 215) | def eval(self):

FILE: colossalai/legacy/engine/gradient_accumulation/__init__.py
  function accumulate_gradient (line 25) | def accumulate_gradient(

FILE: colossalai/legacy/engine/gradient_accumulation/_gradient_accumulation.py
  class GradAccumOptimizer (line 18) | class GradAccumOptimizer(OptimizerWrapper):
    method __init__ (line 29) | def __init__(self, optim: Optimizer, accumulate_size: int, model: nn.M...
    method zero_grad (line 38) | def zero_grad(self, *args, **kwargs) -> None:
    method step (line 50) | def step(self, *args, **kwargs) -> None:
    method clip_grad_norm (line 65) | def clip_grad_norm(self, model: nn.Module, max_norm: float) -> None:
    method backward (line 79) | def backward(self, loss: Tensor) -> None:
    method backward_by_grad (line 97) | def backward_by_grad(self, tensor: Tensor, grad: Tensor) -> None:
  class GradAccumDataloader (line 115) | class GradAccumDataloader:
    method __init__ (line 130) | def __init__(self, dataloader: Iterable, accumulate_size: int) -> None:
    method __getattr__ (line 135) | def __getattr__(self, __name: str) -> Any:
    method __len__ (line 138) | def __len__(self) -> int:
    method __iter__ (line 141) | def __iter__(self) -> Iterable:
    method __next__ (line 146) | def __next__(self) -> Union[Tensor, Tuple[Tensor]]:
  class GradAccumLrSchedulerByStep (line 164) | class GradAccumLrSchedulerByStep(_LRScheduler):
    method __init__ (line 174) | def __init__(self, lr_scheduler: _LRScheduler, accumulate_size: int) -...
    method compute_effective_steps_per_epoch (line 180) | def compute_effective_steps_per_epoch(dataloader: Iterable, accumulate...
    method __getattr__ (line 193) | def __getattr__(self, __name: str) -> Any:
    method step (line 196) | def step(self, *args, **kwargs) -> None:
    method get_lr (line 211) | def get_lr(self) -> Tensor:
    method get_last_lr (line 221) | def get_last_lr(self) -> Tensor:
    method print_lr (line 231) | def print_lr(self, *args, **kwargs) -> None:
    method state_dict (line 241) | def state_dict(self) -> dict:
    method load_state_dict (line 250) | def load_state_dict(self, state_dict: dict) -> None:
  class GradAccumGradientHandler (line 260) | class GradAccumGradientHandler:
    method __init__ (line 274) | def __init__(self, grad_handler: BaseGradientHandler, accumulate_size:...
    method handle_gradient (line 282) | def handle_gradient(self) -> None:

FILE: colossalai/legacy/engine/gradient_handler/_base_gradient_handler.py
  class BaseGradientHandler (line 7) | class BaseGradientHandler(ABC):
    method __init__ (line 16) | def __init__(self, model, optimizer):
    method handle_gradient (line 21) | def handle_gradient(self):

FILE: colossalai/legacy/engine/gradient_handler/_data_parallel_gradient_handler.py
  class DataParallelGradientHandler (line 10) | class DataParallelGradientHandler(BaseGradientHandler):
    method handle_gradient (line 22) | def handle_gradient(self):

FILE: colossalai/legacy/engine/gradient_handler/_moe_gradient_handler.py
  class MoeGradientHandler (line 12) | class MoeGradientHandler(BaseGradientHandler):
    method __init__ (line 24) | def __init__(self, model, optimizer=None):
    method handle_gradient (line 27) | def handle_gradient(self):

FILE: colossalai/legacy/engine/gradient_handler/_pipeline_parallel_gradient_handler.py
  class PipelineSharedModuleGradientHandler (line 16) | class PipelineSharedModuleGradientHandler(BaseGradientHandler):
    method handle_gradient (line 28) | def handle_gradient(self):

FILE: colossalai/legacy/engine/gradient_handler/_sequence_parallel_gradient_handler.py
  class SequenceParallelGradientHandler (line 10) | class SequenceParallelGradientHandler(BaseGradientHandler):
    method handle_gradient (line 22) | def handle_gradient(self):

FILE: colossalai/legacy/engine/gradient_handler/_zero_gradient_handler.py
  class ZeROGradientHandler (line 7) | class ZeROGradientHandler(BaseGradientHandler):
    method handle_gradient (line 18) | def handle_gradient(self):

FILE: colossalai/legacy/engine/gradient_handler/utils.py
  function bucket_allreduce (line 8) | def bucket_allreduce(param_list: Iterable[nn.Parameter], group=None):

FILE: colossalai/legacy/engine/schedule/_base_schedule.py
  class BaseSchedule (line 13) | class BaseSchedule(ABC):
    method __init__ (line 24) | def __init__(self, data_process_func: Callable = None):
    method _move_tensor (line 29) | def _move_tensor(element):
    method _move_to_device (line 35) | def _move_to_device(self, data):
    method _get_batch_size (line 54) | def _get_batch_size(self, data):
    method load_batch (line 64) | def load_batch(self, data_iter, to_gpu=True):
    method pre_processing (line 84) | def pre_processing(self, engine):
    method forward_backward_step (line 88) | def forward_backward_step(
    method _call_engine (line 107) | def _call_engine(engine, inputs):
    method _call_engine_criterion (line 120) | def _call_engine_criterion(engine, outputs, labels):

FILE: colossalai/legacy/engine/schedule/_non_pipeline_schedule.py
  class NonPipelineSchedule (line 14) | class NonPipelineSchedule(BaseSchedule):
    method __init__ (line 34) | def __init__(self, data_process_func: Callable = None):
    method forward_backward_step (line 48) | def forward_backward_step(

FILE: colossalai/legacy/engine/schedule/_pipeline_schedule.py
  function get_tensor_shape (line 20) | def get_tensor_shape():
  function pack_return_tensors (line 52) | def pack_return_tensors(return_tensors):
  class PipelineSchedule (line 71) | class PipelineSchedule(BaseSchedule):
    method __init__ (line 98) | def __init__(
    method load_batch (line 137) | def load_batch(self, data_iter):
    method _get_data_slice (line 145) | def _get_data_slice(self, data, offset):
    method load_micro_batch (line 163) | def load_micro_batch(self):
    method pre_processing (line 168) | def pre_processing(self, engine):
    method _call_engine (line 184) | def _call_engine(model, data):
    method _get_actual_forward_func (line 207) | def _get_actual_forward_func(self, module):
    method _get_data_label_for_current_step (line 216) | def _get_data_label_for_current_step(self, stage_output, micro_batch_d...
    method _forward_step (line 242) | def _forward_step(self, engine, input_obj, return_tensors, return_outp...
    method _backward_step (line 279) | def _backward_step(self, engine, input_obj, output_obj, output_obj_grad):
    method forward_backward_step (line 321) | def forward_backward_step(self, engine, data_iter, forward_only=False,...
  class InterleavedPipelineSchedule (line 457) | class InterleavedPipelineSchedule(PipelineSchedule):
    method __init__ (line 458) | def __init__(
    method pre_processing (line 495) | def pre_processing(self, engine):
    method load_batch (line 509) | def load_batch(self, data_iter):
    method load_micro_batch (line 514) | def load_micro_batch(self, model_chunk_id):
    method _forward_step (line 519) | def _forward_step(
    method forward_backward_step (line 560) | def forward_backward_step(self, engine, data_iter, forward_only=False,...

FILE: colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py
  function pack_return_tensors (line 17) | def pack_return_tensors(return_tensors):
  class PipelineScheduleV2 (line 36) | class PipelineScheduleV2(PipelineSchedule):
    method forward_backward_step (line 62) | def forward_backward_step(

FILE: colossalai/legacy/global_variables.py
  class TensorParallelEnv (line 4) | class TensorParallelEnv(object):
    method __new__ (line 7) | def __new__(cls, *args, **kwargs):
    method __init__ (line 12) | def __init__(self, *args, **kwargs):
    method load (line 15) | def load(
    method save (line 43) | def save(self):

FILE: colossalai/legacy/inference/async_engine.py
  class RequestTracker (line 9) | class RequestTracker:
    method __init__ (line 14) | def __init__(self) -> None:
    method __contains__ (line 19) | def __contains__(self, item):
    method init_event (line 22) | def init_event(self):
    method add_request (line 25) | def add_request(self, request_id: str):
    method add_stop (line 31) | def add_stop(self):
    method process_request_output (line 38) | def process_request_output(self, request_output: RequestOutput) -> None:
    method wait_for_new_requests (line 42) | async def wait_for_new_requests(self):
    method __aiter__ (line 45) | def __aiter__(self):
    method __anext__ (line 48) | async def __anext__(self) -> RequestOutput:
  class Async_Engine (line 56) | class Async_Engine:
    method __init__ (line 64) | def __init__(
    method _step (line 75) | def _step(self):
    method abort_request (line 85) | def abort_request(self, request_id: str):
    method _has_requests_in_progress (line 88) | def _has_requests_in_progress(self):
    method run_loop_fwd (line 91) | async def run_loop_fwd(self):
    method is_running (line 100) | def is_running(self):
    method start_background_loop (line 103) | def start_background_loop(self):
    method add_request (line 112) | async def add_request(self, request_id: str, prompt: str, sampling_par...
    method generate (line 116) | async def generate(self, request_id: str, prompt: str, sampling_params...

FILE: colossalai/legacy/inference/async_manager.py
  class Async_DynamicBatchManager (line 8) | class Async_DynamicBatchManager(DynamicBatchManager):
    method __init__ (line 9) | def __init__(
    method _step (line 47) | def _step(self):
    method _prefill_batch (line 88) | def _prefill_batch(self, batch):
    method _decode_batch (line 103) | def _decode_batch(self, batch: Batch):
    method _handle_finish_req (line 114) | def _handle_finish_req(self, batch: Batch, has_new_finished_req):
    method _output_process (line 124) | def _output_process(self, finished_reqs: List[Req]):
  function start_dynamic_batching (line 135) | def start_dynamic_batching(args, tp_engine, waiting_req_list):

FILE: colossalai/legacy/inference/dynamic_batching/get_tokenizer.py
  function get_tokenizer (line 12) | def get_tokenizer(

FILE: colossalai/legacy/inference/dynamic_batching/infer_batch.py
  class InferSamplingParams (line 14) | class InferSamplingParams:
    method __init__ (line 15) | def __init__(
  class InferBatch (line 37) | class InferBatch:
    method init_batch (line 60) | def init_batch(
    method free_self (line 131) | def free_self(self) -> None:
    method filter (line 148) | def filter(self, request_ids: List[int]) -> "InferBatch":
    method merge (line 229) | def merge(cls, batch1, batch2) -> "InferBatch":
    method __len__ (line 298) | def __len__(self):
    method get_post_sample_tensors (line 301) | def get_post_sample_tensors(self) -> Tuple[torch.Tensor, torch.Tensor,...

FILE: colossalai/legacy/inference/dynamic_batching/io_struct.py
  class Req (line 8) | class Req:
    method __init__ (line 9) | def __init__(self, request_id, prompt_ids, sample_params: SamplingPara...
    method to_rpc_obj (line 21) | def to_rpc_obj(self):
    method stop_sequences_matched (line 29) | def stop_sequences_matched(self):
    method __repr__ (line 42) | def __repr__(self):
  class Batch (line 46) | class Batch:
    method __init__ (line 47) | def __init__(self, batch_id, reqs: List[Req]):
    method input_tokens (line 52) | def input_tokens(self):
    method calcu_max_tokens (line 58) | def calcu_max_tokens(self):
    method calcu_used_tokens (line 64) | def calcu_used_tokens(self):
    method mark_finished_req (line 70) | def mark_finished_req(self, eos_id, engine_max_output_len):
    method filter_finished (line 87) | def filter_finished(self) -> List[Req]:
    method is_clear (line 103) | def is_clear(self):
    method merge (line 106) | def merge(self, mini_batch):
    method __repr__ (line 112) | def __repr__(self):
    method __len__ (line 115) | def __len__(self):
  class BatchTokenIdOut (line 119) | class BatchTokenIdOut:
    method __init__ (line 120) | def __init__(self):
  class BatchStrOut (line 126) | class BatchStrOut:
    method __init__ (line 127) | def __init__(self):
  class AbortReq (line 133) | class AbortReq:
    method __init__ (line 134) | def __init__(self, req_id):
  class RequestOutput (line 138) | class RequestOutput:
    method __init__ (line 148) | def __init__(
    method __repr__ (line 160) | def __repr__(self) -> str:

FILE: colossalai/legacy/inference/dynamic_batching/ray_dist_init.py
  function log_cuda_info (line 23) | def log_cuda_info(scope_name: str):
  class Worker (line 37) | class Worker:
    method __init__ (line 38) | def __init__(
    method setup (line 55) | def setup(self, world_size, rank, port):
    method add_input (line 87) | def add_input(self, request_id: str, prompt: str, sampling_params: Sam...
    method abort (line 90) | def abort(self, request_id: str):
    method step (line 93) | def step(self) -> List[RequestOutput]:
    method add_req (line 96) | def add_req(self, prompt_ids: List[int], sampling_params: SamplingPara...
    method is_running (line 99) | def is_running(self):
  class Driver (line 103) | class Driver:
    method __init__ (line 104) | def __init__(self, router_config: RooterArgsClass, engine_config: Engi...
    method add_input (line 138) | def add_input(self, request_id: str, prompt: str, sampling_params: Sam...
    method abort (line 141) | def abort(self, request_id: str):
    method step (line 144) | def step(self):
    method add_req (line 149) | def add_req(self, request_id: str, prompt_ids: List[int], sampling_par...
    method is_running (line 152) | def is_running(self):

FILE: colossalai/legacy/inference/dynamic_batching/ray_init_config.py
  class EngineArgsClass (line 9) | class EngineArgsClass(BaseModel):
  class RooterArgsClass (line 19) | class RooterArgsClass(BaseModel):
  class RayInitConfig (line 30) | class RayInitConfig(BaseModel):
    method from_yaml_path (line 37) | def from_yaml_path(cls, path: str):

FILE: colossalai/legacy/inference/dynamic_batching/req_queue.py
  class ReqQueue (line 11) | class ReqQueue:
    method __init__ (line 12) | def __init__(self, max_total_tokens, batch_max_tokens, running_max_req...
    method append (line 19) | def append(self, req):
    method _init_cache_list (line 23) | def _init_cache_list(self, current_batch: Batch):
    method _can_add_new_req (line 33) | def _can_add_new_req(self, req):
    method generate_new_batch (line 47) | def generate_new_batch(self, current_batch: Batch = None):
    method __len__ (line 72) | def __len__(self):

FILE: colossalai/legacy/inference/dynamic_batching/sampling_params.py
  class SamplingParams (line 9) | class SamplingParams:
    method __init__ (line 10) | def __init__(
    method verify (line 42) | def verify(self):
    method stop_sentences_to_token_ids (line 57) | def stop_sentences_to_token_ids(self, tokenizer):
    method to_dict (line 73) | def to_dict(self):

FILE: colossalai/legacy/inference/dynamic_batching/stats.py
  class Stats (line 6) | class Stats:
    method __init__ (line 7) | def __init__(self, log_status, log_stats_interval) -> None:
    method count_prompt_tokens (line 16) | def count_prompt_tokens(self, run_batch):
    method count_output_tokens (line 23) | def count_output_tokens(self, run_batch):
    method print_stats (line 30) | def print_stats(self):

FILE: colossalai/legacy/inference/hybridengine/engine.py
  class CaiInferEngine (line 22) | class CaiInferEngine:
    method __init__ (line 60) | def __init__(
    method inference (line 125) | def inference(self, input_list):
    method _shardformer (line 145) | def _shardformer(self, model, model_policy, stage_manager, tp_group):
    method _init_manager (line 160) | def _init_manager(self, model, max_batch_size: int, max_input_len: int...

FILE: colossalai/legacy/inference/hybridengine/modeling/_utils.py
  function copy_kv_to_mem_cache (line 12) | def copy_kv_to_mem_cache(layer_id, key_buffer, value_buffer, context_mem...
  function init_to_get_rotary (line 26) | def init_to_get_rotary(self, base=10000, use_elem=False):

FILE: colossalai/legacy/inference/hybridengine/modeling/llama.py
  function rotate_half (line 39) | def rotate_half(x):
  function apply_rotary_pos_emb (line 46) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
  function llama_triton_context_attention (line 58) | def llama_triton_context_attention(
  function llama_triton_token_attention (line 98) | def llama_triton_token_attention(query_states, attn_output, infer_state,...
  class LlamaInferenceForwards (line 127) | class LlamaInferenceForwards:
    method llama_causal_lm_forward (line 134) | def llama_causal_lm_forward(
    method llama_model_forward (line 196) | def llama_model_forward(
    method llama_decoder_layer_forward (line 342) | def llama_decoder_layer_forward(
    method llama_flash_attn_kvcache_forward (line 385) | def llama_flash_attn_kvcache_forward(

FILE: colossalai/legacy/inference/hybridengine/polices/llama.py
  function get_triton_rmsnorm_forward (line 31) | def get_triton_rmsnorm_forward():
  class LlamaModelInferPolicy (line 42) | class LlamaModelInferPolicy(LlamaForCausalLMPolicy):
    method __init__ (line 43) | def __init__(self) -> None:
    method module_policy (line 46) | def module_policy(self):
    method postprocess (line 132) | def postprocess(self):
    method get_held_layers (line 136) | def get_held_layers(self) -> List[Module]:

FILE: colossalai/legacy/inference/manager.py
  class DynamicBatchManager (line 15) | class DynamicBatchManager:
    method __init__ (line 16) | def __init__(
    method add_req (line 64) | def add_req(self, request_id: str, prompt_ids: List[int], sampling_par...
    method add_input (line 77) | def add_input(self, request_id, prompts, sampling_params):
    method abort (line 89) | def abort(self, request_id):
    method loop_for_fwd (line 101) | def loop_for_fwd(self):
    method _step (line 123) | def _step(self):
    method _init_batch (line 162) | def _init_batch(self, batch: Batch, dtype="fp16"):
    method _prefill_batch (line 184) | def _prefill_batch(self, batch):
    method _decode_batch (line 199) | def _decode_batch(self, batch: Batch):
    method _filter_batch (line 209) | def _filter_batch(self, batch: Batch):
    method _merge_batch (line 217) | def _merge_batch(self, batch1, batch2):
    method _remove_batch (line 229) | def _remove_batch(self, batch):
    method _handle_finish_req (line 237) | def _handle_finish_req(self, batch: Batch, has_new_finished_req):
    method _filter_running_batch (line 246) | def _filter_running_batch(self):
    method _add_token_id_to_req (line 250) | def _add_token_id_to_req(self, batch: Batch, req_ans):
    method _output_process (line 257) | def _output_process(self, finished_reqs: List[Req]):
    method clean_up (line 265) | def clean_up(self):
    method generate (line 269) | def generate(self, request_id, prompts, sampling_params):
    method is_running (line 276) | def is_running(self):
  function start_dynamic_batching (line 280) | def start_dynamic_batching(args, tp_engine, waiting_req_list):

FILE: colossalai/legacy/inference/pipeline/benchmark/benchmark.py
  function data_gen (line 18) | def data_gen(batch_size: int = 4, seq_len: int = 512):
  function print_details_info (line 30) | def print_details_info(timestamps, model_config, args, whole_end2end):

FILE: colossalai/legacy/inference/pipeline/microbatch_manager.py
  class Status (line 12) | class Status(Enum):
  class MicroBatchDescription (line 19) | class MicroBatchDescription:
    method __init__ (line 30) | def __init__(
    method update (line 44) | def update(self, *args, **kwargs):
    method state (line 48) | def state(self):
    method cur_length (line 63) | def cur_length(self):
  class HeadMicroBatchDescription (line 70) | class HeadMicroBatchDescription(MicroBatchDescription):
    method __init__ (line 82) | def __init__(
    method update (line 96) | def update(self, new_token: torch.Tensor = None):
    method _update_newtokens (line 102) | def _update_newtokens(self, new_token: torch.Tensor):
    method _update_attnmask (line 108) | def _update_attnmask(self):
    method cur_length (line 114) | def cur_length(self):
  class BodyMicroBatchDescription (line 125) | class BodyMicroBatchDescription(MicroBatchDescription):
    method __init__ (line 133) | def __init__(
    method cur_length (line 143) | def cur_length(self):
  class MicroBatchManager (line 151) | class MicroBatchManager:
    method __init__ (line 162) | def __init__(
    method add_description (line 181) | def add_description(self, inputs_dict: Dict[str, torch.Tensor]):
    method step (line 191) | def step(self, new_token: torch.Tensor = None):
    method export_new_tokens (line 206) | def export_new_tokens(self):
    method is_micro_batch_done (line 212) | def is_micro_batch_done(self):
    method clear (line 220) | def clear(self):
    method next (line 225) | def next(self):
    method _remove_description (line 228) | def _remove_description(self):
    method cur_description (line 232) | def cur_description(self) -> MicroBatchDescription:
    method cur_infer_state (line 236) | def cur_infer_state(self):
    method cur_state (line 242) | def cur_state(self):

FILE: colossalai/legacy/inference/quant/gptq/cai_gptq/cai_quant_linear.py
  class CaiQuantLinear (line 29) | class CaiQuantLinear(nn.Module):
    method __init__ (line 30) | def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_...
    method pack (line 73) | def pack(self, linear, scales, zeros, g_idx=None):
    method init_q4 (line 145) | def init_q4(self):
    method forward (line 174) | def forward(self, x):
  function split_column_copy (line 202) | def split_column_copy(gptq_linear, cai_linear, tp_size=1, tp_rank=0, spl...
  function split_row_copy (line 231) | def split_row_copy(gptq_linear, cai_linear, tp_rank=0, split_num=1):
  class RowCaiQuantLinear (line 258) | class RowCaiQuantLinear(CaiQuantLinear, ParallelModule):
    method __init__ (line 259) | def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_...
    method from_native_module (line 266) | def from_native_module(
    method forward (line 303) | def forward(self, x):
  class ColCaiQuantLinear (line 312) | class ColCaiQuantLinear(CaiQuantLinear, ParallelModule):
    method __init__ (line 313) | def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_...
    method from_native_module (line 320) | def from_native_module(

FILE: colossalai/legacy/inference/quant/gptq/cai_gptq/gptq_op.py
  class CaiGPTQLinearOp (line 6) | class CaiGPTQLinearOp(torch.nn.Module):
    method __init__ (line 7) | def __init__(self, gptq_group_size, gptq_quant_bits):
    method forward (line 14) | def forward(

FILE: colossalai/legacy/inference/quant/smoothquant/models/base_model.py
  class BaseSmoothForCausalLM (line 30) | class BaseSmoothForCausalLM(nn.Module, PushToHubMixin):
    method __init__ (line 33) | def __init__(self, model: PreTrainedModel, quantized: bool = False):
    method quantized (line 44) | def quantized(self):
    method init_cache_manager (line 47) | def init_cache_manager(self, max_total_token_num=2048):
    method init_batch_state (line 56) | def init_batch_state(self, max_output_len=256, **kwargs):
    method quantize (line 97) | def quantize(
    method forward (line 104) | def forward(self, *args, **kwargs):
    method generate (line 107) | def generate(self, **kwargs):
    method prepare_inputs_for_generation (line 117) | def prepare_inputs_for_generation(self, *args, **kwargs):
    method collect_act_scales (line 121) | def collect_act_scales(self, model, tokenizer, dataset, device, num_sa...
    method collect_act_dict (line 126) | def collect_act_dict(self, model, tokenizer, dataset, act_dict, device...
    method get_act_scales (line 135) | def get_act_scales(self, model, tokenizer, dataset, num_samples=512, s...
    method smooth_ln_fcs (line 168) | def smooth_ln_fcs(self, ln, fcs, act_scales, alpha=0.5):
    method create_quantized_model (line 190) | def create_quantized_model(model):
    method save_quantized (line 194) | def save_quantized(
    method save_pretrained (line 255) | def save_pretrained(
    method from_pretrained (line 268) | def from_pretrained(
    method from_quantized (line 363) | def from_quantized(
    method __getattr__ (line 480) | def __getattr__(self, item):

FILE: colossalai/legacy/inference/quant/smoothquant/models/linear.py
  class W8A8BFP32O32LinearSiLU (line 17) | class W8A8BFP32O32LinearSiLU(torch.nn.Module):
    method __init__ (line 18) | def __init__(self, in_features, out_features, alpha=1.0, beta=1.0):
    method to (line 39) | def to(self, *args, **kwargs):
    method forward (line 46) | def forward(self, x):
    method from_float (line 54) | def from_float(module: torch.nn.Linear, input_scale):
  class W8A8B8O8Linear (line 66) | class W8A8B8O8Linear(torch.nn.Module):
    method __init__ (line 68) | def __init__(self, in_features, out_features, alpha=1.0, beta=1.0):
    method to (line 90) | def to(self, *args, **kwargs):
    method forward (line 97) | def forward(self, x):
    method from_float (line 105) | def from_float(module: torch.nn.Linear, input_scale, output_scale):
  class W8A8BFP32OFP32Linear (line 122) | class W8A8BFP32OFP32Linear(torch.nn.Module):
    method __init__ (line 124) | def __init__(self, in_features, out_features, alpha=1.0, beta=1.0):
    method _apply (line 145) | def _apply(self, fn):
    method to (line 151) | def to(self, *args, **kwargs):
    method forward (line 159) | def forward(self, x):
    method from_float (line 167) | def from_float(module: torch.nn.Linear, input_scale):

FILE: colossalai/legacy/inference/quant/smoothquant/models/llama.py
  class LLamaSmoothquantAttention (line 38) | class LLamaSmoothquantAttention(nn.Module):
    method __init__ (line 39) | def __init__(
    method _init_rope (line 74) | def _init_rope(self):
    method pack (line 82) | def pack(
    method _shape (line 112) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 116) | def forward(
  class LlamaLayerNormQ (line 235) | class LlamaLayerNormQ(torch.nn.Module):
    method __init__ (line 236) | def __init__(self, dim, eps=1e-5):
    method forward (line 242) | def forward(self, x):
    method from_float (line 248) | def from_float(module: torch.nn.LayerNorm, output_scale: float):
  class LlamaSmoothquantMLP (line 255) | class LlamaSmoothquantMLP(nn.Module):
    method __init__ (line 256) | def __init__(self, intermediate_size, hidden_size):
    method pack (line 264) | def pack(
    method forward (line 281) | def forward(
  class LlamaSmoothquantDecoderLayer (line 295) | class LlamaSmoothquantDecoderLayer(nn.Module):
    method __init__ (line 296) | def __init__(self, config: LlamaConfig):
    method pack (line 307) | def pack(
    method forward (line 348) | def forward(
  class LlamaApplyRotary (line 401) | class LlamaApplyRotary(nn.Module):
    method __init__ (line 402) | def __init__(self):
    method forward (line 405) | def forward(self, x, cos, sin, position_ids):
  function llama_decoder_layer_forward (line 417) | def llama_decoder_layer_forward(
  function init_to_get_rotary (line 512) | def init_to_get_rotary(config, base=10000, use_elem=False):
  function llama_model_forward (line 559) | def llama_model_forward(
  class SmoothLlamaForCausalLM (line 717) | class SmoothLlamaForCausalLM(BaseSmoothForCausalLM):
    method __init__ (line 720) | def __init__(self, model: PreTrainedModel, quantized: bool = False):
    method get_act_dict (line 724) | def get_act_dict(
    method smooth_fn (line 771) | def smooth_fn(self, scales, alpha=0.5):
    method create_quantized_model (line 780) | def create_quantized_model(model):
    method quantized (line 790) | def quantized(

FILE: colossalai/legacy/inference/serving/ray_serve/Colossal_Inference_rayserve.py
  class GenConfigArgs (line 22) | class GenConfigArgs(BaseModel):
  function log_cuda_info (line 32) | def log_cuda_info(scope_name: str):
  class Worker (line 46) | class Worker:
    method __init__ (line 47) | def __init__(self, model_path: str, tp_size: int, max_batch_size: int,...
    method setup (line 55) | def setup(self, world_size, rank, port):
    method generate (line 81) | def generate(self, text: Union[str, List[str]]) -> str:
  class Driver (line 106) | class Driver:
    method __init__ (line 107) | def __init__(self, config: GenConfigArgs):
    method batch_generate (line 138) | async def batch_generate(self, requests: List[str]):
    method __call__ (line 144) | async def __call__(self, request: starlette.requests.Request) -> Any:
  function app (line 148) | def app(args: GenConfigArgs) -> Application:

FILE: colossalai/legacy/inference/serving/ray_serve/send_request.py
  function send_query (line 6) | def send_query(text):

FILE: colossalai/legacy/inference/serving/ray_serve/send_requests.py
  function send_query (line 6) | def send_query(text):

FILE: colossalai/legacy/inference/serving/torch_serve/Colossal_Inference_Handler.py
  class ColossalInferenceHandler (line 21) | class ColossalInferenceHandler(BaseHandler, ABC):
    method __init__ (line 26) | def __init__(self):
    method initialize (line 35) | def initialize(self, ctx):
    method preprocess (line 114) | def preprocess(self, requests):
    method inference (line 156) | def inference(self, input_batch):
    method postprocess (line 188) | def postprocess(self, inference_output):

FILE: colossalai/legacy/inference/tensor_parallel/batch_infer_state.py
  class BatchInferState (line 12) | class BatchInferState:
    method total_token_num (line 39) | def total_token_num(self):
    method set_cache_manager (line 44) | def set_cache_manager(self, manager: MemoryManager):
    method init_block_loc (line 49) | def init_block_loc(
    method init_from_batch (line 63) | def init_from_batch(

FILE: colossalai/legacy/inference/tensor_parallel/engine.py
  class TPInferEngine (line 31) | class TPInferEngine:
    method __init__ (line 52) | def __init__(
    method _init_manager (line 108) | def _init_manager(self) -> None:
    method _post_init_gptq_buffer (line 130) | def _post_init_gptq_buffer(self, model: nn.Module) -> None:
    method _optimize_model (line 178) | def _optimize_model(self, model: nn.Module) -> None:
    method _prepare_with_shard_config (line 189) | def _prepare_with_shard_config(self, shard_config: Optional[ShardConfi...
    method _shard_model_by (line 217) | def _shard_model_by(self, shardformer: ShardFormer, model: nn.Module) ...
    method supported_models (line 234) | def supported_models(self) -> List[str]:
    method generate (line 237) | def generate(self, input_tokens: Union[BatchEncoding, dict, list, torc...
    method prepare_batch_state (line 258) | def prepare_batch_state(self, inputs) -> BatchInferState:
    method _generate_by_set_infer_state (line 329) | def _generate_by_set_infer_state(self, input_tokens, **generate_kwargs...
    method _generate_by_pass_infer_state (line 369) | def _generate_by_pass_infer_state(
    method _update_batch_state (line 383) | def _update_batch_state(self, infer_state: Optional[BatchInferState]) ...
    method forward (line 390) | def forward(self, batch_id, is_prefill):
    method _prefill_batch (line 461) | def _prefill_batch(self, batch_id):
    method _decode_batch (line 465) | def _decode_batch(self, batch_id):
    method add_request (line 477) | def add_request():

FILE: colossalai/legacy/inference/tensor_parallel/kvcache_manager.py
  class MemoryManager (line 12) | class MemoryManager:
    method __init__ (line 25) | def __init__(
    method _init_mem_states (line 40) | def _init_mem_states(self, size, device):
    method _init_kv_buffers (line 46) | def _init_kv_buffers(self, size, device, dtype, head_num, head_dim, la...
    method alloc (line 56) | def alloc(self, required_size):
    method alloc_contiguous (line 69) | def alloc_contiguous(self, required_size):
    method free (line 96) | def free(self, free_index):
    method free_all (line 102) | def free_all(self):

FILE: colossalai/legacy/inference/tensor_parallel/modeling/_utils.py
  function copy_kv_to_mem_cache (line 12) | def copy_kv_to_mem_cache(layer_id, key_buffer, value_buffer, context_mem...
  function init_to_get_rotary (line 26) | def init_to_get_rotary(self, base=10000, use_elem=False):

FILE: colossalai/legacy/inference/tensor_parallel/modeling/bloom.py
  function generate_alibi (line 32) | def generate_alibi(n_head, dtype=torch.float16):
  class BloomInferenceForwards (line 60) | class BloomInferenceForwards:
    method bloom_model_forward (line 70) | def bloom_model_forward(
    method bloom_for_causal_lm_forward (line 266) | def bloom_for_causal_lm_forward(
    method bloom_for_causal_lm_prepare_inputs_for_generation (line 345) | def bloom_for_causal_lm_prepare_inputs_for_generation(
    method bloom_block_forward (line 378) | def bloom_block_forward(
    method bloom_attention_forward (line 436) | def bloom_attention_forward(

FILE: colossalai/legacy/inference/tensor_parallel/modeling/chatglm2.py
  function _init_to_get_rotary (line 34) | def _init_to_get_rotary(self, base=10000):
  function get_masks (line 68) | def get_masks(self, input_ids, past_length, padding_mask=None):
  class ChatGLM2InferenceForwards (line 90) | class ChatGLM2InferenceForwards:
    method chatglm_for_conditional_generation_forward (line 97) | def chatglm_for_conditional_generation_forward(
    method chatglm_model_forward (line 218) | def chatglm_model_forward(
    method chatglm_encoder_forward (line 298) | def chatglm_encoder_forward(
    method chatglm_glmblock_forward (line 344) | def chatglm_glmblock_forward(
    method chatglm_flash_attn_kvcache_forward (line 387) | def chatglm_flash_attn_kvcache_forward(

FILE: colossalai/legacy/inference/tensor_parallel/modeling/llama.py
  function rotate_half (line 34) | def rotate_half(x):
  function apply_rotary_pos_emb (line 41) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
  function llama_triton_context_attention (line 53) | def llama_triton_context_attention(
  function llama_triton_token_attention (line 81) | def llama_triton_token_attention(query_states, attn_output, infer_state,...
  class LlamaInferenceForwards (line 111) | class LlamaInferenceForwards:
    method llama_model_forward (line 118) | def llama_model_forward(
    method llama_decoder_layer_forward (line 262) | def llama_decoder_layer_forward(
    method llama_flash_attn_kvcache_forward (line 305) | def llama_flash_attn_kvcache_forward(

FILE: colossalai/legacy/inference/tensor_parallel/policies/bloom.py
  function get_triton_layernorm_forward (line 21) | def get_triton_layernorm_forward():
  class BloomModelInferPolicy (line 32) | class BloomModelInferPolicy(BloomForCausalLMPolicy):
    method __init__ (line 33) | def __init__(self) -> None:
    method module_policy (line 36) | def module_policy(self):

FILE: colossalai/legacy/inference/tensor_parallel/policies/chatglm2.py
  class ChatGLM2InferPolicy (line 24) | class ChatGLM2InferPolicy(ChatGLMModelPolicy):
    method __init__ (line 25) | def __init__(self) -> None:
    method module_policy (line 28) | def module_policy(self):
    method postprocess (line 58) | def postprocess(self):
  class ChatGLM2ForConditionalGenerationInferPolicy (line 63) | class ChatGLM2ForConditionalGenerationInferPolicy(ChatGLM2InferPolicy):
    method __init__ (line 64) | def __init__(self) -> None:
    method module_policy (line 67) | def module_policy(self):
    method postprocess (line 76) | def postprocess(self):

FILE: colossalai/legacy/inference/tensor_parallel/policies/llama.py
  function get_triton_rmsnorm_forward (line 23) | def get_triton_rmsnorm_forward():
  class LlamaModelInferPolicy (line 34) | class LlamaModelInferPolicy(LlamaForCausalLMPolicy):
    method __init__ (line 35) | def __init__(self) -> None:
    method module_policy (line 38) | def module_policy(self):
    method postprocess (line 119) | def postprocess(self):

FILE: colossalai/legacy/initialize.py
  function get_default_parser (line 40) | def get_default_parser():
  function launch (line 58) | def launch(
  function launch_from_slurm (line 126) | def launch_from_slurm(
  function launch_from_openmpi (line 165) | def launch_from_openmpi(
  function launch_from_torch (line 206) | def launch_from_torch(
  function initialize (line 242) | def initialize(

FILE: colossalai/legacy/moe/layer/experts.py
  class MLPExperts (line 18) | class MLPExperts(nn.Module):
    method __init__ (line 33) | def __init__(
    method reset_parameters (line 92) | def reset_parameters(self):
    method forward (line 106) | def forward(

FILE: colossalai/legacy/moe/layer/layers.py
  class SparseMLP (line 17) | class SparseMLP(nn.Module):
    method __init__ (line 47) | def __init__(
    method reset_parameters (line 147) | def reset_parameters(self):
    method forward (line 150) | def forward(self, inputs: torch.Tensor) -> torch.Tensor:
    method _local_process (line 219) | def _local_process(self, expert_in: torch.Tensor) -> torch.Tensor:
    method _ep_process (line 224) | def _ep_process(
    method _tp_process (line 303) | def _tp_process(
  function apply_load_balance (line 389) | def apply_load_balance(model: nn.Module, optim: Any) -> None:

FILE: colossalai/legacy/moe/layer/routers.py
  class MLPExperts (line 18) | class MLPExperts(nn.Module):
    method __init__ (line 33) | def __init__(
    method reset_parameters (line 92) | def reset_parameters(self):
    method forward (line 106) | def forward(

FILE: colossalai/legacy/moe/load_balance.py
  class LoadBalancer (line 15) | class LoadBalancer:
    method __init__ (line 16) | def __init__(
    method _clear_load (line 46) | def _clear_load(self) -> None:
    method _sync_load (line 49) | def _sync_load(self) -> Tensor:
    method _get_diff_from_avg (line 58) | def _get_diff_from_avg(data: List, group: int, avg: float) -> float:
    method _swap_data (line 62) | def _swap_data(data: List, group_i: int, index_i: int, group_j: int, i...
    method _normalize_data (line 69) | def _normalize_data(data: List) -> List:
    method _get_swap_loss (line 75) | def _get_swap_loss(
    method _check_convergence (line 103) | def _check_convergence(data: List, avg: float, tolerance: float):
    method _beam_search (line 112) | def _beam_search(
    method _load_to_list (line 182) | def _load_to_list(self, load: Tensor) -> List:
    method _search_balance (line 194) | def _search_balance(
    method _swap_expert_single_tensor (line 260) | def _swap_expert_single_tensor(
    method _swap_expert_param_and_optim (line 278) | def _swap_expert_param_and_optim(
    method _gather_global_dp_group (line 319) | def _gather_global_dp_group(self, data: Tensor) -> Tensor:
    method _swap_moe_param (line 325) | def _swap_moe_param(self, swap_list: List, optim: LowLevelZeroOptimize...
    method update_load (line 417) | def update_load(self, load: Tensor) -> None:
    method balance_load (line 428) | def balance_load(self, optim: LowLevelZeroOptimizer) -> None:

FILE: colossalai/legacy/moe/manager.py
  class MoEManager (line 11) | class MoEManager(metaclass=SingletonMeta):
    method __init__ (line 16) | def __init__(self):
    method parallel_info_dict (line 41) | def parallel_info_dict(self):
    method is_initialized (line 45) | def is_initialized(self):
    method setup (line 48) | def setup(
    method get_info (line 99) | def get_info(self, num_experts: int, use_tp: bool = False) -> Tuple[in...
    method reset_loss (line 148) | def reset_loss(self):
    method add_loss (line 151) | def add_loss(self, aux_loss: float = 0.0, z_loss: float = 0.0):
    method get_loss (line 155) | def get_loss(self):
    method get_parallel (line 159) | def get_parallel(self):

FILE: colossalai/legacy/moe/openmoe/benchmark/benchmark_cai.py
  function move_to_cuda (line 27) | def move_to_cuda(batch, device):
  function load_ckpt (line 31) | def load_ckpt(repo_name: str, model: OpenMoeForCausalLM, booster: Booster):
  class RandomDataset (line 44) | class RandomDataset(Dataset):
    method __init__ (line 45) | def __init__(
    method __len__ (line 78) | def __len__(self):
    method __getitem__ (line 81) | def __getitem__(self, idx):
  function parse_args (line 89) | def parse_args():
  function main (line 145) | def main():

FILE: colossalai/legacy/moe/openmoe/benchmark/benchmark_fsdp.py
  class RandomDataset (line 20) | class RandomDataset(Dataset):
    method __init__ (line 21) | def __init__(self, num_samples: int = 1000, max_length: int = 2048, vo...
    method __len__ (line 27) | def __len__(self):
    method __getitem__ (line 30) | def __getitem__(self, idx):
  function fsdp_main (line 38) | def fsdp_main(rank, world_size, args):

FILE: colossalai/legacy/moe/openmoe/benchmark/utils.py
  function print_model_numel (line 12) | def print_model_numel(logger: DistributedLogger, model: nn.Module) -> None:
  function get_model_numel (line 29) | def get_model_numel(model: nn.Module) -> None:
  function divide (line 34) | def divide(x: float, y: float) -> float:
  function all_reduce_mean (line 43) | def all_reduce_mean(x: float, world_size: int) -> float:
  class Timer (line 52) | class Timer:
    method __init__ (line 53) | def __init__(self) -> None:
    method start (line 57) | def start(self) -> None:
    method end (line 60) | def end(self) -> None:
    method reset (line 65) | def reset(self) -> None:
  class PerformanceEvaluator (line 69) | class PerformanceEvaluator:
    method __init__ (line 81) | def __init__(
    method on_step_start (line 98) | def on_step_start(self, step: int) -> None:
    method on_step_end (line 105) | def on_step_end(self, input_ids: Tensor, **kwargs) -> None:
    method on_fit_end (line 116) | def on_fit_end(self) -> None:

FILE: colossalai/legacy/moe/openmoe/infer.py
  function parse_args (line 9) | def parse_args():
  function inference (line 15) | def inference(args):

FILE: colossalai/legacy/moe/openmoe/model/convert_openmoe_ckpt.py
  function t5x_attention_lookup (line 44) | def t5x_attention_lookup(params, i, prefix, layer_name="attention"):
  function t5x_mlp_lookup (line 53) | def t5x_mlp_lookup(params, i, prefix, split_mlp_wi=False):
  function t5x_extra_mlp_lookup (line 66) | def t5x_extra_mlp_lookup(params, i, prefix, split_mlp_wi=False):
  function t5x_experts_lookup (line 79) | def t5x_experts_lookup(params, i, prefix, split_mlp_wi=False):
  function t5x_gate_lookup (line 92) | def t5x_gate_lookup(params, i, prefix, split_mlp_wi=False):
  function t5x_layer_norm_lookup (line 97) | def t5x_layer_norm_lookup(params, i, prefix, layer_name):
  function convert_t5x_to_pytorch (line 102) | def convert_t5x_to_pytorch(variables: dict, *, num_layers: int, moe_inte...
  function make_state_dict (line 164) | def make_state_dict(converted_params):
  function load_t5x_weights_in_t5 (line 172) | def load_t5x_weights_in_t5(model, config, t5x_checkpoint_path):
  function convert_t5x_checkpoint_to_pytorch (line 182) | def convert_t5x_checkpoint_to_pytorch(t5x_checkpoint_path, config_file, ...

FILE: colossalai/legacy/moe/openmoe/model/modeling_openmoe.py
  function set_openmoe_args (line 65) | def set_openmoe_args(
  function _make_causal_mask (line 141) | def _make_causal_mask(
  function _expand_mask (line 159) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option...
  function generate_fixed_pos_embedding (line 173) | def generate_fixed_pos_embedding(features, length, min_timescale=1.0, ma...
  function apply_rotary_embedding (line 197) | def apply_rotary_embedding(q, k, cos, sin, decode=False, rotary_index=No...
  function rotate_half (line 236) | def rotate_half(x):
  function SwiGLU (line 243) | def SwiGLU(x):
  class OpenMoeMLP (line 255) | class OpenMoeMLP(nn.Module):
    method __init__ (line 256) | def __init__(self, config: LlamaConfig):
    method forward (line 268) | def forward(self, x):
  function repeat_kv (line 290) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class OpenMoeAttention (line 302) | class OpenMoeAttention(nn.Module):
    method __init__ (line 305) | def __init__(self, config: LlamaConfig):
    method _shape (line 322) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 325) | def forward(
  class OpenMoeDecoderLayer (line 442) | class OpenMoeDecoderLayer(nn.Module):
    method __init__ (line 443) | def __init__(self, config: LlamaConfig, moe: bool):
    method forward (line 475) | def forward(
  class OpenMoePreTrainedModel (line 557) | class OpenMoePreTrainedModel(PreTrainedModel):
    method _init_weights (line 564) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 575) | def _set_gradient_checkpointing(self, module, value=False):
  class OpenMoeModel (line 648) | class OpenMoeModel(OpenMoePreTrainedModel):
    method __init__ (line 656) | def __init__(self, config: LlamaConfig):
    method get_input_embeddings (line 674) | def get_input_embeddings(self):
    method set_input_embeddings (line 677) | def set_input_embeddings(self, value):
    method _prepare_decoder_attention_mask (line 681) | def _prepare_decoder_attention_mask(self, attention_mask, input_shape,...
    method forward (line 705) | def forward(
  class OpenMoeForCausalLM (line 833) | class OpenMoeForCausalLM(OpenMoePreTrainedModel):
    method __init__ (line 836) | def __init__(self, config):
    method get_input_embeddings (line 846) | def get_input_embeddings(self):
    method set_input_embeddings (line 849) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 852) | def get_output_embeddings(self):
    method set_output_embeddings (line 855) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 858) | def set_decoder(self, decoder):
    method get_decoder (line 861) | def get_decoder(self):
    method forward (line 866) | def forward(
    method prepare_inputs_for_generation (line 989) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1020) | def _reorder_cache(past_key_values, beam_idx):
    method _calculate_router_loss (line 1028) | def _calculate_router_loss(self, aux_loss: list = None, z_loss: list =...
    method _calculate_loss (line 1036) | def _calculate_loss(self, logits: torch.Tensor, targets: torch.Tensor)...
  class ZLossCrossEntropy (line 1073) | class ZLossCrossEntropy(torch.autograd.Function):
    method forward (line 1098) | def forward(ctx, logits, targets, z_loss):
    method backward (line 1115) | def backward(ctx, *grad_outputs):

FILE: colossalai/legacy/moe/openmoe/model/openmoe_policy.py
  class OpenMoePolicy (line 22) | class OpenMoePolicy(Policy):
    method config_sanity_check (line 23) | def config_sanity_check(self):
    method preprocess (line 26) | def preprocess(self):
    method module_policy (line 38) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...
    method postprocess (line 86) | def postprocess(self):
    method set_pipeline_forward (line 89) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
    method get_held_layers (line 108) | def get_held_layers(self) -> List[Module]:
    method distribute_layers (line 129) | def distribute_layers(self, num_layers: int, num_stages: int) -> List[...
  class OpenMoeModelPolicy (line 144) | class OpenMoeModelPolicy(OpenMoePolicy):
    method __init__ (line 145) | def __init__(self) -> None:
    method module_policy (line 148) | def module_policy(self):
    method get_held_layers (line 159) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 164) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class OpenMoeForCausalLMPolicy (line 169) | class OpenMoeForCausalLMPolicy(OpenMoePolicy):
    method module_policy (line 170) | def module_policy(self):
    method get_held_layers (line 199) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 207) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class OpenMoePipelineForwards (line 224) | class OpenMoePipelineForwards:
    method openmoe_model_forward (line 231) | def openmoe_model_forward(
    method llama_for_causal_lm_forward (line 409) | def llama_for_causal_lm_forward(

FILE: colossalai/legacy/moe/openmoe/train.py
  function move_to_cuda (line 27) | def move_to_cuda(batch, device):
  function load_ckpt (line 31) | def load_ckpt(repo_name: str, model: OpenMoeForCausalLM, booster: Booster):
  function tokenize_data (line 44) | def tokenize_data(batch, tokenizer: T5Tokenizer, max_length: int) -> Dict:
  class RandomDataset (line 59) | class RandomDataset(Dataset):
    method __init__ (line 60) | def __init__(self, num_samples: int = 1000, max_length: int = 2048, vo...
    method __len__ (line 68) | def __len__(self):
    method __getitem__ (line 71) | def __getitem__(self, idx):
  function parse_args (line 79) | def parse_args():
  function main (line 205) | def main():

FILE: colossalai/legacy/moe/utils.py
  class ForceFP32Parameter (line 16) | class ForceFP32Parameter(torch.nn.Parameter):
    method half (line 17) | def half(self, memory_format=None):
  class NormalNoiseGenerator (line 21) | class NormalNoiseGenerator:
    method __init__ (line 31) | def __init__(self, num_experts: int):
    method __call__ (line 37) | def __call__(self, inputs: torch.Tensor):
  class UniformNoiseGenerator (line 42) | class UniformNoiseGenerator:
    method __init__ (line 53) | def __init__(self, eps: float = 1e-2):
    method __call__ (line 59) | def __call__(self, inputs: torch.Tensor):
  function autocast_softmax (line 64) | def autocast_softmax(logit: torch.Tensor, dim: int):
  function get_noise_generator (line 68) | def get_noise_generator(noise_type: str, num_experts: int) -> Callable:
  function get_activation (line 80) | def get_activation(act: str) -> Callable:
  function SwiGLU (line 93) | def SwiGLU(x):
  function skip_init (line 106) | def skip_init():
  function get_moe_epsize_param_dict (line 136) | def get_moe_epsize_param_dict(model: nn.Module) -> Dict[int, List[nn.Par...
  function sync_moe_model_param (line 157) | def sync_moe_model_param(model: nn.Module):
  function set_moe_args (line 178) | def set_moe_args(config: Any, args: dict):
  function create_ep_hierarchical_group (line 183) | def create_ep_hierarchical_group(

FILE: colossalai/legacy/nn/_ops/_utils.py
  function convert_to_colo_tensor (line 15) | def convert_to_colo_tensor(tensor: Optional[GeneralTensor], pg: ProcessG...
  function set_parallel_input (line 21) | def set_parallel_input(input_parallel: bool):
  function get_parallel_input (line 25) | def get_parallel_input():
  function vocab_range_from_per_partition_vocab_size (line 29) | def vocab_range_from_per_partition_vocab_size(per_partition_vocab_size, ...
  function vocab_range_from_global_vocab_size (line 35) | def vocab_range_from_global_vocab_size(global_vocab_size, rank, world_si...
  function _reduce (line 40) | def _reduce(input_, pg: ProcessGroup):
  function _split (line 51) | def _split(input_, pg: ProcessGroup, dim=-1):
  function _gather (line 71) | def _gather(input_, pg: ProcessGroup, dim=-1):
  class _ReduceGrad (line 91) | class _ReduceGrad(torch.autograd.Function):
    method symbolic (line 101) | def symbolic(graph, input_):
    method forward (line 105) | def forward(ctx, input_, process_group):
    method backward (line 110) | def backward(ctx, grad_output):
  class _ReduceInput (line 114) | class _ReduceInput(torch.autograd.Function):
    method symbolic (line 124) | def symbolic(graph, input_):
    method forward (line 128) | def forward(ctx, input_, process_group):
    method backward (line 132) | def backward(ctx, grad_output):
  class _SplitForwardGatherBackward (line 136) | class _SplitForwardGatherBackward(torch.autograd.Function):
    method symbolic (line 147) | def symbolic(graph, input_):
    method forward (line 151) | def forward(ctx, input_, process_group, dim):
    method backward (line 157) | def backward(ctx, grad_output):
  class _GatherForwardSplitBackward (line 161) | class _GatherForwardSplitBackward(torch.autograd.Function):
    method symbolic (line 171) | def symbolic(graph, input_):
    method forward (line 175) | def forward(ctx, input_, process_group, dim):
    method backward (line 181) | def backward(ctx, grad_output):
  function reduce_grad (line 185) | def reduce_grad(input_, process_group):
  function reduce_input (line 189) | def reduce_input(input_, process_group):
  function split_forward_gather_backward (line 193) | def split_forward_gather_backward(input_, process_group, dim):
  function gather_forward_split_backward (line 197) | def gather_forward_split_backward(input_, process_group, dim):
  function _all_to_all (line 201) | def _all_to_all(x: torch.Tensor, pg: ProcessGroup, scatter_dim: int, gat...
  class _DualAllToAll (line 219) | class _DualAllToAll(torch.autograd.Function):
    method forward (line 221) | def forward(ctx, x, pg, scatter_dim, gather_dim):
    method backward (line 228) | def backward(ctx, grad):
  function dual_all_to_all (line 232) | def dual_all_to_all(x, pg, scatter_dim: int, gather_dim: int):
  function _all_to_all_for_tablewise (line 239) | def _all_to_all_for_tablewise(
  class _DualAllToAllForTablewise (line 266) | class _DualAllToAllForTablewise(torch.autograd.Function):
    method forward (line 268) | def forward(ctx, x, pg, scatter_strides, gather_strides):
    method backward (line 275) | def backward(ctx, grad):
  function dual_all_to_all_tablewise (line 284) | def dual_all_to_all_tablewise(x, pg, scatter_strides, gather_strides):

FILE: colossalai/legacy/nn/layer/base_layer.py
  class ParallelLayer (line 12) | class ParallelLayer(nn.Module):
    method __init__ (line 15) | def __init__(self):
    method _load_from_global_state_dict (line 38) | def _load_from_global_state_dict(
    method _save_to_global_state_dict (line 45) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method _load_from_state_dict (line 48) | def _load_from_state_dict(
    method _save_to_state_dict (line 62) | def _save_to_state_dict(self, destination, prefix, keep_vars):
    method use_local_state_dict (line 69) | def use_local_state_dict(cls):

FILE: colossalai/legacy/nn/layer/colossalai_layer/_utils.py
  function partition_batch (line 12) | def partition_batch(input_) -> Tensor:
  class ColossalaiModule (line 23) | class ColossalaiModule(nn.Module):
    method __init__ (line 24) | def __init__(self, module: nn.Module, **kwargs):
    method __getattr__ (line 30) | def __getattr__(self, name: str):
    method forward (line 39) | def forward(self, *args):

FILE: colossalai/legacy/nn/layer/colossalai_layer/dropout.py
  class Dropout (line 10) | class Dropout(ColossalaiModule):
    method __init__ (line 18) | def __init__(self, p: float = 0.5, inplace: bool = False) -> None:
    method forward (line 26) | def forward(self, *args):

FILE: colossalai/legacy/nn/layer/colossalai_layer/embedding.py
  class Embedding (line 40) | class Embedding(ColossalaiModule):
    method __init__ (line 70) | def __init__(
  class PatchEmbedding (line 112) | class PatchEmbedding(ColossalaiModule):
    method __init__ (line 133) | def __init__(

FILE: colossalai/legacy/nn/layer/colossalai_layer/linear.py
  class Linear (line 35) | class Linear(ColossalaiModule):
    method __init__ (line 67) | def __init__(
  class Classifier (line 94) | class Classifier(ColossalaiModule):
    method __init__ (line 112) | def __init__(

FILE: colossalai/legacy/nn/layer/colossalai_layer/normalization.py
  class LayerNorm (line 22) | class LayerNorm(ColossalaiModule):
    method __init__ (line 36) | def __init__(self, normalized_shape: int, eps=1e-05, bias=True, dtype=...

FILE: colossalai/legacy/nn/layer/parallel_1d/_operation.py
  class FusedLayerNormAffineFunction1D (line 12) | class FusedLayerNormAffineFunction1D(torch.autograd.Function):
    method forward (line 27) | def forward(ctx, input, weight, bias, normalized_shape, eps):
    method backward (line 40) | def backward(ctx, grad_output):
  class LinearWithAsyncCommunication (line 50) | class LinearWithAsyncCommunication(torch.autograd.Function):
    method forward (line 56) | def forward(ctx, input_, weight, bias, parallel_mode, async_grad_allre...
    method backward (line 68) | def backward(ctx, grad_output):
  function linear_with_async_comm (line 97) | def linear_with_async_comm(input_, weight, bias, parallel_mode, async_gr...

FILE: colossalai/legacy/nn/layer/parallel_1d/_utils.py
  function set_parallel_input (line 13) | def set_parallel_input(input_parallel: bool):
  function get_parallel_input (line 17) | def get_parallel_input():
  function vocab_range_from_per_partition_vocab_size (line 21) | def vocab_range_from_per_partition_vocab_size(per_partition_vocab_size, ...
  function vocab_range_from_global_vocab_size (line 27) | def vocab_range_from_global_vocab_size(global_vocab_size, rank, world_si...
  function _reduce (line 32) | def _reduce(input_, parallel_mode):
  function _split (line 42) | def _split(input_, parallel_mode, dim=-1):
  function _gather (line 62) | def _gather(input_, parallel_mode, dim=-1):
  class _ReduceGrad (line 81) | class _ReduceGrad(torch.autograd.Function):
    method symbolic (line 91) | def symbolic(graph, input_):
    method forward (line 95) | def forward(ctx, input_, parallel_mode):
    method backward (line 100) | def backward(ctx, grad_output):
  class _ReduceInput (line 104) | class _ReduceInput(torch.autograd.Function):
    method symbolic (line 114) | def symbolic(graph, input_):
    method forward (line 118) | def forward(ctx, input_, parallel_mode):
    method backward (line 122) | def backward(ctx, grad_output):
  class _SplitForwardGatherBackward (line 126) | class _SplitForwardGatherBackward(torch.autograd.Function):
    method symbolic (line 137) | def symbolic(graph, input_):
    method forward (line 141) | def forward(ctx, input_, parallel_mode, dim):
    method backward (line 147) | def backward(ctx, grad_output):
  class _GatherForwardSplitBackward (line 151) | class _GatherForwardSplitBackward(torch.autograd.Function):
    method symbolic (line 161) | def symbolic(graph, input_):
    method forward (line 165) | def forward(ctx, input_, parallel_mode, dim):
    method backward (line 171) | def backward(ctx, grad_output):
  function reduce_grad (line 175) | def reduce_grad(input_, parallel_mode):
  function reduce_input (line 179) | def reduce_input(input_, parallel_mode):
  function split_forward_gather_backward (line 183) | def split_forward_gather_backward(input_, parallel_mode, dim):
  function gather_forward_split_backward (line 187) | def gather_forward_split_backward(input_, parallel_mode, dim):

FILE: colossalai/legacy/nn/layer/parallel_1d/layers.py
  class Linear1D (line 51) | class Linear1D(ColossalaiModule):
    method __init__ (line 71) | def __init__(
  class LayerNorm1D (line 108) | class LayerNorm1D(ColossalaiModule):
    method __init__ (line 150) | def __init__(self, normalized_shape: int, eps=1e-05, bias=True, dtype=...
    method _load_from_state_dict (line 163) | def _load_from_state_dict(self, state_dict, prefix, *args):
    method _save_to_state_dict (line 180) | def _save_to_state_dict(self, destination, prefix, keep_vars):
  class Classifier1D (line 186) | class Classifier1D(ParallelLayer):
    method __init__ (line 204) | def __init__(
    method reset_parameters (line 241) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method _set_tensor_parallel_attributes (line 249) | def _set_tensor_parallel_attributes(self):
    method _load_from_global_state_dict (line 254) | def _load_from_global_state_dict(self, state_dict, prefix, *args):
    method _save_to_global_state_dict (line 278) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 295) | def forward(self, input_: Tensor) -> Tensor:
  class VocabParallelClassifier1D (line 320) | class VocabParallelClassifier1D(ParallelLayer):
    method __init__ (line 338) | def __init__(
    method reset_parameters (line 377) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method _set_tensor_parallel_attributes (line 384) | def _set_tensor_parallel_attributes(self):
    method _load_from_global_state_dict (line 391) | def _load_from_global_state_dict(self, state_dict, prefix, *args):
    method _save_to_global_state_dict (line 415) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 432) | def forward(self, input_: Tensor) -> Tensor:
  class Linear1D_Col (line 451) | class Linear1D_Col(ParallelLayer):
    method __init__ (line 476) | def __init__(
    method reset_parameters (line 515) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method _set_tensor_parallel_attributes (line 521) | def _set_tensor_parallel_attributes(self):
    method _load_from_global_state_dict (line 527) | def _load_from_global_state_dict(self, state_dict, prefix, *args):
    method _save_to_global_state_dict (line 550) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 565) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]:
  class Linear1D_Row (line 591) | class Linear1D_Row(ParallelLayer):
    method __init__ (line 611) | def __init__(
    method chunk_weight (line 656) | def chunk_weight(self):
    method reset_parameters (line 659) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method _set_tensor_parallel_attributes (line 666) | def _set_tensor_parallel_attributes(self):
    method _load_from_global_state_dict (line 670) | def _load_from_global_state_dict(self, state_dict, prefix, *args):
    method _save_to_global_state_dict (line 693) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 708) | def forward(self, input_: Tensor) -> Tensor:
  class Embedding1D (line 754) | class Embedding1D(ParallelLayer):
    method __init__ (line 784) | def __init__(
    method _set_tensor_parallel_attributes (line 814) | def _set_tensor_parallel_attributes(self):
    method reset_parameters (line 817) | def reset_parameters(self, weight_initializer) -> None:
    method _fill_padding_idx_with_zero (line 823) | def _fill_padding_idx_with_zero(self) -> None:
    method _load_from_global_state_dict (line 828) | def _load_from_global_state_dict(self, state_dict, prefix, *args):
    method _save_to_global_state_dict (line 842) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 854) | def forward(self, input_: Tensor) -> Tensor:
  class VocabParallelEmbedding1D (line 863) | class VocabParallelEmbedding1D(ParallelLayer):
    method __init__ (line 893) | def __init__(
    method _set_tensor_parallel_attributes (line 929) | def _set_tensor_parallel_attributes(self):
    method reset_parameters (line 932) | def reset_parameters(self, weight_initializer) -> None:
    method _fill_padding_idx_with_zero (line 938) | def _fill_padding_idx_with_zero(self) -> None:
    method _load_from_global_state_dict (line 947) | def _load_from_global_state_dict(self, state_dict, prefix, *args):
    method _save_to_global_state_dict (line 961) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 973) | def forward(self, input_: Tensor) -> Tensor:
  class Dropout1D (line 992) | class Dropout1D(ParallelLayer):
    method __init__ (line 1000) | def __init__(self, p: float = 0.5, inplace: bool = False):
    method forward (line 1006) | def forward(self, input_: Tensor) -> Tensor:
  class PatchEmbedding1D (line 1016) | class PatchEmbedding1D(ColossalaiModule):
    method __init__ (line 1040) | def __init__(
    method _load_from_state_dict (line 1065) | def _load_from_state_dict(self, state_dict, prefix, *args):
    method _save_to_state_dict (line 1077) | def _save_to_state_dict(self, destination, prefix, keep_vars):

FILE: colossalai/legacy/nn/layer/parallel_2d/_operation.py
  function matmul_2d (line 14) | def matmul_2d(
  class _Classifier2D (line 74) | class _Classifier2D(torch.autograd.Function):
    method forward (line 77) | def forward(
    method backward (line 129) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  function classifier_2d (line 147) | def classifier_2d(
  class Matmul_AB_2D (line 200) | class Matmul_AB_2D(torch.autograd.Function):
    method forward (line 224) | def forward(
    method backward (line 318) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  class Matmul_ABT_2D (line 352) | class Matmul_ABT_2D(torch.autograd.Function):
    method forward (line 377) | def forward(
    method backward (line 475) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  class Matmul_ATB_2D (line 510) | class Matmul_ATB_2D(torch.autograd.Function):
    method forward (line 534) | def forward(
    method backward (line 632) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  class _Add_Bias_2D (line 667) | class _Add_Bias_2D(torch.autograd.Function):
    method forward (line 670) | def forward(
    method backward (line 705) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  function add_bias_2d (line 718) | def add_bias_2d(
  class _Layernorm_2D (line 769) | class _Layernorm_2D(torch.autograd.Function):
    method forward (line 772) | def forward(
    method backward (line 792) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  function layernorm_2d (line 813) | def layernorm_2d(
  class _AllGatherTensor2D (line 838) | class _AllGatherTensor2D(torch.autograd.Function):
    method forward (line 841) | def forward(ctx: Any, inputs: Tensor, dim: int, parallel_mode: Paralle...
    method backward (line 850) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  function all_gather_tensor_2d (line 855) | def all_gather_tensor_2d(tensor: Tensor, dim: int, parallel_mode: Parall...
  function split_batch_2d (line 870) | def split_batch_2d(input_: Tensor, dim: int = 0) -> Tensor:
  class _ReduceTensor2D (line 893) | class _ReduceTensor2D(torch.autograd.Function):
    method forward (line 895) | def forward(ctx, input_, parallel_mode):
    method backward (line 899) | def backward(ctx, output_grad):
  function reduce_tensor_2d (line 903) | def reduce_tensor_2d(input_: Tensor, parallel_mode: ParallelMode) -> Ten...
  class _ReduceScatterTensor2D (line 917) | class _ReduceScatterTensor2D(torch.autograd.Function):
    method forward (line 919) | def forward(ctx, input_, dim, parallel_mode):
    method backward (line 925) | def backward(ctx, output_grad):
  function reduce_scatter_tensor_2d (line 929) | def reduce_scatter_tensor_2d(tensor: Tensor, dim: int, parallel_mode: Pa...
  class _ReduceByBatch2D (line 948) | class _ReduceByBatch2D(torch.autograd.Function):
    method symbolic (line 950) | def symbolic(graph, input_, reduce_mean: bool = False):
    method forward (line 959) | def forward(ctx, input_, reduce_mean: bool = False):
    method backward (line 970) | def backward(ctx, output_grad):
  function reduce_by_batch_2d (line 977) | def reduce_by_batch_2d(input_, reduce_mean: bool = False) -> Tensor:

FILE: colossalai/legacy/nn/layer/parallel_2d/_utils.py
  function get_summa_dim_from_env (line 6) | def get_summa_dim_from_env() -> int:
  function assert_summa_initialization (line 19) | def assert_summa_initialization():

FILE: colossalai/legacy/nn/layer/parallel_2d/layers.py
  class Linear2D (line 39) | class Linear2D(ParallelLayer):
    method __init__ (line 58) | def __init__(
    method _set_tensor_parallel_attributes (line 101) | def _set_tensor_parallel_attributes(self):
    method reset_parameters (line 106) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method _load_from_global_state_dict (line 112) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 145) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 173) | def forward(self, x: Tensor) -> Tensor:
  class LayerNorm2D (line 231) | class LayerNorm2D(ParallelLayer):
    method __init__ (line 245) | def __init__(self, normalized_shape: int, eps: float = 1e-05, bias=Tru...
    method _set_tensor_parallel_attributes (line 272) | def _set_tensor_parallel_attributes(self):
    method _load_from_global_state_dict (line 277) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 309) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 336) | def forward(self, x: Tensor) -> Tensor:
  class PatchEmbedding2D (line 390) | class PatchEmbedding2D(ParallelLayer):
    method __init__ (line 411) | def __init__(
    method _set_tensor_parallel_attribute (line 465) | def _set_tensor_parallel_attribute(self):
    method reset_parameters (line 471) | def reset_parameters(self, weight_initializer, bias_initializer, posit...
    method _load_from_global_state_dict (line 479) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 521) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 550) | def forward(self, input_: Tensor) -> Tensor:
  class Embedding2D (line 575) | class Embedding2D(ParallelLayer):
    method __init__ (line 605) | def __init__(
    method _set_tensor_parallel_attributes (line 636) | def _set_tensor_parallel_attributes(self):
    method reset_parameters (line 639) | def reset_parameters(self, weight_initializer) -> None:
    method _fill_padding_idx_with_zero (line 645) | def _fill_padding_idx_with_zero(self) -> None:
    method _load_from_global_state_dict (line 650) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 677) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 701) | def forward(self, input_: Tensor) -> Tensor:
  class VocabParallelEmbedding2D (line 711) | class VocabParallelEmbedding2D(ParallelLayer):
    method __init__ (line 741) | def __init__(
    method _set_tensor_parallel_attributes (line 778) | def _set_tensor_parallel_attributes(self):
    method reset_parameters (line 781) | def reset_parameters(self, weight_initializer) -> None:
    method _fill_padding_idx_with_zero (line 787) | def _fill_padding_idx_with_zero(self) -> None:
    method _load_from_global_state_dict (line 796) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 823) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 847) | def forward(self, input_: Tensor) -> Tensor:
  class Classifier2D (line 862) | class Classifier2D(ParallelLayer):
    method __init__ (line 880) | def __init__(
    method _set_tensor_parallel_attributes (line 924) | def _set_tensor_parallel_attributes(self):
    method reset_parameters (line 928) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method _load_from_global_state_dict (line 942) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 976) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 1005) | def forward(self, input_: Tensor) -> Tensor:
  class VocabParallelClassifier2D (line 1026) | class VocabParallelClassifier2D(ParallelLayer):
    method __init__ (line 1044) | def __init__(
    method _set_tensor_parallel_attributes (line 1091) | def _set_tensor_parallel_attributes(self):
    method reset_parameters (line 1097) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method _load_from_global_state_dict (line 1104) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 1138) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 1168) | def forward(self, x: Tensor) -> Tensor:

FILE: colossalai/legacy/nn/layer/parallel_2p5d/_operation.py
  function get_parallel_group (line 14) | def get_parallel_group(parallel_mode: ParallelMode):
  function get_global_rank (line 18) | def get_global_rank():
  function get_parallel_rank (line 22) | def get_parallel_rank(parallel_mode: ParallelMode):
  class _Classifier2p5D (line 26) | class _Classifier2p5D(torch.autograd.Function):
    method forward (line 29) | def forward(
    method backward (line 81) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  function classifier_2p5d (line 100) | def classifier_2p5d(
  class Matmul_AB_2p5D (line 153) | class Matmul_AB_2p5D(torch.autograd.Function):
    method forward (line 178) | def forward(
    method backward (line 278) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  class Matmul_ABT_2p5D (line 314) | class Matmul_ABT_2p5D(torch.autograd.Function):
    method forward (line 339) | def forward(
    method backward (line 443) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  class Matmul_ATB_2p5D (line 479) | class Matmul_ATB_2p5D(torch.autograd.Function):
    method forward (line 504) | def forward(
    method backward (line 606) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  class _Add_Bias_2p5D (line 642) | class _Add_Bias_2p5D(torch.autograd.Function):
    method forward (line 645) | def forward(
    method backward (line 694) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  function add_bias_2p5d (line 805) | def add_bias_2p5d(
  class _Layernorm2p5D (line 859) | class _Layernorm2p5D(torch.autograd.Function):
    method forward (line 876) | def forward(
    method backward (line 889) | def backward(ctx, output_grad):
  function layernorm_2p5d (line 910) | def layernorm_2p5d(
  class _AllGatherTensor2p5D (line 929) | class _AllGatherTensor2p5D(torch.autograd.Function):
    method forward (line 932) | def forward(ctx: Any, inputs: Tensor, dim: int, col_parallel_mode: Par...
    method backward (line 941) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  function all_gather_tensor_2p5d (line 946) | def all_gather_tensor_2p5d(inputs: Tensor, dim: int, col_parallel_mode: ...
  class SplitFirst (line 961) | class SplitFirst(torch.autograd.Function):
    method forward (line 976) | def forward(ctx: Any, inputs: Tensor, tesseract_dim: int, col_parallel...
    method backward (line 987) | def backward(ctx: Any, output_grad: Tensor) -> Tuple[Tensor, ...]:
  function split_batch_2p5d (line 996) | def split_batch_2p5d(input_: Tensor, dim: int = 0) -> Tensor:
  class _ReduceTensor2p5D (line 1021) | class _ReduceTensor2p5D(torch.autograd.Function):
    method forward (line 1023) | def forward(ctx, input_, parallel_mode):
    method backward (line 1027) | def backward(ctx, output_grad):
  function reduce_tensor_2p5d (line 1031) | def reduce_tensor_2p5d(input_: Tensor, parallel_mode: ParallelMode) -> T...
  class _ReduceScatterTensor2p5D (line 1045) | class _ReduceScatterTensor2p5D(torch.autograd.Function):
    method forward (line 1047) | def forward(ctx, input_, dim, parallel_mode):
    method backward (line 1053) | def backward(ctx, output_grad):
  function reduce_scatter_tensor_2p5d (line 1057) | def reduce_scatter_tensor_2p5d(input_: Tensor, dim: int, parallel_mode: ...
  class _RreduceByBatch2p5D (line 1078) | class _RreduceByBatch2p5D(torch.autograd.Function):
    method symbolic (line 1080) | def symbolic(graph, input_, reduce_mean: bool = False):
    method forward (line 1089) | def forward(ctx, input_, reduce_mean: bool = False):
    method backward (line 1100) | def backward(ctx, output_grad):
  function reduce_by_batch_2p5d (line 1107) | def reduce_by_batch_2p5d(input_, reduce_mean: bool = False) -> Tensor:

FILE: colossalai/legacy/nn/layer/parallel_2p5d/_utils.py
  function get_tesseract_dim_dep_from_env (line 6) | def get_tesseract_dim_dep_from_env():
  function assert_tesseract_initialization (line 21) | def assert_tesseract_initialization():

FILE: colossalai/legacy/nn/layer/parallel_2p5d/layers.py
  class Linear2p5D (line 40) | class Linear2p5D(ParallelLayer):
    method __init__ (line 59) | def __init__(
    method _set_tensor_parallel_attributes (line 103) | def _set_tensor_parallel_attributes(self):
    method reset_parameters (line 108) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method _load_from_global_state_dict (line 114) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 153) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 182) | def forward(self, x: Tensor) -> Tensor:
  class LayerNorm2p5D (line 243) | class LayerNorm2p5D(ParallelLayer):
    method __init__ (line 257) | def __init__(self, normalized_shape: int, eps: float = 1e-05, bias=Tru...
    method _set_tensor_parallel_attribute (line 285) | def _set_tensor_parallel_attribute(self):
    method _load_from_global_state_dict (line 290) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 322) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 349) | def forward(self, x: Tensor) -> Tensor:
  class PatchEmbedding2p5D (line 403) | class PatchEmbedding2p5D(ParallelLayer):
    method __init__ (line 424) | def __init__(
    method _set_tensor_parallel_attribute (line 478) | def _set_tensor_parallel_attribute(self):
    method reset_parameters (line 484) | def reset_parameters(self, weight_initializer, bias_initializer, posit...
    method _load_from_global_state_dict (line 492) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 534) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 563) | def forward(self, input_: Tensor) -> Tensor:
  class Embedding2p5D (line 588) | class Embedding2p5D(ParallelLayer):
    method __init__ (line 618) | def __init__(
    method _set_tensor_parallel_attributes (line 649) | def _set_tensor_parallel_attributes(self):
    method reset_parameters (line 652) | def reset_parameters(self, weight_initializer) -> None:
    method _fill_padding_idx_with_zero (line 658) | def _fill_padding_idx_with_zero(self) -> None:
    method _load_from_global_state_dict (line 663) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 690) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 714) | def forward(self, input_: Tensor) -> Tensor:
  class VocabParallelEmbedding2p5D (line 725) | class VocabParallelEmbedding2p5D(ParallelLayer):
    method __init__ (line 755) | def __init__(
    method _set_tensor_parallel_attributes (line 792) | def _set_tensor_parallel_attributes(self):
    method reset_parameters (line 795) | def reset_parameters(self, weight_initializer) -> None:
    method _fill_padding_idx_with_zero (line 801) | def _fill_padding_idx_with_zero(self) -> None:
    method _load_from_global_state_dict (line 806) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 833) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 857) | def forward(self, input_: Tensor) -> Tensor:
  class Classifier2p5D (line 876) | class Classifier2p5D(ParallelLayer):
    method __init__ (line 894) | def __init__(
    method _set_tensor_parallel_attributes (line 939) | def _set_tensor_parallel_attributes(self):
    method reset_parameters (line 943) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method _load_from_global_state_dict (line 957) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 991) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 1020) | def forward(self, input_: Tensor) -> Tensor:
  class VocabParallelClassifier2p5D (line 1041) | class VocabParallelClassifier2p5D(ParallelLayer):
    method __init__ (line 1059) | def __init__(
    method _set_tensor_parallel_attributes (line 1107) | def _set_tensor_parallel_attributes(self):
    method reset_parameters (line 1113) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method _load_from_global_state_dict (line 1120) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method forward (line 1154) | def forward(self, x: Tensor) -> Tensor:

FILE: colossalai/legacy/nn/layer/parallel_3d/_operation.py
  class _Linear3D (line 18) | class _Linear3D(torch.autograd.Function):
    method forward (line 21) | def forward(
    method backward (line 46) | def backward(ctx, output_grad: Tensor) -> Tuple[Tensor, ...]:
  function linear_3d (line 64) | def linear_3d(
  class _Classifier3D (line 94) | class _Classifier3D(torch.autograd.Function):
    method forward (line 97) | def forward(
    method backward (line 130) | def backward(ctx, output_grad: Tensor) -> Tuple[Tensor, ...]:
  function classifier_3d (line 155) | def classifier_3d(
  class _VocabParallelClassifier3D (line 189) | class _VocabParallelClassifier3D(torch.autograd.Function):
    method forward (line 192) | def forward(
    method backward (line 224) | def backward(ctx, output_grad: Tensor) -> Tuple[Tensor, ...]:
  function vocab_parallel_classifier_3d (line 249) | def vocab_parallel_classifier_3d(
  function norm_forward (line 284) | def norm_forward(x: Tensor, mean: Tensor, sqr_mean: Tensor, weight: Tens...
  function norm_backward (line 295) | def norm_backward(grad: Tensor, mu: Tensor, sigma: Tensor, weight: Tensor):
  class _Layernorm3D (line 307) | class _Layernorm3D(torch.autograd.Function):
    method forward (line 310) | def forward(
    method backward (line 341) | def backward(ctx, output_grad: Tensor) -> Tuple[Tensor, ...]:
  function layernorm_3d (line 359) | def layernorm_3d(
  function split_tensor_3d (line 400) | def split_tensor_3d(tensor: Tensor, dim: int, parallel_mode: ParallelMod...
  function split_batch_3d (line 429) | def split_batch_3d(
  class _ReduceTensor3D (line 461) | class _ReduceTensor3D(torch.autograd.Function):
    method forward (line 463) | def forward(ctx, input_, parallel_mode):
    method backward (line 467) | def backward(ctx, output_grad):
  function reduce_tensor_3d (line 471) | def reduce_tensor_3d(tensor: Tensor, parallel_mode: ParallelMode) -> Ten...
  class _AllGatherTensor3D (line 485) | class _AllGatherTensor3D(torch.autograd.Function):
    method forward (line 487) | def forward(ctx, input_, dim, parallel_mode):
    method backward (line 494) | def backward(ctx, output_grad):
  function all_gather_tensor_3d (line 499) | def all_gather_tensor_3d(tensor: Tensor, dim: int, parallel_mode: Parall...
  class _ReduceScatterTensor3D (line 514) | class _ReduceScatterTensor3D(torch.autograd.Function):
    method forward (line 516) | def forward(ctx, input_, dim, parallel_mode):
    method backward (line 522) | def backward(ctx, output_grad):
  function reduce_scatter_tensor_3d (line 527) | def reduce_scatter_tensor_3d(tensor: Tensor, dim: int, parallel_mode: Pa...
  class _ReduceByBatch3D (line 548) | class _ReduceByBatch3D(torch.autograd.Function):
    method forward (line 551) | def forward(
    method backward (line 569) | def backward(ctx, output_grad: Tensor) -> Tuple[Tensor, ...]:
  function reduce_by_batch_3d (line 576) | def reduce_by_batch_3d(

FILE: colossalai/legacy/nn/layer/parallel_3d/_utils.py
  function get_depth_from_env (line 18) | def get_depth_from_env() -> int:
  function get_parallel_mode_from_env (line 31) | def get_parallel_mode_from_env(group):
  function swap_in_out_group (line 42) | def swap_in_out_group():
  function dbg_check_shape (line 50) | def dbg_check_shape(tensor: Tensor, shape: tuple):
  class AsyncGradientBucket (line 57) | class AsyncGradientBucket(object):
    method __init__ (line 58) | def __init__(self):
    method __len__ (line 61) | def __len__(self):
    method push (line 64) | def push(self, async_op, grad_tensor, param_id):
    method pop (line 68) | def pop(self, param_id):
    method synchronize (line 76) | def synchronize(self, params):
  function push_async_grad (line 89) | def push_async_grad(op, grad, param_id):
  function pop_async_grad (line 93) | def pop_async_grad(param_id):
  function _async_grad_hook (line 97) | def _async_grad_hook(grad, param_id):
  function register_async_grad_hook (line 102) | def register_async_grad_hook(param):
  function synchronize (line 106) | def synchronize(params=list()):

FILE: colossalai/legacy/nn/layer/parallel_3d/layers.py
  class LayerNorm3D (line 47) | class LayerNorm3D(ParallelLayer):
    method __init__ (line 61) | def __init__(self, normalized_shape: int, eps: float = 1e-12, bias=Tru...
    method _set_tensor_parallel_attributes (line 86) | def _set_tensor_parallel_attributes(self) -> None:
    method reset_parameters (line 91) | def reset_parameters(self) -> None:
    method _load_from_global_state_dict (line 98) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 131) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 150) | def forward(self, input_: Tensor) -> Tensor:
  class Linear3D (line 163) | class Linear3D(ParallelLayer):
    method __init__ (line 180) | def __init__(
    method _set_tensor_parallel_attributes (line 224) | def _set_tensor_parallel_attributes(self) -> None:
    method _sync_grad_hook (line 229) | def _sync_grad_hook(self, grad) -> Tensor:
    method reset_parameters (line 233) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method _load_from_global_state_dict (line 249) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 290) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 327) | def forward(self, input_: Tensor) -> Tensor:
  class Classifier3D (line 345) | class Classifier3D(ParallelLayer):
    method __init__ (line 363) | def __init__(
    method _set_tensor_parallel_attributes (line 405) | def _set_tensor_parallel_attributes(self) -> None:
    method reset_parameters (line 409) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method _load_from_global_state_dict (line 424) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 456) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 477) | def forward(self, input_: Tensor) -> Tensor:
  class VocabParallelClassifier3D (line 489) | class VocabParallelClassifier3D(ParallelLayer):
    method __init__ (line 507) | def __init__(
    method _set_tensor_parallel_attributes (line 556) | def _set_tensor_parallel_attributes(self) -> None:
    method reset_parameters (line 562) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method _load_from_global_state_dict (line 580) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 622) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 658) | def forward(self, input_: Tensor) -> Tensor:
  class PatchEmbedding3D (line 670) | class PatchEmbedding3D(ParallelLayer):
    method __init__ (line 691) | def __init__(
    method _set_tensor_parallel_attributes (line 744) | def _set_tensor_parallel_attributes(self) -> None:
    method _sync_grad_hook (line 750) | def _sync_grad_hook(self, grad) -> Tensor:
    method reset_parameters (line 754) | def reset_parameters(self, weight_initializer, bias_initializer, posit...
    method _load_from_global_state_dict (line 772) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 812) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 833) | def forward(self, input_: Tensor) -> Tensor:
  class Embedding3D (line 849) | class Embedding3D(ParallelLayer):
    method __init__ (line 879) | def __init__(
    method _set_tensor_parallel_attributes (line 912) | def _set_tensor_parallel_attributes(self) -> None:
    method _sync_grad_hook (line 915) | def _sync_grad_hook(self, grad) -> Tensor:
    method reset_parameters (line 919) | def reset_parameters(self, weight_initializer) -> None:
    method _fill_padding_idx_with_zero (line 929) | def _fill_padding_idx_with_zero(self) -> None:
    method _load_from_global_state_dict (line 934) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 959) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 975) | def forward(self, input_: Tensor) -> Tensor:
  class VocabParallelEmbedding3D (line 985) | class VocabParallelEmbedding3D(ParallelLayer):
    method __init__ (line 1015) | def __init__(
    method _set_tensor_parallel_attributes (line 1054) | def _set_tensor_parallel_attributes(self):
    method reset_parameters (line 1057) | def reset_parameters(self, weight_initializer) -> None:
    method _fill_padding_idx_with_zero (line 1063) | def _fill_padding_idx_with_zero(self) -> None:
    method _load_from_global_state_dict (line 1072) | def _load_from_global_state_dict(self, state_dict, prefix, *args, **kw...
    method _save_to_global_state_dict (line 1107) | def _save_to_global_state_dict(self, destination, prefix, keep_vars):
    method forward (line 1140) | def forward(self, input_: Tensor) -> Tensor:

FILE: colossalai/legacy/nn/layer/parallel_sequence/_operation.py
  class RingQK (line 15) | class RingQK(torch.autograd.Function):
    method forward (line 22) | def forward(ctx, sub_q, sub_k, batch_size, num_attention_heads, sub_se...
    method backward (line 55) | def backward(ctx, grad_output):
  class RingAV (line 92) | class RingAV(torch.autograd.Function):
    method forward (line 99) | def forward(ctx, attention_score, sub_v, batch_size, num_attention_hea...
    method backward (line 132) | def backward(ctx, grad_output):

FILE: colossalai/legacy/nn/layer/parallel_sequence/_utils.py
  function _calc_incoming_device_range (line 5) | def _calc_incoming_device_range(i, rank, world_size, sub_seq_length):
  function _calc_current_device_range (line 12) | def _calc_current_device_range(rank, sub_seq_length):

FILE: colossalai/legacy/nn/layer/parallel_sequence/layers.py
  class TransformerSelfAttentionRing (line 20) | class TransformerSelfAttentionRing(nn.Module):
    method __init__ (line 34) | def __init__(
    method forward (line 98) | def forward(self, hidden_states, attention_mask):
    method __repr__ (line 198) | def __repr__(self):
  class _Linear (line 208) | class _Linear(nn.Module):
    method __init__ (line 227) | def __init__(self, input_size, output_size, bias=True, skip_bias_add=F...
    method forward (line 251) | def forward(self, input_):
    method __repr__ (line 261) | def __repr__(self):

FILE: colossalai/legacy/nn/layer/utils/common.py
  class CheckpointModule (line 16) | class CheckpointModule(nn.Module):
    method __init__ (line 17) | def __init__(self, checkpoint: bool = True, offload: bool = False):
    method _forward (line 23) | def _forward(self, *args, **kwargs):
    method forward (line 26) | def forward(self, *args, **kwargs):
    method train (line 32) | def train(self, mode: bool = True):
    method eval (line 36) | def eval(self):
  function divide (line 41) | def divide(numerator, denominator):
  function swish (line 56) | def swish(x: Tensor) -> Tensor:
  function set_tensor_parallel_attribute_by_size (line 63) | def set_tensor_parallel_attribute_by_size(param, size):
  function set_tensor_parallel_attribute_by_partition (line 68) | def set_tensor_parallel_attribute_by_partition(param, num_partitions):
  function get_tensor_parallel_mode (line 73) | def get_tensor_parallel_mode():
  function _ntuple (line 80) | def _ntuple(n):

FILE: colossalai/legacy/nn/layer/vanilla/layers.py
  function drop_path (line 18) | def drop_path(x, drop_prob: float = 0.0, training: bool = False):
  class DropPath (line 41) | class DropPath(nn.Module):
    method __init__ (line 50) | def __init__(self, drop_prob=None):
    method forward (line 54) | def forward(self, x):
  class WrappedDropout (line 58) | class WrappedDropout(nn.Module):
    method __init__ (line 74) | def __init__(self, p: float = 0.5, inplace: bool = False, mode=None):
    method nonefunc (line 86) | def nonefunc(self, inputs):
    method normalfunc (line 89) | def normalfunc(self, inputs):
    method forward (line 93) | def forward(self, inputs):
  class WrappedDropPath (line 97) | class WrappedDropPath(nn.Module):
    method __init__ (line 110) | def __init__(self, p: float = 0.0, mode=None):
    method nonefunc (line 120) | def nonefunc(self, inputs):
    method normalfunc (line 123) | def normalfunc(self, inputs):
    method forward (line 127) | def forward(self, inputs):
  class VanillaPatchEmbedding (line 132) | class VanillaPatchEmbedding(nn.Module):
    method __init__ (line 154) | def __init__(
    method reset_parameters (line 192) | def reset_parameters(self, weight_initializer, bias_initializer, posit...
    method forward (line 198) | def forward(self, input_: Tensor) -> Tensor:
  class VanillaClassifier (line 214) | class VanillaClassifier(nn.Module):
    method __init__ (line 232) | def __init__(
    method reset_parameters (line 265) | def reset_parameters(self, weight_initializer, bias_initializer):
    method forward (line 274) | def forward(self, input_: Tensor) -> Tensor:
  class VanillaLayerNorm (line 279) | class VanillaLayerNorm(nn.Module):
    method __init__ (line 294) | def __init__(self, normalized_shape: int, eps=1e-05, bias=True, dtype=...
    method forward (line 308) | def forward(self, x: Tensor) -> Tensor:
  class VanillaLinear (line 313) | class VanillaLinear(nn.Module):
    method __init__ (line 331) | def __init__(
    method forward (line 356) | def forward(self, input: Tensor) -> Tensor:

FILE: colossalai/legacy/nn/layer/wrapper/pipeline_wrapper.py
  class PipelineSharedModuleWrapper (line 10) | class PipelineSharedModuleWrapper:
    method __init__ (line 11) | def __init__(self, pipeline_ranks: Union[List[int], Tuple[int]]) -> None:
    method _init_group (line 18) | def _init_group(self):
    method register_module (line 34) | def register_module(self, module: nn.Module):
    method register_parameter (line 43) | def register_parameter(self, param: nn.Parameter):

FILE: colossalai/legacy/nn/loss/__init__.py
  class CrossEntropyLoss (line 27) | class CrossEntropyLoss(_Loss):
    method __init__ (line 28) | def __init__(self, reduction: bool = True, *args, **kwargs):
    method forward (line 39) | def forward(self, *args):

FILE: colossalai/legacy/nn/loss/loss_1d.py
  class _VocabParallelCrossEntropy1D (line 11) | class _VocabParallelCrossEntropy1D(torch.autograd.Function):
    method forward (line 14) | def forward(ctx, vocab_parallel_logits, targets, process_group):
    method backward (line 62) | def backward(ctx, grad_output):
  class VocabParallelCrossEntropyLoss1D (line 83) | class VocabParallelCrossEntropyLoss1D(_Loss):
    method __init__ (line 90) | def __init__(self, reduction=True):
    method forward (line 94) | def forward(self, logits, targets, process_group=None):

FILE: colossalai/legacy/nn/loss/loss_2d.py
  class CrossEntropyLoss2D (line 16) | class CrossEntropyLoss2D(_Loss):
    method __init__ (line 35) | def __init__(self, reduction=True, *args, **kwargs):
    method forward (line 42) | def forward(self, logits, targets):
  class _VocabParallelCrossEntropy2D (line 60) | class _VocabParallelCrossEntropy2D(torch.autograd.Function):
    method forward (line 65) | def forward(ctx, logits, targets):
    method backward (line 109) | def backward(ctx, output_grad):
  class VocabParallelCrossEntropyLoss2D (line 131) | class VocabParallelCrossEntropyLoss2D(_Loss):
    method __init__ (line 138) | def __init__(self, reduction=True):
    method forward (line 142) | def forward(self, logits, targets):

FILE: colossalai/legacy/nn/loss/loss_2p5d.py
  class CrossEntropyLoss2p5D (line 16) | class CrossEntropyLoss2p5D(_Loss):
    method __init__ (line 35) | def __init__(self, reduction=True, *args, **kwargs):
    method forward (line 42) | def forward(self, logits, targets):
  class _VocabParallelCrossEntropy2p5D (line 57) | class _VocabParallelCrossEntropy2p5D(torch.autograd.Function):
    method forward (line 62) | def forward(ctx, logits, targets):
    method backward (line 103) | def backward(ctx, output_grad):
  class VocabParallelCrossEntropyLoss2p5D (line 125) | class VocabParallelCrossEntropyLoss2p5D(_Loss):
    method __init__ (line 133) | def __init__(self, reduction=True):
    method forward (line 137) | def forward(self, logits, targets):

FILE: colossalai/legacy/nn/loss/loss_3d.py
  class CrossEntropyLoss3D (line 16) | class CrossEntropyLoss3D(_Loss):
    method __init__ (line 35) | def __init__(self, reduction=True, *args, **kwargs):
    method forward (line 43) | def forward(self, logits, targets):
  class _VocabParallelCrossEntropy3D (line 59) | class _VocabParallelCrossEntropy3D(torch.autograd.Function):
    method forward (line 65) | def forward(ctx, logits, targets, output_parallel_mode):
    method backward (line 102) | def backward(ctx, output_grad):
  class VocabParallelCrossEntropyLoss3D (line 121) | class VocabParallelCrossEntropyLoss3D(_Loss):
    method __init__ (line 128) | def __init__(self, reduction=True):
    method forward (line 135) | def forward(self, logits, targets):

FILE: colossalai/legacy/nn/metric/__init__.py
  class Accuracy (line 17) | class Accuracy(nn.Module):
    method __init__ (line 18) | def __init__(self):
    method forward (line 26) | def forward(self, *args):

FILE: colossalai/legacy/nn/metric/_utils.py
  function calc_acc (line 4) | def calc_acc(logits, targets):

FILE: colossalai/legacy/nn/metric/accuracy_2d.py
  class Accuracy2D (line 9) | class Accuracy2D(nn.Module):
    method __init__ (line 12) | def __init__(self):
    method forward (line 15) | def forward(self, logits, targets):

FILE: colossalai/legacy/nn/metric/accuracy_2p5d.py
  class Accuracy2p5D (line 9) | class Accuracy2p5D(nn.Module):
    method __init__ (line 12) | def __init__(self):
    method forward (line 15) | def forward(self, logits, targets):

FILE: colossalai/legacy/nn/metric/accuracy_3d.py
  class Accuracy3D (line 11) | class Accuracy3D(nn.Module):
    method __init__ (line 14) | def __init__(self):
    method forward (line 19) | def forward(self, logits, targets):

FILE: colossalai/legacy/nn/parallel/data_parallel.py
  function free_storage (line 14) | def free_storage(data: torch.Tensor) -> None:
  function _cast_float (line 23) | def _cast_float(args, dtype: torch.dtype):
  class ColoDDP (line 33) | class ColoDDP(torch.nn.Module):
    method __init__ (line 52) | def __init__(
    method parameters (line 76) | def parameters(self, recurse: bool = True):
    method named_parameters (line 79) | def named_parameters(self, prefix: str = "", recurse: bool = True):
    method named_buffers (line 82) | def named_buffers(self, prefix: str = "", recurse: bool = True):
    method named_children (line 85) | def named_children(self):
    method named_modules (line 88) | def named_modules(
    method forward (line 93) | def forward(self, *args, **kwargs):
    method backward (line 97) | def backward(self, loss: torch.Tensor):
    method grad_handle (line 110) | def grad_handle(self, p, grad):
    method _save_grad (line 133) | def _save_grad(p, grad):
    method zero_grad (line 139) | def zero_grad(self, set_to_none: bool = False) -> None:
    method set_params_to_ignore (line 153) | def set_params_to_ignore(params_to_ignore: Iterable[torch.Tensor]) -> ...
    method state_dict (line 171) | def state_dict(self, destination=None, prefix="", keep_vars=False):
    method load_state_dict (line 174) | def load_state_dict(self, state_dict: "OrderedDict[str, torch.Tensor]"...

FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/base_embedding.py
  class BaseEmbeddingBag (line 6) | class BaseEmbeddingBag(abc.ABC, nn.Module):
    method __init__ (line 7) | def __init__(

FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/cache_mgr.py
  class EvictionStrategy (line 14) | class EvictionStrategy(Enum):
  function _wait_for_data (line 20) | def _wait_for_data(t, stream: Optional[torch.cuda.streams.Stream]) -> None:
  class CachedParamMgr (line 37) | class CachedParamMgr(torch.nn.Module):
    method __init__ (line 54) | def __init__(
    method _reset_comm_stats (line 100) | def _reset_comm_stats(self):
    method timer (line 111) | def timer(self, name):
    method _find_evict_gpu_idxs (line 120) | def _find_evict_gpu_idxs(self, evict_num: int) -> torch.Tensor:
    method _init_weight (line 142) | def _init_weight(self, weight):
    method cpu_weight_data (line 188) | def cpu_weight_data(self, row_idx: int) -> torch.Tensor:
    method cuda_available_row_num (line 204) | def cuda_available_row_num(self):
    method reorder (line 208) | def reorder(self, ids_freq_mapping: Optional[List[int]] = None, warmup...
    method flush (line 273) | def flush(self):
    method print_comm_stats (line 295) | def print_comm_stats(self):
    method _id_to_cached_cuda_id (line 315) | def _id_to_cached_cuda_id(self, ids: torch.Tensor) -> torch.Tensor:
    method prepare_ids (line 329) | def prepare_ids(self, ids: torch.Tensor) -> torch.Tensor:
    method _row_in_cuda (line 382) | def _row_in_cuda(self, row_id: int) -> bool:
    method _prepare_rows_on_cuda (line 386) | def _prepare_rows_on_cuda(self, cpu_row_idxs: torch.Tensor) -> None:
    method _find_free_cuda_row (line 529) | def _find_free_cuda_row(self) -> int:
    method _evict (line 535) | def _evict(self) -> int:
    method _admit (line 576) | def _admit(self, row_id: int):

FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/cached_embedding.py
  class CachedEmbeddingBag (line 11) | class CachedEmbeddingBag(BaseEmbeddingBag):
    method __init__ (line 39) | def __init__(
    method set_cache_mgr_async_copy (line 81) | def set_cache_mgr_async_copy(self, flag):
    method _weight_alloc (line 84) | def _weight_alloc(self, dtype, device):
    method _preprocess (line 92) | def _preprocess(
    method forward (line 116) | def forward(self, input, offsets=None, per_sample_weights=None, shape_...
    method weight (line 139) | def weight(self):
    method named_parameters (line 142) | def named_parameters(self, prefix: str = "", recurse: bool = True) -> ...
    method parameters (line 145) | def parameters(self, recurse: bool = True) -> Iterator[Parameter]:
    method set_cache_op (line 148) | def set_cache_op(self, cache_op: bool = True):
    method num_hits_history (line 154) | def num_hits_history(self):
    method num_miss_history (line 158) | def num_miss_history(self):
    method num_write_back_history (line 162) | def num_write_back_history(self):
    method swap_in_bandwidth (line 166) | def swap_in_bandwidth(self):
    method swap_out_bandwidth (line 178) | def swap_out_bandwidth(self):

FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/copyer.py
  class LimitBuffIndexCopyer (line 5) | class LimitBuffIndexCopyer(object):
    method __init__ (line 13) | def __init__(self, size: int) -> None:
    method index_copy (line 17) | def index_copy(self, dim: int, src_index: LongTensor, tgt_index: LongT...

FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/embedding_config.py
  class TablewiseEmbeddingBagConfig (line 4) | class TablewiseEmbeddingBagConfig:
    method __init__ (line 13) | def __init__(

FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/parallel_cached_embedding.py
  function get_partition (line 14) | def get_partition(embedding_dim, rank, world_size) -> Tuple[int, int, bo...
  class ParallelCachedEmbeddingBag (line 33) | class ParallelCachedEmbeddingBag(CachedEmbeddingBag):
    method __init__ (line 34) | def __init__(
    method _weight_alloc (line 85) | def _weight_alloc(self, dtype, device):
    method forward (line 98) | def forward(
    method set_cache_op (line 130) | def set_cache_op(self, cache_op: bool = True):
    method from_pretrained (line 134) | def from_pretrained(
    method print_comm_stats_ (line 170) | def print_comm_stats_(self):
    method element_size (line 173) | def element_size(self):

FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/parallel_cached_embedding_tablewise.py
  class ParallelCachedEmbeddingBagTablewise (line 15) | class ParallelCachedEmbeddingBagTablewise(CachedEmbeddingBag):
    method __init__ (line 21) | def __init__(
    method forward (line 104) | def forward(
    method split_along_rank (line 146) | def split_along_rank(
    method set_cache_op (line 222) | def set_cache_op(self, cache_op: bool = True):
    method print_comm_stats_ (line 225) | def print_comm_stats_(self):
    method element_size (line 228) | def element_size(self):

FILE: colossalai/legacy/nn/parallel/layers/cache_embedding/parallel_cached_embedding_tablewise_split_cache.py
  class ParallelCachedEmbeddingBagTablewiseSpiltCache (line 17) | class ParallelCachedEmbeddingBagTablewiseSpiltCache(abc.ABC, nn.Module):
    method __init__ (line 22) | def __init__(
    method forward (line 88) | def forward(self, indices: torch.Tensor, offsets: torch.Tensor = None,...
    method element_size (line 135) | def element_size(self):
    method print_comm_stats_ (line 140) | def print_comm_stats_(self):

FILE: colossalai/legacy/nn/parallel/layers/colo_module.py
  class ColoModule (line 7) | class ColoModule(object):
    method __init__ (line 8) | def __init__(self):
    method _register_shard_params (line 12) | def _register_shard_params(self, params: List[str]):
    method _register_allowed_patterns (line 15) | def _register_allowed_patterns(
    method _set_default (line 25) | def _set_default(self, compute_pattern: ComputePattern, target_mode):
    method has_compute_pattern (line 28) | def has_compute_pattern(self, compute_pattern: ComputePattern):
    method get_dist_specs (line 31) | def get_dist_specs(self, compute_pattern: ComputePattern):
    method has_compute_pattern_with_mode (line 35) | def has_compute_pattern_with_mode(self, compute_pattern: ComputePatter...
    method get_dist_specs_with_mode (line 38) | def get_dist_specs_with_mode(self, compute_pattern: ComputePattern, mo...
    method get_param_names (line 42) | def get_param_names(self):
    method register (line 45) | def register(self, compute_pattern, pg):

FILE: colossalai/legacy/nn/parallel/layers/embedding.py
  class ColoEmbedding (line 6) | class ColoEmbedding(ColoModule):
    method __init__ (line 7) | def __init__(self):
    method register (line 11) | def register(self, compute_pattern, pg: ProcessGroup):
    method _set_TP1D (line 16) | def _set_TP1D(self, pg: ProcessGroup):

FILE: colossalai/legacy/nn/parallel/layers/linear.py
  class ColoLinear (line 6) | class ColoLinear(ColoModule):
    method __init__ (line 7) | def __init__(self):
    method register (line 11) | def register(self, compute_pattern, pg: ProcessGroup):
    method _set_TP1D (line 16) | def _set_TP1D(self, pg):

FILE: colossalai/legacy/nn/parallel/layers/module_utils.py
  function register_colo_module (line 13) | def register_colo_module(module_type: type, colo_module: ColoModule):
  function is_colo_module (line 18) | def is_colo_module(module: torch.nn.Module):
  function get_colo_module (line 26) | def get_colo_module(module: torch.nn.Module):
  function check_colo_module (line 36) | def check_colo_module(module: torch.nn.Module, pg: ProcessGroup, recursi...
  function init_colo_module (line 88) | def init_colo_module(

FILE: colossalai/legacy/nn/parallel/reducer.py
  class Bucket (line 15) | class Bucket:
    method __init__ (line 16) | def __init__(self, size: int, dtype: torch.dtype, device: torch.device...
    method flush (line 22) | def flush(self) -> None:
    method alloc (line 38) | def alloc(self) -> None:
    method free (line 42) | def free(self) -> None:
    method append (line 46) | def append(self, tensor: Tensor, callback_fn: Callable):
    method avail_size (line 58) | def avail_size(self) -> int:
  class Reducer (line 62) | class Reducer:
    method __init__ (line 63) | def __init__(self, bucket_size_mb: int = 25):
    method all_reduce_async (line 68) | def all_reduce_async(
    method flush (line 89) | def flush(self) -> None:
    method free (line 94) | def free(self) -> None:
    method _get_bucket_size (line 99) | def _get_bucket_size(self, element_size: int) -> int:
    method _get_bucket (line 106) | def _get_bucket(self, tensor: Tensor, group: ProcessGroup) -> Bucket:

FILE: colossalai/legacy/pipeline/layer_spec.py
  class LayerSpec (line 6) | class LayerSpec:
    method __init__ (line 9) | def __init__(self, typename, *module_args, **module_kwargs):
    method __repr__ (line 19) | def __repr__(self):
    method param_count (line 23) | def param_count(self):
    method build (line 26) | def build(self):
    method set_children (line 44) | def set_children(self, children):
    method count_params (line 47) | def count_params(self):
    method reset_param_count (line 54) | def reset_param_count(self):

FILE: colossalai/legacy/pipeline/middleware/adaptor/fx.py
  function partition_name_to_id (line 7) | def partition_name_to_id(partition_name, is_input=False, is_output=False):
  function find_input_in_partition (line 28) | def find_input_in_partition(node, partitions, input_partitions=None):
  function find_output_in_partition (line 57) | def find_output_in_partition(node, partitions, output_partitions=None):
  function get_topology (line 94) | def get_topology(gm: GraphModule):

FILE: colossalai/legacy/pipeline/middleware/topo.py
  class ValPosition (line 8) | class ValPosition:
    method __str__ (line 12) | def __str__(self) -> str:
    method __repr__ (line 16) | def __repr__(self) -> str:
  class PartitionInputVal (line 20) | class PartitionInputVal(object):
    method __init__ (line 21) | def __init__(self, partition_id, offset) -> None:
    method get (line 26) | def get(self):
    method __str__ (line 29) | def __str__(self) -> str:
    method __repr__ (line 34) | def __repr__(self) -> str:
  class PartitionOutputVal (line 38) | class PartitionOutputVal(object):
    method __init__ (line 39) | def __init__(self) -> None:
    method add (line 43) | def add(self, partition_id, offset):
    method get (line 47) | def get(self):
    method __str__ (line 50) | def __str__(self) -> str:
    method __repr__ (line 58) | def __repr__(self) -> str:
  class Partition (line 62) | class Partition(object):
    method __init__ (line 63) | def __init__(self) -> None:
    method add_input_val (line 67) | def add_input_val(self, input_val: PartitionInputVal):
    method add_output_val (line 70) | def add_output_val(self, output_val: PartitionOutputVal):
    method get_input_vals (line 73) | def get_input_vals(self):
    method get_output_vals (line 76) | def get_output_vals(self):
    method get_output_offsets (line 80) | def get_output_offsets(self, dst_partition_id):
    method get_input_partition_ids (line 91) | def get_input_partition_ids(self):
    method get_output_partition_ids (line 100) | def get_output_partition_ids(self):
    method __str__ (line 109) | def __str__(self) -> str:
    method __repr__ (line 123) | def __repr__(self) -> str:
  class Topo (line 139) | class Topo(object):
    method __init__ (line 140) | def __init__(self, input_partition_id=None, output_partition_id=None) ...
    method set_input_partition_id (line 145) | def set_input_partition_id(self, partition_id: int):
    method set_output_partition_id (line 148) | def set_output_partition_id(self, partition_id: int):
    method get_input_partition_id (line 151) | def get_input_partition_id(self):
    method get_output_partition_id (line 154) | def get_output_partition_id(self):
    method set_partitions (line 157) | def set_partitions(self, partition_id: int, partition: Partition):
    method get_mid_partitions (line 160) | def get_mid_partitions(self):
    method get_mid_partition_ids (line 168) | def get_mid_partition_ids(self):
    method get_input_partition (line 171) | def get_input_partition(self):
    method get_output_partition (line 176) | def get_output_partition(self):
    method get_partition_by_id (line 181) | def get_partition_by_id(self, partition_id):
    method __str__ (line 184) | def __str__(self) -> str:
    method __repr__ (line 209) | def __repr__(self) -> str:

FILE: colossalai/legacy/pipeline/pipelinable.py
  class PipelinableContext (line 20) | class PipelinableContext(InsertPostInitMethodToModuleSubClasses):
    method __init__ (line 25) | def __init__(self, policy: str = "balanced"):
    method policy (line 35) | def policy(self):
    method policy (line 39) | def policy(self, policy: str):
    method layers_count (line 43) | def layers_count(self):
    method funcs_count (line 47) | def funcs_count(self):
    method _pre_context_exec (line 50) | def _pre_context_exec(self):
    method _post_context_exec (line 58) | def _post_context_exec(self):
    method _post_init_method (line 67) | def _post_init_method(self, module: torch.nn.Module, *args, **kwargs):
    method to_layer_list (line 122) | def to_layer_list(self, exec_seq=None):
    method partition (line 187) | def partition(self, num_chunks, pipeline_size, rank):
  class PipelinableModel (line 237) | class PipelinableModel(torch.nn.Module):
    method __init__ (line 238) | def __init__(self, module_list, front_func_dict, behind_func_dict):
    method forward (line 244) | def forward(self, *input_tensor, **kwargs):

FILE: colossalai/legacy/pipeline/pipeline_process_group.py
  class PipelineProcessGroup (line 10) | class PipelineProcessGroup:
    method __init__ (line 13) | def __init__(self) -> None:
    method set_global_info (line 16) | def set_global_info(
    method _initialize_process_group (line 56) | def _initialize_process_group(self):
    method _initialize_pp_process_group (line 66) | def _initialize_pp_process_group(self) -> None:
    method _initialize_tp_dp_process_group (line 78) | def _initialize_tp_dp_process_group(self) -> None:
    method get_global_rank (line 85) | def get_global_rank(self):
    method get_world_size (line 88) | def get_world_size(self):
    method get_dp_degree (line 91) | def get_dp_degree(self) -> int:
    method get_tp_degree (line 94) | def get_tp_degree(self) -> int:
    method get_local_device_mesh_size (line 97) | def get_local_device_mesh_size(self) -> int:
    method get_device_mesh_num (line 100) | def get_device_mesh_num(self) -> int:
    method get_stage_num (line 103) | def get_stage_num(self) -> int:
    method is_first_stage (line 106) | def is_first_stage(self) -> bool:
    method is_last_stage (line 109) | def is_last_stage(self) -> bool:
    method check_pp_rank_valid (line 112) | def check_pp_rank_valid(self, pp_rank: int) -> bool:
    method get_local_pp_rank (line 115) | def get_local_pp_rank(self) -> int:
    method get_prev_pp_rank (line 118) | def get_prev_pp_rank(self) -> int:
    method get_next_pp_rank (line 124) | def get_next_pp_rank(self) -> int:
    method get_local_stage_global_ranks (line 130) | def get_local_stage_global_ranks(self) -> List[int]:
    method local_dp_rank (line 133) | def local_dp_rank(self) -> int:
    method local_tp_rank (line 136) | def local_tp_rank(self) -> int:
    method get_pp_global_ranks (line 139) | def get_pp_global_ranks(self) -> int:
    method get_dp_global_ranks (line 142) | def get_dp_global_ranks(self):
    method get_tp_global_ranks (line 145) | def get_tp_global_ranks(self):
    method get_chimera_all_reduce_group (line 148) | def get_chimera_all_reduce_group(self, pp_rank: int):

FILE: colossalai/legacy/pipeline/rpc/_pipeline_base.py
  class Phase (line 20) | class Phase(Enum):
  class UniqueKey (line 27) | class UniqueKey:
    method __init__ (line 32) | def __init__(self, microbatch_id, phase) -> None:
    method __eq__ (line 36) | def __eq__(self, __o: object) -> bool:
    method __hash__ (line 39) | def __hash__(self) -> int:
    method __repr__ (line 42) | def __repr__(self) -> str:
  class WorkItem (line 46) | class WorkItem:
    method __init__ (line 71) | def __init__(
  class BackwardCache (line 78) | class BackwardCache:
    method __init__ (line 85) | def __init__(
  class WorkerBase (line 96) | class WorkerBase(ABC):
    method __init__ (line 97) | def __init__(
    method _get_future_by_device (line 153) | def _get_future_by_device(self):
    method _initialize_outstanding_range (line 156) | def _initialize_outstanding_range(self):
    method _initialize_context_container (line 164) | def _initialize_context_container(self):
    method _initialize_lock (line 170) | def _initialize_lock(self):
    method _initialize_partition (line 177) | def _initialize_partition(self):
    method _get_output_all (line 185) | def _get_output_all(self, key: UniqueKey, ref_use=False, rank=None):
    method sync_global_worker_rrefs (line 221) | def sync_global_worker_rrefs(self, pp_rank_to_worker_rref: Dict[int, P...
    method get_output_by_key (line 233) | def get_output_by_key(self, key: UniqueKey, ref_use=False, rank=None, ...
    method get_numels (line 241) | def get_numels(self) -> int:
    method get_parameters (line 245) | def get_parameters(self) -> List[torch.Tensor]:
    method get_parameter_gradients (line 248) | def get_parameter_gradients(self) -> List[torch.Tensor]:
    method get_partition (line 251) | def get_partition(self):
    method get_partition_state_dict (line 256) | def get_partition_state_dict(self):
    method _make_args_kwargs (line 261) | def _make_args_kwargs(self, microbatch, merge=False):
    method set_input (line 286) | def set_input(self, microbatch_id: int, microbatch: Tuple[Any], forwar...
    method set_labels (line 349) | def set_labels(self, microbatch_id: int, microlabels: Any):
    method _begin_backward (line 355) | def _begin_backward(self, microbatch_id: int):
    method _subscribe_producer (line 378) | def _subscribe_producer(self, microbatch_id: int, forward_only: bool):
    method subscribe_producer (line 451) | def subscribe_producer(self, microbatch_id: int, forward_only: bool):
    method _subscribe_consumer (line 463) | def _subscribe_consumer(self, microbatch_id: int):
    method subscribe_consumer (line 503) | def subscribe_consumer(self, microbatch_id: int):
    method get_producer_stage_ids (line 515) | def get_producer_stage_ids(self):
    method get_consumer_stage_ids (line 535) | def get_consumer_stage_ids(self):
    method _get_producer_consumer (line 553) | def _get_producer_consumer(self) -> None:
    method pp_rank_to_partition_id (line 562) | def pp_rank_to_partition_id(self, pp_rank: int, topo: Topo):
    method partition_id_to_pp_rank (line 566) | def partition_id_to_pp_rank(self, partition_id: int, topo: Topo):
    method get_topo (line 572) | def get_topo(self):
    method use_middleware (line 580) | def use_middleware(self):
    method _get_input_offsets_by_index (line 584) | def _get_input_offsets_by_index(self, target_index):
    method _get_output_offsets_by_index (line 625) | def _get_output_offsets_by_index(self, target_index):
    method _get_real_args_kwargs_fwd (line 653) | def _get_real_args_kwargs_fwd(self, args_or_kwargs):
    method _get_real_args_kwargs_bwd (line 715) | def _get_real_args_kwargs_bwd(self, args_or_kwargs):
    method _get_work_item_key (line 766) | def _get_work_item_key(self) -> UniqueKey:
    method is_first_stage (line 771) | def is_first_stage(self):
    method is_last_stage (line 774) | def is_last_stage(self):
    method need_model_input (line 777) | def need_model_input(self):
    method is_model_output (line 788) | def is_model_output(self):
    method is_model_input (line 791) | def is_model_input(self):
    method _default_data_process_func (line 794) | def _default_data_process_func(self, args_kwargs):
    method _consume_work_item_by_phase (line 804) | def _consume_work_item_by_phase(self, work_item: WorkItem):
    method _get_store_len (line 982) | def _get_store_len(self):
    method _get_parameter_grad_sum (line 985) | def _get_parameter_grad_sum(self):
    method _is_first_step (line 992) | def _is_first_step(self, work_item: WorkItem) -> bool:
    method _is_last_step (line 995) | def _is_last_step(self, work_item: WorkItem) -> bool:
    method _hook_before_step (line 1004) | def _hook_before_step(self):
    method _wait_for_reset (line 1008) | def _wait_for_reset(self):
    method _work_loop (line 1014) | def _work_loop(self):
    method reset_context (line 1043) | def reset_context(self):
    method initialize_optimizer (line 1058) | def initialize_optimizer(self, optimizer_class: type, **kwargs):
    method step (line 1061) | def step(self):
  class PipelineEngineBase (line 1067) | class PipelineEngineBase(ABC, nn.Module):
    method __init__ (line 1068) | def __init__(
    method _check_argument (line 1102) | def _check_argument(self) -> None:
    method _get_actual_stage_num (line 1118) | def _get_actual_stage_num(self) -> int:
    method _create_pp_rank_to_rpc_worker_id (line 1121) | def _create_pp_rank_to_rpc_worker_id(self) -> None:
    method _create_pp_rank_to_module_partition_id (line 1133) | def _create_pp_rank_to_module_partition_id(self) -> None:
    method _init_worker (line 1142) | def _init_worker(self) -> None:
    method remote_numels (line 1191) | def remote_numels(self) -> Dict[int, int]:
    method remote_parameters (line 1200) | def remote_parameters(self) -> Dict[int, List[torch.Tensor]]:
    method remote_grad (line 1210) | def remote_grad(self) -> Dict[int, List[torch.Tensor]]:
    method get_input_pp_ranks (line 1220) | def get_input_pp_ranks(self) -> List[int]:
    method get_output_pp_ranks (line 1223) | def get_output_pp_ranks(self) -> List[int]:
    method _consume_constraint (line 1226) | def _consume_constraint(
    method _create_ret_future (line 1246) | def _create_ret_future(self, output_pp_ranks: List[int]) -> Dict[int, ...
    method _set_input (line 1250) | def _set_input(self, input_pp_ranks: List[int], microbatch_id: int, mi...
    method _set_labels (line 1256) | def _set_labels(self, output_pp_ranks: List[int], microbatch_id: int, ...
    method _subscribe_forward (line 1263) | def _subscribe_forward(self, microbatch_id: int, output_pp_ranks: List...
    method _ensure_backward (line 1269) | def _ensure_backward(self, forward_only: bool, input_pp_ranks: List[in...
    method _collect_forward_result (line 1283) | def _collect_forward_result(self, output_pp_ranks: List[int], ret_futu...
    method _reset_worker (line 1298) | def _reset_worker(self):
    method forward_backward (line 1309) | def forward_backward(self, batch: torch.Tensor, labels: torch.Tensor =...
    method initialize_optimizer (line 1365) | def initialize_optimizer(self, optimizer_class: type, **kwargs):
    method step (line 1371) | def step(self):

FILE: colossalai/legacy/pipeline/rpc/_pipeline_schedule.py
  class FillDrainWorker (line 16) | class FillDrainWorker(WorkerBase):
    method _get_work_item_key (line 17) | def _get_work_item_key(self) -> UniqueKey:
  class FillDrainPipelineEngine (line 33) | class FillDrainPipelineEngine(PipelineEngineBase):
    method __init__ (line 34) | def __init__(
  class OneFOneBWorker (line 67) | class OneFOneBWorker(WorkerBase):
    method _get_work_item_key (line 68) | def _get_work_item_key(self) -> UniqueKey:
  class OneFOneBPipelineEngine (line 101) | class OneFOneBPipelineEngine(PipelineEngineBase):
    method __init__ (line 102) | def __init__(
  class ChimeraWorker (line 136) | class ChimeraWorker(WorkerBase):
    method _get_producer_consumer (line 137) | def _get_producer_consumer(self) -> None:
    method _get_work_item_key (line 157) | def _get_work_item_key(self) -> UniqueKey:
    method _initialize_partition (line 185) | def _initialize_partition(self):
    method _get_lock_gradient (line 212) | def _get_lock_gradient(self):
    method is_first_stage (line 218) | def is_first_stage(self):
    method is_last_stage (line 221) | def is_last_stage(self):
    method _is_last_step (line 224) | def _is_last_step(self, work_item: WorkItem) -> bool:
    method _get_step_order (line 236) | def _get_step_order(self) -> List[int]:
    method _hook_before_step (line 245) | def _hook_before_step(self):
  class ChimeraPipelineEngine (line 264) | class ChimeraPipelineEngine(PipelineEngineBase):
    method __init__ (line 265) | def __init__(
    method _consume_constraint (line 294) | def _consume_constraint(
    method _create_pp_rank_to_rpc_worker_id (line 299) | def _create_pp_rank_to_rpc_worker_id(self) -> None:
    method _create_pp_rank_to_module_partition_id (line 306) | def _create_pp_rank_to_module_partition_id(self) -> None:
    method _create_ret_future (line 313) | def _create_ret_future(self, output_pp_ranks: List[int]) -> Dict[int, ...
    method _set_input (line 321) | def _set_input(self, input_pp_ranks: List[int], microbatch_id: int, mi...
    method _set_labels (line 329) | def _set_labels(self, output_pp_ranks: List[int], microbatch_id: int, ...
    method _subscribe_forward (line 337) | def _subscribe_forward(self, microbatch_id: int, output_pp_ranks: List...
    method _ensure_backward (line 344) | def _ensure_backward(self, forward_only: bool, input_pp_ranks: List[in...
    method _collect_forward_result (line 360) | def _collect_forward_result(self, output_pp_ranks: List[int], ret_futu...

FILE: colossalai/legacy/pipeline/rpc/utils.py
  function pyobj_map (line 16) | def pyobj_map(obj: Any, fn: Callable, process_types: Union[Type, Tuple[T...
  function pytree_map (line 29) | def pytree_map(obj: Any, fn: Callable, process_types: Union[Type, Tuple[...
  function tensor_shape_list (line 53) | def tensor_shape_list(obj):
  function get_batch_lengths (line 57) | def get_batch_lengths(batch):
  function split_batch (line 63) | def split_batch(batch: Any, start, stop, device: str):
  function type_detail (line 71) | def type_detail(obj):
  function pytree_filter (line 75) | def pytree_filter(fn, obj, process_types):
  function get_real_args_kwargs (line 89) | def get_real_args_kwargs(args_or_kwargs):
  function run_worker (line 104) | def run_worker(rank, args, master_func):
  function rpc_run (line 137) | def rpc_run(args, master_func):
  function parse_args (line 142) | def parse_args():

FILE: colossalai/legacy/pipeline/utils.py
  function _binary_partition (line 12) | def _binary_partition(weights: List, start: int, end: int):
  function _heap_addition (line 40) | def _heap_addition(weights: List, intervals: int, add_cnt: int):
  function _calc_partitions (line 73) | def _calc_partitions(weights, value):
  function _binary_search (line 90) | def _binary_search(weights, num):
  function partition_uniform (line 114) | def partition_uniform(num_items, pipeline_parallel_size, num_chunks):
  function partition_balanced (line 137) | def partition_balanced(weights, pipeline_parallel_size, num_chunks):
  function build_kwargs_for_module (line 154) | def build_kwargs_for_module(function, input_tensor, kw_dict):
  function build_kwargs_for_function (line 176) | def build_kwargs_for_function(function, kw_dict):
  function exec_func_with_kwargs (line 184) | def exec_func_with_kwargs(func, kw_dict, input_tensor, kwargs):
  function exec_funcs_with_kwargs (line 221) | def exec_funcs_with_kwargs(func_dict, func_key, input_tensor, kwargs):
  function call_module (line 235) | def call_module(module, args=None, kwargs=None):
  function customized_partition (line 258) | def customized_partition(exec_seq):

FILE: colossalai/legacy/registry/registry.py
  class Registry (line 8) | class Registry:
    method __init__ (line 18) | def __init__(self, name: str, third_party_library: List[ModuleType] = ...
    method name (line 24) | def name(self):
    method register_module (line 27) | def register_module(self, module_class):
    method get_module (line 44) | def get_module(self, module_name: str):
    method has (line 64) | def has(self, module_name: str):

FILE: colossalai/legacy/tensor/compute_spec.py
  class ComputePattern (line 4) | class ComputePattern(Enum):
  class ComputeSpec (line 11) | class ComputeSpec(object):
    method __init__ (line 19) | def __init__(self, compute_pattern: ComputePattern) -> None:
    method __repr__ (line 25) | def __repr__(self):
    method set_output_replicate (line 28) | def set_output_replicate(self, flag: bool = True):

FILE: colossalai/legacy/tensor/const.py
  class TensorType (line 4) | class TensorType(Enum):

FILE: colossalai/legacy/tensor/dist_spec_mgr.py
  function divide (line 13) | def divide(numerator, denominator):
  class TransformDistSpec (line 28) | class TransformDistSpec(torch.autograd.Function):
    method forward (line 30) | def forward(ctx, tensor, old_dist_spec, dist_spec, pg, forward_trans_f...
    method backward (line 38) | def backward(ctx, grad_outputs):
  class DistSpecManager (line 49) | class DistSpecManager:
    method _sanity_check (line 53) | def _sanity_check(old_dist_spec: _DistSpec, dist_spec: _DistSpec) -> N...
    method _shard_as (line 57) | def _shard_as(
    method _gather (line 86) | def _gather(tensor: torch.Tensor, old_dist_spec: _DistSpec, pg: Proces...
    method _all_to_all (line 121) | def _all_to_all(
    method _r2r (line 149) | def _r2r(tensor: torch.Tensor, old_dist_spec: _DistSpec, dist_spec: _D...
    method _r2s (line 154) | def _r2s(tensor: torch.Tensor, old_dist_spec: _DistSpec, dist_spec: _D...
    method _s2r (line 159) | def _s2r(tensor: torch.Tensor, old_dist_spec: _DistSpec, dist_spec: _D...
    method _s2s (line 164) | def _s2s(tensor: torch.Tensor, old_dist_spec: _DistSpec, dist_spec: _D...
    method handle_trans_spec (line 175) | def handle_trans_spec(
    method no_grad (line 201) | def no_grad():

FILE: colossalai/legacy/tensor/distspec.py
  class DistPlacementPattern (line 7) | class DistPlacementPattern(Enum):
  class _DistSpec (line 12) | class _DistSpec:
    method __init__ (line 27) | def __init__(self, dist_placement_pattern: DistPlacementPattern, **met...
    method __eq__ (line 32) | def __eq__(self, other: "_DistSpec") -> bool:
    method __repr__ (line 40) | def __repr__(self) -> str:
  function ReplicaSpec (line 49) | def ReplicaSpec() -> _DistSpec:
  function ShardSpec (line 60) | def ShardSpec(dims: List[int], num_partitions: List[int]) -> _DistSpec:

FILE: colossalai/legacy/tensor/op_wrapper.py
  function _register_colo_op (line 8) | def _register_colo_op(op, func):
  function colo_op_impl (line 13) | def colo_op_impl(func):

FILE: colossalai/legacy/tensor/process_group.py
  class PyTorchProcessGroupDict (line 9) | class PyTorchProcessGroupDict(metaclass=SingletonMeta):
    method __init__ (line 10) | def __init__(self):
    method log_pg_init (line 17) | def log_pg_init(self, rank_list: List[int], backend: str):
    method get (line 23) | def get(self, rank_list: List[int], backend: str = "nccl"):
  class ProcessGroup (line 37) | class ProcessGroup:
    method __init__ (line 52) | def __init__(
    method set_cpu_groups (line 121) | def set_cpu_groups(self):
    method has_cpu_groups (line 139) | def has_cpu_groups(self) -> bool:
    method __repr__ (line 148) | def __repr__(self):
    method __eq__ (line 156) | def __eq__(self, obj: "ProcessGroup") -> bool:
    method rank (line 173) | def rank(self) -> int:
    method ranks_in_group (line 183) | def ranks_in_group(self) -> List[int]:
    method world_size (line 193) | def world_size(self) -> int:
    method tp_rank_list (line 203) | def tp_rank_list(self) -> List[int]:
    method dp_rank_list (line 213) | def dp_rank_list(self) -> List[int]:
    method tp_local_rank (line 223) | def tp_local_rank(self) -> int:
    method dp_local_rank (line 233) | def dp_local_rank(self) -> int:
    method dp_world_size (line 243) | def dp_world_size(self) -> int:
    method tp_world_size (line 253) | def tp_world_size(self) -> int:
    method dp_process_group (line 263) | def dp_process_group(self):
    method tp_process_group (line 273) | def tp_process_group(self):
    method cpu_dp_process_group (line 283) | def cpu_dp_process_group(self):
    method cpu_tp_process_group (line 296) | def cpu_tp_process_group(self):
    method get_ranks_in_dp (line 309) | def get_ranks_in_dp(self) -> List[int]:
    method get_ranks_in_tp (line 319) | def get_ranks_in_tp(self):

FILE: colossalai/legacy/tensor/tensor_spec.py
  class ColoTensorSpec (line 11) | class ColoTensorSpec:

FILE: colossalai/legacy/trainer/_trainer.py
  class Trainer (line 14) | class Trainer:
    method __init__ (line 53) | def __init__(
    method cur_epoch (line 81) | def cur_epoch(self):
    method cur_epoch (line 86) | def cur_epoch(self, epoch: int):
    method cur_step (line 92) | def cur_step(self):
    method max_epochs (line 97) | def max_epochs(self):
    method max_steps (line 101) | def max_steps(self):
    method steps_per_epoch (line 105) | def steps_per_epoch(self):
    method engine (line 109) | def engine(self):
    method _set_current_step (line 112) | def _set_current_step(self, epoch: int):
    method _call_timer (line 120) | def _call_timer(self, action: str, item: str, *args, **kwargs) -> None:
    method _reset_states (line 133) | def _reset_states(self) -> None:
    method _call_hooks (line 137) | def _call_hooks(self, func, output=None):
    method _should_display_progress (line 152) | def _should_display_progress(display_progress: bool):
    method _train_epoch (line 156) | def _train_epoch(
    method _eval (line 205) | def _eval(
    method _exceed_max_step (line 252) | def _exceed_max_step(self):
    method fit (line 255) | def fit(
    method evaluate (line 348) | def evaluate(
    method predict (line 391) | def predict(self, data: Union[Any, List[Any]]):

FILE: colossalai/legacy/trainer/hooks/_base_hook.py
  class BaseHook (line 9) | class BaseHook(ABC):
    method __init__ (line 17) | def __init__(self, priority: int) -> None:
    method after_hook_is_attached (line 20) | def after_hook_is_attached(self, trainer):
    method before_train (line 23) | def before_train(self, trainer):
    method after_train (line 26) | def after_train(self, trainer):
    method before_train_iter (line 29) | def before_train_iter(self, trainer):
    method after_train_iter (line 32) | def after_train_iter(self, trainer, output: Tensor, label: Tensor, los...
    method before_train_epoch (line 42) | def before_train_epoch(self, trainer):
    method after_train_epoch (line 45) | def after_train_epoch(self, trainer):
    method before_test (line 48) | def before_test(self, trainer):
    method after_test (line 51) | def after_test(self, trainer):
    method before_test_epoch (line 54) | def before_test_epoch(self, trainer):
    method after_test_epoch (line 57) | def after_test_epoch(self, trainer):
    method before_test_iter (line 60) | def before_test_iter(self, trainer):
    method after_test_iter (line 63) | def after_test_iter(self, trainer, output: Tensor, label: Tensor, loss...
    method init_runner_states (line 73) | def init_runner_states(self, trainer, key, val):

FILE: colossalai/legacy/trainer/hooks/_checkpoint_hook.py
  class SaveCheckpointHook (line 14) | class SaveCheckpointHook(BaseHook):
    method __init__ (line 30) | def __init__(
    method after_hook_is_attached (line 48) | def after_hook_is_attached(self, trainer):
    method after_train_iter (line 56) | def after_train_iter(self, trainer, output, label, loss):
    method after_train_epoch (line 69) | def after_train_epoch(self, trainer):

FILE: colossalai/legacy/trainer/hooks/_commons_.py
  function _format_number (line 4) | def _format_number(val, prec=5):

FILE: colossalai/legacy/trainer/hooks/_log_hook.py
  class LogByEpochHook (line 20) | class LogByEpochHook(BaseHook):
    method __init__ (line 31) | def __init__(self, logger, interval: int = 1, priority: int = 1):
    method _is_epoch_to_log (line 36) | def _is_epoch_to_log(self, trainer):
  class LogMetricByStepHook (line 41) | class LogMetricByStepHook(BaseHook):
    method __init__ (line 50) | def __init__(self, priority: int = 10):
    method after_train_iter (line 53) | def after_train_iter(self, trainer, *args):
    method after_test_iter (line 61) | def after_test_iter(self, trainer, *args):
  class LogMetricByEpochHook (line 71) | class LogMetricByEpochHook(LogByEpochHook):
    method __init__ (line 82) | def __init__(self, logger, interval: int = 1, priority: int = 10) -> N...
    method _get_str (line 86) | def _get_str(self, trainer, mode):
    method after_train_epoch (line 93) | def after_train_epoch(self, trainer):
    method after_test_epoch (line 101) | def after_test_epoch(self, trainer):
  class TensorboardHook (line 110) | class TensorboardHook(BaseHook):
    method __init__ (line 123) | def __init__(
    method _log_by_iter (line 168) | def _log_by_iter(self, trainer, mode: str):
    method _log_by_epoch (line 177) | def _log_by_epoch(self, trainer, mode: str):
    method after_test_iter (line 184) | def after_test_iter(self, trainer, *args):
    method after_test_epoch (line 187) | def after_test_epoch(self, trainer):
    method after_train_iter (line 190) | def after_train_iter(self, trainer, *args):
    method after_train_epoch (line 193) | def after_train_epoch(self, trainer):
  class LogTimingByEpochHook (line 198) | class LogTimingByEpochHook(LogByEpochHook):
    method __init__ (line 212) | def __init__(
    method _get_message (line 231) | def _get_message(self, mode):
    method after_train_epoch (line 251) | def after_train_epoch(self, trainer):
    method after_test_epoch (line 257) | def after_test_epoch(self, trainer):
  class LogMemoryByEpochHook (line 265) | class LogMemoryByEpochHook(LogByEpochHook):
    method __init__ (line 277) | def __init__(
    method before_train (line 289) | def before_train(self, trainer):
    method after_train_epoch (line 294) | def after_train_epoch(self, trainer):
    method after_test (line 299) | def after_test(self, trainer):

FILE: colossalai/legacy/trainer/hooks/_lr_scheduler_hook.py
  class LRSchedulerHook (line 9) | class LRSchedulerHook(MetricHook):
    method __init__ (line 23) | def __init__(
    method after_hook_is_attached (line 35) | def after_hook_is_attached(self, trainer):
    method after_train_epoch (line 41) | def after_train_epoch(self, trainer):
    method after_train_iter (line 46) | def after_train_iter(self, trainer, output: Tensor, label: Tensor, los...

FILE: colossalai/legacy/trainer/hooks/_metric_hook.py
  class Metric (line 21) | class Metric(ABC):
    method __init__ (line 32) | def __init__(self, epoch_only: bool):
    method epoch_only (line 37) | def epoch_only(self):
    method reset (line 42) | def reset(self) -> None:
    method update (line 48) | def update(self, *args, **kwargs) -> None:
    method get_last_step_value (line 54) | def get_last_step_value(self) -> float:
    method get_accumulated_value (line 58) | def get_accumulated_value(self):
    method is_better (line 68) | def is_better(a, b) -> bool:
  class LossMetric (line 76) | class LossMetric(Metric):
    method __init__ (line 83) | def __init__(self, epoch_only):
    method reset (line 89) | def reset(self) -> None:
    method update (line 95) | def update(self, loss) -> None:
    method get_accumulated_value (line 108) | def get_accumulated_value(self):
    method get_last_step_value (line 117) | def get_last_step_value(self) -> float:
    method is_better (line 122) | def is_better(a, b):
  class LearningRateMetric (line 126) | class LearningRateMetric(Metric):
    method __init__ (line 134) | def __init__(self, epoch_only: bool, initial_lr: float = 0.0):
    method reset (line 138) | def reset(self) -> None:
    method update (line 141) | def update(self, lr) -> None:
    method get_last_step_value (line 144) | def get_last_step_value(self) -> float:
    method get_accumulated_value (line 147) | def get_accumulated_value(self):
    method is_better (line 151) | def is_better(a, b) -> bool:
  class AccuracyMetric (line 155) | class AccuracyMetric(Metric):
    method __init__ (line 164) | def __init__(self, epoch_only: bool, accuracy_func: Callable):
    method reset (line 172) | def reset(self) -> None:
    method update (line 178) | def update(self, logits, targets, batch_size) -> None:
    method get_last_step_value (line 199) | def get_last_step_value(self) -> float:
    method get_accumulated_value (line 204) | def get_accumulated_value(self):
    method is_better (line 210) | def is_better(a, b) -> bool:
  class MetricHook (line 214) | class MetricHook(BaseHook):
    method __init__ (line 226) | def __init__(
    method _check_metric_states_initialization (line 233) | def _check_metric_states_initialization(self, trainer):
  class LossHook (line 239) | class LossHook(MetricHook):
    method __init__ (line 248) | def __init__(self, priority: int = 0):
    method after_hook_is_attached (line 251) | def after_hook_is_attached(self, trainer):
    method before_train_epoch (line 262) | def before_train_epoch(self, trainer):
    method after_train_iter (line 266) | def after_train_iter(self, trainer, logits, label, loss):
    method before_test_epoch (line 270) | def before_test_epoch(self, trainer):
    method after_test_iter (line 274) | def after_test_iter(self, trainer, logits, label, loss):
  class AccuracyHook (line 280) | class AccuracyHook(MetricHook):
    method __init__ (line 290) | def __init__(self, accuracy_func: Callable, priority: int = 0):
    method after_hook_is_attached (line 294) | def after_hook_is_attached(self, trainer):
    method before_test (line 302) | def before_test(self, trainer):
    method after_test_iter (line 306) | def after_test_iter(self, trainer, logits, targets, *args):
  class ThroughputMetric (line 312) | class ThroughputMetric(Metric):
    method __init__ (line 319) | def __init__(self, epoch_only: bool, ignored_steps: int = 0, tflop_per...
    method reset (line 330) | def reset(self) -> None:
    method update (line 337) | def update(self, num_samples, time) -> None:
    method get_last_step_value (line 345) | def get_last_step_value(self) -> float:
    method get_last_step_info (line 357) | def get_last_step_info(self) -> str:
    method get_accumulated_value (line 373) | def get_accumulated_value(self) -> float:
    method is_better (line 381) | def is_better(a, b) -> bool:
  class ThroughputHook (line 386) | class ThroughputHook(MetricHook):
    method __init__ (line 398) | def __init__(self, ignored_steps: int = 0, priority: int = 10, tflop_p...
    method after_hook_is_attached (line 404) | def after_hook_is_attached(self, trainer):
    method before_train_epoch (line 418) | def before_train_epoch(self, trainer):
    method after_train_iter (line 422) | def after_train_iter(self, trainer, *args):
    method before_test (line 428) | def before_test(self, trainer):
    method after_test_iter (line 432) | def after_test_iter(self, trainer, *args):

FILE: colossalai/legacy/utils/activation_checkpoint.py
  function copy_to_device (line 13) | def copy_to_device(obj, device):
  class CheckpointFunction (line 30) | class CheckpointFunction(torch.autograd.Function):
    method forward (line 32) | def forward(ctx, run_function, activation_offload=False, *args):
    method backward (line 79) | def backward(ctx, *args):
  function checkpoint (line 141) | def checkpoint(function, activation_offload, *args, use_reentrant: bool ...
  function _checkpoint_without_reentrant (line 164) | def _checkpoint_without_reentrant(function, activation_offload=False, *a...

FILE: colossalai/legacy/utils/checkpoint/module_checkpoint.py
  function save_checkpoint (line 12) | def save_checkpoint(
  function load_checkpoint (line 79) | def load_checkpoint(

FILE: colossalai/legacy/utils/checkpoint/utils.py
  function robust_broadcast (line 9) | def robust_broadcast(tensor):
  function gather_tensor (line 23) | def gather_tensor(colo_tensor: ColoTensor) -> None:
  function scatter_tensor (line 41) | def scatter_tensor(colo_tensor: ColoTensor, dist_spec: _DistSpec) -> None:

FILE: colossalai/legacy/utils/checkpointing.py
  function broadcast_state_dict (line 21) | def broadcast_state_dict(state_dict, parallel_mode):
  function partition_tensor_parallel_state_dict (line 28) | def partition_tensor_parallel_state_dict(
  function gather_tensor_parallel_state_dict (line 66) | def gather_tensor_parallel_state_dict(
  function _send_state_dict (line 99) | def _send_state_dict(state_dict, dst, parallel_mode):
  function _recv_state_dict (line 105) | def _recv_state_dict(src, parallel_mode):
  function partition_pipeline_parallel_state_dict (line 114) | def partition_pipeline_parallel_state_dict(model, state_dict):
  function gather_pipeline_parallel_state_dict (line 139) | def gather_pipeline_parallel_state_dict(state_dict):
  function save_checkpoint (line 161) | def save_checkpoint(
  function broadcast_model (line 202) | def broadcast_model(model: torch.nn.Module):
  function load_checkpoint (line 214) | def load_checkpoint(

FILE: colossalai/legacy/utils/common.py
  function print_rank_0 (line 26) | def print_rank_0(msg: str, logger=None):
  function sync_model_param (line 41) | def sync_model_param(model, parallel_mode):
  function is_dp_rank_0 (line 58) | def is_dp_rank_0():
  function is_tp_rank_0 (line 62) | def is_tp_rank_0():
  function is_no_pp_or_last_stage (line 66) | def is_no_pp_or_last_stage():
  function is_using_ddp (line 70) | def is_using_ddp():
  function is_using_pp (line 74) | def is_using_pp():
  function is_using_sequence (line 78) | def is_using_sequence():
  class model_branch_context (line 82) | class model_branch_context(object):
    method __enter__ (line 83) | def __enter__(self):
    method __exit__ (line 86) | def __exit__(self, *exc_info):
  function is_model_parallel_parameter (line 90) | def is_model_parallel_parameter(p):
  function _calc_l2_norm (line 94) | def _calc_l2_norm(grads):
  function _calc_lp (line 112) | def _calc_lp(grads, norm_type):
  function _move_norm_to_cuda (line 120) | def _move_norm_to_cuda(norm: Union[float, torch.Tensor]) -> Union[float,...
  function _get_tensor_norm (line 126) | def _get_tensor_norm(norm: Union[float, torch.Tensor], move_to_cuda) -> ...
  function _compute_local_lp (line 137) | def _compute_local_lp(params: List[ColoParameter], norm_type: float) -> ...
  function _compute_buckets_lp (line 153) | def _compute_buckets_lp(params: List[ColoParameter], norm_type: float) -...
  function _compute_pp_grad_lp (line 179) | def _compute_pp_grad_lp(total_lp: float, norm_type: float) -> float:
  function _compute_grad_lp (line 190) | def _compute_grad_lp(parameters, norm_type: float = 2.0) -> float:
  function compute_grad_norm (line 217) | def compute_grad_norm(parameters, norm_type: float = 2.0) -> float:
  function _clip_grad_norm (line 225) | def _clip_grad_norm(parameters, max_norm: float, total_norm: float) -> N...
  function clip_grad_norm (line 248) | def clip_grad_norm(parameters, max_norm: float, norm_type: float = 2.0) ...
  function clip_grad_norm_fp32 (line 254) | def clip_grad_norm_fp32(parameters, max_norm, norm_type=2):
  function count_zeros_fp32 (line 375) | def count_zeros_fp32(parameters):
  function copy_tensor_parallel_attributes (line 413) | def copy_tensor_parallel_attributes(src_tensor, dst_tensor):
  function param_is_not_tensor_parallel_duplicate (line 420) | def param_is_not_tensor_parallel_duplicate(param):
  function switch_virtual_pipeline_parallel_rank (line 427) | def switch_virtual_pipeline_parallel_rank(rank):

FILE: colossalai/legacy/utils/data_sampler/base_sampler.py
  class BaseSampler (line 7) | class BaseSampler(ABC):
    method __init__ (line 8) | def __init__(self, dataset, batch_size):
    method __len__ (line 13) | def __len__(self):
    method __iter__ (line 17) | def __iter__(self):

FILE: colossalai/legacy/utils/data_sampler/data_parallel_sampler.py
  class DataParallelSampler (line 19) | class DataParallelSampler(Sampler):
    method __init__ (line 31) | def __init__(self, dataset: Dataset, shuffle: bool = False, seed: int ...
    method __iter__ (line 56) | def __iter__(self) -> Iterator[T_co]:
    method __len__ (line 88) | def __len__(self) -> int:
    method set_epoch (line 91) | def set_epoch(self, epoch: int) -> None:
  function get_dataloader (line 102) | def get_dataloader(

FILE: colossalai/legacy/utils/memory.py
  function _bytes_to_MB (line 17) | def _bytes_to_MB(val, decimal=2):
  function _get_cpu_memory_info (line 27) | def _get_cpu_memory_info():
  function report_memory_usage (line 60) | def report_memory_usage(message, logger=None, report_cpu=False):
  function colo_device_memory_capacity (line 100) | def colo_device_memory_capacity(device: torch.device) -> int:
  function colo_device_memory_used (line 121) | def colo_device_memory_used(device: torch.device) -> int:
  function colo_set_process_memory_fraction (line 145) | def colo_set_process_memory_fraction(ratio: float) -> None:
  function colo_set_cpu_memory_capacity (line 162) | def colo_set_cpu_memory_capacity(size: int) -> None:
  function colo_get_cpu_memory_capacity (line 172) | def colo_get_cpu_memory_capacity() -> int:

FILE: colossalai/legacy/utils/profiler/extention.py
  class ProfilerExtension (line 4) | class ProfilerExtension(ABC):
    method prepare_trace (line 6) | def prepare_trace(self):
    method start_trace (line 10) | def start_trace(self):
    method stop_trace (line 14) | def stop_trace(self):
    method extend_chrome_trace (line 18) | def extend_chrome_trace(self, trace: dict) -> dict:

FILE: colossalai/legacy/utils/profiler/legacy/comm_profiler.py
  function _get_code_location (line 16) | def _get_code_location(depth: int):
  class CommEvent (line 40) | class CommEvent(object):
    method __init__ (line 45) | def __init__(self, count: int = 0, comm_vol: float = 0.0, cuda_time: i...
    method add (line 50) | def add(self, rhs):
  class CommProfiler (line 56) | class CommProfiler(BaseProfiler):
    method __init__ (line 59) | def __init__(self, depth: int = 0, total_count: int = 0, total_comm_vo...
    method reset (line 72) | def reset(self):
    method enable (line 83) | def enable(self):
    method disable (line 90) | def disable(self):
    method to_tensorboard (line 97) | def to_tensorboard(self, writer):
    method to_file (line 100) | def to_file(self, filename: Path):
    method show (line 104) | def show(self):
    method result_str (line 107) | def result_str(self, sep: str = "\n"):
    method has_aync_op (line 155) | def has_aync_op(self):
    method activate_profiler (line 158) | def activate_profiler(self, kn: str, vol: float):
    method close_profiler (line 163) | def close_profiler(self, group=None):
    method wait_async_op (line 196) | def wait_async_op(self):
  class CommHandler (line 203) | class CommHandler(object):
    method __init__ (line 206) | def __init__(self, profiler: CommProfiler):
    method wait (line 210) | def wait(self):
  function async_check (line 214) | def async_check(profiler: CommProfiler):
  function all_reduce (line 220) | def all_reduce(
  function reduce_scatter (line 237) | def reduce_scatter(
  function all_gather (line 262) | def all_gather(
  function broadcast (line 286) | def broadcast(
  function reduce (line 301) | def reduce(

FILE: colossalai/legacy/utils/profiler/legacy/pcie_profiler.py
  function _get_size (line 9) | def _get_size(dtype: str):
  function _get_numel (line 18) | def _get_numel(my_list: List[int]) -> int:
  function _reduce_location (line 25) | def _reduce_location(locations: List[str]) -> str:
  class PcieEvent (line 34) | class PcieEvent(object):
    method __init__ (line 37) | def __init__(self, count: int = 0, pcie_vol: int = 0, cuda_time: int =...
    method add (line 42) | def add(self, rhs):
  class PcieProfiler (line 48) | class PcieProfiler(BaseProfiler):
    method __init__ (line 54) | def __init__(self, dtype: str = "fp32", depth: int = 1):
    method reset (line 66) | def reset(self):
    method enable (line 75) | def enable(self):
    method disable (line 81) | def disable(self):
    method to_tensorboard (line 106) | def to_tensorboard(self, writer):
    method to_file (line 109) | def to_file(self, filename: Path):
    method show (line 113) | def show(self):
    method result_str (line 116) | def result_str(self, sep: str = "\n"):

FILE: colossalai/legacy/utils/profiler/legacy/prof_utils.py
  function _format_time (line 9) | def _format_time(time_us):
  function _format_memory (line 21) | def _format_memory(nbytes):
  function _format_bandwidth (line 36) | def _format_bandwidth(volume: float or int, time_us: int):
  class BaseProfiler (line 46) | class BaseProfiler(ABC):
    method __init__ (line 47) | def __init__(self, profiler_name: str, priority: int):
    method enable (line 52) | def enable(self):
    method disable (line 56) | def disable(self):
    method to_tensorboard (line 60) | def to_tensorboard(self, writer):
    method to_file (line 64) | def to_file(self, filename: Path):
    method show (line 68) | def show(self):
  class ProfilerContext (line 72) | class ProfilerContext(object):
    method __init__ (line 95) | def __init__(self, profilers: List[BaseProfiler] = None, enable: bool ...
    method __enter__ (line 99) | def __enter__(self):
    method __exit__ (line 105) | def __exit__(self, exc_type, exc_val, exc_tb):
    method to_tensorboard (line 110) | def to_tensorboard(self, writer):
    method to_file (line 120) | def to_file(self, log_dir: Union[str, Path]):
    method show (line 130) | def show(self):

FILE: colossalai/legacy/utils/profiler/profiler.py
  class profile (line 17) | class profile(torch_profile):
    method __init__ (line 123) | def __init__(
    method prepare_trace (line 155) | def prepare_trace(self) -> None:
    method _start_warmup (line 163) | def _start_warmup(self):
    method start_trace (line 166) | def start_trace(self):
    method _start_trace (line 174) | def _start_trace(self):
    method stop_trace (line 177) | def stop_trace(self):
    method _stop_trace (line 185) | def _stop_trace(self):
    method export_chrome_trace (line 188) | def export_chrome_trace(self, path: str):

FILE: colossalai/legacy/utils/profiler/stateful_tensor_mem_extention.py
  class DeviceType (line 15) | class DeviceType(Enum):
  function get_timestamp_us (line 20) | def get_timestamp_us():
  function generic_instant_event (line 24) | def generic_instant_event(name, pid, tid, timestamp, args):
  class StatefulTensorMemoryEvent (line 28) | class StatefulTensorMemoryEvent:
    method __init__ (line 31) | def __init__(self, timestamp: int, device_type: DeviceType, bytes_: in...
    method state_dict (line 39) | def state_dict(self):
  class StatefulTensorMemoryTracer (line 49) | class StatefulTensorMemoryTracer:
    method __init__ (line 50) | def __init__(self) -> None:
    method sample (line 54) | def sample(self):
    method start_trace (line 62) | def start_trace(self):
    method stop_trace (line 66) | def stop_trace(self):
    method state_dict (line 69) | def state_dict(self):
  class StatefulTensorMemoryTracerHook (line 73) | class StatefulTensorMemoryTracerHook(BaseOpHook):
    method __init__ (line 74) | def __init__(self, tracer: StatefulTensorMemoryTracer):
    method pre_fwd_exec (line 79) | def pre_fwd_exec(self, module: torch.nn.Module, *args):
    method post_fwd_exec (line 83) | def post_fwd_exec(self, module: torch.nn.Module, *args):
    method pre_bwd_exec (line 87) | def pre_bwd_exec(self, module: torch.nn.Module, input_, output):
    method post_bwd_exec (line 91) | def post_bwd_exec(self, module: torch.nn.Module, input_):
    method post_iter (line 95) | def post_iter(self):
    method enable (line 99) | def enable(self):
    method disable (line 102) | def disable(self):
  class StatefulTensorMemoryProfilerExtention (line 106) | class StatefulTensorMemoryProfilerExtention(ProfilerExtension):
    method __init__ (line 107) | def __init__(self, engine: Engine) -> None:
    method prepare_trace (line 113) | def prepare_trace(self):
    method start_trace (line 119) | def start_trace(self):
    method stop_trace (line 123) | def stop_trace(self):
    method extend_chrome_trace (line 132) | def extend_chrome_trace(self, trace: dict) -> dict:

FILE: colossalai/legacy/zero/__init__.py
  function convert_to_zero_v2 (line 14) | def convert_to_zero_v2(

FILE: colossalai/legacy/zero/gemini/colo_init_context.py
  function _named_params_with_replica (line 13) | def _named_params_with_replica(
  function _convert_to_coloparam (line 28) | def _convert_to_coloparam(
  function ColoModulize (line 61) | def ColoModulize(module):
  class ColoInitContext (line 69) | class ColoInitContext(InsertPostInitMethodToModuleSubClasses):
    method __init__ (line 70) | def __init__(
    method _register_colo_modules (line 92) | def _register_colo_modules(self):
    method _pre_context_exec (line 98) | def _pre_context_exec(self):
    method _post_init_method (line 101) | def _post_init_method(self, module: torch.nn.Module, *args, **kwargs):
  function post_process_colo_init_ctx (line 158) | def post_process_colo_init_ctx(

FILE: colossalai/legacy/zero/gemini/gemini_context.py
  class GeminiMemoryManager (line 4) | class GeminiMemoryManager(object):
    method __init__ (line 5) | def __init__(self, states_cls: EnumMeta):
    method total_number (line 18) | def total_number(self):
    method reset (line 21) | def reset(self):
    method register_new_instance (line 32) | def register_new_instance(self):
    method delete_instance (line 35) | def delete_instance(self):
    method print_info (line 38) | def print_info(self):

FILE: colossalai/legacy/zero/gemini/ophooks/_shard_grad_ophook.py
  class ShardGradMemTracerHook (line 9) | class ShardGradMemTracerHook(BaseOpHook):
    method __init__ (line 14) | def __init__(self):
    method pre_fwd_exec (line 17) | def pre_fwd_exec(self, module: torch.nn.Module, *args):
    method post_fwd_exec (line 20) | def post_fwd_exec(self, module: torch.nn.Module, *args):
    method pre_bwd_exec (line 23) | def pre_bwd_exec(self, module: torch.nn.Module, input, output):
    method post_bwd_exec (line 28) | def post_bwd_exec(self, module: torch.nn.Module, input):
    method post_iter (line 31) | def post_iter(self):

FILE: colossalai/legacy/zero/gemini/ophooks/_shard_param_ophook.py
  class ShardParamHook (line 9) | class ShardParamHook(BaseOpHook):
    method __init__ (line 14) | def __init__(self):
    method niter (line 17) | def niter(self):
    method pre_fwd_exec (line 20) | def pre_fwd_exec(self, module: torch.nn.Module, *args):
    method post_fwd_exec (line 26) | def post_fwd_exec(self, module: torch.nn.Module, *args):
    method pre_bwd_exec (line 32) | def pre_bwd_exec(self, module: torch.nn.Module, input, output):
    method post_bwd_exec (line 38) | def post_bwd_exec(self, module: torch.nn.Module, input):
    method pre_iter (line 44) | def pre_iter(self):
    method post_iter (line 47) | def post_iter(self):

FILE: colossalai/legacy/zero/gemini/ophooks/runtime_mem_tracer_hook.py
  class TrainingPhase (line 13) | class TrainingPhase(Enum):
  class GradMemStats (line 18) | class GradMemStats:
    method __init__ (line 19) | def __init__(self) -> None:
    method clear (line 23) | def clear(self):
  class GradMemTracerHook (line 28) | class GradMemTracerHook:
    method __init__ (line 29) | def __init__(self, grad_stats: GradMemStats):
    method grad_handle (line 33) | def grad_handle(self, p, grad):
    method register_grad_hook (line 39) | def register_grad_hook(self, module: torch.nn.Module):
    method remove_grad_hook (line 45) | def remove_grad_hook(self):
  class ParamMemTracerHook (line 50) | class ParamMemTracerHook(ColoParamOpHook):
    method __init__ (line 51) | def __init__(self, memstats: MemStats, gradstats: GradMemStats) -> None:
    method _free_cuda_params (line 58) | def _free_cuda_params(self, params):
    method _allocate_params_on_cuda (line 64) | def _allocate_params_on_cuda(self, params: List[torch.nn.Parameter]):
    method record_model_data_volume (line 85) | def record_model_data_volume(self, params):
    method pre_op (line 102) | def pre_op(self, params):
    method post_op (line 116) | def post_op(self, params):
    method pre_forward (line 119) | def pre_forward(self, params: List[torch.Tensor]) -> None:
    method post_forward (line 122) | def post_forward(self, params: List[torch.Tensor]) -> None:
    method pre_backward (line 125) | def pre_backward(self, params: List[torch.Tensor]) -> None:
    method post_backward (line 128) | def post_backward(self, params: List[torch.Tensor]) -> None:
    method switch_training_phase (line 132) | def switch_training_phase(self, training_phase: TrainingPhase = Traini...

FILE: colossalai/legacy/zero/gemini/ophooks/utils.py
  class BaseOpHook (line 8) | class BaseOpHook(ABC):
    method __init__ (line 12) | def __init__(self):
    method pre_fwd_exec (line 16) | def pre_fwd_exec(self, module: torch.nn.Module, *args):
    method post_fwd_exec (line 20) | def post_fwd_exec(self, module: torch.nn.Module, *args):
    method pre_bwd_exec (line 24) | def pre_bwd_exec(self, module: torch.nn.Module, input, output):
    method post_bwd_exec (line 28) | def post_bwd_exec(self, module: torch.nn.Module, input):
    method post_iter (line 32) | def post_iter(self):
  function _apply_to_tensors_only (line 37) | def _apply_to_tensors_only(module, functional, backward_function, outputs):
  class PreBackwardFunction (line 50) | class PreBackwardFunction(torch.autograd.Function):
    method forward (line 52) | def forward(ctx, module, pre_backward_function, outputs):
    method backward (line 60) | def backward(ctx, *args):
  class PostBackwardFunction (line 65) | class PostBackwardFunction(torch.autograd.Function):
    method forward (line 67) | def forward(ctx, module, pre_backward_function, output):
    method backward (line 74) | def backward(ctx, *args):
  function register_ophooks_recursively (line 85) | def register_ophooks_recursively(

FILE: colossalai/legacy/zero/gemini/paramhooks/_param_hookmgr.py
  class BaseParamHookMgr (line 7) | class BaseParamHookMgr(object):
    method __init__ (line 8) | def __init__(self, param_list: List[torch.nn.Parameter]) -> None:
    method register_backward_hooks (line 15) | def register_backward_hooks(self, hook_call: Callable) -> None:
    method remove_hooks (line 31) | def remove_hooks(self) -> None:

FILE: colossalai/legacy/zero/gemini/stateful_tensor.py
  function sizeof_tensor (line 9) | def sizeof_tensor(tensor: torch.Tensor):
  class TensorState (line 13) | class TensorState(Enum):
  class StatefulTensor (line 21) | class StatefulTensor(object):
    method __init__ (line 32) | def __init__(self, maybe_tensor: Optional[torch.Tensor], state: Option...
    method data_ptr (line 49) | def data_ptr(self):
    method set_null (line 54) | def set_null(self) -> None:
    method is_null (line 60) | def is_null(self) -> bool:
    method trans_state (line 67) | def trans_state(self, state: TensorState) -> None:
    method move_to (line 80) | def move_to(self, device: Union[torch.device, int]):
    method payload_copy (line 97) | def payload_copy(self, tensor) -> None:
    method payload_reset (line 100) | def payload_reset(self, tensor) -> None:
    method payload_relay (line 116) | def payload_relay(self, rhs):
    method payload (line 134) | def payload(self) -> Optional[torch.Tensor]:
    method payload_size (line 138) | def payload_size(self) -> int:
    method state (line 142) | def state(self) -> TensorState:
    method device (line 146) | def device(self) -> torch.device:
    method dtype (line 150) | def dtype(self) -> torch.dtype:
    method shape (line 154) | def shape(self):
    method to (line 157) | def to(self, device: torch.device):
    method to_ (line 160) | def to_(self, device: torch.device):
    method __release (line 163) | def __release(self):
    method __trans_state_update (line 170) | def __trans_state_update(self, from_state: TensorState, to_state: Tens...
    method __trans_device_update (line 190) | def __trans_device_update(self, from_type: str, to_type: str):
    method __del__ (line 204) | def __del__(self):

FILE: colossalai/legacy/zero/gemini/stateful_tensor_mgr.py
  class StatefulTensorMgr (line 13) | class StatefulTensorMgr(object):
    method __init__ (line 21) | def __init__(self, tensor_placement_policy: TensorPlacementPolicy) -> ...
    method register_stateful_tensor_list (line 33) | def register_stateful_tensor_list(self, tensor_list: List[StatefulTens...
    method start_iter (line 40) | def start_iter(self):
    method finish_iter (line 43) | def finish_iter(self):
    method adjust_layout (line 51) | def adjust_layout(self) -> None:
    method cpu_gpu_move_volume (line 75) | def cpu_gpu_move_volume(self):
    method _trans_state (line 78) | def _trans_state(self, trans_state_func, stateful_tensor, state):
    method _get_layout_info (line 86) | def _get_layout_info(self, compute_idx: int, warmup: bool):

FILE: colossalai/legacy/zero/gemini/tensor_placement_policy.py
  class TensorPlacementPolicy (line 16) | class TensorPlacementPolicy(ABC):
    method __init__ (line 17) | def __init__(self, device: Optional[torch.device], mem_stats_collector...
    method evict_tensors (line 22) | def evict_tensors(self, hold_cuda_tensor_list: List[StatefulTensor], *...
  class CPUTensorPlacementPolicy (line 26) | class CPUTensorPlacementPolicy(TensorPlacementPolicy):
    method __init__ (line 27) | def __init__(self, mem_stats_collector: Optional[MemStatsCollector] = ...
    method evict_tensors (line 30) | def evict_tensors(self, hold_cuda_tensor_list: List[StatefulTensor], *...
  class CUDATensorPlacementPolicy (line 38) | class CUDATensorPlacementPolicy(TensorPlacementPolicy):
    method __init__ (line 39) | def __init__(self, mem_stats_collector: Optional[MemStatsCollector] = ...
    method evict_tensors (line 43) | def evict_tensors(self, hold_cuda_tensor_list: List[StatefulTensor], *...
  class AutoTensorPlacementPolicy (line 47) | class AutoTensorPlacementPolicy(TensorPlacementPolicy):
    method __init__ (line 48) | def __init__(self, mem_stats_collector: Optional[MemStatsCollector] = ...
    method evict_tensors (line 55) | def evict_tensors(
    method _sort_hold_cuda_tensors (line 118) | def _sort_hold_cuda_tensors(hold_cuda_tensors: tuple, compute_idx: int...
  class TensorPlacementPolicyFactory (line 127) | class TensorPlacementPolicyFactory:
    method create (line 129) | def create(policy_name: str) -> Type[TensorPlacementPolicy]:

FILE: colossalai/legacy/zero/gemini/tensor_utils.py
  function is_storage_empty (line 8) | def is_storage_empty(tensor: torch.Tensor) -> bool:
  function free_storage (line 12) | def free_storage(tensor: torch.Tensor) -> None:
  function alloc_storage (line 17) | def alloc_storage(tensor: torch.Tensor) -> None:
  function colo_tensor_mem_usage (line 22) | def colo_tensor_mem_usage(tensor: Union[torch.Tensor, StatefulTensor]) -...
  function colo_model_data_tensor_move (line 41) | def colo_model_data_tensor_move(
  function colo_model_data_tensor_move_inline (line 75) | def colo_model_data_tensor_move_inline(
  function colo_model_data_move_to_cpu (line 95) | def colo_model_data_move_to_cpu(t: Union[StatefulTensor, torch.Tensor]) ...
  function colo_model_tensor_clone (line 110) | def colo_model_tensor_clone(t: Union[StatefulTensor, torch.Tensor], targ...

FILE: colossalai/legacy/zero/init_ctx/init_context.py
  class ZeroContextConfig (line 23) | class ZeroContextConfig:
    method __post_init__ (line 37) | def __post_init__(self):
  class ZeroInitContext (line 45) | class ZeroInitContext(InsertPostInitMethodToModuleSubClasses):
    method __init__ (line 62) | def __init__(
    method target_device (line 88) | def target_device(self):
    method is_replicated (line 92) | def is_replicated(self):
    method shard_param (line 96) | def shard_param(self):
    method calc_fanin_fanout (line 100) | def calc_fanin_fanout(tensor: torch.Tensor):
    method _pre_context_exec (line 129) | def _pre_context_exec(self):
    method _post_context_exec (line 162) | def _post_context_exec(self):
    method _post_init_method (line 186) | def _post_init_method(self, module: torch.nn.Module, *args, **kwargs):
  class ZeroContextMgr (line 242) | class ZeroContextMgr(metaclass=SingletonMeta):
    method hijack_context_config (line 246) | def hijack_context_config(self, **kwargs):
  function no_shard_zero_context (line 256) | def no_shard_zero_context(is_replicated: bool = True) -> AbstractContext...
  function no_shard_zero_decrator (line 262) | def no_shard_zero_decrator(is_replicated: bool = True):

FILE: colossalai/legacy/zero/shard_utils/base_shard_strategy.py
  class BaseShardStrategy (line 9) | class BaseShardStrategy(ABC):
    method __init__ (line 10) | def __init__(self) -> None:
    method shard (line 15) | def shard(self, tensor_list: List[ShardedTensor], process_group: Optio...
    method gather (line 19) | def gather(self, tensor_list: List[ShardedTensor], process_group: Opti...

FILE: colossalai/legacy/zero/shard_utils/bucket_tensor_shard_strategy.py
  class BucketTensorShardStrategy (line 13) | class BucketTensorShardStrategy(TensorShardStrategy):
    method gather (line 20) | def gather(self, tensor_list: List[ShardedTensor], process_group: Opti...

FILE: colossalai/legacy/zero/shard_utils/commons.py
  function get_shard (line 6) | def get_shard(tensor: torch.Tensor, rank: int, world_size: int) -> Tuple...

FILE: colossalai/legacy/zero/shard_utils/tensor_shard_strategy.py
  class TensorShardStrategy (line 13) | class TensorShardStrategy(BaseShardStrategy):
    method shard (line 18) | def shard(self, tensor_list: List[ShardedTensor], process_group: Optio...
    method gather (line 22) | def gather(self, tensor_list: List[ShardedTensor], process_group: Opti...
    method _shard_tensor (line 26) | def _shard_tensor(self, t: ShardedTensor, process_group: Optional[dist...
    method _gather_tensor (line 45) | def _gather_tensor(self, t: ShardedTensor, process_group: Optional[dis...

FILE: colossalai/legacy/zero/sharded_model/_utils.py
  function get_gradient_predivide_factor (line 9) | def get_gradient_predivide_factor(world_size: int) -> float:
  function free_storage (line 16) | def free_storage(data: torch.Tensor) -> None:
  function alloc_storage (line 26) | def alloc_storage(data: torch.Tensor, size: torch.Size) -> None:
  function cast_tensor_to_fp16 (line 34) | def cast_tensor_to_fp16(tensor: torch.Tensor) -> torch.Tensor:
  function cast_tensor_to_fp32 (line 42) | def cast_tensor_to_fp32(tensor: Union[torch.Tensor, StatefulTensor]) -> ...
  function cast_tensor_to_bf16 (line 51) | def cast_tensor_to_bf16(tensor: torch.Tensor) -> torch.Tensor:
  function apply_to_tensors (line 59) | def apply_to_tensors(x: Any, fn: Callable):
  function cast_float_arguments (line 72) | def cast_float_arguments(fn: Callable, *args: Any, **kwargs: Any) -> Tup...
  function chunk_and_pad (line 76) | def chunk_and_pad(tensor: torch.Tensor, num_chunks: int) -> List[torch.T...

FILE: colossalai/legacy/zero/sharded_model/reduce_scatter.py
  class Bucket (line 22) | class Bucket:
    method __init__ (line 23) | def __init__(self, shard_size: int, dtype: torch.dtype, device: torch....
    method flush (line 30) | def flush(self) -> None:
    method alloc (line 53) | def alloc(self) -> None:
    method free (line 65) | def free(self) -> None:
    method append (line 71) | def append(self, tensor_list: List[Tensor], callback_fn: Callable):
  class ReduceScatterBucketer (line 85) | class ReduceScatterBucketer:
    method __init__ (line 113) | def __init__(self, bucket_size_mb: int = 25):
    method reduce_scatter_async (line 118) | def reduce_scatter_async(
    method flush (line 173) | def flush(self) -> None:
    method free (line 179) | def free(self) -> None:
    method _get_shard_size (line 185) | def _get_shard_size(self, element_size: int, num_shards: int) -> int:
    method _get_bucket (line 192) | def _get_bucket(self, tensor: Tensor, group: ProcessGroup) -> Bucket:

FILE: colossalai/legacy/zero/sharded_model/sharded_model_v2.py
  class ShardedModelV2 (line 46) | class ShardedModelV2(nn.Module):
    method __init__ (line 81) | def __init__(
    method adjust_stateful_tensor_layout (line 187) | def adjust_stateful_tensor_layout(self) -> None:
    method use_memory_tracer (line 191) | def use_memory_tracer(self):
    method cuda_margin_space (line 195) | def cuda_margin_space(self):
    method cpu_offload (line 199) | def cpu_offload(self):
    method dump_memory_stats (line 202) | def dump_memory_stats(self, filename: Optional[str] = "dump_mem_stats....
    method _pre_forward_operations (line 230) | def _pre_forward_operations(self, *args):
    method _post_forward_operations (line 241) | def _post_forward_operations(self):
    method forward (line 246) | def forward(self, *args: Any, **kwargs: Any) -> torch.Tensor:
    method backward (line 254) | def backward(self, loss):
    method backward_by_grad (line 260) | def backward_by_grad(self, tensor, grad):
    method _update_memstats (line 266) | def _update_memstats(self):
    method _post_backward_operations (line 279) | def _post_backward_operations(self) -> None:
    method _grad_post_backward_hook (line 322) | def _grad_post_backward_hook(self, param: Parameter, grad: torch.Tenso...
    method _reduce_scatter_handler (line 358) | def _reduce_scatter_handler(self, param: Parameter, grad: torch.Tensor...
    method _reduce_scatter_callback (line 377) | def _reduce_scatter_callback(self, param: Parameter, reduced_grad: tor...
    method _save_grad (line 388) | def _save_grad(self, param: Parameter, grad: torch.Tensor):
    method parameters (line 423) | def parameters(self, recurse: bool = True) -> Iterator[Parameter]:
    method named_parameters (line 426) | def named_parameters(self, prefix: str = "", recurse: bool = True) -> ...
    method state_dict (line 429) | def state_dict(self, destination=None, prefix="", keep_vars=False) -> ...
    method load_state_dict (line 441) | def load_state_dict(self, state_dict: "OrderedDict[str, torch.Tensor]"...
    method _colo_state_dict (line 453) | def _colo_state_dict(
    method _colo_load_from_state_dict (line 481) | def _colo_load_from_state_dict(
    method __getitem__ (line 583) | def __getitem__(self, idx: int):
    method __len__ (line 587) | def __len__(self):
    method __iter__ (line 591) | def __iter__(self):

FILE: colossalai/legacy/zero/sharded_model/utils.py
  function col_model_deepcopy (line 8) | def col_model_deepcopy(sharded_model: ShardedModelV2, other_model: torch...

FILE: colossalai/legacy/zero/sharded_model/zero_hook.py
  class ZeroHook (line 17) | class ZeroHook(BaseOpHook):
    method __init__ (line 23) | def __init__(
    method gather_parameters (line 41) | def gather_parameters(self, module: torch.nn.Module):
    method shard_parameters (line 50) | def shard_parameters(self, module: torch.nn.Module):
    method adjust_module_data (line 59) | def adjust_module_data(self, module: torch.nn.Module):
    method pre_fwd_exec (line 74) | def pre_fwd_exec(self, module: torch.nn.Module, *args):
    method post_fwd_exec (line 81) | def post_fwd_exec(self, module: torch.nn.Module, *args):
    method pre_bwd_exec (line 92) | def pre_bwd_exec(self, module: torch.nn.Module, input, output):
    method post_bwd_exec (line 99) | def post_bwd_exec(self, module: torch.nn.Module, input):
    method pre_iter (line 110) | def pre_iter(self):
    method post_iter (line 113) | def post_iter(self):

FILE: colossalai/legacy/zero/sharded_optim/sharded_optim_v2.py
  class OptimState (line 25) | class OptimState(Enum):
  class ShardedOptimizerV2 (line 30) | class ShardedOptimizerV2(OptimizerWrapper):
    method __init__ (line 76) | def __init__(
    method loss_scale (line 148) | def loss_scale(self):
    method get_memory_usage (line 151) | def get_memory_usage(self) -> Tuple[int, int]:
    method zero_grad (line 178) | def zero_grad(self, *args, **kwargs):
    method backward (line 181) | def backward(self, loss: Tensor) -> None:
    method backward_by_grad (line 188) | def backward_by_grad(self, tensor: Tensor, grad: Tensor) -> None:
    method clip_grad_norm (line 198) | def clip_grad_norm(self, model: nn.Module, max_norm: float):
    method step (line 204) | def step(self, *args, **kwargs):
    method _check_overflow (line 240) | def _check_overflow(self):
    method _unscale_grads (line 252) | def _unscale_grads(self):
    method _zero_grad (line 260) | def _zero_grad(self, recover_data: bool = False):
    method sync_grad (line 286) | def sync_grad(self):
    method _register_master_weight (line 289) | def _register_master_weight(self):
    method _maybe_move_fp32_shards (line 304) | def _maybe_move_fp32_shards(self):
    method _prepare_grads (line 325) | def _prepare_grads(self):
    method _point_param_fp16_to_master_param (line 346) | def _point_param_fp16_to_master_param(self):
    method _copy_master_model_to_model_fp16 (line 356) | def _copy_master_model_to_model_fp16(self):
    method _copy_master_param_to_param_fp16 (line 364) | def _copy_master_param_to_param_fp16(self, p):
    method state_dict (line 394) | def state_dict(self):
    method load_state_dict (line 400) | def load_state_dict(self, *args, **kwargs):

FILE: colossalai/legacy/zero/sharded_param/sharded_param.py
  function get_empty_tensor (line 13) | def get_empty_tensor(device: torch.device, dtype: torch.dtype):
  class ShardedParamV2 (line 21) | class ShardedParamV2(object):
    method __init__ (line 22) | def __init__(self, param: torch.nn.Parameter, set_data_none: bool = Fa...
    method get_payload_tensors (line 37) | def get_payload_tensors(self) -> List[StatefulTensor]:
    method set_data_none (line 41) | def set_data_none(self):
    method set_grad_none (line 44) | def set_grad_none(self):
    method sharded_data_tensor (line 48) | def sharded_data_tensor(self):
    method data_payload (line 52) | def data_payload(self):
    method grad_payload (line 57) | def grad_payload(self):
    method param_is_sharded (line 62) | def param_is_sharded(self):
    method data_payload_reset (line 65) | def data_payload_reset(self, tensor: torch.Tensor):
    method grad_payload_reset (line 70) | def grad_payload_reset(self, tensor: torch.Tensor):
    method get_memory_usage (line 75) | def get_memory_usage(self) -> Tuple[int, int]:

FILE: colossalai/legacy/zero/sharded_param/sharded_tensor.py
  class ShardedTensor (line 6) | class ShardedTensor(StatefulTensor):
    method __init__ (line 7) | def __init__(self, tensor: torch.Tensor, state: TensorState = TensorSt...
    method dtype (line 21) | def dtype(self) -> torch.dtype:
    method origin_numel (line 26) | def origin_numel(self) -> int:
    method origin_shape (line 30) | def origin_shape(self) -> int:
    method is_sharded (line 34) | def is_sharded(self):
    method is_sharded (line 38) | def is_sharded(self, flag: bool):

FILE: colossalai/logging/__init__.py
  function get_dist_logger (line 9) | def get_dist_logger(name: str = "colossalai") -> DistributedLogger:
  function disable_existing_loggers (line 22) | def disable_existing_loggers(include: Optional[List[str]] = None, exclud...

FILE: colossalai/logging/logger.py
  class DistributedLogger (line 12) | class DistributedLogger:
    method get_instance (line 27) | def get_instance(name: str):
    method __init__ (line 42) | def __init__(self, name):
    method rank (line 69) | def rank(self):
    method __get_call_info (line 73) | def __get_call_info():
    method _check_valid_logging_level (line 86) | def _check_valid_logging_level(level: str):
    method set_level (line 89) | def set_level(self, level: str) -> None:
    method log_to_file (line 98) | def log_to_file(self, path: Union[str, Path], mode: str = "a", level: ...
    method _log (line 129) | def _log(self, level, message: str, ranks: List[int] = None) -> None:
    method info (line 136) | def info(self, message: str, ranks: List[int] = None) -> None:
    method warning (line 147) | def warning(self, message: str, ranks: List[int] = None) -> None:
    method debug (line 158) | def debug(self, message: str, ranks: List[int] = None) -> None:
    method error (line 169) | def error(self, message: str, ranks: List[int] = None) -> None:

FILE: colossalai/moe/_operation.py
  function load_moe (line 14) | def load_moe():
  class AllGather (line 21) | class AllGather(torch.autograd.Function):
    method forward (line 23) | def forward(
    method backward (line 54) | def backward(ctx: Any, *grad_outputs) -> Tuple[Tensor, None, None]:
  class ReduceScatter (line 62) | class ReduceScatter(torch.autograd.Function):
    method forward (line 64) | def forward(
    method backward (line 98) | def backward(ctx: Any, *grad_outputs) -> Tuple[Tensor, None, None]:
  class AllToAll (line 107) | class AllToAll(torch.autograd.Function):
    method forward (line 113) | def forward(
    method backward (line 141) | def backward(ctx: Any, *grad_outputs) -> Tuple[Tensor, None, None]:
  class HierarchicalAllToAll (line 149) | class HierarchicalAllToAll(torch.autograd.Function):
    method forward (line 151) | def forward(ctx: Any, inputs: Tensor, groups: Tuple[ProcessGroup, Proc...
    method backward (line 196) | def backward(ctx: Any, *grad_outputs) -> Tuple[Tensor, None, None]:
  class MoeDispatch (line 204) | class MoeDispatch(torch.autograd.Function):
    method forward (line 207) | def forward(ctx, tokens, mask, dest_idx, ec):
    method backward (line 229) | def backward(ctx, output_grad):
  class MoeCombine (line 239) | class MoeCombine(torch.autograd.Function):
    method forward (line 242) | def forward(ctx, expert_tokens, logits, mask, dest_idx, ec):
    method backward (line 270) | def backward(ctx, tokens_grad):
  function moe_cumsum (line 284) | def moe_cumsum(inputs: Tensor, use_kernel: bool = False):
  class EPGradScalerIn (line 295) | class EPGradScalerIn(torch.autograd.Function):
    method forward (line 302) | def forward(ctx: Any, inputs: Tensor, ep_size: int) -> Tensor:
    method backward (line 307) | def backward(ctx: Any, *grad_outputs: Tensor) -> Tuple[Tensor, None]:
  class EPGradScalerOut (line 315) | class EPGradScalerOut(torch.autograd.Function):
    method forward (line 322) | def forward(ctx: Any, inputs: Tensor, ep_size: int) -> Tensor:
    method backward (line 327) | def backward(ctx: Any, *grad_outputs: Tensor) -> Tuple[Tensor, None]:
  class DPGradScalerIn (line 335) | class DPGradScalerIn(torch.autograd.Function):
    method forward (line 342) | def forward(ctx: Any, inputs: Tensor, moe_dp_size: int, activated_expe...
    method backward (line 349) | def backward(ctx: Any, *grad_outputs: Tensor) -> Tuple[Tensor, None, N...
  class DPGradScalerOut (line 357) | class DPGradScalerOut(torch.autograd.Function):
    method forward (line 364) | def forward(ctx: Any, inputs: Tensor, moe_dp_size: int, activated_expe...
    method backward (line 371) | def backward(ctx: Any, *grad_outputs: Tensor) -> Tuple[Tensor, None, N...
  function _all_to_all (line 379) | def _all_to_all(
  class AllToAllUneven (line 409) | class AllToAllUneven(torch.autograd.Function):
    method forward (line 411) | def forward(
    method backward (line 433) | def backward(ctx: Any, *grad_outputs):
  function all_to_all_uneven (line 444) | def all_to_all_uneven(

FILE: colossalai/nn/init.py
  function zeros_ (line 8) | def zeros_():
  function ones_ (line 17) | def ones_():
  function uniform_ (line 26) | def uniform_(a: float = 0.0, b: float = 1.0):
  function normal_ (line 41) | def normal_(mean: float = 0.0, std: float = 1.0):
  function trunc_normal_ (line 58) | def trunc_normal_(mean: float = 0.0, std: float = 1.0, a: float = -2.0, ...
  function kaiming_uniform_ (line 79) | def kaiming_uniform_(a=0, mode="fan_in", nonlinearity="leaky_relu"):
  function kaiming_normal_ (line 123) | def kaiming_normal_(a=0, mode="fan_in", nonlinearity="leaky_relu"):
  function xavier_uniform_ (line 166) | def xavier_uniform_(a: float = math.sqrt(3.0), scale: float = 2.0, gain:...
  function xavier_normal_ (line 200) | def xavier_normal_(scale: float = 2.0, gain: float = 1.0):
  function lecun_uniform_ (line 232) | def lecun_uniform_():
  function lecun_normal_ (line 244) | def lecun_normal_():

FILE: colossalai/nn/layer/layernorm.py
  class FusedLayerNormAffineFunction (line 20) | class FusedLayerNormAffineFunction(torch.autograd.Function):
    method forward (line 23) | def forward(ctx, input, weight, bias, normalized_shape, eps):
    method backward (line 41) | def backward(ctx, grad_output):
  class MixedFusedLayerNorm (line 51) | class MixedFusedLayerNorm(torch.nn.Module):
    method __init__ (line 52) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None):
    method reset_parameters (line 63) | def reset_parameters(self):
    method forward (line 67) | def forward(self, input):
    method __repr__ (line 70) | def __repr__(self):

FILE: colossalai/nn/layer/scaled_softmax.py
  class AttnMaskType (line 20) | class AttnMaskType(enum.Enum):
  class ScaledUpperTriangMaskedSoftmax (line 26) | class ScaledUpperTriangMaskedSoftmax(torch.autograd.Function):
    method forward (line 36) | def forward(ctx, inputs, scale):
    method backward (line 48) | def backward(ctx, output_grads):
  class ScaledMaskedSoftmax (line 55) | class ScaledMaskedSoftmax(torch.autograd.Function):
    method forward (line 65) | def forward(ctx, inputs, mask, scale):
    method backward (line 78) | def backward(ctx, output_grads):
  class FusedScaleMaskSoftmax (line 85) | class FusedScaleMaskSoftmax(nn.Module):
    method __init__ (line 99) | def __init__(
    method forward (line 123) | def forward(self, input, mask):
    method is_kernel_available (line 132) | def is_kernel_available(self, mask, b, np, sq, sk):
    method forward_fused_softmax (line 154) | def forward_fused_softmax(self, input, mask):
    method forward_torch_softmax (line 169) | def forward_torch_softmax(self, input, mask):
    method get_batch_per_block (line 186) | def get_batch_per_block(self, sq, sk, b, np):

FILE: colossalai/nn/layer/utils.py
  function divide (line 1) | def divide(numerator, denominator):

FILE: colossalai/nn/lr_scheduler/cosine.py
  class CosineAnnealingLR (line 6) | class CosineAnnealingLR(_CosineAnnealingLR):
    method __init__ (line 45) | def __init__(self, optimizer, total_steps: int, eta_min: int = 0, last...
  class CosineAnnealingWarmupLR (line 49) | class CosineAnnealingWarmupLR(WarmupScheduler):
    method __init__ (line 61) | def __init__(self, optimizer, total_steps: int, warmup_steps: int = 0,...
  class FlatAnnealingLR (line 68) | class FlatAnnealingLR(DelayerScheduler):
    method __init__ (line 79) | def __init__(self, optimizer, total_steps: int, pct_start: float = 0.7...
  class FlatAnnealingWarmupLR (line 88) | class FlatAnnealingWarmupLR(WarmupDelayerScheduler):
    method __init__ (line 102) | def __init__(

FILE: colossalai/nn/lr_scheduler/delayed.py
  class _enable_get_lr_call (line 12) | class _enable_get_lr_call:
    method __init__ (line 13) | def __init__(self, o):
    method __enter__ (line 16) | def __enter__(self):
    method __exit__ (line 20) | def __exit__(self, type, value, traceback):
  class TwoStageScheduler (line 24) | class TwoStageScheduler(_LRScheduler):
    method __init__ (line 25) | def __init__(self, optimizer, after_scheduler: _LRScheduler, last_epoc...
    method state_dict (line 30) | def state_dict(self):
    method load_state_dict (line 40) | def load_state_dict(self, state_dict):
  class DelayerScheduler (line 56) | class DelayerScheduler(TwoStageScheduler):
    method __init__ (line 68) | def __init__(self, optimizer, delay_epochs, after_scheduler, last_epoc...
    method get_lr (line 74) | def get_lr(self):
    method step (line 84) | def step(self, epoch=None):
  class WarmupScheduler (line 96) | class WarmupScheduler(TwoStageScheduler):
    method __init__ (line 108) | def __init__(self, optimizer, warmup_epochs, after_scheduler, last_epo...
    method get_lr (line 112) | def get_lr(self):
    method step (line 121) | def step(self, epoch=None):
  class WarmupDelayerScheduler (line 133) | class WarmupDelayerScheduler(TwoStageScheduler):
    method __init__ (line 146) | def __init__(self, optimizer, warmup_epochs, delay_epochs, after_sched...
    method get_lr (line 155) | def get_lr(self):
    method step (line 170) | def step(self, epoch=None):

FILE: colossalai/nn/lr_scheduler/linear.py
  class LinearWarmupLR (line 4) | class LinearWarmupLR(_LRScheduler):
    method __init__ (line 15) | def __init__(self, optimizer, total_steps: int, warmup_steps: int = 0,...
    method get_lr (line 20) | def get_lr(self):

FILE: colossalai/nn/lr_scheduler/multistep.py
  class MultiStepLR (line 8) | class MultiStepLR(_MultiStepLR):
    method __init__ (line 23) | def __init__(
  class MultiStepWarmupLR (line 35) | class MultiStepWarmupLR(WarmupScheduler):
    method __init__ (line 49) | def __init__(

FILE: colossalai/nn/lr_scheduler/onecycle.py
  class OneCycleLR (line 4) | class OneCycleLR(_OneCycleLR):
    method __init__ (line 68) | def __init__(

FILE: colossalai/nn/lr_scheduler/poly.py
  class PolynomialLR (line 6) | class PolynomialLR(_LRScheduler):
    method __init__ (line 18) | def __init__(
    method get_lr (line 28) | def get_lr(self):
    method _get_closed_form_lr (line 31) | def _get_closed_form_lr(self):
  class PolynomialWarmupLR (line 39) | class PolynomialWarmupLR(WarmupScheduler):
    method __init__ (line 52) | def __init__(

FILE: colossalai/nn/lr_scheduler/torch.py
  class LambdaLR (line 7) | class LambdaLR(_LambdaLR):
    method __init__ (line 20) | def __init__(self, optimizer, total_steps, lr_lambda=None, last_epoch:...
  class MultiplicativeLR (line 24) | class MultiplicativeLR(_MultiplicativeLR):
    method __init__ (line 37) | def __init__(self, optimizer, total_steps, lr_lambda=None, last_epoch:...
  class StepLR (line 41) | class StepLR(_StepLR):
    method __init__ (line 55) | def __init__(self, optimizer, total_steps, step_size: int = 1, gamma: ...
  class ExponentialLR (line 59) | class ExponentialLR(_ExponentialLR):
    method __init__ (line 70) | def __init__(self, optimizer, total_steps, gamma: float = 1.0, last_ep...

FILE: colossalai/nn/optimizer/__init__.py
  function cast_to_distributed (line 48) | def cast_to_distributed(optim):

FILE: colossalai/nn/optimizer/adafactor.py
  class Adafactor (line 25) | class Adafactor(Optimizer):
    method __init__ (line 26) | def __init__(
    method _get_lr (line 59) | def _get_lr(param_group, param_state):
    method _get_options (line 70) | def _get_options(param_group, param_shape):
    method _rms (line 76) | def _rms(tensor):
    method _approx_sq_grad (line 80) | def _approx_sq_grad(exp_avg_sq_row, exp_avg_sq_col):
    method step (line 86) | def step(self, closure=None):

FILE: colossalai/nn/optimizer/came.py
  class CAME (line 6) | class CAME(torch.optim.Optimizer):
    method __init__ (line 23) | def __init__(
    method supports_memory_efficient_fp16 (line 45) | def supports_memory_efficient_fp16(self):
    method supports_flat_params (line 49) | def supports_flat_params(self):
    method _get_options (line 52) | def _get_options(self, param_shape):
    method _rms (line 56) | def _rms(self, tensor):
    method _approx_sq_grad (line 59) | def _approx_sq_grad(self, exp_avg_sq_row, exp_avg_sq_col):
    method step (line 64) | def step(self, closure=None):

FILE: colossalai/nn/optimizer/cpu_adam.py
  class CPUAdam (line 11) | class CPUAdam(NVMeOptimizer):
    method __init__ (line 65) | def __init__(
    method load_state_dict (line 84) | def load_state_dict(self, state_dict):
    method torch_adam_update (line 92) | def torch_adam_update(
    method step (line 127) | def step(self, closure=None, div_scale: float = -1):

FILE: colossalai/nn/optimizer/distributed_adafactor.py
  class DistributedAdaFactor (line 15) | class DistributedAdaFactor(DistributedOptim):
    method __init__ (line 16) | def __init__(
    method setup_distributed (line 60) | def setup_distributed(
    method _get_lr (line 106) | def _get_lr(param_group, param_state):
    method _get_options (line 117) | def _get_options(param_group, param_shape):
    method _rms (line 130) | def _rms(tensor, param_is_dtensor, use_zero, tp_size, dp_size, tp_grou...
    method _approx_sq_grad (line 145) | def _approx_sq_grad(exp_avg_sq_row, exp_avg_sq_col):
    method _approx_sq_grad_row_parallel (line 152) | def _approx_sq_grad_row_parallel(exp_avg_sq_row, exp_avg_sq_col, sq_ro...
    method _col_parallel_factor (line 158) | def _col_parallel_factor(self, update, grad, state, grad_shape, beta2t):
    method _row_parallel_factor (line 190) | def _row_parallel_factor(self, update, grad, state, grad_shape, beta2t):
    method _base_factor (line 233) | def _base_factor(self, update, grad, state, grad_shape, beta2t):
    method step (line 286) | def step(self, closure=None):

FILE: colossalai/nn/optimizer/distributed_came.py
  class DistributedCAME (line 11) | class DistributedCAME(DistributedOptim):
    method __init__ (line 28) | def __init__(
    method supports_memory_efficient_fp16 (line 61) | def supports_memory_efficient_fp16(self):
    method supports_flat_params (line 65) | def supports_flat_params(self):
    method setup_distributed (line 68) | def setup_distributed(
    method _get_options (line 119) | def _get_options(param_shape):
    method _rms (line 124) | def _rms(tensor, param_is_dtensor, use_zero, tp_size, dp_size, tp_grou...
    method _approx_sq_grad (line 139) | def _approx_sq_grad(exp_avg_sq_row, exp_avg_sq_col):
    method _approx_sq_grad_row_parallel (line 146) | def _approx_sq_grad_row_parallel(exp_avg_sq_row, exp_avg_sq_col, sq_ro...
    method _col_parallel_factor (line 151) | def _col_parallel_factor(self, update, grad, state_row, state_col, gra...
    method _row_parallel_factor (line 183) | def _row_parallel_factor(self, update, grad, state_row, state_col, gra...
    method _base_factor (line 226) | def _base_factor(self, update, grad, state_row, state_col, grad_shape,...
    method _base_res_factor (line 280) | def _base_res_factor(self, res, exp_avg, state_row, state_col, grad_sh...
    method step (line 333) | def step(self, closure=None):

FILE: colossalai/nn/optimizer/distributed_galore.py
  class DistGaloreAwamW (line 21) | class DistGaloreAwamW(DistributedOptim, Optimizer2State):
    method __init__ (line 49) | def __init__(
    method setup_distributed (line 94) | def setup_distributed(
    method step (line 143) | def step(self, closure=None):
    method to_master_shape (line 268) | def to_master_shape(self, data, padding):
    method __del__ (line 277) | def __del__(self):

FILE: colossalai/nn/optimizer/distributed_lamb.py
  class DistributedLamb (line 15) | class DistributedLamb(DistributedOptim):
    method __init__ (line 40) | def __init__(
    method setup_distributed (line 65) | def setup_distributed(
    method step (line 104) | def step(self, closure=None):

FILE: colossalai/nn/optimizer/fused_adam.py
  class FusedAdam (line 14) | class FusedAdam(torch.optim.Optimizer):
    method __init__ (line 54) | def __init__(
    method zero_grad (line 83) | def zero_grad(self, set_to_none=False):
    method step (line 91) | def step(self, closure=None, grads=None, output_params=None, scale=Non...

FILE: colossalai/nn/optimizer/fused_lamb.py
  class FusedLAMB (line 7) | class FusedLAMB(torch.optim.Optimizer):
    method __init__ (line 52) | def __init__(
    method zero_grad (line 97) | def zero_grad(self):
    method step (line 105) | def step(self, closure=None):

FILE: colossalai/nn/optimizer/fused_sgd.py
  class FusedSGD (line 8) | class FusedSGD(Optimizer):
    method __init__ (line 57) | def __init__(
    method __setstate__ (line 87) | def __setstate__(self, state):
    method get_momentums (line 92) | def get_momentums(self, params):
    method step (line 109) | def step(self, closure=None):

FILE: colossalai/nn/optimizer/galore.py
  function get_galore_param_groups (line 11) | def get_galore_param_groups(
  function make_low_rank_buffer (line 49) | def make_low_rank_buffer(p, grad):
  class GaLoreProjector (line 61) | class GaLoreProjector:
    method __init__ (line 62) | def __init__(self, rank, verbose=False, update_proj_gap=200, scale=1.0...
    method project (line 71) | def project(self, full_rank_grad, iter):
    method project_back (line 105) | def project_back(self, low_rank_grad):
    method get_orthogonal_matrix (line 118) | def get_orthogonal_matrix(self, weights, rank, type):
  class GaLoreAdamW8bit (line 163) | class GaLoreAdamW8bit(Optimizer2State):
    method __init__ (line 192) | def __init__(
    method step (line 235) | def step(self, closure=None):
    method __del__ (line 312) | def __del__(self):

FILE: colossalai/nn/optimizer/hybrid_adam.py
  class HybridAdam (line 11) | class HybridAdam(CPUAdam):
    method __init__ (line 63) | def __init__(
    method step (line 93) | def step(self, closure=None, div_scale: float = -1):

FILE: colossalai/nn/optimizer/lamb.py
  class Lamb (line 9) | class Lamb(Optimizer):
    method __init__ (line 29) | def __init__(
    method step (line 44) | def step(self, closure=None):

FILE: colossalai/nn/optimizer/lars.py
  class Lars (line 9) | class Lars(Optimizer):
    method __init__ (line 22) | def __init__(
    method step (line 40) | def step(self, closure=None):

FILE: colossalai/nn/optimizer/nvme_optimizer.py
  class NVMeOptimizer (line 10) | class NVMeOptimizer(torch.optim.Optimizer):
    method __init__ (line 24) | def __init__(
    method _get_numel (line 51) | def _get_numel(self) -> int:
    method _post_state_init (line 58) | def _post_state_init(self, param: Parameter) -> None:
    method _setup_prefetch_params (line 70) | def _setup_prefetch_params(self) -> List[Parameter]:
    method _pre_step (line 83) | def _pre_step(self, *state_keys: str) -> None:
    method _pre_update (line 94) | def _pre_update(self, param: Parameter, *state_keys: str) -> None:
    method _post_update (line 104) | def _post_update(self, param: Parameter, *state_keys: str) -> None:
    method _post_step (line 113) | def _post_step(self) -> None:
    method step (line 119) | def step(self, closure: Optional[Callable[[], float]] = ...) -> Option...
    method state_dict (line 148) | def state_dict(self) -> dict:
    method load_state_dict (line 154) | def load_state_dict(self, state_dict: dict) -> None:
    method __del__ (line 160) | def __del__(self) -> None:

FILE: colossalai/pipeline/p2p.py
  function _cuda_safe_tensor_to_object (line 22) | def _cuda_safe_tensor_to_object(tensor: torch.Tensor, tensor_size: torch...
  function check_for_nccl_backend (line 50) | def check_for_nccl_backend(group):
  function _broadcast_object_list (line 63) | def _broadcast_object_list(
  function _check_for_nccl_hccl_backend (line 151) | def _check_for_nccl_hccl_backend(group):
  function _check_device (line 164) | def _check_device(group):
  function create_send_metadata (line 176) | def create_send_metadata(
  function _filling_ops_queue (line 201) | def _filling_ops_queue(
  function _create_recv_buffer (line 218) | def _create_recv_buffer(tensor_metadata: List[TensorMetadata], current_d...
  function _batch_send_recv_tensor (line 228) | def _batch_send_recv_tensor(
  function _send_recv_serialization_object (line 273) | def _send_recv_serialization_object(
  function _communicate (line 364) | def _communicate(
  function _p2p_comm (line 459) | def _p2p_comm(
  class PipelineP2PCommunication (line 539) | class PipelineP2PCommunication:
    method __init__ (line 540) | def __init__(self, stage_manager: PipelineStageManager, overlap_p2p: b...
    method recv_forward (line 544) | def recv_forward(
    method recv_backward (line 569) | def recv_backward(
    method send_forward (line 594) | def send_forward(self, output_object: Any, next_rank: Optional[int] = ...
    method send_backward (line 616) | def send_backward(self, input_object: Any, prev_rank: Optional[int] = ...
    method send_forward_recv_forward (line 638) | def send_forward_recv_forward(
    method send_backward_recv_backward (line 676) | def send_backward_recv_backward(
    method send_forward_recv_backward (line 716) | def send_forward_recv_backward(
    method send_backward_recv_forward (line 746) | def send_backward_recv_forward(
    method p2p_communicate (line 776) | def p2p_communicate(

FILE: colossalai/pipeline/schedule/_utils.py
  function _odict_flatten (line 12) | def _odict_flatten(d: "OrderedDict[Any, Any]") -> Tuple[List[Any], Any]:
  function _odict_unflatten (line 16) | def _odict_unflatten(values: List[Any], context: Any) -> "OrderedDict[An...
  function tree_map_hf (line 28) | def tree_map_hf(fn: Any, pytree: Any):
  function tree_flatten_hf (line 34) | def tree_flatten_hf(pytree: Any) -> Tuple[List[Any], TreeSpec]:
  function to_device (line 56) | def to_device(x: Any, device: Optional[torch.device] = None) -> Any:
  function get_batch_size (line 71) | def get_batch_size(batch: Any) -> int:
  function get_micro_batch (line 90) | def get_micro_batch(batch: Any, start: int, micro_batch_size: int) -> Any:
  function model_forward (line 110) | def model_forward(model: Module, data: Any, internal_inputs: Optional[di...
  function retain_grad (line 130) | def retain_grad(x: Any) -> None:
  function require_grad (line 140) | def require_grad(x: Any) -> None:
  function detach (line 150) | def detach(x: Any) -> Any:
  function clone (line 164) | def clone(x: Any) -> Any:
  function release_tensor_data (line 178) | def release_tensor_data(x: Any) -> Any:
  function merge_batch (line 192) | def merge_batch(data: List[Any], batch_size_dim=0) -> Any:

FILE: colossalai/pipeline/schedule/base.py
  class PipelineSchedule (line 10) | class PipelineSchedule:
    method __init__ (line 11) | def __init__(self, stage_manager: PipelineStageManager) -> None:
    method forward_backward_step (line 14) | def forward_backward_step(

FILE: colossalai/pipeline/schedule/generate.py
  class ActionIntervalBuffer (line 19) | class ActionIntervalBuffer:
    method __int__ (line 25) | def __int__(self):
    method clear (line 29) | def clear(self):
  class GenerateSchedule (line 34) | class GenerateSchedule(PipelineSchedule):
    method __init__ (line 46) | def __init__(self, stage_manager: PipelineStageManager, mb_manager: Mi...
    method load_batch (line 60) | def load_batch(self, data_iter: Iterable, device: Optional[torch.devic...
    method load_micro_batch (line 81) | def load_micro_batch(self) -> Any:
    method _prepare_inputs_for_interval_stage (line 91) | def _prepare_inputs_for_interval_stage(self):
    method _prepare_inputs_for_new_token (line 101) | def _prepare_inputs_for_new_token(self, new_token: torch.Tensor):
    method _get_token_id (line 114) | def _get_token_id(self, hidden_state: torch.Tensor) -> torch.Tensor:
    method _recv_pre_stage (line 119) | def _recv_pre_stage(self) -> Any:
    method _init_infer_state_action (line 130) | def _init_infer_state_action(self) -> None:
    method _load_stage_action (line 138) | def _load_stage_action(self, model: Module) -> None:
    method _gen_token_action (line 153) | def _gen_token_action(self, model: Module):
    method _head_encoding_action (line 174) | def _head_encoding_action(self, model: Module):
    method _body_encoding_action (line 186) | def _body_encoding_action(self, model: Module):
    method _comm_action (line 194) | def _comm_action(self, recv_pre: bool) -> torch.Tensor:
    method _gen_action (line 203) | def _gen_action(self, model: Module):
    method _gen_one_stage_action (line 236) | def _gen_one_stage_action(self, model: Module):
    method generate_step (line 258) | def generate_step(self, model: Module, data_iter: Iterable) -> Union[t...
    method generate_step_one_stage (line 267) | def generate_step_one_stage(self, model: Module, data_iter: Iterable) ...
    method generate_step_p2p (line 304) | def generate_step_p2p(self, model: Module, data_iter: Iterable) -> Uni...
    method generate_step_broadcast (line 348) | def generate_step_broadcast(self, model: Module, data_iter: Iterable) ...

FILE: colossalai/pipeline/schedule/interleaved_pp.py
  function _wait_p2p (line 20) | def _wait_p2p(wait_handles) -> None:
  class InterleavedSchedule (line 26) | class InterleavedSchedule(PipelineSchedule):
    method __init__ (line 27) | def __init__(
    method load_batch (line 62) | def load_batch(self, data_iter: Iterable, device: Optional[torch.devic...
    method load_micro_batch (line 104) | def load_micro_batch(self, model_chunk_id: int) -> Any:
    method get_model_chunk_id (line 118) | def get_model_chunk_id(self, microbatch_id: int, is_forward: bool) -> ...
    method recv_forward (line 138) | def recv_forward(self, model_chunk_id: int, prev_rank: int = None) -> ...
    method recv_backward (line 160) | def recv_backward(self, model_chunk_id: int, next_rank: int = None) ->...
    method send_forward (line 183) | def send_forward(self, model_chunk_id: int, output_tensor: Any, next_r...
    method send_backward (line 206) | def send_backward(self, model_chunk_id: int, input_tensor_grad: Any, p...
    method send_forward_recv_forward (line 231) | def send_forward_recv_forward(
    method send_backward_recv_backward (line 257) | def send_backward_recv_backward(
    method forward_step (line 282) | def forward_step(
    method backward_step (line 327) | def backward_step(
    method run_forward_only (line 373) | def run_forward_only(
    method run_forward_backward (line 419) | def run_forward_backward(
    method forward_backward_step (line 584) | def forward_backward_step(

FILE: colossalai/pipeline/schedule/one_f_one_b.py
  class OneForwardOneBackwardSchedule (line 28) | class OneForwardOneBackwardSchedule(PipelineSchedule):
    method __init__ (line 29) | def __init__(
    method load_batch (line 67) | def load_batch(self, data_iter: Iterable, device: Optional[torch.devic...
    method load_micro_batch (line 109) | def load_micro_batch(self) -> Any:
    method recv_forward (line 120) | def recv_forward(self, prev_rank: int = None) -> Any:
    method recv_backward (line 139) | def recv_backward(self, next_rank: int = None) -> Any:
    method send_forward (line 158) | def send_forward(self, output_tensor: Any, next_rank: int = None) -> N...
    method send_backward (line 175) | def send_backward(self, input_tensor_grad: Any, prev_rank: int = None)...
    method send_forward_recv_backward (line 191) | def send_forward_recv_backward(self, output_tensor: Any, send_first: O...
    method send_backward_recv_forward (line 219) | def send_backward_recv_forward(self, input_tensor_grad: Any, send_firs...
    method forward_step (line 247) | def forward_step(
    method backward_step (line 282) | def backward_step(
    method run_forward_only (line 327) | def run_forward_only(
    method run_forward_backward (line 359) | def run_forward_backward(
    method forward_backward_step (line 443) | def forward_backward_step(

FILE: colossalai/pipeline/schedule/v_schedule.py
  class ScheduledNode (line 36) | class ScheduledNode:
  class PipelineGraph (line 46) | class PipelineGraph(object):
    method __init__ (line 49) | def __init__(
    method get_id (line 76) | def get_id(self, cat, chunk, stage, micro):
    method try_v_schedule (line 81) | def try_v_schedule(self, fill_f=True, fill_b=True, approved_bubble=None):
    method print_details (line 290) | def print_details(self, end_time, print_scaling=1):
    method get_v_schedule (line 318) | def get_v_schedule(self, only_run_time=False):

FILE: colossalai/pipeline/schedule/zero_bubble_pp.py
  function _wait_p2p (line 34) | def _wait_p2p(wait_handles: List[torch.cuda.Event]) -> None:
  class ZeroBubbleVPipeScheduler (line 40) | class ZeroBubbleVPipeScheduler(PipelineSchedule):
    method __init__ (line 54) | def __init__(
    method _free_buffers (line 117) | def _free_buffers(self):
    method assert_buffer_empty (line 149) | def assert_buffer_empty(self):
    method load_batch (line 170) | def load_batch(self, data_iter: Iterable, device: Optional[torch.devic...
    method load_micro_batch (line 205) | def load_micro_batch(self, model_chunk_id: int) -> Any:
    method get_model_chunk_id (line 219) | def get_model_chunk_id(self, microbatch_id: int, is_forward: bool) -> ...
    method recv_forward (line 239) | def recv_forward(self, model_chunk_id: int, prev_rank: int = None) -> ...
    method recv_backward (line 297) | def recv_backward(self, model_chunk_id: int, next_rank: int = None) ->...
    method send_forward (line 356) | def send_forward(self, model_chunk_id: int, next_rank: int = None) -> ...
    method send_backward (line 415) | def send_backward(self, model_chunk_id: int, prev_rank: int = None) ->...
    method forward_step (line 474) | def forward_step(
    method backward_b_step (line 516) | def backward_b_step(
    method backward_w_step (line 591) | def backward_w_step(
    method schedule_f (line 636) | def schedule_f(
    method schedule_b (line 741) | def schedule_b(
    method schedule_w (line 809) | def schedule_w(
    method run_forward_only (line 827) | def run_forward_only(
    method run_forward_backward (line 871) | def run_forward_backward(
    method forward_backward_step (line 938) | def forward_backward_step(

FILE: colossalai/pipeline/stage_manager.py
  class PipelineStageManager (line 11) | class PipelineStageManager:
    method __init__ (line 24) | def __init__(
    method get_stage_index (line 62) | def get_stage_index(
    method is_first_stage (line 107) | def is_first_stage(self, ignore_chunk: bool = False) -> bool:
    method is_last_stage (line 124) | def is_last_stage(self, ignore_chunk: bool = False) -> bool:
    method num_stages (line 146) | def num_stages(self) -> int:
    method stage (line 155) | def stage(self) -> int:
    method get_rank (line 163) | def get_rank(self) -> int:
    method get_prev_rank (line 171) | def get_prev_rank(self) -> int:
    method get_next_rank (line 179) | def get_next_rank(self) -> int:
    method get_p2p_process_group (line 187) | def get_p2p_process_group(self) -> ProcessGroup:
    method init_process_group_by_stages (line 194) | def init_process_group_by_stages(self, stages: List[int]) -> ProcessGr...
    method switch_model_chunk_id (line 206) | def switch_model_chunk_id(self, model_chunk_id: int):
    method distribute_layers (line 212) | def distribute_layers(

FILE: colossalai/pipeline/weight_grad_store.py
  class WeightGradStore (line 4) | class WeightGradStore:
    method put (line 10) | def put(cls, total_input, grad_output, weight, func):
    method flush (line 14) | def flush(cls, chunk=0):
    method pop (line 19) | def pop(cls, chunk=0):

FILE: colossalai/quantization/bnb.py
  function quantize_model (line 30) | def quantize_model(
  function replace_with_bnb_layers (line 109) | def replace_with_bnb_layers(model, bnb_quantization_config, modules_to_n...
  function _replace_with_bnb_layers (line 141) | def _replace_with_bnb_layers(
  function get_keys_to_not_convert (line 208) | def get_keys_to_not_convert(model):
  function find_tied_parameters (line 261) | def find_tied_parameters(model: nn.Module, **kwargs):
  class FindTiedParametersResult (line 319) | class FindTiedParametersResult(list):
    method __init__ (line 325) | def __init__(self, *args, **kwargs):
    method values (line 328) | def values(self):

FILE: colossalai/quantization/bnb_config.py
  class BnbQuantizationConfig (line 11) | class BnbQuantizationConfig:
    method __post_init__ (line 66) | def __post_init__(self):

FILE: colossalai/quantization/fp8.py
  class Handle (line 21) | class Handle:
    method __init__ (line 22) | def __init__(self, handles=[], remain_ops=None) -> None:
    method wait (line 26) | def wait(self):
  function process_group_is_intranode (line 33) | def process_group_is_intranode(pg):
  function cast_to_fp8 (line 51) | def cast_to_fp8(
  function cast_from_fp8 (line 93) | def cast_from_fp8(
  function _all_reduce_fp8 (line 120) | def _all_reduce_fp8(
  function all_reduce_fp8 (line 187) | def all_reduce_fp8(
  function _all_to_all_single_fp8 (line 195) | def _all_to_all_single_fp8(
  function all_to_all_single_fp8 (line 258) | def all_to_all_single_fp8(
  function cast_to_fp8_pipeline (line 285) | def cast_to_fp8_pipeline(inp: Any) -> None:
  function cast_from_fp8_pipeline (line 327) | def cast_from_fp8_pipeline(inp: Any, del_metadata=True) -> None:
  function _reduce_scatter_fp8 (line 356) | def _reduce_scatter_fp8(
  function reduce_scatter_fp8 (line 401) | def reduce_scatter_fp8(
  function fp8_compress_ddp_grad_comm_hook_async (line 408) | def fp8_compress_ddp_grad_comm_hook_async(
  function fp8_compress_ddp_grad_comm_hook_sync (line 506) | def fp8_compress_ddp_grad_comm_hook_sync(
  function fp8_compress_fsdp_grad_comm_hook (line 532) | def fp8_compress_fsdp_grad_comm_hook(
  function fp8_compress_fsdp_params_comm_hook (line 566) | def fp8_compress_fsdp_params_comm_hook(
  function split_chunk_by_channel (line 601) | def split_chunk_by_channel(
  function _all_to_all_fp8 (line 616) | def _all_to_all_fp8(output_list, input_list, group=None, fp8_format="e5m...
  function all_to_all_fp8 (line 648) | def all_to_all_fp8(output_list, input_list, group=None, fp8_format="e5m2...
  function _all_gather_fp8 (line 656) | def _all_gather_fp8(output_list, input_, group=None, fp8_format="e5m2", ...
  function all_gather_fp8 (line 680) | def all_gather_fp8(output_list, input_, group=None, fp8_format="e5m2", a...
  function all_gather_fp8_lagacy (line 688) | def all_gather_fp8_lagacy(
  function all_gather_fp8_ring (line 716) | def all_gather_fp8_ring(output_list, input_, group=None, fp8_format="e5m...
  class _LinearFp8 (line 773) | class _LinearFp8(torch.autograd.Function):
    method forward (line 775) | def forward(
    method backward (line 812) | def backward(ctx: Any, out_grad) -> Any:
  function _linear_fp8 (line 838) | def _linear_fp8(input: torch.Tensor, weight: torch.Tensor, bias: Optiona...
  function linear_fp8 (line 842) | def linear_fp8(input: torch.Tensor, weight: torch.Tensor, bias: Optional...

FILE: colossalai/quantization/fp8_hook.py
  class FP8Hook (line 7) | class FP8Hook(ColoParamOpHook):
    method pre_forward (line 8) | def pre_forward(self, params) -> None:
    method post_forward (line 11) | def post_forward(self, params) -> None:
    method pre_backward (line 14) | def pre_backward(self, params) -> None:
    method post_backward (line 17) | def post_backward(self, params) -> None:
    method rewrite_op (line 20) | def rewrite_op(self, func):

FILE: colossalai/quantization/utils.py
  function _all_gather_flat_param (line 9) | def _all_gather_flat_param(
  function register_params_comm_hook (line 56) | def register_params_comm_hook(self, state: object, hook: callable):
  function patch_fsdp_params_comm_hook (line 102) | def patch_fsdp_params_comm_hook():

FILE: colossalai/shardformer/_utils.py
  function get_obj_list_element (line 4) | def get_obj_list_element(obj, attr: str):
  function set_obj_list_element (line 31) | def set_obj_list_element(obj, attr: str, value):
  function hasattr_ (line 55) | def hasattr_(obj, attr: str):
  function setattr_ (line 72) | def setattr_(obj, attr: str, value, ignore: bool = False):
  function getattr_ (line 94) | def getattr_(obj, attr: str, ignore: bool = False):

FILE: colossalai/shardformer/examples/convergence_benchmark.py
  function to_device (line 21) | def to_device(x: Any, device: torch.device) -> Any:
  function train (line 30) | def train(args):
  function fit (line 86) | def fit(
  function evaluate_model (line 124) | def evaluate_model(

FILE: colossalai/shardformer/examples/data.py
  class GLUEDataBuilder (line 8) | class GLUEDataBuilder:
    method __init__ (line 45) | def __init__(
    method setup (line 68) | def setup(self):
    method prepare_data (line 82) | def prepare_data(self):
    method train_dataloader (line 86) | def train_dataloader(self):
    method val_dataloader (line 95) | def val_dataloader(self):
    method test_dataloader (line 106) | def test_dataloader(self):
    method convert_to_features (line 117) | def convert_to_features(self, example_batch):
    method native_prepare_dataloader (line 134) | def native_prepare_dataloader(self, dataset, batch_size, shuffle=False...

FILE: colossalai/shardformer/examples/performance_benchmark.py
  function data_gen (line 14) | def data_gen(batch_size, seq_length):
  function data_gen_for_sequence_classification (line 20) | def data_gen_for_sequence_classification(batch_size, seq_length):
  function train (line 56) | def train(model, data):
  function bench_shardformer (line 63) | def bench_shardformer(BATCH, N_CTX, provider, model_func, dtype=torch.fl...

FILE: colossalai/shardformer/layer/_operation.py
  class FusedLayerNormAffineFunction1D (line 38) | class FusedLayerNormAffineFunction1D(torch.autograd.Function):
    method forward (line 53) | def forward(ctx, input, weight, bias, normalized_shape, eps):
    method backward (line 66) | def backward(ctx, grad_output):
  class MatmulWithAsyncCommunication (line 76) | class MatmulWithAsyncCommunication(torch.autograd.Function):
    method forward (line 82) | def forward(ctx, input_, weight, bias, process_group, async_grad_allre...
    method backward (line 98) | def backward(ctx, grad_output):
  class MatmulWithGradAccum (line 170) | class MatmulWithGradAccum(torch.autograd.Function):
    method forward (line 176) | def forward(ctx, input_, weight, bias, async_grad_allreduce, use_zbv=F...
    method backward (line 189) | def backward(ctx, grad_output):
  class LinearWithAsyncCommunication (line 251) | class LinearWithAsyncCommunication(torch.autograd.Function):
    method forward (line 257) | def forward(ctx, input_, weight, bias, process_group, async_grad_allre...
    method backward (line 272) | def backward(ctx, grad_output):
  class LinearWithGradAccum (line 341) | class LinearWithGradAccum(torch.autograd.Function):
    method forward (line 347) | def forward(ctx, input_, weight, bias, async_grad_allreduce, use_zbv=F...
    method backward (line 360) | def backward(ctx, grad_output):
  function _ring_as_gather (line 418) | def _ring_as_gather(func, input_to_gather=None, input_local=None, proces...
  class _GatherForwardReduceScatterBackward (line 485) | class _GatherForwardReduceScatterBackward(torch.autograd.Function):
    method forward (line 496) | def forward(ctx, input_, process_group, dim, fp8_communication=False):
    method backward (line 504) | def backward(ctx, grad_output):
  class _LinearWithGatherForwardReduceScatterBackward (line 527) | class _LinearWithGatherForwardReduceScatterBackward(torch.autograd.Funct...
    method forward (line 538) | def forward(ctx, input_, weight, bias, process_group, async_grad_reduc...
    method backward (line 571) | def backward(ctx, grad_output):
  function _ring_as_reducescatter (line 646) | def _ring_as_reducescatter(
  class _LinearWithReduceScatterForwardGatherBackward (line 704) | class _LinearWithReduceScatterForwardGatherBackward(torch.autograd.Funct...
    method forward (line 715) | def forward(ctx, input_, weight, bias, process_group, dim, ring, use_z...
    method backward (line 756) | def backward(ctx, grad_output):
  class _ReduceScatterForwardGatherBackward (line 819) | class _ReduceScatterForwardGatherBackward(torch.autograd.Function):
    method forward (line 829) | def forward(ctx, input_, process_group, dim, fp8_communication=False):
    method backward (line 850) | def backward(ctx, grad_output):
  class _MatmulWithGatherForwardReduceScatterBackward (line 858) | class _MatmulWithGatherForwardReduceScatterBackward(torch.autograd.Funct...
    method forward (line 870) | def forward(
    method backward (line 904) | def backward(ctx, grad_output):
  class _SplitForwardGatherBackward (line 982) | class _SplitForwardGatherBackward(torch.autograd.Function):
    method forward (line 994) | def forward(ctx, input_, dim, process_group, grad_scale=None, fp8_comm...
    method backward (line 1002) | def backward(ctx, grad_output):
  class _ReduceForward (line 1015) | class _ReduceForward(torch.autograd.Function):
    method forward (line 1026) | def forward(ctx, input_, process_group, grad_scale=None, fp8_communica...
    method backward (line 1031) | def backward(ctx, grad_output):
  class _ReduceBackward (line 1037) | class _ReduceBackward(torch.autograd.Function):
    method forward (line 1047) | def forward(ctx, input_, process_group, fp8_communication=False):
    method backward (line 1053) | def backward(ctx, grad_output):
  class _GatherForwardSplitBackward (line 1058) | class _GatherForwardSplitBackward(torch.autograd.Function):
    method forward (line 1068) | def forward(ctx, input_, dim, process_group, grad_scale=None, fp8_comm...
    method backward (line 1076) | def backward(ctx, grad_output):
  class _AllToAll (line 1082) | class _AllToAll(torch.autograd.Function):
    method forward (line 1093) | def forward(ctx, input_, process_group, scatter_dim, gather_dim, fp8_c...
    method backward (line 1124) | def backward(ctx, grad_output):
  class HookParameter (line 1156) | class HookParameter(torch.autograd.Function):
    method forward (line 1160) | def forward(ctx, input, weight, bias):
    method backward (line 1166) | def backward(ctx, grad_output):
  function hook_parameter_in_backward (line 1175) | def hook_parameter_in_backward(input, weight=None, bias=None):
  function _reduce (line 1179) | def _reduce(input_, process_group, fp8_communication=False, fp8_format="...
  function _split (line 1191) | def _split(input_, dim=-1, process_group=None):
  function _gather (line 1211) | def _gather(input_, dim=-1, process_group=None, fp8_communication=False,...
  function _reduce_scatter (line 1229) | def _reduce_scatter(input_, dim=1, process_group=None):
  function _all_to_all (line 1253) | def _all_to_all(input_, world_size, group, scatter_dim, gather_dim, fp8_...
  function _all_to_all_single (line 1263) | def _all_to_all_single(
  function matmul_with_async_comm (line 1296) | def matmul_with_async_comm(
  function matmul_with_grad_comm (line 1304) | def matmul_with_grad_comm(input_, weight, bias, async_grad_allreduce, us...
  function linear_with_async_comm (line 1308) | def linear_with_async_comm(
  function linear_with_grad_accum (line 1316) | def linear_with_grad_accum(input_, weight, bias, async_grad_allreduce, u...
  function linear_gather_forward_reducescatter_backward (line 1320) | def linear_gather_forward_reducescatter_backward(
  function gather_forward_reducescatter_backward (line 1328) | def gather_forward_reducescatter_backward(input_, process_group, dim, fp...
  function reducescatter_forward_gather_backward (line 1332) | def reducescatter_forward_gather_backward(input_, process_group, dim, fp...
  function linear_reducescatter_forward_gather_backward (line 1336) | def linear_reducescatter_forward_gather_backward(
  function matmul_gather_forward_reducescatter_backward (line 1342) | def matmul_gather_forward_reducescatter_backward(
  function gather_forward_split_backward (line 1358) | def gather_forward_split_backward(input_, dim, process_group, grad_scale...
  function split_forward_gather_backward (line 1362) | def split_forward_gather_backward(input_, dim, process_group, grad_scale...
  function reduce_forward (line 1366) | def reduce_forward(input_, process_group, grad_scale=None, fp8_communica...
  function reduce_backward (line 1370) | def reduce_backward(input_, process_group, fp8_communication=False):
  function all_to_all_comm (line 1374) | def all_to_all_comm(input_, process_group=None, scatter_dim=2, gather_di...
  function gather_sp_output (line 1378) | def gather_sp_output(hidden_states, shard_config, sp_dim=1):

FILE: colossalai/shardformer/layer/attn.py
  class AttnMaskType (line 33) | class AttnMaskType(Enum):
  function invert_mask (line 40) | def invert_mask(mask: torch.Tensor) -> torch.Tensor:
  function get_pad_info (line 54) | def get_pad_info(
  class ColoAttention (line 82) | class ColoAttention:
    method _init_kernels_dispatch (line 87) | def _init_kernels_dispatch():
    method _dispatch_kernel (line 114) | def _dispatch_kernel(dtype: torch.dtype, mask_type: Optional[AttnMaskT...
    method prepare_attn_kwargs (line 139) | def prepare_attn_kwargs(
    method attention (line 229) | def attention(
  function _load_varlen_helpers (line 334) | def _load_varlen_helpers():
  function _load_flash_attn (line 356) | def _load_flash_attn():
  function _rescale_out_lse (line 376) | def _rescale_out_lse(out, block_out, lse, block_lse):
  class RingAttention (line 406) | class RingAttention(torch.autograd.Function):
    method get_double_ring_groups (line 445) | def get_double_ring_groups(sp_axis, pg_mesh, inner_ring_size=None):
    method attention (line 498) | def attention(
    method forward (line 623) | def forward(
    method backward (line 942) | def backward(ctx, dout, _):
    method prepare_varlen_batch (line 1178) | def prepare_varlen_batch(

FILE: colossalai/shardformer/layer/dropout.py
  class DropoutForParallelInput (line 13) | class DropoutForParallelInput(ParallelModule, nn.Dropout):
    method __init__ (line 25) | def __init__(self, p: float = 0.5, inplace: bool = False, process_grou...
    method from_native_module (line 34) | def from_native_module(
    method forward (line 44) | def forward(self, input):
  class DropoutForReplicatedInput (line 50) | class DropoutForReplicatedInput(ParallelModule, nn.Dropout):
    method __init__ (line 62) | def __init__(self, p: float = 0.5, inplace: bool = False, process_grou...
    method from_native_module (line 71) | def from_native_module(
    method forward (line 81) | def forward(self, input):

FILE: colossalai/shardformer/layer/embedding.py
  class Embedding1D (line 30) | class Embedding1D(ParallelModule):
    method __init__ (line 60) | def __init__(
    method from_native_module (line 107) | def from_native_module(
    method reset_parameters (line 147) | def reset_parameters(self, weight_initializer) -> None:
    method _fill_padding_idx_with_zero (line 152) | def _fill_padding_idx_with_zero(self) -> None:
    method forward (line 157) | def forward(self, input_: Tensor) -> Tensor:
  class PaddingEmbedding (line 168) | class PaddingEmbedding(PaddingParallelModule):
    method __init__ (line 169) | def __init__(
    method reset_parameters (line 202) | def reset_parameters(self) -> None:
    method _fill_padding_idx_with_zero (line 206) | def _fill_padding_idx_with_zero(self) -> None:
    method forward (line 211) | def forward(self, input: Tensor) -> Tensor:
    method from_native_module (line 215) | def from_native_module(
  class VocabParallelEmbedding1D (line 241) | class VocabParallelEmbedding1D(PaddingParallelModule):
    method __init__ (line 270) | def __init__(
    method from_native_module (line 330) | def from_native_module(
    method reset_parameters (line 362) | def reset_parameters(self, weight_initializer) -> None:
    method _fill_padding_idx_with_zero (line 368) | def _fill_padding_idx_with_zero(self) -> None:
    method _select_padding_idx (line 377) | def _select_padding_idx(self, padding_idx: int):
    method forward (line 386) | def forward(self, input_: Tensor) -> Tensor:

FILE: colossalai/shardformer/layer/linear.py
  class LinearWithGradAccum (line 40) | class LinearWithGradAccum(ParallelModule):
    method __init__ (line 63) | def __init__(
    method from_native_module (line 123) | def from_native_module(module: nn.Linear, **kwargs) -> ParallelModule:
    method reset_parameters (line 146) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method forward (line 153) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]:
  class Linear1D_Col (line 181) | class Linear1D_Col(ParallelModule):
    method __init__ (line 209) | def __init__(
    method from_native_module (line 285) | def from_native_module(
    method reset_parameters (line 324) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method forward (line 331) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]:
  class Linear1D_Row (line 379) | class Linear1D_Row(ParallelModule):
    method __init__ (line 402) | def __init__(
    method from_native_module (line 481) | def from_native_module(
    method chunk_weight (line 521) | def chunk_weight(self):
    method reset_parameters (line 525) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method forward (line 542) | def forward(self, input_: Tensor) -> Tensor:
  class PaddingLMHead (line 598) | class PaddingLMHead(PaddingParallelModule):
    method __init__ (line 599) | def __init__(
    method reset_parameters (line 640) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method from_native_module (line 647) | def from_native_module(
    method forward (line 673) | def forward(self, input: Tensor) -> Tensor:
  class VocabParallelLMHead1D (line 679) | class VocabParallelLMHead1D(Linear1D_Col, PaddingParallelModule):
    method __init__ (line 707) | def __init__(
    method from_native_module (line 762) | def from_native_module(
    method forward (line 788) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]:

FILE: colossalai/shardformer/layer/loss.py
  class DistCrossEntropy (line 25) | class DistCrossEntropy(Function):
    method forward (line 34) | def forward(
    method backward (line 130) | def backward(ctx, grad_output):
  class DistLogProb (line 148) | class DistLogProb(Function):
    method forward (line 157) | def forward(
    method backward (line 218) | def backward(ctx, grad_output):
  function cross_entropy_1d (line 240) | def cross_entropy_1d(
  function dist_log_prob_1d (line 252) | def dist_log_prob_1d(
  function dist_cross_entropy (line 262) | def dist_cross_entropy(
  function dist_log_prob (line 358) | def dist_log_prob(

FILE: colossalai/shardformer/layer/normalization.py
  class FusedLayerNormWithHook (line 38) | class FusedLayerNormWithHook(ApexFusedLayerNorm):
    method __init__ (line 39) | def __init__(self, normalized_shape, eps=0.00001, elementwise_affine=T...
    method forward (line 42) | def forward(self, input):
  class NPUFusedRMSNormWithHook (line 53) | class NPUFusedRMSNormWithHook(nn.Module):
    method __init__ (line 54) | def __init__(self, normalized_shape, eps=0.00001, elementwise_affine=T...
    method reset_parameters (line 67) | def reset_parameters(self):
    method forward (line 71) | def forward(self, input):
  class CUDAFusedRMSNormWithHook (line 82) | class CUDAFusedRMSNormWithHook(ApexFusedRMSNorm):
    method __init__ (line 83) | def __init__(self, normalized_shape, eps=0.00001, elementwise_affine=T...
    method forward (line 86) | def forward(self, input):
  class FastLayerNormWithHook (line 127) | class FastLayerNormWithHook(FastLayerNorm):
    method __init__ (line 128) | def __init__(self, hidden_size, eps=0.00001):
    method forward (line 131) | def forward(self, input):
  class BaseLayerNorm (line 137) | class BaseLayerNorm(ABC):
    method from_native_module (line 139) | def from_native_module(module: nn.Module, sp_partial_derived: bool = F...
  class RMSNorm (line 156) | class RMSNorm(BaseLayerNorm):
    method __init__ (line 161) | def __init__(self) -> None:
    method from_native_module (line 168) | def from_native_module(module: nn.Module, sp_partial_derived: bool = F...
  class LayerNorm (line 192) | class LayerNorm(BaseLayerNorm):
    method __init__ (line 197) | def __init__(self) -> None:
    method from_native_module (line 204) | def from_native_module(module: nn.Module, sp_partial_derived: bool = F...
  class FusedLayerNorm (line 231) | class FusedLayerNorm(BaseLayerNorm):
    method __init__ (line 236) | def __init__(self) -> None:
    method from_native_module (line 243) | def from_native_module(module: nn.LayerNorm, sp_partial_derived: bool ...
  class FusedRMSNorm (line 300) | class FusedRMSNorm(BaseLayerNorm):
    method __init__ (line 305) | def __init__(self) -> None:
    method from_native_module (line 312) | def from_native_module(module: nn.Module, sp_partial_derived: bool = F...

FILE: colossalai/shardformer/layer/parallel_module.py
  class ParallelModule (line 28) | class ParallelModule(nn.Module, ABC):
    method __init__ (line 29) | def __init__(self, **kwargs):
    method from_native_module (line 33) | def from_native_module(
    method _save_to_state_dict (line 46) | def _save_to_state_dict(self, destination, prefix, keep_vars):
    method _load_from_state_dict (line 70) | def _load_from_state_dict(
  class PaddingParallelModule (line 180) | class PaddingParallelModule(ParallelModule):
    method __init__ (line 181) | def __init__(
    method from_native_module (line 204) | def from_native_module(
    method _save_to_state_dict (line 218) | def _save_to_state_dict(self, destination, prefix, keep_vars):
    method _load_from_state_dict (line 245) | def _load_from_state_dict(
    method resize_embedding_weight (line 357) | def resize_embedding_weight(self):
    method resize_embedding_bias (line 360) | def resize_embedding_bias(self):

FILE: colossalai/shardformer/layer/qkv_fused_linear.py
  function split_fused_qkv_in_gpt2_style (line 55) | def split_fused_qkv_in_gpt2_style(
  function gather_fused_qkv_in_gpt2_style (line 100) | def gather_fused_qkv_in_gpt2_style(
  class _SplitForwardGatherBackwardFusedQKV (line 157) | class _SplitForwardGatherBackwardFusedQKV(torch.autograd.Function):
    method forward (line 159) | def forward(ctx, qkv: torch.Tensor, split_sizes: List[int], process_gr...
    method backward (line 165) | def backward(ctx, grad_output):
  function split_forward_gather_backward_fused_qkv (line 172) | def split_forward_gather_backward_fused_qkv(qkv: torch.Tensor, split_siz...
  class _GatherForwardSplitBackwardFusedQKV (line 176) | class _GatherForwardSplitBackwardFusedQKV(torch.autograd.Function):
    method forward (line 178) | def forward(ctx, qkv: torch.Tensor, split_sizes: List[int], process_gr...
    method backward (line 184) | def backward(ctx, grad_output):
  function gather_forward_split_backward_fused_qkv (line 189) | def gather_forward_split_backward_fused_qkv(qkv: torch.Tensor, split_siz...
  class GPT2FusedLinearConv1D_Col (line 193) | class GPT2FusedLinearConv1D_Col(ParallelModule):
    method __init__ (line 222) | def __init__(
    method from_native_module (line 310) | def from_native_module(
    method reset_parameters (line 361) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method forward (line 368) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]:
  class GPT2FusedLinearConv1D_Row (line 417) | class GPT2FusedLinearConv1D_Row(ParallelModule):
    method __init__ (line 439) | def __init__(
    method from_native_module (line 518) | def from_native_module(
    method chunk_weight (line 559) | def chunk_weight(self):
    method reset_parameters (line 562) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method forward (line 579) | def forward(self, input_: Tensor) -> Tensor:
  class GPT2FusedLinearConv (line 637) | class GPT2FusedLinearConv(ParallelModule):
    method __init__ (line 658) | def __init__(
    method from_native_module (line 720) | def from_native_module(
    method reset_parameters (line 752) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method forward (line 759) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]:
  class FusedLinear1D_Col (line 788) | class FusedLinear1D_Col(ParallelModule):
    method __init__ (line 816) | def __init__(
    method from_native_module (line 905) | def from_native_module(
    method reset_parameters (line 948) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method forward (line 955) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]:
  class FusedLinear1D_Row (line 1001) | class FusedLinear1D_Row(ParallelModule):
    method __init__ (line 1024) | def __init__(
    method from_native_module (line 1109) | def from_native_module(
    method reset_parameters (line 1142) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method forward (line 1159) | def forward(self, input_: Tensor) -> Tensor:
  class FusedLinear (line 1205) | class FusedLinear(ParallelModule):
    method __init__ (line 1233) | def __init__(
    method from_native_module (line 1295) | def from_native_module(
    method reset_parameters (line 1329) | def reset_parameters(self, weight_initializer, bias_initializer) -> None:
    method forward (line 1336) | def forward(self, input_: Tensor) -> Tuple[Tensor, Tensor]:

FILE: colossalai/shardformer/layer/utils.py
  function execute_conv1d_w_pass_grad_accum (line 21) | def execute_conv1d_w_pass_grad_accum(_input_, _grad_output_, _weight_mai...
  function execute_conv1d_w_pass (line 31) | def execute_conv1d_w_pass(_input_, _grad_output_, _weight_main_grad_=Non...
  function execute_w_pass_grad_accum (line 36) | def execute_w_pass_grad_accum(_input_, _grad_output_, _weight_main_grad_):
  function execute_w_pass (line 46) | def execute_w_pass(_input_, _grad_output_, _weight_main_grad_=None, wgra...
  class SeqParallelUtils (line 50) | class SeqParallelUtils:
    method marked_as_sp_partial_derived_param (line 52) | def marked_as_sp_partial_derived_param(param):
    method is_sp_partial_derived_param (line 62) | def is_sp_partial_derived_param(param):
    method allreduce_partial_data_grad (line 75) | def allreduce_partial_data_grad(
  class Randomizer (line 129) | class Randomizer:
    method __init__ (line 151) | def __init__(self, seed: int):
    method _set_device_rng_state (line 169) | def _set_device_rng_state(self, rng_state):
    method _get_device_rng_state (line 172) | def _get_device_rng_state(self):
    method _set_cpu_rng_state (line 176) | def _set_cpu_rng_state(self, rng_state):
    method _get_cpu_rng_state (line 179) | def _get_cpu_rng_state(self):
    method fork_rng (line 184) | def fork_rng(self, enable_cpu: bool = False):
    method index (line 210) | def index():
    method increment_index (line 237) | def increment_index():
    method reset_index (line 244) | def reset_index():
    method is_randomizer_index_synchronized (line 251) | def is_randomizer_index_synchronized(process_group: ProcessGroup = None):
    method synchronize_index (line 272) | def synchronize_index(process_group: ProcessGroup = None):
  function create_randomizer_with_offset (line 295) | def create_randomizer_with_offset(
  function split_batch_zigzag (line 331) | def split_batch_zigzag(
  function split_varlen_zigzag (line 381) | def split_varlen_zigzag(
  function is_share_sp_tp (line 467) | def is_share_sp_tp(sp_mode: str):
  class RingComm (line 475) | class RingComm:
    method __init__ (line 476) | def __init__(self, process_group: dist.ProcessGroup):
    method send_recv (line 489) | def send_recv(
    method commit (line 510) | def commit(self):
    method wait (line 514) | def wait(self):
  function get_half_index (line 523) | def get_half_index(cu_seqlens, *, front: bool):

FILE: colossalai/shardformer/modeling/bert.py
  class BertPipelineForwards (line 35) | class BertPipelineForwards:
    method bert_model_forward (line 42) | def bert_model_forward(
    method bert_for_pretraining_forward (line 286) | def bert_for_pretraining_forward(
    method bert_lm_head_model_forward (line 363) | def bert_lm_head_model_forward(
    method bert_for_masked_lm_forward (line 472) | def bert_for_masked_lm_forward(
    method bert_for_next_sentence_prediction_forward (line 551) | def bert_for_next_sentence_prediction_forward(
    method bert_for_sequence_classification_forward (line 659) | def bert_for_sequence_classification_forward(
    method bert_for_token_classification_forward (line 753) | def bert_for_token_classification_forward(
    method bert_for_multiple_choice_forward (line 828) | def bert_for_multiple_choice_forward(
    method bert_for_question_answering_forward (line 919) | def bert_for_question_answering_forward(
  function get_jit_fused_bert_self_output_forward (line 1016) | def get_jit_fused_bert_self_output_forward():
  function get_jit_fused_bert_output_forward (line 1028) | def get_jit_fused_bert_output_forward():
  function get_bert_sequence_parallel_attention_forward (line 1042) | def get_bert_sequence_parallel_attention_forward(shard_config: ShardConf...
  function bert_sequence_parallel_forward_fn (line 1123) | def bert_sequence_parallel_forward_fn(shard_config: ShardConfig):
  function get_jit_fused_bert_intermediate_forward (line 1283) | def get_jit_fused_bert_intermediate_forward():

FILE: colossalai/shardformer/modeling/blip2.py
  function forward_fn (line 9) | def forward_fn():
  function get_blip2_flash_attention_forward (line 64) | def get_blip2_flash_attention_forward():
  function get_jit_fused_blip2_QFormer_self_output_forward (line 102) | def get_jit_fused_blip2_QFormer_self_output_forward():
  function get_jit_fused_blip2_QFormer_output_forward (line 118) | def get_jit_fused_blip2_QFormer_output_forward():
  function get_jit_fused_blip2_mlp_forward (line 134) | def get_jit_fused_blip2_mlp_forward():

FILE: colossalai/shardformer/modeling/bloom.py
  function build_bloom_alibi_tensor_fn (line 37) | def build_bloom_alibi_tensor_fn(process_group: ProcessGroup) -> torch.Te...
  class BloomPipelineForwards (line 103) | class BloomPipelineForwards:
    method bloom_model_forward (line 109) | def bloom_model_forward(
    method bloom_for_causal_lm_forward (line 320) | def bloom_for_causal_lm_forward(
    method bloom_for_sequence_classification_forward (line 412) | def bloom_for_sequence_classification_forward(
    method bloom_for_token_classification_forward (line 538) | def bloom_for_token_classification_forward(
    method bloom_for_question_answering_forward (line 632) | def bloom_for_question_answering_forward(
  function get_jit_fused_bloom_attention_forward (line 726) | def get_jit_fused_bloom_attention_forward():
  function get_jit_fused_bloom_mlp_forward (line 810) | def get_jit_fused_bloom_mlp_forward():
  function get_jit_fused_bloom_gelu_forward (line 832) | def get_jit_fused_bloom_gelu_forward():
  function get_bloom_sequence_parallel_attention_forward (line 848) | def get_bloom_sequence_parallel_attention_forward(shard_config: ShardCon...
  function get_bloom_sequence_parallel_forward_fn (line 933) | def get_bloom_sequence_parallel_forward_fn(shard_config: ShardConfig):
  function get_lm_forward_with_dist_cross_entropy (line 1095) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig):

FILE: colossalai/shardformer/modeling/chatglm2.py
  function get_flash_core_attention_forward (line 23) | def get_flash_core_attention_forward():
  function get_jit_fused_glm_block_forward (line 37) | def get_jit_fused_glm_block_forward():
  class ChatGLMPipelineForwards (line 87) | class ChatGLMPipelineForwards:
    method chatglm_model_forward (line 93) | def chatglm_model_forward(
    method chatglm_for_conditional_generation_forward (line 267) | def chatglm_for_conditional_generation_forward(
  function get_chatglm_sequence_parallel_forward_fn (line 339) | def get_chatglm_sequence_parallel_forward_fn(shard_config: ShardConfig, ...
  function get_chatglm_sequence_parallel_attention_forward (line 466) | def get_chatglm_sequence_parallel_attention_forward(shard_config: ShardC...
  function get_flash_attention_forward_for_chat_glm_model (line 642) | def get_flash_attention_forward_for_chat_glm_model():

FILE: colossalai/shardformer/modeling/chatglm2_6b/configuration_chatglm.py
  class ChatGLMConfig (line 4) | class ChatGLMConfig(PretrainedConfig):
    method __init__ (line 7) | def __init__(

FILE: colossalai/shardformer/modeling/chatglm2_6b/modeling_chatglm.py
  function default_init (line 78) | def default_init(cls, *args, **kwargs):
  class InvalidScoreLogitsProcessor (line 82) | class InvalidScoreLogitsProcessor(LogitsProcessor):
    method __call__ (line 83) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  class PrefixEncoder (line 90) | class PrefixEncoder(torch.nn.Module):
    method __init__ (line 97) | def __init__(self, config: ChatGLMConfig):
    method forward (line 115) | def forward(self, prefix: torch.Tensor):
  function split_tensor_along_last_dim (line 124) | def split_tensor_along_last_dim(
  class RotaryEmbedding (line 152) | class RotaryEmbedding(nn.Module):
    method __init__ (line 153) | def __init__(self, dim, original_impl=False, device=None, dtype=None):
    method forward_impl (line 160) | def forward_impl(
    method forward (line 190) | def forward(self, max_seq_len, offset=0):
  function apply_rotary_pos_emb (line 200) | def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> t...
  class RMSNorm (line 220) | class RMSNorm(torch.nn.Module):
    method __init__ (line 221) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None...
    method forward (line 228) | def forward(self, hidden_states: torch.Tensor):
  class CoreAttention (line 235) | class CoreAttention(torch.nn.Module):
    method __init__ (line 236) | def __init__(self, config: ChatGLMConfig, layer_number):
    method forward (line 261) | def forward(self, query_layer, key_layer, value_layer, attention_mask):
  class SelfAttention (line 374) | class SelfAttention(torch.nn.Module):
    method __init__ (line 381) | def __init__(self, config: ChatGLMConfig, layer_number, device=None):
    method _allocate_memory (line 413) | def _allocate_memory(self, inference_max_sequence_len, batch_size, dev...
    method forward (line 427) | def forward(
  function _config_to_kwargs (line 546) | def _config_to_kwargs(args):
  class MLP (line 553) | class MLP(torch.nn.Module):
    method __init__ (line 561) | def __init__(self, config: ChatGLMConfig, device=None):
    method forward (line 590) | def forward(self, hidden_states):
  class GLMBlock (line 599) | class GLMBlock(torch.nn.Module):
    method __init__ (line 606) | def __init__(self, config: ChatGLMConfig, layer_number, device=None):
    method forward (line 638) | def forward(
  class GLMTransformer (line 686) | class GLMTransformer(torch.nn.Module):
    method __init__ (line 689) | def __init__(self, config: ChatGLMConfig, device=None):
    method _get_layer (line 716) | def _get_layer(self, layer_number):
    method forward (line 719) | def forward(
  class ChatGLMPreTrainedModel (line 776) | class ChatGLMPreTrainedModel(PreTrainedModel):
    method _init_weights (line 788) | def _init_weights(self, module: nn.Module):
    method get_masks (line 792) | def get_masks(self, input_ids, past_key_values, padding_mask=None):
    method get_position_ids (line 815) | def get_position_ids(self, input_ids, device):
    method _set_gradient_checkpointing (line 820) | def _set_gradient_checkpointing(self, module, value=False):
  class Embedding (line 825) | class Embedding(torch.nn.Module):
    method __init__ (line 828) | def __init__(self, config: ChatGLMConfig, device=None):
    method forward (line 841) | def forward(self, input_ids):
  class ChatGLMModel (line 853) | class ChatGLMModel(ChatGLMPreTrainedModel):
    method __init__ (line 854) | def __init__(self, config: ChatGLMConfig, device=None, empty_init=True):
    method get_input_embeddings (line 898) | def get_input_embeddings(self):
    method get_prompt (line 901) | def get_prompt(self, batch_size, device, dtype=torch.half):
    method forward (line 916) | def forward(
    method quantize (line 996) | def quantize(self, weight_bit_width: int):
  class ChatGLMForConditionalGeneration (line 1003) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
    method __init__ (line 1004) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None):
    method _update_model_kwargs_for_generation (line 1015) | def _update_model_kwargs_for_generation(
    method prepare_inputs_for_generation (line 1045) | def prepare_inputs_for_generation(
    method forward (line 1068) | def forward(
    method _reorder_cache (line 1129) | def _reorder_cache(
    method process_response (line 1147) | def process_response(self, response):
    method build_inputs (line 1152) | def build_inputs(self, tokenizer, query: str, history: List[Tuple[str,...
    method build_stream_inputs (line 1158) | def build_stream_inputs(self, tokenizer, query: str, history: List[Tup...
    method chat (line 1171) | def chat(
    method stream_chat (line 1207) | def stream_chat(
    method stream_generate (line 1265) | def stream_generate(
    method quantize (line 1372) | def quantize(self, bits: int, empty_init=False, device=None, **kwargs):

FILE: colossalai/shardformer/modeling/command.py
  class CommandPipelineForwards (line 33) | class CommandPipelineForwards:
    method command_model_forward (line 40) | def command_model_forward(
    method command_for_causal_lm_forward (line 244) | def command_for_causal_lm_forward(
  function get_command_flash_attention_forward (line 349) | def get_command_flash_attention_forward(shard_config: ShardConfig, sp_mo...
  function get_command_flash_attention_model_forward (line 430) | def get_command_flash_attention_model_forward(shard_config: ShardConfig,...
  function get_lm_forward_with_dist_cross_entropy (line 573) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig):

FILE: colossalai/shardformer/modeling/deepseek.py
  class AddAuxiliaryLoss (line 42) | class AddAuxiliaryLoss(torch.autograd.Function):
    method forward (line 49) | def forward(ctx, x, loss):
    method backward (line 56) | def backward(ctx, grad_output):
  class EPDeepseekMoE (line 63) | class EPDeepseekMoE(ParallelModule):
    method __init__ (line 64) | def __init__(self):
    method setup_process_groups (line 67) | def setup_process_groups(
    method from_native_module (line 126) | def from_native_module(
    method forward (line 142) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class DeepseekMoEGate_Col (line 232) | class DeepseekMoEGate_Col(ParallelModule):
    method parallel_linear (line 233) | def parallel_linear(self, hidden_states):
    method forward (line 250) | def forward(self, hidden_states):
    method from_native_module (line 293) | def from_native_module(
  class DeepseekPipelineForwards (line 305) | class DeepseekPipelineForwards:
    method deepseek_model_forward (line 312) | def deepseek_model_forward(
    method deepseek_for_causal_lm_forward (line 502) | def deepseek_for_causal_lm_forward(
  function get_deepseek_flash_attention_forward (line 614) | def get_deepseek_flash_attention_forward(shard_config, sp_mode=None, sp_...
  function get_deepseek_flash_attention_model_forward (line 733) | def get_deepseek_flash_attention_model_forward(shard_config, sp_mode=Non...

FILE: colossalai/shardformer/modeling/deepseek_v3.py
  class EpDeepseekV3MoE (line 26) | class EpDeepseekV3MoE(ParallelModule):
    method __init__ (line 31) | def __init__(self, config):
    method setup_process_groups (line 34) | def setup_process_groups(
    method from_native_module (line 63) | def from_native_module(
    method forward (line 76) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
    method moe_forward (line 86) | def moe_forward(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_we...
  function deepseek_v3_model_forward (line 161) | def deepseek_v3_model_forward(
  function deepseek_v3_for_causal_lm_forward (line 297) | def deepseek_v3_for_causal_lm_forward(

FILE: colossalai/shardformer/modeling/falcon.py
  function build_falcon_alibi_tensor_fn (line 32) | def build_falcon_alibi_tensor_fn(process_group: ProcessGroup) -> torch.T...
  function get_tp_falcon_decoder_layer_forward (line 98) | def get_tp_falcon_decoder_layer_forward():
  class FalconPipelineForwards (line 182) | class FalconPipelineForwards:
    method falcon_model_forward (line 188) | def falcon_model_forward(
    method falcon_for_causal_lm_forward (line 354) | def falcon_for_causal_lm_forward(
    method falcon_for_sequence_classification_forward (line 453) | def falcon_for_sequence_classification_forward(
    method falcon_for_token_classification_forward (line 567) | def falcon_for_token_classification_forward(
    method falcon_for_question_answering_forward (line 649) | def falcon_for_question_answering_forward(
  function get_lm_forward_with_dist_cross_entropy (line 742) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig):

FILE: colossalai/shardformer/modeling/gpt2.py
  function _get_attention_mask (line 34) | def _get_attention_mask(
  class GPT2PipelineForwards (line 109) | class GPT2PipelineForwards:
    method gpt2_model_forward (line 116) | def gpt2_model_forward(
    method gpt2_lmhead_model_forward (line 344) | def gpt2_lmhead_model_forward(
    method gpt2_double_heads_model_forward (line 433) | def gpt2_double_heads_model_forward(
    method gpt2_for_question_answering_forward (line 527) | def gpt2_for_question_answering_forward(
    method gpt2_for_token_classification_forward (line 618) | def gpt2_for_token_classification_forward(
    method gpt2_for_sequence_classification_forward (line 693) | def gpt2_for_sequence_classification_forward(
  function get_gpt2_flash_attention_forward (line 811) | def get_gpt2_flash_attention_forward(shard_config: Optional[ShardConfig]...
  function get_jit_fused_gpt2_mlp_forward (line 889) | def get_jit_fused_gpt2_mlp_forward():

FILE: colossalai/shardformer/modeling/gptj.py
  function _get_attention_mask (line 30) | def _get_attention_mask(
  class GPTJPipelineForwards (line 73) | class GPTJPipelineForwards:
    method gptj_model_forward (line 80) | def gptj_model_forward(
    method gptj_causallm_model_forward (line 262) | def gptj_causallm_model_forward(
    method gptj_for_sequence_classification_forward (line 356) | def gptj_for_sequence_classification_forward(
    method gptj_for_question_answering_forward (line 475) | def gptj_for_question_answering_forward(
  function get_gptj_flash_attention_forward (line 567) | def get_gptj_flash_attention_forward():
  function gptj_model_forward_for_flash_attention (line 649) | def gptj_model_forward_for_flash_attention(shard_config: ShardConfig):
  function gptj_sequence_parallel_forward_fn (line 825) | def gptj_sequence_parallel_forward_fn(shard_config: ShardConfig):

FILE: colossalai/shardformer/modeling/jit.py
  function get_dropout_add_func (line 4) | def get_dropout_add_func():
  function get_jit_fused_dropout_add_func (line 13) | def get_jit_fused_dropout_add_func():
  function get_jit_fused_gelu_forward_func (line 25) | def get_jit_fused_gelu_forward_func():

FILE: colossalai/shardformer/modeling/llama.py
  class LlamaPipelineForwards (line 36) | class LlamaPipelineForwards:
    method llama_model_forward (line 43) | def llama_model_forward(
    method llama_for_causal_lm_forward (line 260) | def llama_for_causal_lm_forward(
    method llama_for_sequence_classification_forward (line 373) | def llama_for_sequence_classification_forward(
  function get_llama_flash_attention_forward (line 489) | def get_llama_flash_attention_forward(shard_config: ShardConfig, sp_mode...

FILE: colossalai/shardformer/modeling/mistral.py
  class MistralForwards (line 23) | class MistralForwards:
    method mistral_model_forward (line 25) | def mistral_model_forward(
    method mistral_for_causal_lm_forward (line 187) | def mistral_for_causal_lm_forward(
    method mistral_for_sequence_classification_forward (line 276) | def mistral_for_sequence_classification_forward(
  function get_mistral_model_forward_for_flash_attn (line 375) | def get_mistral_model_forward_for_flash_attn(shard_config: ShardConfig):
  function get_mistral_flash_attention_forward (line 497) | def get_mistral_flash_attention_forward(shard_config: ShardConfig):
  function get_lm_forward_with_dist_cross_entropy (line 557) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig):

FILE: colossalai/shardformer/modeling/mixtral.py
  class EPMixtralSparseMoeBlock (line 54) | class EPMixtralSparseMoeBlock(ParallelModule):
    method __init__ (line 55) | def __init__(self, *args, **kwargs):
    method setup_process_groups (line 58) | def setup_process_groups(
    method from_native_module (line 108) | def from_native_module(
    method forward (line 123) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class MixtralPipelineForwards (line 211) | class MixtralPipelineForwards:
    method mixtral_model_forward (line 218) | def mixtral_model_forward(
    method mixtral_for_causal_lm_forward (line 443) | def mixtral_for_causal_lm_forward(
  function get_mixtral_flash_attention_forward (line 573) | def get_mixtral_flash_attention_forward(shard_config, sp_mode=None, sp_s...
  function get_mixtral_flash_attention_model_forward (line 719) | def get_mixtral_flash_attention_model_forward(shard_config, sp_mode=None...

FILE: colossalai/shardformer/modeling/opt.py
  function _get_attention_mask (line 30) | def _get_attention_mask(
  class OPTPipelineForwards (line 57) | class OPTPipelineForwards:
    method opt_model_forward (line 64) | def opt_model_forward(
    method opt_for_causal_lm_forward (line 286) | def opt_for_causal_lm_forward(
    method opt_for_sequence_classification_forward (line 359) | def opt_for_sequence_classification_forward(
    method opt_for_question_answering_forward (line 461) | def opt_for_question_answering_forward(
  function get_opt_flash_attention_forward (line 542) | def get_opt_flash_attention_forward(shard_config: ShardConfig):
  function get_opt_decoder_forward_for_flash_attention (line 612) | def get_opt_decoder_forward_for_flash_attention(shard_config: ShardConfig):
  function get_jit_fused_opt_decoder_layer_forward (line 764) | def get_jit_fused_opt_decoder_layer_forward():
  function get_lm_forward_with_dist_cross_entropy (line 846) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig):

FILE: colossalai/shardformer/modeling/qwen2.py
  class Qwen2PipelineForwards (line 35) | class Qwen2PipelineForwards:
    method qwen2_model_forward (line 42) | def qwen2_model_forward(
    method qwen2_for_causal_lm_forward (line 262) | def qwen2_for_causal_lm_forward(
    method qwen2_for_sequence_classification_forward (line 366) | def qwen2_for_sequence_classification_forward(
  function get_qwen2_flash_attention_forward (line 481) | def get_qwen2_flash_attention_forward(shard_config: ShardConfig, sp_mode...
  function get_qwen2_model_forward_for_flash_attn (line 606) | def get_qwen2_model_forward_for_flash_attn(shard_config: ShardConfig, sp...
  function get_lm_forward_with_dist_cross_entropy (line 761) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig):

FILE: colossalai/shardformer/modeling/qwen3.py
  class Qwen3PipelineForwards (line 35) | class Qwen3PipelineForwards:
    method qwen3_model_forward (line 42) | def qwen3_model_forward(
    method qwen3_for_causal_lm_forward (line 260) | def qwen3_for_causal_lm_forward(
    method qwen3_for_sequence_classification_forward (line 364) | def qwen3_for_sequence_classification_forward(
  function get_qwen3_flash_attention_forward (line 467) | def get_qwen3_flash_attention_forward(shard_config: ShardConfig, sp_mode...
  function get_qwen3_model_forward_for_flash_attn (line 598) | def get_qwen3_model_forward_for_flash_attn(shard_config: ShardConfig, sp...
  function get_lm_forward_with_dist_cross_entropy (line 753) | def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig):

FILE: colossalai/shardformer/modeling/sam.py
  function forward_fn (line 6) | def forward_fn():

FILE: colossalai/shardformer/modeling/t5.py
  class T5PipelineForwards (line 25) | class T5PipelineForwards:
    method t5_stack_forward (line 32) | def t5_stack_forward(
    method t5_model_forward (line 264) | def t5_model_forward(
    method t5_for_conditional_generation_forward (line 403) | def t5_for_conditional_generation_forward(
    method t5_encoder_model_forward (line 566) | def t5_encoder_model_forward(
    method t5_for_token_classification_forward (line 609) | def t5_for_token_classification_forward(
  function get_t5_flash_attention_forward (line 674) | def get_t5_flash_attention_forward():
  function get_jit_fused_T5_layer_ff_forward (line 801) | def get_jit_fused_T5_layer_ff_forward():
  function get_T5_layer_self_attention_forward (line 813) | def get_T5_layer_self_attention_forward():
  function get_T5_layer_cross_attention_forward (line 845) | def get_T5_layer_cross_attention_forward():

FILE: colossalai/shardformer/modeling/vit.py
  function _encoder_forward (line 11) | def _encoder_forward(
  function ViTModel_pipeline_forward (line 50) | def ViTModel_pipeline_forward(stage_manager: PipelineStageManager, stage...
  function ViTForImageClassification_pipeline_forward (line 139) | def ViTForImageClassification_pipeline_forward(stage_manager: PipelineSt...
  function ViTForMaskedImageModeling_pipeline_forward (line 225) | def ViTForMaskedImageModeling_pipeline_forward(stage_manager: PipelineSt...
  function get_vit_flash_self_attention_forward (line 336) | def get_vit_flash_self_attention_forward():
  function get_jit_fused_vit_output_forward (line 366) | def get_jit_fused_vit_output_forward():
  function get_jit_fused_vit_intermediate_forward (line 377) | def get_jit_fused_vit_intermediate_forward():

FILE: colossalai/shardformer/modeling/whisper.py
  function _get_attention_mask (line 37) | def _get_attention_mask(
  function get_whisper_flash_attention_forward (line 74) | def get_whisper_flash_attention_forward():
  function get_whisper_decoder_forward_for_flash_attention (line 161) | def get_whisper_decoder_forward_for_flash_attention(shard_config: ShardC...
  function get_jit_fused_whisper_encoder_layer_forward (line 319) | def get_jit_fused_whisper_encoder_layer_forward():
  function get_jit_fused_whisper_decoder_layer_forward (line 373) | def get_jit_fused_whisper_decoder_layer_forward():
  class WhisperPipelineForwards (line 465) | class WhisperPipelineForwards:
    method whisper_encoder_forward (line 472) | def whisper_encoder_forward(
    method whisper_decoder_forward (line 601) | def whisper_decoder_forward(
    method whisper_model_forward (line 845) | def whisper_model_forward(
    method whisper_for_conditional_generation_forward (line 992) | def whisper_for_conditional_generation_forward(
    method whisper_for_audio_classification_forward (line 1113) | def whisper_for_audio_classification_forward(

FILE: colossalai/shardformer/policies/auto_policy.py
  class PolicyLocation (line 12) | class PolicyLocation:
  function import_policy (line 243) | def import_policy(policy_location: PolicyLocation) -> Policy:
  function _fullname (line 252) | def _fullname(obj):
  function get_autopolicy (line 273) | def get_autopolicy(model: nn.Module) -> Policy:

FILE: colossalai/shardformer/policies/base_policy.py
  class SubModuleReplacementDescription (line 21) | class SubModuleReplacementDescription:
  class ModulePolicyDescription (line 39) | class ModulePolicyDescription:
  class Policy (line 65) | class Policy(ABC):
    method __init__ (line 75) | def __init__(self) -> None:
    method set_model (line 80) | def set_model(self, model: nn.Module) -> None:
    method set_shard_config (line 88) | def set_shard_config(self, shard_config: ShardConfig) -> None:
    method pipeline_stage_manager (line 99) | def pipeline_stage_manager(self) -> Optional[PipelineStageManager]:
    method config_sanity_check (line 105) | def config_sanity_check(self):
    method preprocess (line 113) | def preprocess(self) -> nn.Module:
    method module_policy (line 119) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...
    method postprocess (line 127) | def postprocess(self) -> nn.Module:
    method append_or_create_submodule_replacement (line 133) | def append_or_create_submodule_replacement(
    method append_or_create_method_replacement (line 162) | def append_or_create_method_replacement(
    method get_held_layers (line 186) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 194) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
    method tie_weight_check (line 202) | def tie_weight_check(self):

FILE: colossalai/shardformer/policies/bert.py
  class BertPolicy (line 36) | class BertPolicy(Policy):
    method config_sanity_check (line 37) | def config_sanity_check(self):
    method preprocess (line 40) | def preprocess(self):
    method module_policy (line 45) | def module_policy(self):
    method add_lm_head_policy (line 362) | def add_lm_head_policy(self, base_policy):
    method add_lm_prediction_policy (line 404) | def add_lm_prediction_policy(self, base_policy):
    method postprocess (line 418) | def postprocess(self):
    method set_pipeline_forward (line 421) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
    method get_held_layers (line 460) | def get_held_layers(self) -> List[Module]:
  class BertModelPolicy (line 495) | class BertModelPolicy(BertPolicy):
    method module_policy (line 496) | def module_policy(self):
    method get_held_layers (line 508) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 513) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class BertForPreTrainingPolicy (line 519) | class BertForPreTrainingPolicy(BertPolicy):
    method module_policy (line 520) | def module_policy(self):
    method get_held_layers (line 534) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 543) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class BertLMHeadModelPolicy (line 558) | class BertLMHeadModelPolicy(BertPolicy):
    method module_policy (line 559) | def module_policy(self):
    method get_held_layers (line 573) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 583) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class BertForMaskedLMPolicy (line 598) | class BertForMaskedLMPolicy(BertPolicy):
    method module_policy (line 599) | def module_policy(self):
    method get_held_layers (line 613) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 623) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class BertForSequenceClassificationPolicy (line 638) | class BertForSequenceClassificationPolicy(BertPolicy):
    method module_policy (line 639) | def module_policy(self):
    method get_held_layers (line 665) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 676) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class BertForTokenClassificationPolicy (line 682) | class BertForTokenClassificationPolicy(BertPolicy):
    method module_policy (line 683) | def module_policy(self):
    method get_held_layers (line 709) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 720) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class BertForNextSentencePredictionPolicy (line 726) | class BertForNextSentencePredictionPolicy(BertPolicy):
    method module_policy (line 727) | def module_policy(self):
    method get_held_layers (line 740) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 750) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class BertForMultipleChoicePolicy (line 756) | class BertForMultipleChoicePolicy(BertPolicy):
    method module_policy (line 757) | def module_policy(self):
    method get_held_layers (line 783) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 794) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class BertForQuestionAnsweringPolicy (line 799) | class BertForQuestionAnsweringPolicy(BertPolicy):
    method module_policy (line 800) | def module_policy(self):
    method get_held_layers (line 813) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 823) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/blip2.py
  class BlipPolicy (line 16) | class BlipPolicy(Policy):
    method config_sanity_check (line 17) | def config_sanity_check(self):
    method preprocess (line 20) | def preprocess(self):
    method module_policy (line 27) | def module_policy(self):
    method postprocess (line 689) | def postprocess(self):
  class Blip2ModelPolicy (line 694) | class Blip2ModelPolicy(BlipPolicy):
    method __init__ (line 695) | def __init__(self) -> None:
  class Blip2ForConditionalGenerationPolicy (line 700) | class Blip2ForConditionalGenerationPolicy(BlipPolicy):
    method __init__ (line 701) | def __init__(self) -> None:

FILE: colossalai/shardformer/policies/bloom.py
  class BloomPolicy (line 25) | class BloomPolicy(Policy):
    method __init__ (line 26) | def __init__(self) -> None:
    method config_sanity_check (line 29) | def config_sanity_check(self):
    method preprocess (line 32) | def preprocess(self):
    method module_policy (line 36) | def module_policy(self):
    method postprocess (line 276) | def postprocess(self):
    method set_pipeline_forward (line 279) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
    method get_held_layers (line 301) | def get_held_layers(self) -> List[Module]:
  class BloomModelPolicy (line 337) | class BloomModelPolicy(BloomPolicy):
    method module_policy (line 338) | def module_policy(self):
    method get_held_layers (line 348) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 355) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class BloomForCausalLMPolicy (line 360) | class BloomForCausalLMPolicy(BloomPolicy):
    method module_policy (line 361) | def module_policy(self):
    method get_held_layers (line 402) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 416) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class BloomForSequenceClassificationPolicy (line 430) | class BloomForSequenceClassificationPolicy(BloomPolicy):
    method module_policy (line 431) | def module_policy(self):
    method get_held_layers (line 468) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 482) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class BloomForTokenClassificationPolicy (line 487) | class BloomForTokenClassificationPolicy(BloomPolicy):
    method module_policy (line 488) | def module_policy(self):
    method get_held_layers (line 538) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 554) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class BloomForQuestionAnsweringPolicy (line 559) | class BloomForQuestionAnsweringPolicy(BloomPolicy):
    method module_policy (line 561) | def module_policy(self):
    method get_held_layers (line 573) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 587) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/chatglm2.py
  class ChatGLMPolicy (line 28) | class ChatGLMPolicy(Policy):
    method config_sanity_check (line 29) | def config_sanity_check(self):
    method preprocess (line 32) | def preprocess(self):
    method module_policy (line 41) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...
    method postprocess (line 281) | def postprocess(self):
    method get_held_layers (line 284) | def get_held_layers(self) -> List[nn.Module]:
    method set_pipeline_forward (line 322) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
  class ChatGLMModelPolicy (line 346) | class ChatGLMModelPolicy(ChatGLMPolicy):
    method module_policy (line 347) | def module_policy(self):
    method get_held_layers (line 360) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 363) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class ChatGLMForConditionalGenerationPolicy (line 368) | class ChatGLMForConditionalGenerationPolicy(ChatGLMModelPolicy):
    method module_policy (line 369) | def module_policy(self):
    method get_held_layers (line 380) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 393) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/command.py
  class CommandPolicy (line 29) | class CommandPolicy(Policy):
    method config_sanity_check (line 30) | def config_sanity_check(self):
    method preprocess (line 33) | def preprocess(self):
    method module_policy (line 38) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...
    method postprocess (line 275) | def postprocess(self):
    method set_pipeline_forward (line 278) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
    method get_held_layers (line 308) | def get_held_layers(self) -> List[Module]:
  class CommandModelPolicy (line 345) | class CommandModelPolicy(CommandPolicy):
    method module_policy (line 346) | def module_policy(self):
    method get_held_layers (line 357) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 362) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class CommandForCausalLMPolicy (line 367) | class CommandForCausalLMPolicy(CommandPolicy):
    method module_policy (line 368) | def module_policy(self):
    method get_held_layers (line 419) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 433) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/deepseek.py
  class DeepseekPolicy (line 24) | class DeepseekPolicy(Policy):
    method config_sanity_check (line 25) | def config_sanity_check(self):
    method preprocess (line 28) | def preprocess(self):
    method module_policy (line 47) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...
    method postprocess (line 294) | def postprocess(self):
    method set_pipeline_forward (line 297) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
    method get_held_layers (line 320) | def get_held_layers(self) -> List[Module]:
  class DeepseekModelPolicy (line 355) | class DeepseekModelPolicy(DeepseekPolicy):
    method __init__ (line 356) | def __init__(self) -> None:
    method module_policy (line 359) | def module_policy(self):
    method get_held_layers (line 370) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 375) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class DeepseekForCausalLMPolicy (line 380) | class DeepseekForCausalLMPolicy(DeepseekPolicy):
    method module_policy (line 381) | def module_policy(self):
    method get_held_layers (line 432) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 446) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/deepseek_v3.py
  class DeepseekV3Policy (line 17) | class DeepseekV3Policy(Policy):
    method config_sanity_check (line 18) | def config_sanity_check(self):
    method preprocess (line 24) | def preprocess(self):
    method module_policy (line 27) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...
    method postprocess (line 83) | def postprocess(self):
    method set_pipeline_forward (line 86) | def set_pipeline_forward(self, model_cls: str, new_forward: Callable, ...
    method get_held_layers (line 102) | def get_held_layers(self) -> List[nn.Module]:
  class DeepseekV3ModelPolicy (line 142) | class DeepseekV3ModelPolicy(DeepseekV3Policy):
    method module_policy (line 143) | def module_policy(self):
  class DeepseekV3ForCausalLMPolicy (line 150) | class DeepseekV3ForCausalLMPolicy(DeepseekV3Policy):
    method module_policy (line 151) | def module_policy(self):
    method get_held_layers (line 157) | def get_held_layers(self):

FILE: colossalai/shardformer/policies/falcon.py
  class FalconPolicy (line 21) | class FalconPolicy(Policy):
    method __init__ (line 22) | def __init__(self) -> None:
    method config_sanity_check (line 25) | def config_sanity_check(self):
    method preprocess (line 28) | def preprocess(self):
    method module_policy (line 32) | def module_policy(self):
    method postprocess (line 216) | def postprocess(self):
    method set_pipeline_forward (line 219) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
    method get_held_layers (line 240) | def get_held_layers(self) -> List[Module]:
  class FalconModelPolicy (line 274) | class FalconModelPolicy(FalconPolicy):
    method __init__ (line 275) | def __init__(self) -> None:
    method module_policy (line 278) | def module_policy(self):
    method get_held_layers (line 289) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 296) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class FalconForCausalLMPolicy (line 301) | class FalconForCausalLMPolicy(FalconPolicy):
    method __init__ (line 302) | def __init__(self) -> None:
    method module_policy (line 305) | def module_policy(self):
    method get_held_layers (line 349) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 363) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class FalconForSequenceClassificationPolicy (line 377) | class FalconForSequenceClassificationPolicy(FalconPolicy):
    method __init__ (line 378) | def __init__(self) -> None:
    method module_policy (line 381) | def module_policy(self):
    method get_held_layers (line 416) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 430) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class FalconForTokenClassificationPolicy (line 435) | class FalconForTokenClassificationPolicy(FalconPolicy):
    method __init__ (line 436) | def __init__(self) -> None:
    method module_policy (line 439) | def module_policy(self):
    method get_held_layers (line 487) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 503) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class FalconForQuestionAnsweringPolicy (line 508) | class FalconForQuestionAnsweringPolicy(FalconPolicy):
    method __init__ (line 509) | def __init__(self) -> None:
    method module_policy (line 512) | def module_policy(self):
    method get_held_layers (line 548) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 562) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/gpt2.py
  class GPT2Policy (line 22) | class GPT2Policy(Policy):
    method config_sanity_check (line 23) | def config_sanity_check(self):
    method preprocess (line 26) | def preprocess(self):
    method module_policy (line 38) | def module_policy(self):
    method postprocess (line 287) | def postprocess(self):
    method get_held_layers (line 290) | def get_held_layers(self) -> List[nn.Module]:
    method set_pipeline_forward (line 325) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
  class GPT2ModelPolicy (line 361) | class GPT2ModelPolicy(GPT2Policy):
    method module_policy (line 362) | def module_policy(self):
    method get_held_layers (line 375) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 378) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class GPT2LMHeadModelPolicy (line 384) | class GPT2LMHeadModelPolicy(GPT2Policy):
    method module_policy (line 385) | def module_policy(self):
    method get_held_layers (line 431) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 446) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class GPT2DoubleHeadsModelPolicy (line 463) | class GPT2DoubleHeadsModelPolicy(GPT2Policy):
    method module_policy (line 464) | def module_policy(self):
    method get_held_layers (line 508) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 531) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class GPT2ForQuestionAnsweringPolicy (line 548) | class GPT2ForQuestionAnsweringPolicy(GPT2Policy):
    method module_policy (line 549) | def module_policy(self):
    method get_held_layers (line 563) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 578) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class GPT2ForTokenClassificationPolicy (line 584) | class GPT2ForTokenClassificationPolicy(GPT2Policy):
    method module_policy (line 585) | def module_policy(self):
    method get_held_layers (line 611) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 629) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class GPT2ForSequenceClassificationPolicy (line 635) | class GPT2ForSequenceClassificationPolicy(GPT2Policy):
    method module_policy (line 636) | def module_policy(self):
    method get_held_layers (line 649) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 665) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/gptj.py
  class GPTJPolicy (line 27) | class GPTJPolicy(Policy):
    method config_sanity_check (line 28) | def config_sanity_check(self):
    method preprocess (line 31) | def preprocess(self):
    method module_policy (line 36) | def module_policy(self):
    method postprocess (line 260) | def postprocess(self):
    method get_held_layers (line 263) | def get_held_layers(self) -> List[nn.Module]:
    method set_pipeline_forward (line 296) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
  class GPTJModelPolicy (line 321) | class GPTJModelPolicy(GPTJPolicy):
    method __init__ (line 322) | def __init__(self) -> None:
    method module_policy (line 325) | def module_policy(self):
    method get_held_layers (line 338) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 341) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class GPTJForCausalLMPolicy (line 347) | class GPTJForCausalLMPolicy(GPTJPolicy):
    method __init__ (line 348) | def __init__(self) -> None:
    method module_policy (line 351) | def module_policy(self):
    method get_held_layers (line 394) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 407) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class GPTJForSequenceClassificationPolicy (line 424) | class GPTJForSequenceClassificationPolicy(GPTJPolicy):
    method __init__ (line 425) | def __init__(self) -> None:
    method module_policy (line 428) | def module_policy(self):
    method get_held_layers (line 441) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 454) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class GPTJForQuestionAnsweringPolicy (line 460) | class GPTJForQuestionAnsweringPolicy(GPTJPolicy):
    method __init__ (line 461) | def __init__(self) -> None:
    method module_policy (line 464) | def module_policy(self):
    method get_held_layers (line 477) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 490) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/llama.py
  class LlamaPolicy (line 26) | class LlamaPolicy(Policy):
    method config_sanity_check (line 27) | def config_sanity_check(self):
    method preprocess (line 30) | def preprocess(self):
    method module_policy (line 35) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...
    method postprocess (line 300) | def postprocess(self):
    method set_pipeline_forward (line 303) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
    method get_held_layers (line 333) | def get_held_layers(self) -> List[Module]:
  class LlamaModelPolicy (line 369) | class LlamaModelPolicy(LlamaPolicy):
    method module_policy (line 370) | def module_policy(self):
    method get_held_layers (line 381) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 386) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class LlamaForCausalLMPolicy (line 391) | class LlamaForCausalLMPolicy(LlamaPolicy):
    method module_policy (line 392) | def module_policy(self):
    method get_held_layers (line 442) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 452) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class LlamaForSequenceClassificationPolicy (line 471) | class LlamaForSequenceClassificationPolicy(LlamaPolicy):
    method module_policy (line 472) | def module_policy(self):
    method get_held_layers (line 526) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 536) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/mistral.py
  class MistralPolicy (line 31) | class MistralPolicy(Policy):
    method config_sanity_check (line 32) | def config_sanity_check(self):
    method preprocess (line 35) | def preprocess(self):
    method module_policy (line 40) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...
    method postprocess (line 261) | def postprocess(self):
    method set_pipeline_forward (line 264) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
    method get_held_layers (line 294) | def get_held_layers(self) -> List[Module]:
  class MistralModelPolicy (line 329) | class MistralModelPolicy(MistralPolicy):
    method __init__ (line 330) | def __init__(self) -> None:
    method module_policy (line 333) | def module_policy(self):
    method get_held_layers (line 344) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 349) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class MistralForCausalLMPolicy (line 354) | class MistralForCausalLMPolicy(MistralPolicy):
    method module_policy (line 355) | def module_policy(self):
    method get_held_layers (line 406) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 420) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class MistralForSequenceClassificationPolicy (line 437) | class MistralForSequenceClassificationPolicy(MistralPolicy):
    method module_policy (line 438) | def module_policy(self):
    method get_held_layers (line 468) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 482) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/mixtral.py
  class MixtralPolicy (line 33) | class MixtralPolicy(Policy):
    method config_sanity_check (line 34) | def config_sanity_check(self):
    method preprocess (line 37) | def preprocess(self):
    method module_policy (line 42) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...
    method postprocess (line 284) | def postprocess(self):
    method set_pipeline_forward (line 287) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
    method get_held_layers (line 310) | def get_held_layers(self) -> List[Module]:
  class MixtralModelPolicy (line 348) | class MixtralModelPolicy(MixtralPolicy):
    method __init__ (line 349) | def __init__(self) -> None:
    method module_policy (line 352) | def module_policy(self):
    method get_held_layers (line 363) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 368) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class MixtralForCausalLMPolicy (line 373) | class MixtralForCausalLMPolicy(MixtralPolicy):
    method module_policy (line 374) | def module_policy(self):
    method get_held_layers (line 423) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 433) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class MixtralForSequenceClassificationPolicy (line 450) | class MixtralForSequenceClassificationPolicy(MixtralPolicy):
    method module_policy (line 451) | def module_policy(self):
    method get_held_layers (line 481) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 489) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/opt.py
  class OPTPolicy (line 40) | class OPTPolicy(Policy):
    method __init__ (line 41) | def __init__(self) -> None:
    method config_sanity_check (line 44) | def config_sanity_check(self):
    method preprocess (line 47) | def preprocess(self):
    method module_policy (line 52) | def module_policy(self):
    method postprocess (line 277) | def postprocess(self):
    method get_held_layers (line 280) | def get_held_layers(self) -> List[nn.Module]:
    method set_pipeline_forward (line 318) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
  class OPTModelPolicy (line 343) | class OPTModelPolicy(OPTPolicy):
    method module_policy (line 344) | def module_policy(self):
    method get_held_layers (line 356) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 359) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class OPTForCausalLMPolicy (line 364) | class OPTForCausalLMPolicy(OPTPolicy):
    method module_policy (line 365) | def module_policy(self):
    method get_held_layers (line 407) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 420) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
    method postprocess (line 433) | def postprocess(self):
  class OPTForSequenceClassificationPolicy (line 447) | class OPTForSequenceClassificationPolicy(OPTPolicy):
    method module_policy (line 448) | def module_policy(self):
    method get_held_layers (line 461) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 467) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class OPTForQuestionAnsweringPolicy (line 472) | class OPTForQuestionAnsweringPolicy(OPTPolicy):
    method module_policy (line 473) | def module_policy(self):
    method get_held_layers (line 486) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 499) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/qwen2.py
  class Qwen2Policy (line 37) | class Qwen2Policy(Policy):
    method __init__ (line 38) | def __init__(self) -> None:
    method config_sanity_check (line 47) | def config_sanity_check(self):
    method preprocess (line 50) | def preprocess(self):
    method module_policy (line 55) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...
    method postprocess (line 307) | def postprocess(self):
    method set_pipeline_forward (line 310) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
    method get_held_layers (line 343) | def get_held_layers(self) -> List[Module]:
  class Qwen2ModelPolicy (line 381) | class Qwen2ModelPolicy(Qwen2Policy):
    method module_policy (line 382) | def module_policy(self):
    method get_held_layers (line 392) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 397) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class Qwen2ForCausalLMPolicy (line 402) | class Qwen2ForCausalLMPolicy(Qwen2Policy):
    method module_policy (line 403) | def module_policy(self):
    method get_held_layers (line 460) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 474) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class Qwen2ForSequenceClassificationPolicy (line 491) | class Qwen2ForSequenceClassificationPolicy(Qwen2Policy):
    method module_policy (line 492) | def module_policy(self):
    method get_held_layers (line 540) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 554) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/qwen3.py
  class Qwen3Policy (line 37) | class Qwen3Policy(Policy):
    method __init__ (line 38) | def __init__(self) -> None:
    method config_sanity_check (line 47) | def config_sanity_check(self):
    method preprocess (line 50) | def preprocess(self):
    method module_policy (line 55) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...
    method postprocess (line 307) | def postprocess(self):
    method set_pipeline_forward (line 310) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
    method get_held_layers (line 343) | def get_held_layers(self) -> List[Module]:
  class Qwen3ModelPolicy (line 381) | class Qwen3ModelPolicy(Qwen3Policy):
    method module_policy (line 382) | def module_policy(self):
    method get_held_layers (line 391) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 396) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class Qwen3ForCausalLMPolicy (line 401) | class Qwen3ForCausalLMPolicy(Qwen3Policy):
    method module_policy (line 402) | def module_policy(self):
    method get_held_layers (line 446) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 460) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class Qwen3ForSequenceClassificationPolicy (line 477) | class Qwen3ForSequenceClassificationPolicy(Qwen3Policy):
    method module_policy (line 478) | def module_policy(self):
    method get_held_layers (line 525) | def get_held_layers(self) -> List[Module]:
    method get_shared_params (line 539) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/sam.py
  class SamPolicy (line 9) | class SamPolicy(Policy):
    method config_sanity_check (line 10) | def config_sanity_check(self):
    method preprocess (line 13) | def preprocess(self):
    method module_policy (line 16) | def module_policy(self):
    method postprocess (line 504) | def postprocess(self):
  class SamModelPolicy (line 509) | class SamModelPolicy(SamPolicy):
    method __init__ (line 510) | def __init__(self) -> None:

FILE: colossalai/shardformer/policies/t5.py
  class T5BasePolicy (line 44) | class T5BasePolicy(Policy):
    method config_sanity_check (line 45) | def config_sanity_check(self):
    method preprocess (line 48) | def preprocess(self):
    method module_policy (line 52) | def module_policy(self):
    method postprocess (line 445) | def postprocess(self):
    method distribute_t5_layers (line 448) | def distribute_t5_layers(
    method get_t5_stage_index (line 484) | def get_t5_stage_index(
    method get_held_layers (line 501) | def get_held_layers(self) -> List[nn.Module]:
    method set_pipeline_forward (line 575) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
  class T5ModelPolicy (line 608) | class T5ModelPolicy(T5BasePolicy):
    method module_policy (line 609) | def module_policy(self):
    method get_held_layers (line 643) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 646) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class T5ForConditionalGenerationPolicy (line 659) | class T5ForConditionalGenerationPolicy(T5BasePolicy):
    method module_policy (line 660) | def module_policy(self):
    method get_held_layers (line 723) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 736) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class T5EncoderPolicy (line 762) | class T5EncoderPolicy(T5BasePolicy):
    method module_policy (line 763) | def module_policy(self):
    method get_held_layers (line 799) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 802) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class T5ForTokenClassificationPolicy (line 806) | class T5ForTokenClassificationPolicy(T5EncoderPolicy):
    method module_policy (line 807) | def module_policy(self):
    method get_held_layers (line 833) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 851) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/policies/vit.py
  class ViTPolicy (line 23) | class ViTPolicy(Policy):
    method config_sanity_check (line 24) | def config_sanity_check(self):
    method preprocess (line 27) | def preprocess(self):
    method module_policy (line 31) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...
    method new_model_class (line 248) | def new_model_class(self):
    method postprocess (line 251) | def postprocess(self):
    method get_held_layers (line 254) | def get_held_layers(self) -> List[nn.Module]:
    method set_pipeline_forward (line 281) | def set_pipeline_forward(self, model_cls: nn.Module, pipeline_forward:...
  class ViTModelPolicy (line 298) | class ViTModelPolicy(ViTPolicy):
    method module_policy (line 299) | def module_policy(self):
    method get_held_layers (line 308) | def get_held_layers(self) -> List[nn.Module]:
  class ViTForImageClassificationPolicy (line 329) | class ViTForImageClassificationPolicy(ViTPolicy):
    method module_policy (line 330) | def module_policy(self):
    method get_held_layers (line 381) | def get_held_layers(self) -> List[nn.Module]:
  class ViTForMaskedImageModelingPolicy (line 402) | class ViTForMaskedImageModelingPolicy(ViTPolicy):
    method module_policy (line 403) | def module_policy(self):
    method get_held_layers (line 417) | def get_held_layers(self) -> List[nn.Module]:

FILE: colossalai/shardformer/policies/whisper.py
  class WhisperPolicy (line 29) | class WhisperPolicy(Policy):
    method __init__ (line 30) | def __init__(self) -> None:
    method config_sanity_check (line 33) | def config_sanity_check(self):
    method preprocess (line 36) | def preprocess(self):
    method module_policy (line 44) | def module_policy(self):
    method add_lm_head_policy (line 505) | def add_lm_head_policy(self, base_policy):
    method postprocess (line 536) | def postprocess(self):
    method distribute_whisper_layers (line 539) | def distribute_whisper_layers(
    method get_whisper_stage_index (line 575) | def get_whisper_stage_index(
    method get_held_layers (line 593) | def get_held_layers(self) -> List[nn.Module]:
    method set_pipeline_forward (line 681) | def set_pipeline_forward(self, model_cls: nn.Module, new_forward: Call...
  class WhisperModelPolicy (line 726) | class WhisperModelPolicy(WhisperPolicy):
    method module_policy (line 727) | def module_policy(self):
    method get_held_layers (line 741) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 744) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class WhisperForConditionalGenerationPolicy (line 750) | class WhisperForConditionalGenerationPolicy(WhisperPolicy):
    method module_policy (line 751) | def module_policy(self):
    method postprocess (line 765) | def postprocess(self):
    method get_held_layers (line 768) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 781) | def get_shared_params(self) -> List[Dict[int, Tensor]]:
  class WhisperForAudioClassificationPolicy (line 815) | class WhisperForAudioClassificationPolicy(WhisperPolicy):
    method module_policy (line 816) | def module_policy(self):
    method get_held_layers (line 829) | def get_held_layers(self) -> List[nn.Module]:
    method get_shared_params (line 844) | def get_shared_params(self) -> List[Dict[int, Tensor]]:

FILE: colossalai/shardformer/shard/grad_ckpt_config.py
  class GradientCheckpointConfig (line 6) | class GradientCheckpointConfig:
    method get_num_ckpt_layers (line 9) | def get_num_ckpt_layers(self, num_layers: int) -> int:
  class PipelineGradientCheckpointConfig (line 14) | class PipelineGradientCheckpointConfig(GradientCheckpointConfig):
    method __post_init__ (line 52) | def __post_init__(self):
    method _enable_gradient_checkpointing_ratio (line 60) | def _enable_gradient_checkpointing_ratio(self) -> bool:
    method _enable_customized_ckpt_layers_per_stage (line 64) | def _enable_customized_ckpt_layers_per_stage(self) -> bool:
    method get_num_ckpt_layers (line 67) | def get_num_ckpt_layers(

FILE: colossalai/shardformer/shard/shard_config.py
  class ShardConfig (line 17) | class ShardConfig:
    method tensor_parallel_size (line 64) | def tensor_parallel_size(self):
    method sequence_parallel_size (line 68) | def sequence_parallel_size(self):
    method expert_parallel_size (line 72) | def expert_parallel_size(self):
    method __post_init__ (line 75) | def __post_init__(self):
    method _turn_on_all_optimization (line 112) | def _turn_on_all_optimization(self):

FILE: colossalai/shardformer/shard/sharder.py
  class ModelSharder (line 18) | class ModelSharder(object):
    method __init__ (line 28) | def __init__(self, model: nn.Module, policy: Policy, shard_config: Sha...
    method shard (line 33) | def shard(self) -> List[Dict[int, Tensor]]:
    method _preprocess (line 48) | def _preprocess(self) -> None:
    method _postprocess (line 51) | def _postprocess(self) -> None:
    method _replace_module (line 54) | def _replace_module(self, include: Optional[Set[nn.Module]] = None) ->...
    method _recursive_replace_layer (line 77) | def _recursive_replace_layer(
    method _replace_attr (line 125) | def _replace_attr(
    method _replace_param (line 140) | def _replace_param(
    method _replace_method (line 155) | def _replace_method(self, module: nn.Module, method_replacement: Dict[...
    method _replace_sub_module (line 161) | def _replace_sub_module(
    method _get_recursive_held_layers (line 209) | def _get_recursive_held_layers(self, held_layers: Optional[List[nn.Mod...
    method _release_unheld_layers (line 222) | def _release_unheld_layers(self) -> Optional[Set[nn.Module]]:
    method _materialize (line 232) | def _materialize(self) -> None:

FILE: colossalai/shardformer/shard/shardformer.py
  class ShardFormer (line 14) | class ShardFormer:
    method __init__ (line 35) | def __init__(self, shard_config: ShardConfig):
    method optimize (line 43) | def optimize(self, model: nn.Module, policy: Policy = None) -> Tuple[n...

FILE: colossalai/shardformer/shard/utils.py
  function set_tensors_to_none (line 6) | def set_tensors_to_none(model: nn.Module, exclude: Set[nn.Module] = set(...

FILE: colossalai/tensor/colo_parameter.py
  function is_no_hook_op (line 14) | def is_no_hook_op(func) -> bool:
  function filter_colo_parameters (line 18) | def filter_colo_parameters(*args, **kwargs):
  function replace_args (line 39) | def replace_args(args, kwargs, new_args):
  class ColoParameter (line 46) | class ColoParameter(ColoTensor, torch.nn.Parameter):
    method __new__ (line 49) | def __new__(cls, data: Optional[torch.Tensor] = None, requires_grad: b...
    method __torch_function__ (line 55) | def __torch_function__(cls, func, types, args=..., kwargs=None):
    method __deepcopy__ (line 72) | def __deepcopy__(self, memo):
    method __reduce_ex__ (line 82) | def __reduce_ex__(self, proto):

FILE: colossalai/tensor/colo_tensor.py
  function _get_my_nowrap_functions (line 15) | def _get_my_nowrap_functions() -> Set[Callable]:
  function _convert (line 25) | def _convert(output):
  function _convert_output (line 33) | def _convert_output(output, func):
  class ColoTensor (line 39) | class ColoTensor(torch.Tensor):
    method __new__ (line 51) | def __new__(cls, data: torch.Tensor) -> "ColoTensor":
    method __torch_function__ (line 66) | def __torch_function__(cls, func, types, args=(), kwargs=None):
    method __deepcopy__ (line 94) | def __deepcopy__(self, memo):

FILE: colossalai/tensor/comm_spec.py
  function _all_gather (line 15) | def _all_gather(tensor, comm_spec):
  function _split (line 33) | def _split(tensor, comm_spec):
  function _all_to_all (line 47) | def _all_to_all(tensor, comm_spec):
  function _all_reduce (line 68) | def _all_reduce(tensor, comm_spec, async_op=False):
  function _mix_gather (line 81) | def _mix_gather(tensor, comm_spec):
  function _mix_split (line 166) | def _mix_split(tensor, comm_spec):
  class _ReduceGrad (line 208) | class _ReduceGrad(torch.autograd.Function):
    method symbolic (line 219) | def symbolic(graph, input_):
    method forward (line 223) | def forward(ctx, input_, comm_spec):
    method backward (line 228) | def backward(ctx, grad_output):
  class _ReduceInput (line 232) | class _ReduceInput(torch.autograd.Function):
    method symbolic (line 243) | def symbolic(graph, input_):
    method forward (line 247) | def forward(ctx, input_, comm_spec):
    method backward (line 251) | def backward(ctx, grad_output):
  class _SplitForwardGatherBackward (line 255) | class _SplitForwardGatherBackward(torch.autograd.Function):
    method symbolic (line 266) | def symbolic(graph, input_):
    method forward (line 270) | def forward(ctx, input_, comm_spec):
    method backward (line 275) | def backward(ctx, grad_output):
  class _GatherForwardSplitBackward (line 279) | class _GatherForwardSplitBackward(torch.autograd.Function):
    method symbolic (line 290) | def symbolic(graph, input_):
    method forward (line 294) | def forward(ctx, input_, comm_spec):
    method backward (line 299) | def backward(ctx, grad_output):
  class _AllToAll (line 303) | class _AllToAll(torch.autograd.Function):
    method symbolic (line 314) | def symbolic(graph, input_):
    method forward (line 318) | def forward(ctx, input_, comm_spec):
    method backward (line 331) | def backward(ctx, grad_outputs):
  class _MixGatherForwardMixSplitBackward (line 335) | class _MixGatherForwardMixSplitBackward(torch.autograd.Function):
    method symbolic (line 337) | def symbolic(graph, input_):
    method forward (line 341) | def forward(ctx, input_, comm_spec):
    method backward (line 346) | def backward(ctx, grad_output):
  function reduce_grad (line 350) | def reduce_grad(input_, comm_spec):
  function reduce_input (line 354) | def reduce_input(input_, comm_spec):
  function split_forward_gather_backward (line 358) | def split_forward_gather_backward(input_, comm_spec):
  function gather_forward_split_backward (line 362) | def gather_forward_split_backward(input_, comm_spec):
  function all_to_all (line 366) | def all_to_all(input_, comm_spec):
  function mixgather_forward_split_backward (line 370) | def mixgather_forward_split_backward(input_, comm_spec):
  class CollectiveCommPattern (line 374) | class CollectiveCommPattern(Enum):
  class CommSpec (line 383) | class CommSpec:
    method __init__ (line 400) | def __init__(
    method __repr__ (line 428) | def __repr__(self):
    method get_comm_cost (line 458) | def get_comm_cost(self):
    method covert_spec_to_action (line 506) | def covert_spec_to_action(self, tensor):

FILE: colossalai/tensor/d_tensor/api.py
  function get_shard_dim_1d (line 22) | def get_shard_dim_1d(p: torch.Tensor):
  function clear_layout_converter (line 36) | def clear_layout_converter():
  function is_distributed_tensor (line 41) | def is_distributed_tensor(tensor: torch.Tensor) -> bool:
  function is_sharded (line 54) | def is_sharded(dtensor: torch.Tensor) -> bool:
  function _hijack_detach_and_clone (line 68) | def _hijack_detach_and_clone(dtensor: torch.Tensor) -> torch.Tensor:
  function _construct_default_sharding_spec (line 97) | def _construct_default_sharding_spec(
  function _apply_layout (line 112) | def _apply_layout(tensor, layout):
  function distribute_tensor (line 125) | def distribute_tensor(tensor: torch.Tensor, device_mesh: DeviceMesh, sha...
  function init_as_dtensor (line 149) | def init_as_dtensor(
  function redistribute (line 164) | def redistribute(dtensor: torch.Tensor, device_mesh: DeviceMesh, shardin...
  function to_global (line 183) | def to_global(dtensor: torch.Tensor) -> torch.Tensor:
  function shard_rowwise (line 206) | def shard_rowwise(
  function shard_colwise (line 238) | def shard_colwise(tensor: torch.Tensor, group_or_device_mesh: Union[Proc...
  function sharded_tensor_to_param (line 266) | def sharded_tensor_to_param(dtensor: torch.Tensor, requires_grad: bool =...
  function sharded_tensor_to_existing_param (line 277) | def sharded_tensor_to_existing_param(dtensor: torch.Tensor, param: torch...
  function compute_global_numel (line 285) | def compute_global_numel(dtensor: torch.Tensor) -> int:
  function get_layout (line 300) | def get_layout(dtensor: torch.Tensor) -> Layout:
  function get_global_shape (line 315) | def get_global_shape(dtensor: torch.Tensor) -> torch.Size:
  function get_device_mesh (line 329) | def get_device_mesh(dtensor: torch.Tensor) -> DeviceMesh:
  function get_sharding_spec (line 343) | def get_sharding_spec(dtensor: torch.Tensor) -> ShardingSpec:
  function is_customized_distributed_tensor (line 362) | def is_customized_distributed_tensor(tensor: torch.Tensor):
  function _hijack_detach_and_clone_for_customized_distributed_tensor (line 375) | def _hijack_detach_and_clone_for_customized_distributed_tensor(dtensor: ...
  function distribute_tensor_with_customization (line 406) | def distribute_tensor_with_customization(tensor: torch.Tensor, shard_fn,...
  function init_tensor_as_customization_distributed (line 455) | def init_tensor_as_customization_distributed(tensor: torch.Tensor, shard...
  function to_global_for_customized_distributed_tensor (line 502) | def to_global_for_customized_distributed_tensor(dtensor: torch.Tensor) -...
  function customized_distributed_tensor_to_param (line 516) | def customized_distributed_tensor_to_param(dtensor: torch.Tensor, requir...
  function customized_distributed_tensor_to_existing_param (line 531) | def customized_distributed_tensor_to_existing_param(dtensor: torch.Tenso...

FILE: colossalai/tensor/d_tensor/comm_spec.py
  class CollectiveCommPattern (line 14) | class CollectiveCommPattern(Enum):
  class CommSpec (line 23) | class CommSpec:
    method __init__ (line 38) | def __init__(
    method __repr__ (line 52) | def __repr__(self):
    method covert_spec_to_action (line 78) | def covert_spec_to_action(self, tensor):
  function _all_gather (line 93) | def _all_gather(tensor: torch.Tensor, comm_spec: CommSpec):
  function _split (line 107) | def _split(tensor: torch.Tensor, comm_spec: CommSpec):
  function _all_to_all (line 119) | def _all_to_all(tensor: torch.Tensor, comm_spec: CommSpec):
  function _all_reduce (line 138) | def _all_reduce(tensor: torch.Tensor, comm_spec: CommSpec, async_op: boo...
  class _ReduceGrad (line 149) | class _ReduceGrad(torch.autograd.Function):
    method symbolic (line 160) | def symbolic(graph, input_):
    method forward (line 164) | def forward(ctx, input_, comm_spec):
    method backward (line 169) | def backward(ctx, grad_output):
  class _ReduceInput (line 173) | class _ReduceInput(torch.autograd.Function):
    method symbolic (line 184) | def symbolic(graph, input_):
    method forward (line 188) | def forward(ctx, input_, comm_spec):
    method backward (line 192) | def backward(ctx, grad_output):
  class _SplitForwardGatherBackward (line 196) | class _SplitForwardGatherBackward(torch.autograd.Function):
    method symbolic (line 207) | def symbolic(graph, input_):
    method forward (line 211) | def forward(ctx, input_, comm_spec):
    method backward (line 216) | def backward(ctx, grad_output):
  class _GatherForwardSplitBackward (line 220) | class _GatherForwardSplitBackward(torch.autograd.Function):
    method symbolic (line 231) | def symbolic(graph, input_):
    method forward (line 235) | def forward(ctx, input_, comm_spec):
    method backward (line 240) | def backward(ctx, grad_output):
  class _AllToAll (line 244) | class _AllToAll(torch.autograd.Function):
    method symbolic (line 255) | def symbolic(graph, input_):
    method forward (line 259) | def forward(ctx, input_, comm_spec):
    method backward (line 272) | def backward(ctx, grad_outputs):
  function reduce_grad (line 276) | def reduce_grad(input_, comm_spec):
  function reduce_input (line 280) | def reduce_input(input_, comm_spec):
  function split_forward_gather_backward (line 284) | def split_forward_gather_backward(input_, comm_spec):
  function gather_forward_split_backward (line 288) | def gather_forward_split_backward(input_, comm_spec):
  function all_to_all (line 292) | def all_to_all(input_, comm_spec):

FILE: colossalai/tensor/d_tensor/layout.py
  class Layout (line 12) | class Layout:
    method __init__ (line 21) | def __init__(self, device_mesh: DeviceMesh, sharding_spec: ShardingSpe...
    method __hash__ (line 27) | def __hash__(self) -> int:
    method get_sharded_shape_per_device (line 30) | def get_sharded_shape_per_device(self):
    method _sanity_check (line 41) | def _sanity_check(self):

FILE: colossalai/tensor/d_tensor/layout_converter.py
  class LayoutConverterOptions (line 23) | class LayoutConverterOptions:
  function set_layout_converting_options (line 31) | def set_layout_converting_options(options: LayoutConverterOptions):
  class LayoutConverter (line 39) | class LayoutConverter(metaclass=SingletonMeta):
    method __init__ (line 44) | def __init__(self):
    method options (line 50) | def options(self):
    method options (line 54) | def options(self, options_: LayoutConverterOptions):
    method forward_only (line 59) | def forward_only(self):
    method forward_only (line 63) | def forward_only(self, value):
    method all_gather_transform_layouts (line 67) | def all_gather_transform_layouts(self, source_layout: Layout) -> Dict[...
    method all_to_all_transform_layout (line 149) | def all_to_all_transform_layout(self, source_layout: Layout) -> Dict[L...
    method shard_transform_layout (line 267) | def shard_transform_layout(self, source_layout: Layout) -> Dict[Layout...
    method get_all_one_step_transform_spec (line 361) | def get_all_one_step_transform_spec(self, source_layout: Layout) -> Di...
    method layout_converting (line 382) | def layout_converting(
    method get_total_comm_cost (line 537) | def get_total_comm_cost(self, source_layout: Layout, target_layout: La...
    method apply (line 549) | def apply(self, tensor: torch.Tensor, source_layout: Layout, target_la...

FILE: colossalai/tensor/d_tensor/misc.py
  class LayoutException (line 1) | class LayoutException(Exception):
  class DuplicatedShardingDimensionError (line 5) | class DuplicatedShardingDimensionError(LayoutException):
  class ShardingNotDivisibleError (line 9) | class ShardingNotDivisibleError(LayoutException):
  class ShardingOutOfIndexError (line 13) | class ShardingOutOfIndexError(LayoutException):

FILE: colossalai/tensor/d_tensor/sharding_spec.py
  class DimSpec (line 14) | class DimSpec:
    method __init__ (line 27) | def __init__(self, shard_list):
    method __eq__ (line 31) | def __eq__(self, other):
    method __repr__ (line 34) | def __repr__(self):
    method difference_dict (line 43) | def difference_dict(self):
    method dim_diff (line 56) | def dim_diff(self, other):
    method _build_difference_2d_dict (line 78) | def _build_difference_2d_dict(cls):
    method _convert_str_to_shard_list (line 138) | def _convert_str_to_shard_list(str_spec):
  class ShardingSpec (line 156) | class ShardingSpec:
    method __init__ (line 168) | def __init__(
    method _sanity_check (line 191) | def _sanity_check(self):
    method __repr__ (line 202) | def __repr__(self):
    method convert_dict_to_shard_sequence (line 207) | def convert_dict_to_shard_sequence(self):
    method convert_shard_sequence_to_dict (line 216) | def convert_shard_sequence_to_dict(self):
    method spec_diff (line 228) | def spec_diff(self, other):

FILE: colossalai/tensor/d_tensor/utils.py
  function get_comm_cost (line 9) | def get_comm_cost(layout: Layout, comm_spec: CommSpec, forward_only: boo...

FILE: colossalai/tensor/moe_tensor/api.py
  function is_moe_tensor (line 10) | def is_moe_tensor(tensor: torch.Tensor) -> bool:
  function set_moe_tensor_ep_group (line 23) | def set_moe_tensor_ep_group(tensor: torch.Tensor, ep_group: ProcessGroup...
  function get_moe_info (line 35) | def get_moe_info(ep_size: int, dp_size: int, pp_size: int, ep_inside: bo...
  function get_ep_group (line 51) | def get_ep_group(tensor: torch.Tensor) -> ProcessGroup:
  function get_ep_size (line 64) | def get_ep_size(tensor: torch.Tensor) -> int:
  function get_dp_size (line 78) | def get_dp_size(tensor: torch.Tensor) -> int:
  function get_dp_group (line 91) | def get_dp_group(tensor: torch.Tensor) -> ProcessGroup:
  function get_ep_rank (line 104) | def get_ep_rank(tensor: torch.Tensor) -> int:
  function get_dp_rank (line 117) | def get_dp_rank(tensor: torch.Tensor) -> int:
  function get_ep_group_ranks (line 130) | def get_ep_group_ranks(tensor: torch.Tensor) -> List[int]:
  function get_dp_group_ranks (line 143) | def get_dp_group_ranks(tensor: torch.Tensor) -> List[int]:

FILE: colossalai/tensor/moe_tensor/moe_info.py
  class MoeParallelInfo (line 4) | class MoeParallelInfo:
    method __init__ (line 7) | def __init__(self, ep_inside: bool, ep_size: int, dp_size: int, pp_siz...

FILE: colossalai/tensor/padded_tensor/api.py
  function _hijack_detach_and_clone (line 4) | def _hijack_detach_and_clone(ptensor: torch.Tensor) -> torch.Tensor:
  function _hijack_back_detach_and_clone (line 37) | def _hijack_back_detach_and_clone(ptensor: torch.Tensor) -> torch.Tensor:
  function is_padded_tensor (line 56) | def is_padded_tensor(tensor: torch.Tensor) -> bool:
  function to_padded_tensor (line 69) | def to_padded_tensor(
  function to_unpadded_tensor (line 101) | def to_unpadded_tensor(ptensor: torch.Tensor):
  function init_as_padded_tensor (line 118) | def init_as_padded_tensor(tensor: torch.Tensor, current_length: int, ori...

FILE: colossalai/tensor/param_op_hook.py
  class ColoParamOpHook (line 9) | class ColoParamOpHook(ABC):
    method pre_forward (line 18) | def pre_forward(self, params: List[torch.Tensor]) -> None:
    method post_forward (line 22) | def post_forward(self, params: List[torch.Tensor]) -> None:
    method pre_backward (line 26) | def pre_backward(self, params: List[torch.Tensor]) -> None:
    method post_backward (line 30) | def post_backward(self, params: List[torch.Tensor]) -> None:
    method rewrite_op (line 33) | def rewrite_op(self, func) -> Any:
  class ColoParamOpHookManager (line 37) | class ColoParamOpHookManager:
    method use_hooks (line 47) | def use_hooks(*hooks: ColoParamOpHook):
    method _trigger_pre_forward (line 65) | def _trigger_pre_forward(params: List[torch.Tensor]) -> None:
    method _trigger_post_forward (line 70) | def _trigger_post_forward(params: List[torch.Tensor]) -> None:
    method _trigger_pre_backward (line 75) | def _trigger_pre_backward(params: List[torch.Tensor]) -> None:
    method _trigger_post_backward (line 80) | def _trigger_post_backward(params: List[torch.Tensor]) -> None:
    method pre_op (line 85) | def pre_op(params: List[torch.Tensor], *args: Any) -> list:
    method post_op (line 96) | def post_op(params: List[torch.Tensor], arg: Any) -> Any:
    method has_hook (line 104) | def has_hook() -> bool:
    method rewrite_op (line 108) | def rewrite_op(func) -> Any:
  class PreFwdPostBwd (line 114) | class PreFwdPostBwd(torch.autograd.Function):
    method forward (line 116) | def forward(ctx, params, *args):
    method backward (line 121) | def backward(ctx, *grads):
  class PostFwdPreBwd (line 126) | class PostFwdPreBwd(torch.autograd.Function):
    method forward (line 128) | def forward(ctx, params, *args):
    method backward (line 133) | def backward(ctx, *grads):
  function _is_grad_tensor (line 138) | def _is_grad_tensor(obj) -> bool:
  function _flatten_grad_args (line 145) | def _flatten_grad_args(args) -> Tuple[list, list, List[bool], TreeSpec]:
  function _merge_args (line 160) | def _merge_args(grad_args, other_args, grad_flags, spec):

FILE: colossalai/tensor/shape_consistency.py
  class ShapeConsistencyOptions (line 20) | class ShapeConsistencyOptions:
  function to_global (line 28) | def to_global(distributed_tensor: torch.Tensor, sharding_spec: ShardingS...
  function set_shape_consistency_options (line 38) | def set_shape_consistency_options(options: ShapeConsistencyOptions):
  class ShapeConsistencyManager (line 46) | class ShapeConsistencyManager(metaclass=SingletonMeta):
    method __init__ (line 47) | def __init__(self):
    method options (line 55) | def options(self):
    method options (line 59) | def options(self, options_: ShapeConsistencyOptions):
    method forward_only (line 64) | def forward_only(self):
    method forward_only (line 68) | def forward_only(self, value):
    method get_all_all_gather_spec (line 72) | def get_all_all_gather_spec(
    method get_all_all_to_all_spec (line 146) | def get_all_all_to_all_spec(
    method get_all_shard_spec (line 257) | def get_all_shard_spec(self, source_spec: ShardingSpec, orig_cost_dict):
    method get_all_mix_gather_spec (line 340) | def get_all_mix_gather_spec(
    method get_all_one_step_transform_spec (line 396) | def get_all_one_step_transform_spec(self, source_spec: ShardingSpec, o...
    method mem_cost (line 418) | def mem_cost(self, comm_action_sequence: List[CommSpec]) -> TrainCycle...
    method shape_consistency (line 581) | def shape_consistency(
    method apply (line 689) | def apply(self, tensor_with_sharding_spec: torch.Tensor, target_spec: ...
    method apply_for_autoparallel_runtime (line 754) | def apply_for_autoparallel_runtime(self, tensor, source_spec, target_s...

FILE: colossalai/tensor/sharding_spec.py
  class _DimSpec (line 18) | class _DimSpec:
    method __init__ (line 31) | def __init__(self, shard_list):
    method __eq__ (line 35) | def __eq__(self, other):
    method __repr__ (line 38) | def __repr__(self):
    method difference_dict (line 47) | def difference_dict(self):
    method difference (line 60) | def difference(self, other):
    method _build_difference_2d_dict (line 82) | def _build_difference_2d_dict(cls):
    method _convert_str_to_shard_list (line 142) | def _convert_str_to_shard_list(str_spec):
  class ShardingSpecException (line 160) | class ShardingSpecException(Exception):
  class ShardingOutOfIndexError (line 164) | class ShardingOutOfIndexError(ShardingSpecException):
  class DuplicatedShardingDimensionError (line 168) | class DuplicatedShardingDimensionError(ShardingSpecException):
  class ShardingNotDivisibleError (line 172) | class ShardingNotDivisibleError(ShardingSpecException):
  class ShardingSpec (line 176) | class ShardingSpec:
    method __init__ (line 190) | def __init__(
    method __repr__ (line 215) | def __repr__(self):
    method _sanity_check (line 221) | def _sanity_check(self):
    method convert_dict_to_shard_sequence (line 253) | def convert_dict_to_shard_sequence(self):
    method convert_shard_sequence_to_dict (line 262) | def convert_shard_sequence_to_dict(self):
    method sharding_sequence_difference (line 274) | def sharding_sequence_difference(self, other):
    method get_sharded_shape_per_device (line 309) | def get_sharded_shape_per_device(self):

FILE: colossalai/tensor/utils.py
  function all_gather_simulator (line 9) | def all_gather_simulator(target_pair):
  function all_to_all_simulator (line 29) | def all_to_all_simulator(f_target_pair, b_target_pair):
  function shard_simulator (line 63) | def shard_simulator(target_pair, legal_sharding_dims):
  function mix_gather_simulator (line 93) | def mix_gather_simulator(f_target_pair, b_target_pair):
  function named_params_with_colotensor (line 119) | def named_params_with_colotensor(
  function _convert_tensor (line 165) | def _convert_tensor(tensor: torch.Tensor) -> ColoTensor:
  function convert_parameter (line 169) | def convert_parameter(module: torch.nn.Module, param_name: str):
  function convert_dim_partition_dict (line 196) | def convert_dim_partition_dict(dim_size: int, dim_partition_dict: Dict[i...
  function merge_same_dim_mesh_list (line 210) | def merge_same_dim_mesh_list(dim_size: int, dim_partition_dict: Dict[int...

FILE: colossalai/testing/comparison.py
  function assert_equal (line 11) | def assert_equal(a: Tensor, b: Tensor):
  function assert_not_equal (line 15) | def assert_not_equal(a: Tensor, b: Tensor):
  function assert_close_loose (line 19) | def assert_close_loose(a: Tensor, b: Tensor, rtol: float = 1e-3, atol: f...
  function assert_equal_in_group (line 28) | def assert_equal_in_group(tensor: Tensor, process_group: ProcessGroup = ...
  function check_state_dict_equal (line 41) | def check_state_dict_equal(
  function check_state_dict_equal_pytree (line 84) | def check_state_dict_equal_pytree(d1: OrderedDict, d2: OrderedDict, igno...
  function assert_hf_output_close (line 99) | def assert_hf_output_close(

FILE: colossalai/testing/pytest_wrapper.py
  function run_on_environment_flag (line 10) | def run_on_environment_flag(name: str):

FILE: colossalai/testing/random.py
  function seed_all (line 7) | def seed_all(seed, cuda_deterministic=False):

FILE: colossalai/testing/utils.py
  function parameterize (line 16) | def parameterize(argument: str, values: List[Any]) -> Callable:
  function rerun_on_exception (line 71) | def rerun_on_exception(exception_type: Exception = Exception, pattern: s...
  function rerun_if_address_is_in_use (line 157) | def rerun_if_address_is_in_use():
  function skip_if_not_enough_gpus (line 183) | def skip_if_not_enough_gpus(min_gpus: int):
  function free_port (line 212) | def free_port() -> int:
  function spawn (line 229) | def spawn(func, nprocs=1, **kwargs):
  function clear_cache_before_run (line 255) | def clear_cache_before_run():
  class DummyDataloader (line 280) | class DummyDataloader:
    method __init__ (line 281) | def __init__(self, data_gen_fn: Callable, length: int = 10):
    method __iter__ (line 286) | def __iter__(self):
    method __next__ (line 290) | def __next__(self):
    method __len__ (line 297) | def __len__(self):

FILE: colossalai/utils/common.py
  function get_current_device (line 17) | def get_current_device():
  function ensure_path_exists (line 24) | def ensure_path_exists(filename: str):
  function conditional_context (line 32) | def conditional_context(context_manager, enable=True):
  function is_ddp_ignored (line 40) | def is_ddp_ignored(p):
  function disposable (line 44) | def disposable(func: Callable) -> Callable:
  function free_storage (line 57) | def free_storage(data: torch.Tensor) -> None:
  function _cast_float (line 66) | def _cast_float(args, dtype: torch.dtype):
  function set_seed (line 76) | def set_seed(seed):
  function get_non_persistent_buffers_set (line 82) | def get_non_persistent_buffers_set(

FILE: colossalai/utils/memory.py
  function _get_cpu_memory_info (line 14) | def _get_cpu_memory_info():
  function colo_device_memory_capacity (line 47) | def colo_device_memory_capacity(device: torch.device) -> int:
  function colo_get_cpu_memory_capacity (line 66) | def colo_get_cpu_memory_capacity() -> int:

FILE: colossalai/utils/model/utils.py
  function substitute_init_recursively (line 11) | def substitute_init_recursively(cls, func, visited: set):
  function call_to_str (line 19) | def call_to_str(base, *args, **kwargs):
  class InsertPostInitMethodToModuleSubClasses (line 41) | class InsertPostInitMethodToModuleSubClasses(object):
    method __init__ (line 42) | def __init__(self, default_dtype: Optional[torch.dtype] = None):
    method __enter__ (line 46) | def __enter__(self):
    method __exit__ (line 82) | def __exit__(self, exc_type, exc_value, traceback):
    method _post_init_method (line 105) | def _post_init_method(self, module, *args, **kwargs):
    method _pre_context_exec (line 108) | def _pre_context_exec(self):
    method _post_context_exec (line 111) | def _post_context_exec(self):

FILE: colossalai/utils/multi_tensor_apply/multi_tensor_apply.py
  class MultiTensorApply (line 4) | class MultiTensorApply(object):
    method __init__ (line 15) | def __init__(self, chunk_size):
    method check_avail (line 23) | def check_avail(self):
    method __call__ (line 32) | def __call__(self, op, noop_flag_buffer, tensor_lists, *args):

FILE: colossalai/utils/rank_recorder/rank_recorder.py
  class Event (line 19) | class Event:
    method __init__ (line 20) | def __init__(self, start: int, end: int, name: str, rank: int) -> None:
  class Recorder (line 27) | class Recorder:
    method __init__ (line 28) | def __init__(self) -> None:
    method start (line 46) | def start(self, name: str, rank: int):
    method end (line 52) | def end(self):
    method get_history (line 63) | def get_history(self):
    method __call__ (line 66) | def __call__(self, name: str, rank: str):
    method __enter__ (line 71) | def __enter__(self):
    method __exit__ (line 76) | def __exit__(self, *args):
    method dump_record (line 79) | def dump_record(self):
    method merge_recode (line 96) | def merge_recode(self):
    method visualize_record (line 129) | def visualize_record(self):
    method exit_worker (line 157) | def exit_worker(self):

FILE: colossalai/utils/safetensors.py
  function _object_to_tensor (line 24) | def _object_to_tensor(obj, device):
  function _tensor_to_object (line 35) | def _tensor_to_object(tensor, tensor_size):
  class TensorInfo (line 42) | class TensorInfo:
  class PreparedData (line 49) | class PreparedData:
  function _cast_to_tensor (line 55) | def _cast_to_tensor(obj):
  function _cast_to_object (line 61) | def _cast_to_object(tensor: torch.Tensor):
  function _flatten_optim_state_dict (line 65) | def _flatten_optim_state_dict(state_dict: dict, seperator: str = ".") ->...
  function _unflatten_optim_state_dict (line 93) | def _unflatten_optim_state_dict(flat_dict: dict, metadata: Optional[dict...
  function prepare (line 122) | def prepare(
  function save (line 162) | def save(path: str, state_dict: Dict[str, torch.Tensor], metadata: Optio...
  function save_nested (line 174) | def save_nested(path: str, state_dict: Dict[str, torch.Tensor]) -> None:
  function move_and_save (line 179) | def move_and_save(
  function load_flat (line 200) | def load_flat(checkpoint_path, seperator: str = "."):

FILE: colossalai/utils/tensor_detector/tensor_detector.py
  class TensorDetector (line 13) | class TensorDetector:
    method __init__ (line 14) | def __init__(
    method get_tensor_mem (line 47) | def get_tensor_mem(self, tensor):
    method mem_format (line 55) | def mem_format(self, real_memory_size):
    method collect_tensors_state (line 65) | def collect_tensors_state(self):
    method print_tensors_state (line 110) | def print_tensors_state(self):
    method detect (line 170) | def detect(self, include_cpu=False):
    method close (line 180) | def close(self):

FILE: colossalai/utils/timer.py
  class Timer (line 9) | class Timer:
    method __init__ (line 12) | def __init__(self):
    method has_history (line 19) | def has_history(self):
    method current_time (line 23) | def current_time(self) -> float:
    method start (line 27) | def start(self):
    method lap (line 34) | def lap(self):
    method stop (line 38) | def stop(self, keep_in_history: bool = False):
    method get_history_mean (line 56) | def get_history_mean(self):
    method get_history_sum (line 64) | def get_history_sum(self):
    method get_elapsed_time (line 72) | def get_elapsed_time(self):
    method reset (line 84) | def reset(self):
  class MultiTimer (line 91) | class MultiTimer:
    method __init__ (line 98) | def __init__(self, on: bool = True):
    method start (line 102) | def start(self, name: str):
    method stop (line 113) | def stop(self, name: str, keep_in_history: bool):
    method get_timer (line 125) | def get_timer(self, name):
    method reset (line 135) | def reset(self, name=None):
    method is_on (line 149) | def is_on(self):
    method set_status (line 152) | def set_status(self, mode: bool):
    method __iter__ (line 155) | def __iter__(self) -> Tuple[str, Timer]:

FILE: colossalai/zero/gemini/chunk/chunk.py
  class TensorState (line 13) | class TensorState(Enum):
  class TensorInfo (line 35) | class TensorInfo:
  class ChunkFullError (line 41) | class ChunkFullError(Exception):
  function is_storage_empty (line 45) | def is_storage_empty(tensor: torch.Tensor) -> bool:
  function free_storage (line 49) | def free_storage(tensor: torch.Tensor) -> None:
  function alloc_storage (line 54) | def alloc_storage(tensor: torch.Tensor) -> None:
  class Chunk (line 59) | class Chunk:
    method __init__ (line 62) | def __init__(
    method memory_usage (line 173) | def memory_usage(self) -> Dict[str, int]:
    method device_type (line 194) | def device_type(self) -> str:
    method payload (line 203) | def payload(self) -> torch.Tensor:
    method payload_mem (line 215) | def payload_mem(self) -> int:
    method can_move (line 225) | def can_move(self) -> bool:
    method can_release (line 229) | def can_release(self) -> bool:
    method can_reduce (line 239) | def can_reduce(self):
    method has_inf_or_nan (line 243) | def has_inf_or_nan(self) -> bool:
    method set_l2_norm (line 253) | def set_l2_norm(self) -> None:
    method append_tensor (line 264) | def append_tensor(self, tensor: torch.Tensor):
    method close_chunk (line 290) | def close_chunk(self):
    method shard_move (line 321) | def shard_move(self, device: torch.device, force_copy: bool = False, n...
    method access_chunk (line 365) | def access_chunk(self, async_access: bool = False) -> Optional[dist.Wo...
    method release_chunk (line 375) | def release_chunk(self):
    method reduce (line 383) | def reduce(self, async_op: bool = False):
    method wait_async_reduce (line 421) | def wait_async_reduce(self) -> None:
    method tensor_trans_state (line 426) | def tensor_trans_state(self, tensor: torch.Tensor, tensor_state: Tenso...
    method copy_tensor_to_chunk_slice (line 445) | def copy_tensor_to_chunk_slice(
    method add_tensor_to_chunk_slice (line 463) | def add_tensor_to_chunk_slice(self, tensor: torch.Tensor, data_slice: ...
    method get_valid_length (line 478) | def get_valid_length(self) -> int:
    method init_pair (line 485) | def init_pair(self, friend_chunk: "Chunk") -> None:
    method optim_update (line 494) | def optim_update(self) -> None:
    method get_tensors (line 516) | def get_tensors(self) -> List[torch.Tensor]:
    method __gather (line 519) | def __gather(self, async_op: bool = False) -> Optional[dist.Work]:
    method __scatter (line 544) | def __scatter(self):
    method __paired_shard_move (line 559) | def __paired_shard_move(self, non_blocking=False):
    method __update_tensors_ptr (line 574) | def __update_tensors_ptr(self) -> None:
    method __update_one_tensor_info (line 582) | def __update_one_tensor_info(self, tensor_info: TensorInfo, next_state...
    method __update_tensors_state (line 587) | def __update_tensors_state(self, next_state: TensorState, prev_state: ...
    method __hash__ (line 592) | def __hash__(self) -> int:
    method __eq__ (line 595) | def __eq__(self, __o: object) -> bool:
    method __repr__ (line 598) | def __repr__(self, detailed: bool = True):
    method init_grad_chunk (line 640) | def init_grad_chunk(self) -> "Chunk":

FILE: colossalai/zero/gemini/chunk/manager.py
  class ChunkManager (line 14) | class ChunkManager:
    method __init__ (line 23) | def __init__(
    method register_tensor (line 50) | def register_tensor(
    method close_all_groups (line 115) | def close_all_groups(self):
    method access_chunk (line 120) | def access_chunk(self, chunk: Chunk, async_access: bool = False) -> Op...
    method release_chunk (line 131) | def release_chunk(self, chunk: Chunk) -> None:
    method move_chunk (line 140) | def move_chunk(self, chunk: Chunk, device: torch.device, force_copy: b...
    method trans_tensor_state (line 148) | def trans_tensor_state(self, tensor: torch.Tensor, state: TensorState)...
    method reduce_chunk (line 153) | def reduce_chunk(self, chunk: Chunk, async_op: bool = False) -> bool:
    method fake_release_chunk (line 163) | def fake_release_chunk(self, chunk: Chunk) -> None:
    method copy_tensor_to_chunk_slice (line 171) | def copy_tensor_to_chunk_slice(self, tensor: torch.Tensor, data: torch...
    method get_chunk (line 182) | def get_chunk(self, tensor: torch.Tensor) -> Chunk:
    method get_cuda_movable_chunks (line 191) | def get_cuda_movable_chunks(self) -> List[Chunk]:
    method get_chunks (line 202) | def get_chunks(self, tensors: Iterable[torch.Tensor]) -> Tuple[Chunk, ...
    method add_extern_static_tensor (line 216) | def add_extern_static_tensor(self, tensor: torch.Tensor) -> None:
    method __repr__ (line 231) | def __repr__(self) -> str:
    method __get_chunk_group (line 242) | def __get_chunk_group(self, group_name: str) -> Deque[Chunk]:
    method __close_one_chunk (line 248) | def __close_one_chunk(self, chunk: Chunk):
    method __sub_memory_usage (line 253) | def __sub_memory_usage(self, usage: Dict[str, int]):
    method __add_memory_usage (line 257) | def __add_memory_usage(self, usage: Dict[str, int]):
    method __add_accessed_chunk (line 261) | def __add_accessed_chunk(self, chunk: Chunk, async_access: bool = Fals...
    method __sub_accessed_chunk (line 267) | def __sub_accessed_chunk(self, chunk: Chunk):
    method init_grad_chunk (line 272) | def init_grad_chunk(self, chunk: Chunk) -> Chunk:
    method rearrange_accumulated_grad_chunk (line 282) | def rearrange_accumulated_grad_chunk(self, chunk: Chunk) -> Chunk:

FILE: colossalai/zero/gemini/chunk/search_utils.py
  function _filter_exlarge_params (line 14) | def _filter_exlarge_params(model: nn.Module, size_dict: Dict[int, List[i...
  function _get_unused_byte (line 41) | def _get_unused_byte(size_list: List[int], chunk_size: int) -> int:
  function _tensor_numel (line 63) | def _tensor_numel(local_param: ColoParameter) -> int:
  function classify_params_by_dp_degree (line 79) | def classify_params_by_dp_degree(
  function search_chunk_configuration (line 108) | def search_chunk_configuration(

FILE: colossalai/zero/gemini/chunk/utils.py
  function safe_div (line 12) | def safe_div(a, b):
  function init_chunk_manager (line 18) | def init_chunk_manager(

FILE: colossalai/zero/gemini/gemini_ddp.py
  class GeminiDDP (line 56) | class GeminiDDP(ModelWrapper):
    method __init__ (line 75) | def __init__(
    method remove_hooks (line 212) | def remove_hooks(self):
    method __del__ (line 221) | def __del__(self):
    method parameters (line 224) | def parameters(self, recurse: bool = True):
    method named_parameters (line 227) | def named_parameters(self, prefix: str = "", recurse: bool = True):
    method named_buffers (line 230) | def named_buffers(self, prefix: str = "", recurse: bool = True):
    method named_children (line 233) | def named_children(self):
    method named_modules (line 236) | def named_modules(
    method set_params_to_ignore (line 242) | def set_params_to_ignore(params_to_ignore: Iterable[torch.Tensor]) -> ...
    method _post_forward (line 260) | def _post_forward(self):
    method forward (line 274) | def forward(self, *args, **kwargs):
    method _inference_forward (line 295) | def _inference_forward(self, *args, **kwargs):
    method _setup_grads_ptr (line 312) | def _setup_grads_ptr(self):
    method _pre_backward (line 318) | def _pre_backward(self):
    method _post_backward (line 325) | def _post_backward(self):
    method backward (line 348) | def backward(self, loss: torch.Tensor):
    method backward_by_grad (line 354) | def backward_by_grad(self, tensor, grad, inputs: torch.Tensor = None, ...
    method grad_handle (line 358) | def grad_handle(
    method zero_grad (line 438) | def zero_grad(self, set_to_none: bool = False) -> None:
    method set_chunk_grad_device (line 441) | def set_chunk_grad_device(self, chunk: Chunk, device: torch.device) ->...
    method state_dict (line 445) | def state_dict(self, destination=None, prefix="", keep_vars=False, onl...
    method _get_chunk_to_save_data (line 472) | def _get_chunk_to_save_data(self, chunk: Chunk, only_rank_0: bool) -> ...
    method _get_param_to_save_data (line 516) | def _get_param_to_save_data(self, param_list: List[torch.nn.Parameter]...
    method _save_to_state_dict (line 534) | def _save_to_state_dict(self, destination, prefix, keep_vars, only_ran...
    method load_state_dict (line 587) | def load_state_dict(self, state_dict: "OrderedDict[str, torch.Tensor]"...
    method _load_from_state_dict (line 644) | def _load_from_state_dict(
    method _init_chunks (line 819) | def _init_chunks(self, param_order, strict_ddp_mode: bool, cpu_offload...
    method _cast_buffers (line 876) | def _cast_buffers(self):
    method _preprocess_param (line 885) | def _preprocess_param(self, p: Union[nn.Parameter, ColoParameter, "Laz...
    method state_dict_shard (line 900) | def state_dict_shard(

FILE: colossalai/zero/gemini/gemini_hook.py
  class TrainingPhase (line 15) | class TrainingPhase(Enum):
  class GeminiZeROHook (line 20) | class GeminiZeROHook(ColoParamOpHook):
    method __init__ (line 21) | def __init__(self, gemini_manager: GeminiManager) -> None:
    method pre_op (line 27) | def pre_op(self, params):
    method post_op (line 76) | def post_op(self, params):
    method pre_forward (line 86) | def pre_forward(self, params: List[torch.Tensor]) -> None:
    method post_forward (line 89) | def post_forward(self, params: List[torch.Tensor]) -> None:
    method pre_backward (line 92) | def pre_backward(self, params: List[torch.Tensor]) -> None:
    method post_backward (line 95) | def post_backward(self, params: List[torch.Tensor]) -> None:
    method switch_training_phase (line 99) | def switch_training_phase(self, training_phase: TrainingPhase = Traini...

FILE: colossalai/zero/gemini/gemini_mgr.py
  class GeminiManager (line 13) | class GeminiManager:
    method __init__ (line 28) | def __init__(
    method reset_attributes (line 59) | def reset_attributes(self):
    method need_warmup (line 68) | def need_warmup(self) -> bool:
    method is_warmup (line 71) | def is_warmup(self):
    method memstats (line 74) | def memstats(self):
    method pre_iter (line 87) | def pre_iter(self, *args):
    method post_iter (line 91) | def post_iter(self):
    method adjust_layout (line 98) | def adjust_layout(self, chunks: Tuple[Chunk, ...], record_anyway: bool...
    method wait_chunks (line 123) | def wait_chunks(self, chunks: Iterable[Chunk]) -> Tuple[Chunk]:
    method add_work (line 133) | def add_work(self, chunk: Chunk, work: dist.Work):
    method _get_layout_info (line 139) | def _get_layout_info(self, compute_idx: int, warmup: bool, chunks: Tup...
    method _record_warmup_chunks_order (line 157) | def _record_warmup_chunks_order(self, chunks: Tuple[Chunk, ...], recor...
    method sample_overall_data (line 162) | def sample_overall_data(self):
    method record_model_data_volume (line 166) | def record_model_data_volume(self):
    method chunk_manager (line 171) | def chunk_manager(self):
    method cuda_margin_mem (line 175) | def cuda_margin_mem(self) -> Optional[float]:
    method placement_policy (line 181) | def placement_policy(self) -> PlacementPolicy:
    method compute_list (line 185) | def compute_list(self) -> List[Tuple[Chunk, ...]]:
    method compute_idx (line 189) | def compute_idx(self) -> int:
    method async_works (line 193) | def async_works(self) -> Dict[Chunk, dist.Work]:
    method is_cuda_margin_mem_avail (line 197) | def is_cuda_margin_mem_avail(self) -> bool:
    method setup_grads_device (line 200) | def setup_grads_device(

FILE: colossalai/zero/gemini/gemini_optimizer.py
  class GeminiFP16MixedPrecisionMixin (line 46) | class GeminiFP16MixedPrecisionMixin(FP16MixedPrecisionMixin):
    method __init__ (line 47) | def __init__(
    method check_local_overflow (line 63) | def check_local_overflow(self) -> bool:
    method pre_zero_grad (line 66) | def pre_zero_grad(self) -> None:
  class GeminiOptimizer (line 70) | class GeminiOptimizer(OptimizerWrapper):
    method __init__ (line 100) | def __init__(
    method _set_grad_ptr (line 200) | def _set_grad_ptr(self):
    method _update_fp16_params (line 213) | def _update_fp16_params(self):
    method _clear_global_norm (line 223) | def _clear_global_norm(self) -> None:
    method _calc_global_norm (line 228) | def _calc_global_norm(self) -> float:
    method _get_combined_scale (line 254) | def _get_combined_scale(self):
    method zero_grad (line 266) | def zero_grad(self, *args, **kwargs):
    method step (line 270) | def step(self, *args, **kwargs):
    method clip_grad_norm (line 296) | def clip_grad_norm(self, model: torch.nn.Module, max_norm: float, norm...
    method backward (line 299) | def backward(self, loss: torch.Tensor):
    method backward_by_grad (line 303) | def backward_by_grad(
    method _maybe_move_fp32_params (line 313) | def _maybe_move_fp32_params(self):
    method _register_states_ (line 345) | def _register_states_(self):
    method __init__optimizer (line 353) | def __init__optimizer(self):
    method get_offsets (line 392) | def get_offsets(self, param_id: int) -> tuple:
    method collect_states (line 421) | def collect_states(self, param_id: int, only_rank_0: bool = True) -> d...
    method pack_optimizer_states_to_tensor (line 574) | def pack_optimizer_states_to_tensor(
    method load_from_compacted_states (line 616) | def load_from_compacted_states(
    method get_param_groups_for_saving (line 642) | def get_param_groups_for_saving(self) -> list:
    method state_dict (line 670) | def state_dict(self, only_rank_0: bool = True) -> dict:
    method load_param_groups (line 698) | def load_param_groups(self, saved_param_groups: list):
    method load_single_param_states (line 719) | def load_single_param_states(self, param_id: int, saved_states: dict):
    method load_param_states (line 778) | def load_param_states(self, param_states: dict):
    method optimizer_loading_epilogue (line 789) | def optimizer_loading_epilogue(self):
    method load_state_dict (line 797) | def load_state_dict(self, state_dict: dict):
    method state_shard (line 811) | def state_shard(
    method clip_grad_by_value (line 852) | def clip_grad_by_value(self, clip_value: float, *args, **kwargs) -> None:
    method clip_grad_by_norm (line 855) | def clip_grad_by_norm(
    method get_grad_norm (line 867) | def get_grad_norm(self, norm_type=2, **kwargs):
  class GeminiAdamOptimizer (line 871) | class GeminiAdamOptimizer(GeminiOptimizer):
    method __init__ (line 872) | def __init__(self, model: torch.nn.Module, **defaults: Any) -> None:

FILE: colossalai/zero/gemini/memory_tracer/chunk_memstats_collector.py
  class ChunkMemStatsCollector (line 10) | class ChunkMemStatsCollector(MemStatsCollector):
    method __init__ (line 11) | def __init__(self, chunk_manager: ChunkManager, memstats: Optional[Mem...
    method record_model_data_volume (line 24) | def record_model_data_volume(self) -> None:
    method cuda_margin_mem (line 33) | def cuda_margin_mem(self) -> float:

FILE: colossalai/zero/gemini/memory_tracer/memory_monitor.py
  class MemoryMonitor (line 11) | class MemoryMonitor:
    method __init__ (line 16) | def __init__(self):
    method __len__ (line 20) | def __len__(self):
    method start (line 24) | def start(self):
    method finish (line 28) | def finish(self):
    method state_dict (line 31) | def state_dict(self):
    method save (line 37) | def save(self, filename):
    method clear (line 41) | def clear(self):
  class AsyncMemoryMonitor (line 46) | class AsyncMemoryMonitor(MemoryMonitor):
    method __init__ (line 76) | def __init__(self, power: int = 10):
    method set_interval (line 89) | def set_interval(self, power: int):
    method is_measuring (line 93) | def is_measuring(self):
    method start (line 96) | def start(self):
    method finish (line 100) | def finish(self):
    method _measure_usage (line 112) | def _measure_usage(self):
  class SyncCudaMemoryMonitor (line 125) | class SyncCudaMemoryMonitor(MemoryMonitor):
    method __init__ (line 131) | def __init__(self, power: int = 10):
    method start (line 134) | def start(self):
    method finish (line 138) | def finish(self) -> int:

FILE: colossalai/zero/gemini/memory_tracer/memory_stats.py
  class MemStats (line 8) | class MemStats(object):
    method __init__ (line 9) | def __init__(self) -> None:
    method calc_max_cuda_non_model_data (line 37) | def calc_max_cuda_non_model_data(self):
    method record_max_cuda_model_data (line 44) | def record_max_cuda_model_data(self, val):
    method record_max_cuda_overall_data (line 47) | def record_max_cuda_overall_data(self, val):
    method max_overall_cuda (line 52) | def max_overall_cuda(self):
    method increase_preop_step (line 55) | def increase_preop_step(self, param_list: List[torch.nn.Parameter]):
    method param_used_step (line 72) | def param_used_step(self, param: torch.nn.Parameter) -> Optional[List[...
    method param_order (line 87) | def param_order(self):
    method non_model_data_list (line 93) | def non_model_data_list(self, device_type: str) -> List[int]:
    method max_non_model_data (line 101) | def max_non_model_data(self, device_type: str) -> float:
    method clear (line 109) | def clear(self):

FILE: colossalai/zero/gemini/memory_tracer/memstats_collector.py
  class MemStatsCollector (line 8) | class MemStatsCollector:
    method __init__ (line 20) | def __init__(self, memstats: Optional[MemStats] = None) -> None:
    method next_period_non_model_data_usage (line 34) | def next_period_non_model_data_usage(self, device_type: str) -> int:
    method sampling_time (line 54) | def sampling_time(self):
    method start_collection (line 57) | def start_collection(self):
    method finish_collection (line 61) | def finish_collection(self):
    method record_model_data_volume (line 69) | def record_model_data_volume(self) -> None:
    method sample_overall_data (line 80) | def sample_overall_data(self) -> None:
    method clear (line 94) | def clear(self) -> None:

FILE: colossalai/zero/gemini/memory_tracer/param_runtime_order.py
  class ParamGenerator (line 6) | class ParamGenerator(ABC):
    method append (line 7) | def append(self, param: torch.nn.Parameter):
    method generate (line 10) | def generate(self):
    method clear (line 13) | def clear(self):
  class OrderedParamGenerator (line 17) | class OrderedParamGenerator(ParamGenerator):
    method __init__ (line 23) | def __init__(self) -> None:
    method append (line 26) | def append(self, param: torch.nn.Parameter):
    method generate (line 29) | def generate(self):
    method is_empty (line 37) | def is_empty(self):
    method clear (line 40) | def clear(self):

FILE: colossalai/zero/gemini/memory_tracer/runtime_mem_tracer.py
  class RuntimeMemTracer (line 11) | class RuntimeMemTracer:
    method __init__ (line 23) | def __init__(self, module: torch.nn.Module, dtype: torch.dtype = torch...
    method parameters_in_runtime_order (line 44) | def parameters_in_runtime_order(self):
    method memstats (line 47) | def memstats(self):
    method __call__ (line 50) | def __call__(self, *args, **kwargs):
    method _backup_params (line 53) | def _backup_params(self):
    method _restore_params (line 61) | def _restore_params(self):
    method _pre_forward (line 70) | def _pre_forward(self):
    method forward (line 76) | def forward(self, *args, **kwargs):
    method backward (line 84) | def backward(self, loss):
    method _post_backward (line 89) | def _post_backward(self):
    method _clear_cuda_mem_info (line 97) | def _clear_cuda_mem_info(self):
    method _cast_buffers_to_cuda_dtype (line 101) | def _cast_buffers_to_cuda_dtype(self):

FILE: colossalai/zero/gemini/memory_tracer/static_memstats_collector.py
  class ModuleInfos (line 17) | class ModuleInfos:
    method __init__ (line 18) | def __init__(
  class StaticMemStatsCollector (line 27) | class StaticMemStatsCollector(ChunkMemStatsCollector):
    method __init__ (line 32) | def __init__(self, module: nn.Module, chunk_manager: ChunkManager) -> ...
    method init_mem_stats (line 37) | def init_mem_stats(self, *inputs):
    method refactor_module (line 80) | def refactor_module(self):
    method recover_module (line 85) | def recover_module(self):
    method register_opnodes_recursively (line 89) | def register_opnodes_recursively(

FILE: colossalai/zero/gemini/memory_tracer/utils.py
  function colo_model_optimizer_usage (line 6) | def colo_model_optimizer_usage(optim) -> Tuple[int, int]:
  function colo_model_mem_usage (line 21) | def colo_model_mem_usage(model: torch.nn.Module) -> Tuple[int, int]:

FILE: colossalai/zero/gemini/placement_policy.py
  class PlacementPolicy (line 17) | class PlacementPolicy(ABC):
    method __init__ (line 20) | def __init__(
    method evict_tensors (line 32) | def evict_tensors(self, can_evict_chunks: List[Chunk], **kwargs) -> Tu...
    method setup_grads_device (line 36) | def setup_grads_device(
    method get_prefetch_chunks (line 41) | def get_prefetch_chunks(
  class StaticPlacementPolicy (line 47) | class StaticPlacementPolicy(PlacementPolicy):
    method __init__ (line 48) | def __init__(
    method evict_tensors (line 69) | def evict_tensors(self, can_evict_chunks: List[Chunk], **kwargs) -> Tu...
    method setup_grads_device (line 86) | def setup_grads_device(
    method get_prefetch_chunks (line 109) | def get_prefetch_chunks(
  class AutoPlacementPolicy (line 128) | class AutoPlacementPolicy(PlacementPolicy):
    method __init__ (line 131) | def __init__(
    method evict_tensors (line 149) | def evict_tensors(
    method _sort_can_evict_chunks (line 215) | def _sort_can_evict_chunks(can_evict_chunks: tuple, compute_idx: int, ...
    method setup_grads_device (line 224) | def setup_grads_device(
    method get_prefetch_chunks (line 236) | def get_prefetch_chunks(
  class PlacementPolicyFactory (line 261) | class PlacementPolicyFactory:
    method create (line 268) | def create(policy_name: str) -> Type[PlacementPolicy]:
    method get_policy_names (line 274) | def get_policy_names():

FILE: colossalai/zero/gemini/utils.py
  function get_temp_total_chunk_on_cuda (line 14) | def get_temp_total_chunk_on_cuda(chunk: Chunk, dtype: torch.dtype):
  function _get_dfs_module_list (line 32) | def _get_dfs_module_list(module: nn.Module, memo: Optional[Set[nn.Module...
  function _get_shallow_copy_model (line 48) | def _get_shallow_copy_model(model: nn.Module):
  function get_static_torch_model (line 64) | def get_static_torch_model(

FILE: colossalai/zero/low_level/_utils.py
  function flatten (line 10) | def flatten(input_):
  function unflatten (line 14) | def unflatten(flat, tensors):
  function count_numel (line 18) | def count_numel(tensor_list):
  function calculate_padding (line 25) | def calculate_padding(numel, unit_size):
  function shuffle_by_round_robin (line 30) | def shuffle_by_round_robin(tensor_list, num_partitions):
  function flatten_dense_tensors_with_padding (line 53) | def flatten_dense_tensors_with_padding(tensor_list, unit_size):
  function is_nccl_aligned (line 66) | def is_nccl_aligned(tensor):
  function get_grad_accumulate_object (line 70) | def get_grad_accumulate_object(tensor):
  function split_by_dtype (line 91) | def split_by_dtype(tensor_list):
  function reduce_tensor_dp_group (line 109) | def reduce_tensor_dp_group(
  function has_inf_or_nan (line 161) | def has_inf_or_nan(tensor):
  function release_param_grad (line 182) | def release_param_grad(tensor_list):
  function calculate_global_norm_from_list (line 187) | def calculate_global_norm_from_list(norm_list):
  function sync_tensor (line 195) | def sync_tensor(flat_tensor, tensor_list):
  function all_gather_into_flat_tensor_nd (line 215) | def all_gather_into_flat_tensor_nd(
  function get_nd_world_size (line 238) | def get_nd_world_size(group) -> int:
  function get_nd_rank (line 245) | def get_nd_rank(group) -> int:

FILE: colossalai/zero/low_level/bookkeeping/base_store.py
  class BaseStore (line 8) | class BaseStore:
    method __init__ (line 9) | def __init__(self, torch_pg: Union[ProcessGroup, Tuple[ProcessGroup, ....
    method world_size (line 21) | def world_size(self):
    method local_rank (line 25) | def local_rank(self):

FILE: colossalai/zero/low_level/bookkeeping/bucket_store.py
  class BucketStore (line 13) | class BucketStore(BaseStore):
    method __init__ (line 14) | def __init__(
    method reset_all (line 24) | def reset_all(self) -> None:
    method num_elements_in_bucket (line 40) | def num_elements_in_bucket(self) -> int:
    method reset_num_elements_in_bucket (line 49) | def reset_num_elements_in_bucket(self):
    method add_param_grad (line 54) | def add_param_grad(self, group_id: int, param: Tensor, padding_size: i...
    method build_grad_in_bucket (line 71) | def build_grad_in_bucket(self):
    method get_grad (line 94) | def get_grad(self) -> Dict:
    method get_flatten_grad (line 103) | def get_flatten_grad(self) -> Tensor:
    method get_param_id_of_grad (line 117) | def get_param_id_of_grad(self, grad: Tensor) -> int:
    method reset (line 129) | def reset(self):

FILE: colossalai/zero/low_level/bookkeeping/gradient_store.py
  class GradientStore (line 8) | class GradientStore(BaseStore):
    method __init__ (line 9) | def __init__(self, *args, partition_grad: bool = False):
    method get_partitioned_gradients_by_param_id (line 26) | def get_partitioned_gradients_by_param_id(self, group_id: int, param_i...
    method append_gradients_by_param_id (line 43) | def append_gradients_by_param_id(self, grad: Tensor, group_id: int, pa...
    method add_gradients_by_param_id (line 61) | def add_gradients_by_param_id(self, grad: Tensor, grad_idx: int, group...
    method get_working_grads_by_group_id (line 74) | def get_working_grads_by_group_id(self, group_id: int) -> List:
    method get_working_grad_by_param_id (line 93) | def get_working_grad_by_param_id(self, param_id) -> Optional[Tensor]:
    method reset_grads_by_group_id (line 109) | def reset_grads_by_group_id(self, group_id: int):
    method reset_all_gradients (line 112) | def reset_all_gradients(self):
    method get_param_id_for_grad (line 116) | def get_param_id_for_grad(self, grad: Tensor) -> Optional[int]:

FILE: colossalai/zero/low_level/bookkeeping/tensor_bucket.py
  class TensorBucket (line 12) | class TensorBucket:
    method __init__ (line 13) | def __init__(self, size):
    method max_size (line 20) | def max_size(self):
    method current_size (line 24) | def current_size(self):
    method is_full_or_oversized (line 27) | def is_full_or_oversized(self):
    method is_empty (line 30) | def is_empty(self):
    method add_to_bucket (line 33) | def add_to_bucket(self, tensor, allow_oversize=False, write_back_tenso...
    method will_exceed_max_size (line 45) | def will_exceed_max_size(self, tensor_size):
    method get_bucket (line 49) | def get_bucket(self):
    method empty (line 52) | def empty(self):
    method flatten (line 57) | def flatten(self):
    method unflatten (line 60) | def unflatten(self, flat_tensor):
    method unflatten_and_copy (line 63) | def unflatten_and_copy(self, flat_tensor):
    method all_gather (line 68) | def all_gather(self, group=None, fp8_communication: bool = False):

FILE: colossalai/zero/low_level/low_level_optim.py
  class LowLevelZeroFP16MixedPrecisionMixin (line 40) | class LowLevelZeroFP16MixedPrecisionMixin(FP16MixedPrecisionMixin):
    method __init__ (line 41) | def __init__(
    method check_local_overflow (line 65) | def check_local_overflow(self) -> bool:
  class LowLevelZeroOptimizer (line 74) | class LowLevelZeroOptimizer(OptimizerWrapper):
    method __init__ (line 77) | def __init__(
    method __del__ (line 241) | def __del__(self):
    method dtype (line 246) | def dtype(self):
    method num_param_groups (line 250) | def num_param_groups(self):
    method _sanity_checks (line 253) | def _sanity_checks(self):
    method _create_master_param_current_rank (line 263) | def _create_master_param_current_rank(self, param_list):
    method _attach_reduction_hook (line 305) | def _attach_reduction_hook(self):
    method _run_reduction (line 327) | def _run_reduction(self):
    method _update_unpartitoned_grad (line 393) | def _update_unpartitoned_grad(
    method _update_partitoned_grad (line 402) | def _update_partitoned_grad(
    method _add_grad (line 415) | def _add_grad(
    method _add_to_bucket (line 431) | def _add_to_bucket(self, param, group_id):
    method backward (line 451) | def backward(self, loss, inputs=None, retain_graph=False):
    method backward_by_grad (line 472) | def backward_by_grad(self, tensor, grad, inputs: Tensor = None, retain...
    method zero_bucket_stores (line 494) | def zero_bucket_stores(self):
    method zero_grad_stores (line 498) | def zero_grad_stores(self):
    method zero_grad (line 502) | def zero_grad(self, set_to_none=True):
    method step (line 527) | def step(self, closure=None):
    method _compute_grad_norm (line 637) | def _compute_grad_norm(
    method _unscale_and_clip_grads (line 702) | def _unscale_and_clip_grads(self, grad_groups_flat, total_norm):
    method _sync_grad (line 722) | def _sync_grad(self):
    method _reduce_grad (line 736) | def _reduce_grad(self, partition_grad):
    method no_sync (line 746) | def no_sync(self):
    method _pack_state (line 758) | def _pack_state(self, state: Dict) -> Dict:
    method state_dict (line 779) | def state_dict(
    method load_state_dict (line 824) | def load_state_dict(self, state_dict: Dict):
    method state_dict_shard (line 853) | def state_dict_shard(
    method update_master_params (line 923) | def update_master_params(self, model: nn.Module) -> None:
    method get_working_to_master_map (line 942) | def get_working_to_master_map(self) -> Dict[int, torch.Tensor]:
    method get_master_to_working_map (line 945) | def get_master_to_working_map(self) -> Dict[int, torch.Tensor]:
    method get_param_padding_map (line 948) | def get_param_padding_map(self) -> Dict[int, torch.Tensor]:
    method record_param_padding_size (line 951) | def record_param_padding_size(self, param: Tensor, padding_size: int):
    method get_param_padding_size (line 961) | def get_param_padding_size(self, param: Tensor) -> int:
    method link_master_and_working_param (line 973) | def link_master_and_working_param(self, master_param: Tensor, working_...
    method get_padding_map (line 984) | def get_padding_map(self) -> Dict[int, Tensor]:
    method get_param_grad (line 993) | def get_param_grad(self, working_param: nn.Parameter) -> Tensor:
    method get_working_grads_by_group_id (line 1005) | def get_working_grads_by_group_id(self, group_id: int) -> List[Tensor]:
    method get_param_id_for_grad (line 1011) | def get_param_id_for_grad(self, grad: Tensor) -> int:
    method get_working_grad_by_param_id (line 1021) | def get_working_grad_by_param_id(self, param_id: int) -> Tensor:
    method get_partitioned_gradients_by_param_id (line 1025) | def get_partitioned_gradients_by_param_id(self, group_id: int, param_i...
    method _force_wait_all_gather (line 1029) | def _force_wait_all_gather(self):
    method get_grad_norm (line 1033) | def get_grad_norm(self, norm_type=2, **kwargs):

FILE: colossalai/zero/low_level/zero_hook.py
  function wait_all_gather_handle (line 10) | def wait_all_gather_handle(p):
  function set_all_gather_handle (line 17) | def set_all_gather_handle(p, handle):
  class ZeroOpHook (line 21) | class ZeroOpHook(ColoParamOpHook):
    method pre_forward (line 22) | def pre_forward(self, params: List[Tensor]) -> None:
    method post_forward (line 26) | def post_forward(self, params: List[Tensor]) -> None:
    method pre_backward (line 29) | def pre_backward(self, params: List[Tensor]) -> None:
    method post_backward (line 32) | def post_backward(self, params: List[Tensor]) -> None:

FILE: colossalai/zero/wrapper.py
  function zero_model_wrapper (line 10) | def zero_model_wrapper(
  function zero_optim_wrapper (line 52) | def zero_optim_wrapper(

FILE: examples/community/fp8/mnist/main.py
  class Net (line 22) | class Net(nn.Module):
    method __init__ (line 23) | def __init__(self, use_te=False):
    method forward (line 37) | def forward(self, x):
  function train (line 55) | def train(args, model, device, train_loader, optimizer, epoch, use_fp8):
  function calibrate (line 77) | def calibrate(model, device, test_loader):
  function test (line 87) | def test(model, device, test_loader, use_fp8):
  function main (line 110) | def main():

FILE: examples/community/roberta/preprocessing/get_mask.py
  function map_to_numpy (line 17) | def map_to_numpy(data):
  class PreTrainingDataset (line 21) | class PreTrainingDataset:
    method __init__ (line 22) | def __init__(
    method tokenize (line 45) | def tokenize(self, doc):
    method create_training_instance (line 51) | def create_training_instance(self, instance):
    method create_masked_lm_predictions (line 111) | def create_masked_lm_predictions(self, tokens):
    method get_new_segment (line 159) | def get_new_segment(self, segment):
    method create_whole_masked_lm_predictions (line 190) | def create_whole_masked_lm_predictions(self, tokens):

FILE: examples/community/roberta/preprocessing/mask.cpp
  type MaskedLMInstance (line 22) | struct MaskedLMInstance {
    method MaskedLMInstance (line 25) | MaskedLMInstance(int index, std::string label) {
  function get_new_segment (line 31) | auto get_new_segment(
  function startsWith (line 78) | bool startsWith(const std::string &s, const std::string &sub) {
  function create_whole_masked_lm_predictions (line 82) | auto create_whole_masked_lm_predictions(
  function PYBIND11_MODULE (line 186) | PYBIND11_MODULE(mask, m) {

FILE: examples/community/roberta/preprocessing/sentence_split.py
  function split_sentence (line 13) | def split_sentence(document: str, flag: str = "all", limit: int = 510) -...
  function get_sent (line 50) | def get_sent(output_path, input_path, fin_list=[], host=-1, seq_len=512)...
  function getFileSize (line 85) | def getFileSize(filepath, shard):
  function get_start_end (line 110) | def get_start_end(real_shard, base=0, server_num=10, server_name="GPU"):

FILE: examples/community/roberta/preprocessing/tokenize_mask.py
  function get_raw_instance (line 15) | def get_raw_instance(document, max_sequence_length=512):
  function split_numpy_chunk (line 58) | def split_numpy_chunk(path, tokenizer, pretrain_data, host):
  function split_numpy_chunk_pool (line 121) | def split_numpy_chunk_pool(input_path, output_path, pretrain_data, worke...

FILE: examples/community/roberta/pretraining/arguments.py
  function parse_args (line 6) | def parse_args():

FILE: examples/community/roberta/pretraining/bert_dataset_provider.py
  class BertDatasetProviderInterface (line 1) | class BertDatasetProviderInterface:
    method get_shard (line 2) | def get_shard(self, index, shuffle=True):
    method release_shard (line 5) | def release_shard(self, index):
    method prefetch_shard (line 8) | def prefetch_shard(self, index):
    method get_batch (line 11) | def get_batch(self, batch_iter):
    method prefetch_batch (line 14) | def prefetch_batch(self):

FILE: examples/community/roberta/pretraining/evaluation.py
  function evaluate (line 10) | def evaluate(model, args, logger, global_step, criterion):

FILE: examples/community/roberta/pretraining/loss.py
  class LossForPretraining (line 6) | class LossForPretraining(torch.nn.Module):
    method __init__ (line 7) | def __init__(self, vocab_size):
    method forward (line 12) | def forward(self, prediction_scores, masked_lm_labels, next_sentence_l...

FILE: examples/community/roberta/pretraining/model/bert.py
  function load_tf_weights_in_bert (line 105) | def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
  class BertEmbeddings (line 178) | class BertEmbeddings(nn.Module):
    method __init__ (line 181) | def __init__(self, config):
    method forward (line 201) | def forward(
  class BertSelfAttention (line 243) | class BertSelfAttention(nn.Module):
    method __init__ (line 244) | def __init__(self, config, position_embedding_type=None):
    method transpose_for_scores (line 268) | def transpose_for_scores(self, x: torch.Tensor) -> torch.Tensor:
    method forward (line 273) | def forward(
  class BertSelfOutput (line 368) | class BertSelfOutput(nn.Module):
    method __init__ (line 369) | def __init__(self, config):
    method forward (line 375) | def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Ten...
  class BertAttention (line 382) | class BertAttention(nn.Module):
    method __init__ (line 383) | def __init__(self, config, position_embedding_type=None):
    method prune_heads (line 389) | def prune_heads(self, heads):
    method forward (line 407) | def forward(
  class BertIntermediate (line 431) | class BertIntermediate(nn.Module):
    method __init__ (line 432) | def __init__(self, config):
    method forward (line 440) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class BertOutput (line 446) | class BertOutput(nn.Module):
    method __init__ (line 447) | def __init__(self, config):
    method forward (line 453) | def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Ten...
  class BertLayer (line 460) | class BertLayer(nn.Module):
    method __init__ (line 461) | def __init__(self, config):
    method forward (line 475) | def forward(
    method feed_forward_chunk (line 540) | def feed_forward_chunk(self, attention_output):
  class BertEncoder (line 546) | class BertEncoder(nn.Module):
    method __init__ (line 547) | def __init__(self, config):
    method forward (line 553) | def forward(
  class BertPooler (line 642) | class BertPooler(nn.Module):
    method __init__ (line 643) | def __init__(self, config):
    method forward (line 648) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class BertPredictionHeadTransform (line 657) | class BertPredictionHeadTransform(nn.Module):
    method __init__ (line 658) | def __init__(self, config):
    method forward (line 667) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class BertLMPredictionHead (line 674) | class BertLMPredictionHead(nn.Module):
    method __init__ (line 675) | def __init__(self, config):
    method forward (line 688) | def forward(self, hidden_states):
  class BertOnlyMLMHead (line 694) | class BertOnlyMLMHead(nn.Module):
    method __init__ (line 695) | def __init__(self, config):
    method forward (line 699) | def forward(self, sequence_output: torch.Tensor) -> torch.Tensor:
  class BertOnlyNSPHead (line 704) | class BertOnlyNSPHead(nn.Module):
    method __init__ (line 705) | def __init__(self, config):
    method forward (line 709) | def forward(self, pooled_output):
  class BertPreTrainingHeads (line 714) | class BertPreTrainingHeads(nn.Module):
    method __init__ (line 715) | def __init__(self, config):
    method forward (line 720) | def forward(self, sequence_output, pooled_output):
  class BertPreTrainedModel (line 726) | class BertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 738) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 754) | def _set_gradient_checkpointing(self, module, value=False):
  class BertForPreTrainingOutput (line 760) | class BertForPreTrainingOutput(ModelOutput):
  class BertModel (line 863) | class BertModel(BertPreTrainedModel):
    method __init__ (line 876) | def __init__(self, config, add_pooling_layer=True):
    method get_input_embeddings (line 888) | def get_input_embeddings(self):
    method set_input_embeddings (line 891) | def set_input_embeddings(self, value):
    method _prune_heads (line 894) | def _prune_heads(self, heads_to_prune):
    method forward (line 909) | def forward(
  class BertForPreTraining (line 1046) | class BertForPreTraining(BertPreTrainedModel):
    method __init__ (line 1047) | def __init__(self, config):
    method get_output_embeddings (line 1056) | def get_output_embeddings(self):
    method set_output_embeddings (line 1059) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1064) | def forward(
  class BertLMHeadModel (line 1150) | class BertLMHeadModel(BertPreTrainedModel):
    method __init__ (line 1154) | def __init__(self, config):
    method get_output_embeddings (line 1166) | def get_output_embeddings(self):
    method set_output_embeddings (line 1169) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1179) | def forward(
    method prepare_inputs_for_generation (line 1264) | def prepare_inputs_for_generation(self, input_ids, past=None, attentio...
    method _reorder_cache (line 1276) | def _reorder_cache(self, past, beam_idx):
  class BertForMaskedLM (line 1284) | class BertForMaskedLM(BertPreTrainedModel):
    method __init__ (line 1288) | def __init__(self, config):
    method get_output_embeddings (line 1303) | def get_output_embeddings(self):
    method set_output_embeddings (line 1306) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1318) | def forward(
    method prepare_inputs_for_generation (line 1375) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class BertForNextSentencePrediction (line 1396) | class BertForNextSentencePrediction(BertPreTrainedModel):
    method __init__ (line 1397) | def __init__(self, config):
    method forward (line 1408) | def forward(
  class BertForSequenceClassification (line 1501) | class BertForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 1502) | def __init__(self, config):
    method forward (line 1526) | def forward(
  class BertForMultipleChoice (line 1605) | class BertForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1606) | def __init__(self, config):
    method forward (line 1626) | def forward(
  class BertForTokenClassification (line 1700) | class BertForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1703) | def __init__(self, config):
    method forward (line 1726) | def forward(
  class BertForQuestionAnswering (line 1786) | class BertForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 1789) | def __init__(self, config):
    method forward (line 1810) | def forward(

FILE: examples/community/roberta/pretraining/model/deberta_v2.py
  class ContextPooler (line 60) | class ContextPooler(nn.Module):
    method __init__ (line 61) | def __init__(self, config):
    method forward (line 67) | def forward(self, hidden_states):
    method output_dim (line 78) | def output_dim(self):
  class XSoftmax (line 83) | class XSoftmax(torch.autograd.Function):
    method forward (line 112) | def forward(self, input, mask, dim):
    method backward (line 123) | def backward(self, grad_output):
    method symbolic (line 129) | def symbolic(g, self, mask, dim):
  class DropoutContext (line 147) | class DropoutContext(object):
    method __init__ (line 148) | def __init__(self):
  function get_mask (line 156) | def get_mask(input, local_context):
  class XDropout (line 176) | class XDropout(torch.autograd.Function):
    method forward (line 180) | def forward(ctx, input, local_ctx):
    method backward (line 190) | def backward(ctx, grad_output):
  class StableDropout (line 199) | class StableDropout(nn.Module):
    method __init__ (line 207) | def __init__(self, drop_prob):
    method forward (line 213) | def forward(self, x):
    method clear_context (line 224) | def clear_context(self):
    method init_context (line 228) | def init_context(self, reuse_mask=True, scale=1):
    method get_context (line 236) | def get_context(self):
  class DebertaV2SelfOutput (line 249) | class DebertaV2SelfOutput(nn.Module):
    method __init__ (line 250) | def __init__(self, config):
    method forward (line 256) | def forward(self, hidden_states, input_tensor):
  class DebertaV2Attention (line 264) | class DebertaV2Attention(nn.Module):
    method __init__ (line 265) | def __init__(self, config):
    method forward (line 271) | def forward(
  class DebertaV2Intermediate (line 301) | class DebertaV2Intermediate(nn.Module):
    method __init__ (line 302) | def __init__(self, config):
    method forward (line 310) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class DebertaV2Output (line 317) | class DebertaV2Output(nn.Module):
    method __init__ (line 318) | def __init__(self, config):
    method forward (line 325) | def forward(self, hidden_states, input_tensor):
  class DebertaV2Layer (line 333) | class DebertaV2Layer(nn.Module):
    method __init__ (line 334) | def __init__(self, config):
    method forward (line 340) | def forward(
  class ConvLayer (line 367) | class ConvLayer(nn.Module):
    method __init__ (line 368) | def __init__(self, config):
    method forward (line 380) | def forward(self, hidden_states, residual_states, input_mask):
  class DebertaV2Encoder (line 403) | class DebertaV2Encoder(nn.Module):
    method __init__ (line 406) | def __init__(self, config):
    method get_rel_embedding (line 435) | def get_rel_embedding(self):
    method get_attention_mask (line 445) | def get_attention_mask(self, attention_mask):
    method get_rel_pos (line 455) | def get_rel_pos(self, hidden_states, query_states=None, relative_pos=N...
    method forward (line 463) | def forward(
  function make_log_bucket_position (line 545) | def make_log_bucket_position(relative_pos, bucket_size, max_position):
  function build_relative_position (line 554) | def build_relative_position(query_size, key_size, bucket_size=-1, max_po...
  function c2p_dynamic_expand (line 585) | def c2p_dynamic_expand(c2p_pos, query_layer, relative_pos):
  function p2c_dynamic_expand (line 591) | def p2c_dynamic_expand(c2p_pos, query_layer, key_layer):
  function pos_dynamic_expand (line 597) | def pos_dynamic_expand(pos_index, p2c_att, key_layer):
  class DisentangledSelfAttention (line 601) | class DisentangledSelfAttention(nn.Module):
    method __init__ (line 612) | def __init__(self, config):
    method transpose_for_scores (line 651) | def transpose_for_scores(self, x, attention_heads):
    method forward (line 656) | def forward(
    method disentangled_attention_bias (line 740) | def disentangled_attention_bias(self, query_layer, key_layer, relative...
  class DebertaV2Embeddings (line 823) | class DebertaV2Embeddings(nn.Module):
    method __init__ (line 826) | def __init__(self, config):
    method forward (line 850) | def forward(self, input_ids=None, token_type_ids=None, position_ids=No...
  class DebertaV2PreTrainedModel (line 898) | class DebertaV2PreTrainedModel(PreTrainedModel):
    method _init_weights (line 910) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 923) | def _set_gradient_checkpointing(self, module, value=False):
  class DebertaV2Model (line 994) | class DebertaV2Model(DebertaV2PreTrainedModel):
    method __init__ (line 995) | def __init__(self, config):
    method get_input_embeddings (line 1005) | def get_input_embeddings(self):
    method set_input_embeddings (line 1008) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 1011) | def _prune_heads(self, heads_to_prune):
    method forward (line 1025) | def forward(
  class DebertaV2ForMaskedLM (line 1107) | class DebertaV2ForMaskedLM(DebertaV2PreTrainedModel):
    method __init__ (line 1111) | def __init__(self, config):
    method get_output_embeddings (line 1120) | def get_output_embeddings(self):
    method set_output_embeddings (line 1123) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1133) | def forward(
  class DebertaV2PredictionHeadTransform (line 1186) | class DebertaV2PredictionHeadTransform(nn.Module):
    method __init__ (line 1187) | def __init__(self, config):
    method forward (line 1196) | def forward(self, hidden_states):
  class DebertaV2LMPredictionHead (line 1204) | class DebertaV2LMPredictionHead(nn.Module):
    method __init__ (line 1205) | def __init__(self, config):
    method forward (line 1218) | def forward(self, hidden_states):
  class DebertaV2OnlyMLMHead (line 1225) | class DebertaV2OnlyMLMHead(nn.Module):
    method __init__ (line 1226) | def __init__(self, config):
    method forward (line 1230) | def forward(self, sequence_output):
  class DebertaV2ForSequenceClassification (line 1243) | class DebertaV2ForSequenceClassification(DebertaV2PreTrainedModel):
    method __init__ (line 1244) | def __init__(self, config):
    method get_input_embeddings (line 1262) | def get_input_embeddings(self):
    method set_input_embeddings (line 1265) | def set_input_embeddings(self, new_embeddings):
    method forward (line 1275) | def forward(
  class DebertaV2ForTokenClassification (line 1363) | class DebertaV2ForTokenClassification(DebertaV2PreTrainedModel):
    method __init__ (line 1366) | def __init__(self, config):
    method forward (line 1384) | def forward(
  class DebertaV2ForQuestionAnswering (line 1440) | class DebertaV2ForQuestionAnswering(DebertaV2PreTrainedModel):
    method __init__ (line 1443) | def __init__(self, config):
    method forward (line 1460) | def forward(
  class DebertaV2ForMultipleChoice (line 1540) | class DebertaV2ForMultipleChoice(DebertaV2PreTrainedModel):
    method __init__ (line 1541) | def __init__(self, config):
    method get_input_embeddings (line 1558) | def get_input_embeddings(self):
    method set_input_embeddings (line 1561) | def set_input_embeddings(self, new_embeddings):
    method forward (line 1571) | def forward(

FILE: examples/community/roberta/pretraining/nvidia_bert_dataset_provider.py
  class WorkerInitObj (line 16) | class WorkerInitObj(object):
    method __init__ (line 17) | def __init__(self, seed):
    method __call__ (line 20) | def __call__(self, id):
  function create_pretraining_dataset (line 25) | def create_pretraining_dataset(
  class pretraining_dataset (line 40) | class pretraining_dataset(Dataset):
    method __init__ (line 41) | def __init__(self, input_file, max_predictions_per_seq):
    method __len__ (line 49) | def __len__(self):
    method __getitem__ (line 53) | def __getitem__(self, index):
  class NvidiaBertDatasetProvider (line 66) | class NvidiaBertDatasetProvider(BertDatasetProviderInterface):
    method __init__ (line 67) | def __init__(self, args, evaluate=False):
    method get_shard (line 111) | def get_shard(self, index):
    method release_shard (line 132) | def release_shard(self):
    method prefetch_shard (line 136) | def prefetch_shard(self, index):
    method get_batch (line 148) | def get_batch(self, batch_iter):
    method prefetch_batch (line 151) | def prefetch_batch(self):
    method _get_shard_file (line 154) | def _get_shard_file(self, shard_index):
    method _get_shard_file_index (line 158) | def _get_shard_file_index(self, shard_index, global_rank):
    method shuffle_dataset (line 168) | def shuffle_dataset(self, epoch):

FILE: examples/community/roberta/pretraining/pretrain_utils.py
  function get_new_state_dict (line 21) | def get_new_state_dict(state_dict, start_index=13):
  class LMModel (line 29) | class LMModel(nn.Module):
    method __init__ (line 30) | def __init__(self, model, config, args):
    method forward (line 39) | def forward(self, input_ids, token_type_ids=None, attention_mask=None):
  function get_model (line 44) | def get_model(args, logger):
  function get_optimizer (line 74) | def get_optimizer(model, lr):
  function get_lr_scheduler (line 87) | def get_lr_scheduler(optimizer, total_steps, warmup_steps=2000, last_epo...
  function save_ckpt (line 96) | def save_ckpt(model, optimizer, lr_scheduler, path, epoch, shard, global...

FILE: examples/community/roberta/pretraining/run_pretraining.py
  function main (line 26) | def main():

FILE: examples/community/roberta/pretraining/utils/WandbLog.py
  class WandbLog (line 8) | class WandbLog:
    method init_wandb (line 10) | def init_wandb(cls, project, notes=None, name=time.strftime("%Y-%m-%d ...
    method log (line 14) | def log(cls, result, model=None, gradient=None):
  class TensorboardLog (line 24) | class TensorboardLog:
    method __init__ (line 25) | def __init__(self, location, name=time.strftime("%Y-%m-%d %H:%M:%S", t...
    method log_train (line 30) | def log_train(self, result, step):
    method log_eval (line 34) | def log_eval(self, result, step):
    method log_zeroshot (line 38) | def log_zeroshot(self, result, step):

FILE: examples/community/roberta/pretraining/utils/exp_util.py
  function logging (line 11) | def logging(s, log_path, print_=True, log_=True):
  function get_logger (line 19) | def get_logger(log_path, **kwargs):
  function create_exp_dir (line 23) | def create_exp_dir(dir_path, scripts_to_save=None, debug=False):
  function get_cpu_mem (line 43) | def get_cpu_mem():
  function get_gpu_mem (line 47) | def get_gpu_mem():
  function get_mem_info (line 51) | def get_mem_info(prefix=""):
  function get_tflops (line 55) | def get_tflops(model_numel, batch_size, seq_len, step_time):
  function get_parameters_in_billions (line 59) | def get_parameters_in_billions(model, world_size=1):
  function throughput_calculator (line 72) | def throughput_calculator(numel, args, config, iteration_time, total_ite...
  function synchronize (line 98) | def synchronize():
  function log_args (line 109) | def log_args(logger, args):

FILE: examples/community/roberta/pretraining/utils/global_vars.py
  function set_global_variables (line 11) | def set_global_variables(launch_time, tensorboard_path):
  function _set_timers (line 16) | def _set_timers():
  function _set_tensorboard_writer (line 23) | def _set_tensorboard_writer(launch_time, tensorboard_path):
  function get_timers (line 31) | def get_timers():
  function get_tensorboard_writer (line 37) | def get_tensorboard_writer():
  function _ensure_var_is_initialized (line 43) | def _ensure_var_is_initialized(var, name):
  function _ensure_var_is_not_initialized (line 48) | def _ensure_var_is_not_initialized(var, name):
  class _Timer (line 53) | class _Timer:
    method __init__ (line 56) | def __init__(self, name):
    method start (line 62) | def start(self):
    method stop (line 69) | def stop(self):
    method reset (line 76) | def reset(self):
    method elapsed (line 81) | def elapsed(self, reset=True):
  class Timers (line 98) | class Timers:
    method __init__ (line 101) | def __init__(self):
    method __call__ (line 104) | def __call__(self, name):
    method write (line 109) | def write(self, names, writer, iteration, normalizer=1.0, reset=False):
    method log (line 119) | def log(self, names, normalizer=1.0, reset=True):

FILE: examples/community/roberta/pretraining/utils/logger.py
  class Logger (line 11) | class Logger:
    method __init__ (line 12) | def __init__(self, log_path, cuda=False, debug=False):
    method info (line 18) | def info(self, message, log_=True, print_=True, *args, **kwargs):
    method error (line 27) | def error(self, message, *args, **kwargs):

FILE: examples/images/diffusion/ldm/data/base.py
  class Txt2ImgIterableBaseDataset (line 9) | class Txt2ImgIterableBaseDataset(IterableDataset):
    method __init__ (line 14) | def __init__(self, file_path: str, rank, world_size):
    method __len__ (line 36) | def __len__(self):
    method __iter__ (line 40) | def __iter__(self):
    method _sample_generator (line 45) | def _sample_generator(self, start, end):
    method _get_file_info (line 57) | def _get_file_info(self, file_path):

FILE: examples/images/diffusion/ldm/data/cifar10.py
  function make_multi_folder_data (line 15) | def make_multi_folder_data(paths, caption_files=None, **kwargs):
  class FolderData (line 35) | class FolderData(Dataset):
    method __init__ (line 36) | def __init__(
    method __len__ (line 86) | def __len__(self):
    method __getitem__ (line 92) | def __getitem__(self, index):
    method process_im (line 120) | def process_im(self, im):
  function hf_dataset (line 125) | def hf_dataset(
  class TextOnly (line 171) | class TextOnly(Dataset):
    method __init__ (line 172) | def __init__(self, captions, output_size, image_key="image", caption_k...
    method __len__ (line 188) | def __len__(self):
    method __getitem__ (line 191) | def __getitem__(self, index):
    method _load_caption_file (line 196) | def _load_caption_file(self, filename):

FILE: examples/images/diffusion/ldm/data/imagenet.py
  function synset2idx (line 23) | def synset2idx(path_to_yaml="data/index_synset.yaml"):
  class ImageNetBase (line 29) | class ImageNetBase(Dataset):
    method __init__ (line 30) | def __init__(self, config=None):
    method __len__ (line 42) | def __len__(self):
    method __getitem__ (line 45) | def __getitem__(self, i):
    method _prepare (line 48) | def _prepare(self):
    method _filter_relpaths (line 51) | def _filter_relpaths(self, relpaths):
    method _prepare_synset_to_human (line 71) | def _prepare_synset_to_human(self):
    method _prepare_idx_to_synset (line 78) | def _prepare_idx_to_synset(self):
    method _prepare_human_to_integer_label (line 84) | def _prepare_human_to_integer_label(self):
    method _load (line 97) | def _load(self):
  class ImageNetTrain (line 139) | class ImageNetTrain(ImageNetBase):
    method __init__ (line 150) | def __init__(self, process_images=True, data_root=None, **kwargs):
    method _prepare (line 155) | def _prepare(self):
  class ImageNetValidation (line 202) | class ImageNetValidation(ImageNetBase):
    method __init__ (line 216) | def __init__(self, process_images=True, data_root=None, **kwargs):
    method _prepare (line 221) | def _prepare(self):
  class ImageNetSR (line 276) | class ImageNetSR(Dataset):
    method __init__ (line 277) | def __init__(self, size=None, degradation=None, downscale_f=4, min_cro...
    method __len__ (line 339) | def __len__(self):
    method __getitem__ (line 342) | def __getitem__(self, i):
  class ImageNetSRTrain (line 378) | class ImageNetSRTrain(ImageNetSR):
    method __init__ (line 379) | def __init__(self, **kwargs):
    method get_base (line 382) | def get_base(self):
  class ImageNetSRValidation (line 391) | class ImageNetSRValidation(ImageNetSR):
    method __init__ (line 392) | def __init__(self, **kwargs):
    method get_base (line 395) | def get_base(self):

FILE: examples/images/diffusion/ldm/data/lsun.py
  class LSUNBase (line 11) | class LSUNBase(Dataset):
    method __init__ (line 12) | def __init__(
    method __len__ (line 44) | def __len__(self):
    method __getitem__ (line 48) | def __getitem__(self, i):
  class LSUNChurchesTrain (line 84) | class LSUNChurchesTrain(LSUNBase):
    method __init__ (line 85) | def __init__(self, **kwargs):
  class LSUNChurchesValidation (line 91) | class LSUNChurchesValidation(LSUNBase):
    method __init__ (line 92) | def __init__(self, flip_p=0.0, **kwargs):
  class LSUNBedroomsTrain (line 100) | class LSUNBedroomsTrain(LSUNBase):
    method __init__ (line 101) | def __init__(self, **kwargs):
  class LSUNBedroomsValidation (line 107) | class LSUNBedroomsValidation(LSUNBase):
    method __init__ (line 108) | def __init__(self, flip_p=0.0, **kwargs):
  class LSUNCatsTrain (line 115) | class LSUNCatsTrain(LSUNBase):
    method __init__ (line 116) | def __init__(self, **kwargs):
  class LSUNCatsValidation (line 122) | class LSUNCatsValidation(LSUNBase):
    method __init__ (line 123) | def __init__(self, flip_p=0.0, **kwargs):

FILE: examples/images/diffusion/ldm/data/teyvat.py
  function make_multi_folder_data (line 15) | def make_multi_folder_data(paths, caption_files=None, **kwargs):
  class FolderData (line 35) | class FolderData(Dataset):
    method __init__ (line 36) | def __init__(
    method __len__ (line 86) | def __len__(self):
    method __getitem__ (line 92) | def __getitem__(self, index):
    method process_im (line 120) | def process_im(self, im):
  function hf_dataset (line 125) | def hf_dataset(

FILE: examples/images/diffusion/ldm/lr_scheduler.py
  class LambdaWarmUpCosineScheduler (line 4) | class LambdaWarmUpCosineScheduler:
    method __init__ (line 9) | def __init__(self, warm_up_steps, lr_min, lr_max, lr_start, max_decay_...
    method schedule (line 18) | def schedule(self, n, **kwargs):
    method __call__ (line 33) | def __call__(self, n, **kwargs):
  class LambdaWarmUpCosineScheduler2 (line 37) | class LambdaWarmUpCosineScheduler2:
    method __init__ (line 43) | def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths...
    method find_in_interval (line 54) | def find_in_interval(self, n):
    method schedule (line 61) | def schedule(self, n, **kwargs):
    method __call__ (line 78) | def __call__(self, n, **kwargs):
  class LambdaLinearScheduler (line 82) | class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2):
    method schedule (line 83) | def schedule(self, n, **kwargs):

FILE: examples/images/diffusion/ldm/models/autoencoder.py
  class AutoencoderKL (line 12) | class AutoencoderKL(pl.LightningModule):
    method __init__ (line 13) | def __init__(
    method init_from_ckpt (line 52) | def init_from_ckpt(self, path, ignore_keys=list()):
    method ema_scope (line 64) | def ema_scope(self, context=None):
    method on_train_batch_end (line 78) | def on_train_batch_end(self, *args, **kwargs):
    method encode (line 82) | def encode(self, x):
    method decode (line 88) | def decode(self, z):
    method forward (line 93) | def forward(self, input, sample_posterior=True):
    method get_input (line 102) | def get_input(self, batch, k):
    method training_step (line 109) | def training_step(self, batch, batch_idx, optimizer_idx):
    method validation_step (line 144) | def validation_step(self, batch, batch_idx):
    method _validation_step (line 150) | def _validation_step(self, batch, batch_idx, postfix=""):
    method configure_optimizers (line 178) | def configure_optimizers(self):
    method get_last_layer (line 193) | def get_last_layer(self):
    method log_images (line 197) | def log_images(self, batch, only_inputs=False, log_ema=False, **kwargs):
    method to_rgb (line 222) | def to_rgb(self, x):
  class IdentityFirstStage (line 231) | class IdentityFirstStage(torch.nn.Module):
    method __init__ (line 232) | def __init__(self, *args, vq_interface=False, **kwargs):
    method encode (line 236) | def encode(self, x, *args, **kwargs):
    method decode (line 239) | def decode(self, x, *args, **kwargs):
    method quantize (line 242) | def quantize(self, x, *args, **kwargs):
    method forward (line 247) | def forward(self, x, *args, **kwargs):

FILE: examples/images/diffusion/ldm/models/diffusion/classifier.py
  function disabled_train (line 21) | def disabled_train(self, mode=True):
  class NoisyLatentImageClassifier (line 27) | class NoisyLatentImageClassifier(pl.LightningModule):
    method __init__ (line 28) | def __init__(
    method init_from_ckpt (line 71) | def init_from_ckpt(self, path, ignore_keys=list(), only_model=False):
    method load_diffusion (line 90) | def load_diffusion(self):
    method load_classifier (line 97) | def load_classifier(self, ckpt_path, pool):
    method get_x_noisy (line 112) | def get_x_noisy(self, x, t, noise=None):
    method forward (line 123) | def forward(self, x_noisy, t, *args, **kwargs):
    method get_input (line 127) | def get_input(self, batch, k):
    method get_conditioning (line 136) | def get_conditioning(self, batch, k=None):
    method compute_top_k (line 153) | def compute_top_k(self, logits, labels, k, reduction="mean"):
    method on_train_epoch_start (line 160) | def on_train_epoch_start(self):
    method write_logs (line 165) | def write_logs(self, loss, logits, targets):
    method shared_step (line 178) | def shared_step(self, batch, t=None):
    method training_step (line 197) | def training_step(self, batch, batch_idx):
    method reset_noise_accs (line 201) | def reset_noise_accs(self):
    method on_validation_start (line 207) | def on_validation_start(self):
    method validation_step (line 211) | def validation_step(self, batch, batch_idx):
    method configure_optimizers (line 221) | def configure_optimizers(self):
    method log_images (line 236) | def log_images(self, batch, N=8, *args, **kwargs):

FILE: examples/images/diffusion/ldm/models/diffusion/ddim.py
  class DDIMSampler (line 14) | class DDIMSampler(object):
    method __init__ (line 15) | def __init__(self, model, schedule="linear", **kwargs):
    method register_buffer (line 21) | def register_buffer(self, name, attr):
    method make_schedule (line 27) | def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddi...
    method sample (line 65) | def sample(
    method ddim_sampling (line 138) | def ddim_sampling(
    method p_sample_ddim (line 221) | def p_sample_ddim(
    method encode (line 305) | def encode(
    method stochastic_encode (line 365) | def stochastic_encode(self, x0, t, use_original_steps=False, noise=None):
    method decode (line 383) | def decode(

FILE: examples/images/diffusion/ldm/models/diffusion/ddpm.py
  function disabled_train (line 48) | def disabled_train(self, mode=True):
  function uniform_on_device (line 54) | def uniform_on_device(r1, r2, shape, device):
  class DDPM (line 58) | class DDPM(pl.LightningModule):
    method __init__ (line 60) | def __init__(
    method register_schedule (line 178) | def register_schedule(
    method ema_scope (line 246) | def ema_scope(self, context=None):
    method init_from_ckpt (line 261) | def init_from_ckpt(self, path, ignore_keys=list(), only_model=False):
    method q_mean_variance (line 321) | def q_mean_variance(self, x_start, t):
    method predict_start_from_noise (line 333) | def predict_start_from_noise(self, x_t, t, noise):
    method predict_start_from_z_and_v (line 339) | def predict_start_from_z_and_v(self, x_t, t, v):
    method predict_eps_from_z_and_v (line 347) | def predict_eps_from_z_and_v(self, x_t, t, v):
    method q_posterior (line 353) | def q_posterior(self, x_start, x_t, t):
    method p_mean_variance (line 362) | def p_mean_variance(self, x, t, clip_denoised: bool):
    method p_sample (line 375) | def p_sample(self, x, t, clip_denoised=True, repeat_noise=False):
    method p_sample_loop (line 384) | def p_sample_loop(self, shape, return_intermediates=False):
    method sample (line 400) | def sample(self, batch_size=16, return_intermediates=False):
    method q_sample (line 407) | def q_sample(self, x_start, t, noise=None):
    method get_v (line 414) | def get_v(self, x, noise, t):
    method get_loss (line 420) | def get_loss(self, pred, target, mean=True):
    method p_losses (line 435) | def p_losses(self, x_start, t, noise=None):
    method forward (line 466) | def forward(self, x, *args, **kwargs):
    method get_input (line 472) | def get_input(self, batch, k):
    method shared_step (line 483) | def shared_step(self, batch):
    method training_step (line 488) | def training_step(self, batch, batch_idx):
    method validation_step (line 511) | def validation_step(self, batch, batch_idx):
    method on_train_batch_end (line 519) | def on_train_batch_end(self, *args, **kwargs):
    method _get_rows_from_list (line 523) | def _get_rows_from_list(self, samples):
    method log_images (line 531) | def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=Non...
    method configure_optimizers (line 568) | def configure_optimizers(self):
  class LatentDiffusion (line 577) | class LatentDiffusion(DDPM):
    method __init__ (line 580) | def __init__(
    method configure_sharded_model (line 644) | def configure_sharded_model(self) -> None:
    method make_cond_schedule (line 687) | def make_cond_schedule(
    method on_train_batch_start (line 696) | def on_train_batch_start(self, batch, batch_idx):
    method register_schedule (line 717) | def register_schedule(
    method instantiate_first_stage (line 732) | def instantiate_first_stage(self, config):
    method instantiate_cond_stage (line 739) | def instantiate_cond_stage(self, config):
    method _get_denoise_row_from_list (line 758) | def _get_denoise_row_from_list(self, samples, desc="", force_no_decode...
    method get_first_stage_encoding (line 771) | def get_first_stage_encoding(self, encoder_posterior):
    method get_learned_conditioning (line 780) | def get_learned_conditioning(self, c):
    method meshgrid (line 793) | def meshgrid(self, h, w):
    method delta_border (line 800) | def delta_border(self, h, w):
    method get_weighting (line 814) | def get_weighting(self, h, w, Ly, Lx, device):
    method get_fold_unfold (line 835) | def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1):  # todo...
    method get_input (line 894) | def get_input(
    method decode_first_stage (line 957) | def decode_first_stage(self, z, predict_cids=False, force_not_quantize...
    method encode_first_stage (line 968) | def encode_first_stage(self, x):
    method shared_step (line 971) | def shared_step(self, batch, **kwargs):
    method forward (line 976) | def forward(self, x, c, *args, **kwargs):
    method apply_model (line 987) | def apply_model(self, x_noisy, t, cond, return_ids=False):
    method _predict_eps_from_xstart (line 1004) | def _predict_eps_from_xstart(self, x_t, t, pred_xstart):
    method _prior_bpd (line 1009) | def _prior_bpd(self, x_start):
    method p_losses (line 1023) | def p_losses(self, x_start, cond, t, noise=None):
    method p_mean_variance (line 1061) | def p_mean_variance(
    method p_sample (line 1103) | def p_sample(
    method progressive_denoising (line 1152) | def progressive_denoising(
    method p_sample_loop (line 1240) | def p_sample_loop(
    method sample (line 1305) | def sample(
    method sample_log (line 1346) | def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs):
    method get_unconditional_conditioning (line 1358) | def get_unconditional_conditioning(self, batch_size, null_label=None):
    method log_images (line 1383) | def log_images(
    method configure_optimizers (line 1539) | def configure_optimizers(self):
    method to_rgb (line 1563) | def to_rgb(self, x):
  class DiffusionWrapper (line 1572) | class DiffusionWrapper(pl.LightningModule):
    method __init__ (line 1573) | def __init__(self, diff_model_config, conditioning_key):
    method forward (line 1580) | def forward(self, x, t, c_concat: list = None, c_crossattn: list = Non...
  class LatentUpscaleDiffusion (line 1614) | class LatentUpscaleDiffusion(LatentDiffusion):
    method __init__ (line 1615) | def __init__(self, *args, low_scale_config, low_scale_key="LR", noise_...
    method instantiate_low_stage (line 1623) | def instantiate_low_stage(self, config):
    method get_input (line 1631) | def get_input(self, batch, k, cond_key=None, bs=None, log_mode=False):
    method log_images (line 1662) | def log_images(
  class LatentFinetuneDiffusion (line 1780) | class LatentFinetuneDiffusion(LatentDiffusion):
    method __init__ (line 1786) | def __init__(
    method init_from_ckpt (line 1813) | def init_from_ckpt(self, path, ignore_keys=list(), only_model=False):
    method log_images (line 1847) | def log_images(
  class LatentInpaintDiffusion (line 1949) | class LatentInpaintDiffusion(LatentFinetuneDiffusion):
    method __init__ (line 1956) | def __init__(self, concat_keys=("mask", "masked_image"), masked_image_...
    method get_input (line 1962) | def get_input(self, batch, k, cond_key=None, bs=None, return_first_sta...
    method log_images (line 1997) | def log_images(self, *args, **kwargs):
  class LatentDepth2ImageDiffusion (line 2005) | class LatentDepth2ImageDiffusion(LatentFinetuneDiffusion):
    method __init__ (line 2010) | def __init__(self, depth_stage_config, concat_keys=("midas_in",), *arg...
    method get_input (line 2016) | def get_input(self, batch, k, cond_key=None, bs=None, return_first_sta...
    method log_images (line 2056) | def log_images(self, *args, **kwargs):
  class LatentUpscaleFinetuneDiffusion (line 2066) | class LatentUpscaleFinetuneDiffusion(LatentFinetuneDiffusion):
    method __init__ (line 2071) | def __init__(
    method instantiate_low_stage (line 2083) | def instantiate_low_stage(self, config):
    method get_input (line 2091) | def get_input(self, batch, k, cond_key=None, bs=None, return_first_sta...
    method log_images (line 2135) | def log_images(self, *args, **kwargs):

FILE: examples/images/diffusion/ldm/models/diffusion/dpm_solver/dpm_solver.py
  class NoiseScheduleVP (line 7) | class NoiseScheduleVP:
    method __init__ (line 8) | def __init__(
    method marginal_log_mean_coeff (line 118) | def marginal_log_mean_coeff(self, t):
    method marginal_alpha (line 133) | def marginal_alpha(self, t):
    method marginal_std (line 139) | def marginal_std(self, t):
    method marginal_lambda (line 145) | def marginal_lambda(self, t):
    method inverse_lambda (line 153) | def inverse_lambda(self, lamb):
  function model_wrapper (line 182) | def model_wrapper(
  class DPM_Solver (line 340) | class DPM_Solver:
    method __init__ (line 341) | def __init__(self, model_fn, noise_schedule, predict_x0=False, thresho...
    method noise_prediction_fn (line 367) | def noise_prediction_fn(self, x, t):
    method data_prediction_fn (line 373) | def data_prediction_fn(self, x, t):
    method model_fn (line 388) | def model_fn(self, x, t):
    method get_time_steps (line 397) | def get_time_steps(self, skip_type, t_T, t_0, N, device):
    method get_orders_and_timesteps_for_singlestep_solver (line 427) | def get_orders_and_timesteps_for_singlestep_solver(self, steps, order,...
    method denoise_to_zero_fn (line 513) | def denoise_to_zero_fn(self, x, s):
    method dpm_solver_first_update (line 519) | def dpm_solver_first_update(self, x, s, t, model_s=None, return_interm...
    method singlestep_dpm_solver_second_update (line 562) | def singlestep_dpm_solver_second_update(
    method singlestep_dpm_solver_third_update (line 646) | def singlestep_dpm_solver_third_update(
    method multistep_dpm_solver_second_update (line 785) | def multistep_dpm_solver_second_update(self, x, model_prev_list, t_pre...
    method multistep_dpm_solver_third_update (line 845) | def multistep_dpm_solver_third_update(self, x, model_prev_list, t_prev...
    method singlestep_dpm_solver_update (line 896) | def singlestep_dpm_solver_update(
    method multistep_dpm_solver_update (line 927) | def multistep_dpm_solver_update(self, x, model_prev_list, t_prev_list,...
    method dpm_solver_adaptive (line 950) | def dpm_solver_adaptive(
    method sample (line 1012) | def sample(
  function interpolate_fn (line 1194) | def interpolate_fn(x, xp, yp):
  function expand_dims (line 1239) | def expand_dims(v, dims):

FILE: examples/images/diffusion/ldm/models/diffusion/dpm_solver/sampler.py
  class DPMSolverSampler (line 10) | class DPMSolverSampler(object):
    method __init__ (line 11) | def __init__(self, model, **kwargs):
    method register_buffer (line 17) | def register_buffer(self, name, attr):
    method sample (line 24) | def sample(

FILE: examples/images/diffusion/ldm/models/diffusion/plms.py
  class PLMSSampler (line 10) | class PLMSSampler(object):
    method __init__ (line 11) | def __init__(self, model, schedule="linear", **kwargs):
    method register_buffer (line 17) | def register_buffer(self, name, attr):
    method make_schedule (line 23) | def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddi...
    method sample (line 63) | def sample(
    method plms_sampling (line 126) | def plms_sampling(
    method p_sample_plms (line 211) | def p_sample_plms(

FILE: examples/images/diffusion/ldm/models/diffusion/sampling_util.py
  function append_dims (line 1) | def append_dims(x, target_dims):
  function norm_thresholding (line 10) | def norm_thresholding(x0, value):
  function spatial_norm_thresholding (line 15) | def spatial_norm_thresholding(x0, value):

FILE: examples/images/diffusion/ldm/modules/attention.py
  function exists (line 20) | def exists(val):
  function uniq (line 24) | def uniq(arr):
  function default (line 28) | def default(val, d):
  function max_neg_value (line 34) | def max_neg_value(t):
  function init_ (line 38) | def init_(tensor):
  class GEGLU (line 46) | class GEGLU(nn.Module):
    method __init__ (line 47) | def __init__(self, dim_in, dim_out):
    method forward (line 51) | def forward(self, x):
  class FeedForward (line 56) | class FeedForward(nn.Module):
    method __init__ (line 57) | def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0):
    method forward (line 65) | def forward(self, x):
  function zero_module (line 69) | def zero_module(module):
  function Normalize (line 78) | def Normalize(in_channels):
  class SpatialSelfAttention (line 82) | class SpatialSelfAttention(nn.Module):
    method __init__ (line 83) | def __init__(self, in_channels):
    method forward (line 93) | def forward(self, x):
  class CrossAttention (line 119) | class CrossAttention(nn.Module):
    method __init__ (line 120) | def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, ...
    method forward (line 134) | def forward(self, x, context=None, mask=None):
  class MemoryEfficientCrossAttention (line 161) | class MemoryEfficientCrossAttention(nn.Module):
    method __init__ (line 163) | def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, ...
    method forward (line 182) | def forward(self, x, context=None, mask=None):
  class BasicTransformerBlock (line 212) | class BasicTransformerBlock(nn.Module):
    method __init__ (line 218) | def __init__(
    method forward (line 250) | def forward(self, x, context=None):
    method _forward (line 253) | def _forward(self, x, context=None):
  class SpatialTransformer (line 260) | class SpatialTransformer(nn.Module):
    method __init__ (line 270) | def __init__(
    method forward (line 313) | def forward(self, x, context=None):

FILE: examples/images/diffusion/ldm/modules/diffusionmodules/model.py
  function get_timestep_embedding (line 27) | def get_timestep_embedding(timesteps, embedding_dim):
  function nonlinearity (line 48) | def nonlinearity(x):
  function Normalize (line 53) | def Normalize(in_channels, num_groups=32):
  class Upsample (line 57) | class Upsample(nn.Module):
    method __init__ (line 58) | def __init__(self, in_channels, with_conv):
    method forward (line 64) | def forward(self, x):
  class Downsample (line 71) | class Downsample(nn.Module):
    method __init__ (line 72) | def __init__(self, in_channels, with_conv):
    method forward (line 79) | def forward(self, x):
  class ResnetBlock (line 89) | class ResnetBlock(nn.Module):
    method __init__ (line 90) | def __init__(self, *, in_channels, out_channels=None, conv_shortcut=Fa...
    method forward (line 110) | def forward(self, x, temb):
  class AttnBlock (line 133) | class AttnBlock(nn.Module):
    method __init__ (line 134) | def __init__(self, in_channels):
    method forward (line 144) | def forward(self, x):
  class MemoryEfficientAttnBlock (line 171) | class MemoryEfficientAttnBlock(nn.Module):
    method __init__ (line 179) | def __init__(self, in_channels):
    method forward (line 190) | def forward(self, x):
  class MemoryEfficientCrossAttentionWrapper (line 217) | class MemoryEfficientCrossAttentionWrapper(MemoryEfficientCrossAttention):
    method forward (line 218) | def forward(self, x, context=None, mask=None):
  function make_attn (line 226) | def make_attn(in_channels, attn_type="vanilla", attn_kwargs=None):
  class Model (line 251) | class Model(nn.Module):
    method __init__ (line 252) | def __init__(
    method forward (line 360) | def forward(self, x, t=None, context=None):
    method get_last_layer (line 407) | def get_last_layer(self):
  class Encoder (line 411) | class Encoder(nn.Module):
    method __init__ (line 412) | def __init__(
    method forward (line 485) | def forward(self, x):
  class Decoder (line 513) | class Decoder(nn.Module):
    method __init__ (line 514) | def __init__(
    method forward (line 592) | def forward(self, z):
  class SimpleDecoder (line 628) | class SimpleDecoder(nn.Module):
    method __init__ (line 629) | def __init__(self, in_channels, out_channels, *args, **kwargs):
    method forward (line 645) | def forward(self, x):
  class UpsampleDecoder (line 658) | class UpsampleDecoder(nn.Module):
    method __init__ (line 659) | def __init__(self, in_channels, out_channels, ch, num_res_blocks, reso...
    method forward (line 688) | def forward(self, x):
  class LatentRescaler (line 702) | class LatentRescaler(nn.Module):
    method __init__ (line 703) | def __init__(self, factor, in_channels, mid_channels, out_channels, de...
    method forward (line 728) | def forward(self, x):
  class MergedRescaleEncoder (line 742) | class MergedRescaleEncoder(nn.Module):
    method __init__ (line 743) | def __init__(
    method forward (line 780) | def forward(self, x):
  class MergedRescaleDecoder (line 786) | class MergedRescaleDecoder(nn.Module):
    method __init__ (line 787) | def __init__(
    method forward (line 823) | def forward(self, x):
  class Upsampler (line 829) | class Upsampler(nn.Module):
    method __init__ (line 830) | def __init__(self, in_size, out_size, in_channels, out_channels, ch_mu...
    method forward (line 852) | def forward(self, x):
  class Resize (line 858) | class Resize(nn.Module):
    method __init__ (line 859) | def __init__(self, in_channels=None, learned=False, mode="bilinear"):
    method forward (line 872) | def forward(self, x, scale_factor=1.0):

FILE: examples/images/diffusion/ldm/modules/diffusionmodules/openaimodel.py
  function convert_module_to_f16 (line 22) | def convert_module_to_f16(x):
  function convert_module_to_f32 (line 26) | def convert_module_to_f32(x):
  class AttentionPool2d (line 31) | class AttentionPool2d(nn.Module):
    method __init__ (line 36) | def __init__(
    method forward (line 50) | def forward(self, x):
  class TimestepBlock (line 61) | class TimestepBlock(nn.Module):
    method forward (line 67) | def forward(self, x, emb):
  class TimestepEmbedSequential (line 73) | class TimestepEmbedSequential(nn.Sequential, TimestepBlock):
    method forward (line 79) | def forward(self, x, emb, context=None):
  class Upsample (line 90) | class Upsample(nn.Module):
    method __init__ (line 99) | def __init__(self, channels, use_conv, dims=2, out_channels=None, padd...
    method forward (line 108) | def forward(self, x):
  class TransposedUpsample (line 119) | class TransposedUpsample(nn.Module):
    method __init__ (line 122) | def __init__(self, channels, out_channels=None, ks=5):
    method forward (line 129) | def forward(self, x):
  class Downsample (line 133) | class Downsample(nn.Module):
    method __init__ (line 142) | def __init__(self, channels, use_conv, dims=2, out_channels=None, padd...
    method forward (line 155) | def forward(self, x):
  class ResBlock (line 160) | class ResBlock(TimestepBlock):
    method __init__ (line 176) | def __init__(
    method forward (line 236) | def forward(self, x, emb):
    method _forward (line 245) | def _forward(self, x, emb):
  class AttentionBlock (line 268) | class AttentionBlock(nn.Module):
    method __init__ (line 275) | def __init__(
    method forward (line 304) | def forward(self, x):
    method _forward (line 310) | def _forward(self, x):
  function count_flops_attn (line 319) | def count_flops_attn(model, _x, y):
  class QKVAttentionLegacy (line 339) | class QKVAttentionLegacy(nn.Module):
    method __init__ (line 344) | def __init__(self, n_heads):
    method forward (line 348) | def forward(self, qkv):
    method count_flops (line 365) | def count_flops(model, _x, y):
  class QKVAttention (line 369) | class QKVAttention(nn.Module):
    method __init__ (line 374) | def __init__(self, n_heads):
    method forward (line 378) | def forward(self, qkv):
    method count_flops (line 399) | def count_flops(model, _x, y):
  class UNetModel (line 403) | class UNetModel(nn.Module):
    method __init__ (line 433) | def __init__(
    method convert_to_fp16 (line 757) | def convert_to_fp16(self):
    method convert_to_fp32 (line 765) | def convert_to_fp32(self):
    method forward (line 773) | def forward(self, x, timesteps=None, context=None, y=None, **kwargs):

FILE: examples/images/diffusion/ldm/modules/diffusionmodules/upscaling.py
  class AbstractLowScaleModel (line 10) | class AbstractLowScaleModel(nn.Module):
    method __init__ (line 12) | def __init__(self, noise_schedule_config=None):
    method register_schedule (line 17) | def register_schedule(
    method q_sample (line 46) | def q_sample(self, x_start, t, noise=None):
    method forward (line 53) | def forward(self, x):
    method decode (line 56) | def decode(self, x):
  class SimpleImageConcat (line 60) | class SimpleImageConcat(AbstractLowScaleModel):
    method __init__ (line 62) | def __init__(self):
    method forward (line 66) | def forward(self, x):
  class ImageConcatWithNoiseAugmentation (line 71) | class ImageConcatWithNoiseAugmentation(AbstractLowScaleModel):
    method __init__ (line 72) | def __init__(self, noise_schedule_config, max_noise_level=1000, to_cud...
    method forward (line 76) | def forward(self, x, noise_level=None):

FILE: examples/images/diffusion/ldm/modules/diffusionmodules/util.py
  function make_beta_schedule (line 19) | def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_e...
  function make_ddim_timesteps (line 40) | def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_...
  function make_ddim_sampling_parameters (line 57) | def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbos...
  function betas_for_alpha_bar (line 73) | def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.9...
  function extract_into_tensor (line 92) | def extract_into_tensor(a, t, x_shape):
  function checkpoint (line 98) | def checkpoint(func, inputs, params, flag):
  class CheckpointFunction (line 118) | class CheckpointFunction(torch.autograd.Function):
    method forward (line 120) | def forward(ctx, run_function, length, *args):
    method backward (line 134) | def backward(ctx, *output_grads):
  function timestep_embedding (line 154) | def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=Fal...
  function zero_module (line 177) | def zero_module(module):
  function scale_module (line 186) | def scale_module(module, scale):
  function mean_flat (line 195) | def mean_flat(tensor):
  function normalization (line 202) | def normalization(channels):
  class SiLU (line 213) | class SiLU(nn.Module):
    method forward (line 214) | def forward(self, x):
  class GroupNorm32 (line 218) | class GroupNorm32(nn.GroupNorm):
    method forward (line 219) | def forward(self, x):
  function conv_nd (line 223) | def conv_nd(dims, *args, **kwargs):
  function linear (line 236) | def linear(*args, **kwargs):
  function avg_pool_nd (line 243) | def avg_pool_nd(dims, *args, **kwargs):
  class HybridConditioner (line 256) | class HybridConditioner(nn.Module):
    method __init__ (line 257) | def __init__(self, c_concat_config, c_crossattn_config):
    method forward (line 262) | def forward(self, c_concat, c_crossattn):
  function noise_like (line 268) | def noise_like(shape, device, repeat=False):

FILE: examples/images/diffusion/ldm/modules/distributions/distributions.py
  class AbstractDistribution (line 5) | class AbstractDistribution:
    method sample (line 6) | def sample(self):
    method mode (line 9) | def mode(self):
  class DiracDistribution (line 13) | class DiracDistribution(AbstractDistribution):
    method __init__ (line 14) | def __init__(self, value):
    method sample (line 17) | def sample(self):
    method mode (line 20) | def mode(self):
  class DiagonalGaussianDistribution (line 24) | class DiagonalGaussianDistribution(object):
    method __init__ (line 25) | def __init__(self, parameters, deterministic=False):
    method sample (line 35) | def sample(self):
    method kl (line 39) | def kl(self, other=None):
    method nll (line 55) | def nll(self, sample, dims=[1, 2, 3]):
    method mode (line 61) | def mode(self):
  function normal_kl (line 65) | def normal_kl(mean1, logvar1, mean2, logvar2):

FILE: examples/images/diffusion/ldm/modules/ema.py
  class LitEma (line 5) | class LitEma(nn.Module):
    method __init__ (line 6) | def __init__(self, model, decay=0.9999, use_num_upates=True):
    method reset_num_updates (line 26) | def reset_num_updates(self):
    method forward (line 30) | def forward(self, model):
    method copy_to (line 51) | def copy_to(self, model):
    method store (line 60) | def store(self, parameters):
    method restore (line 69) | def restore(self, parameters):

FILE: examples/images/diffusion/ldm/modules/encoders/modules.py
  class AbstractEncoder (line 9) | class AbstractEncoder(nn.Module):
    method __init__ (line 10) | def __init__(self):
    method encode (line 13) | def encode(self, *args, **kwargs):
  class IdentityEncoder (line 17) | class IdentityEncoder(AbstractEncoder):
    method encode (line 18) | def encode(self, x):
  class ClassEmbedder (line 22) | class ClassEmbedder(nn.Module):
    method __init__ (line 23) | def __init__(self, embed_dim, n_classes=1000, key="class", ucg_rate=0.1):
    method forward (line 30) | def forward(self, batch, key=None, disable_dropout=False):
    method get_unconditional_conditioning (line 42) | def get_unconditional_conditioning(self, bs, device="cuda"):
  function disabled_train (line 49) | def disabled_train(self, mode=True):
  class FrozenT5Embedder (line 55) | class FrozenT5Embedder(AbstractEncoder):
    method __init__ (line 58) | def __init__(
    method freeze (line 69) | def freeze(self):
    method forward (line 75) | def forward(self, text):
    method encode (line 91) | def encode(self, text):
  class FrozenCLIPEmbedder (line 95) | class FrozenCLIPEmbedder(AbstractEncoder):
    method __init__ (line 100) | def __init__(
    method freeze (line 123) | def freeze(self):
    method forward (line 129) | def forward(self, text):
    method encode (line 149) | def encode(self, text):
  class FrozenOpenCLIPEmbedder (line 153) | class FrozenOpenCLIPEmbedder(AbstractEncoder):
    method __init__ (line 164) | def __init__(
    method freeze (line 185) | def freeze(self):
    method forward (line 190) | def forward(self, text):
    method encode_with_transformer (line 195) | def encode_with_transformer(self, text):
    method text_transformer_forward (line 204) | def text_transformer_forward(self, x: torch.Tensor, attn_mask=None):
    method encode (line 214) | def encode(self, text):
  class FrozenCLIPT5Encoder (line 218) | class FrozenCLIPT5Encoder(AbstractEncoder):
    method __init__ (line 219) | def __init__(
    method encode (line 235) | def encode(self, text):
    method forward (line 238) | def forward(self, text):

FILE: examples/images/diffusion/ldm/modules/image_degradation/bsrgan.py
  function modcrop_np (line 28) | def modcrop_np(img, sf):
  function analytic_kernel (line 48) | def analytic_kernel(k):
  function anisotropic_Gaussian (line 64) | def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6):
  function gm_blur_kernel (line 85) | def gm_blur_kernel(mean, cov, size=15):
  function shift_pixel (line 98) | def shift_pixel(x, sf, upper_left=True):
  function blur (line 127) | def blur(x, k):
  function gen_kernel (line 144) | def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]),...
  function fspecial_gaussian (line 185) | def fspecial_gaussian(hsize, sigma):
  function fspecial_laplacian (line 199) | def fspecial_laplacian(alpha):
  function fspecial (line 208) | def fspecial(filter_type, *args, **kwargs):
  function bicubic_degradation (line 226) | def bicubic_degradation(x, sf=3):
  function srmd_degradation (line 238) | def srmd_degradation(x, k, sf=3):
  function dpsr_degradation (line 260) | def dpsr_degradation(x, k, sf=3):
  function classical_degradation (line 282) | def classical_degradation(x, k, sf=3):
  function add_sharpening (line 297) | def add_sharpening(img, weight=0.5, radius=50, threshold=10):
  function add_blur (line 323) | def add_blur(img, sf=4):
  function add_resize (line 337) | def add_resize(img, sf=4):
  function add_Gaussian_noise (line 368) | def add_Gaussian_noise(img, noise_level1=2, noise_level2=25):
  function add_speckle_noise (line 385) | def add_speckle_noise(img, noise_level1=2, noise_level2=25):
  function add_Poisson_noise (line 403) | def add_Poisson_noise(img):
  function add_JPEG_noise (line 417) | def add_JPEG_noise(img):
  function random_crop (line 426) | def random_crop(lq, hq, sf=4, lq_patchsize=64):
  function degradation_bsrgan (line 437) | def degradation_bsrgan(img, sf=4, lq_patchsize=72, isp_model=None):
  function degradation_bsrgan_variant (line 532) | def degradation_bsrgan_variant(image, sf=4, isp_model=None):
  function degradation_bsrgan_plus (line 623) | def degradation_bsrgan_plus(img, sf=4, shuffle_prob=0.5, use_sharp=True,...

FILE: examples/images/diffusion/ldm/modules/image_degradation/bsrgan_light.py
  function modcrop_np (line 28) | def modcrop_np(img, sf):
  function analytic_kernel (line 48) | def analytic_kernel(k):
  function anisotropic_Gaussian (line 64) | def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6):
  function gm_blur_kernel (line 85) | def gm_blur_kernel(mean, cov, size=15):
  function shift_pixel (line 98) | def shift_pixel(x, sf, upper_left=True):
  function blur (line 127) | def blur(x, k):
  function gen_kernel (line 144) | def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]),...
  function fspecial_gaussian (line 185) | def fspecial_gaussian(hsize, sigma):
  function fspecial_laplacian (line 199) | def fspecial_laplacian(alpha):
  function fspecial (line 208) | def fspecial(filter_type, *args, **kwargs):
  function bicubic_degradation (line 226) | def bicubic_degradation(x, sf=3):
  function srmd_degradation (line 238) | def srmd_degradation(x, k, sf=3):
  function dpsr_degradation (line 260) | def dpsr_degradation(x, k, sf=3):
  function classical_degradation (line 282) | def classical_degradation(x, k, sf=3):
  function add_sharpening (line 297) | def add_sharpening(img, weight=0.5, radius=50, threshold=10):
  function add_blur (line 323) | def add_blur(img, sf=4):
  function add_resize (line 341) | def add_resize(img, sf=4):
  function add_Gaussian_noise (line 372) | def add_Gaussian_noise(img, noise_level1=2, noise_level2=25):
  function add_speckle_noise (line 389) | def add_speckle_noise(img, noise_level1=2, noise_level2=25):
  function add_Poisson_noise (line 407) | def add_Poisson_noise(img):
  function add_JPEG_noise (line 421) | def add_JPEG_noise(img):
  function random_crop (line 430) | def random_crop(lq, hq, sf=4, lq_patchsize=64):
  function degradation_bsrgan (line 441) | def degradation_bsrgan(img, sf=4, lq_patchsize=72, isp_model=None):
  function degradation_bsrgan_variant (line 536) | def degradation_bsrgan_variant(image, sf=4, isp_model=None, up=False):

FILE: examples/images/diffusion/ldm/modules/image_degradation/utils_image.py
  function is_image_file (line 31) | def is_image_file(filename):
  function get_timestamp (line 35) | def get_timestamp():
  function imshow (line 39) | def imshow(x, title=None, cbar=False, figsize=None):
  function surf (line 49) | def surf(Z, cmap="rainbow", figsize=None):
  function get_image_paths (line 69) | def get_image_paths(dataroot):
  function _get_paths_from_images (line 76) | def _get_paths_from_images(path):
  function patches_from_image (line 95) | def patches_from_image(img, p_size=512, p_overlap=64, p_max=800):
  function imssave (line 114) | def imssave(imgs, img_path):
  function split_imageset (line 127) | def split_imageset(original_dataroot, taget_dataroot, n_channels=3, p_si...
  function mkdir (line 156) | def mkdir(path):
  function mkdirs (line 161) | def mkdirs(paths):
  function mkdir_and_rename (line 169) | def mkdir_and_rename(path):
  function imread_uint (line 188) | def imread_uint(path, n_channels=3):
  function imsave (line 206) | def imsave(img, img_path):
  function imwrite (line 213) | def imwrite(img, img_path):
  function read_img (line 223) | def read_img(path):
  function uint2single (line 252) | def uint2single(img):
  function single2uint (line 256) | def single2uint(img):
  function uint162single (line 260) | def uint162single(img):
  function single2uint16 (line 264) | def single2uint16(img):
  function uint2tensor4 (line 274) | def uint2tensor4(img):
  function uint2tensor3 (line 281) | def uint2tensor3(img):
  function tensor2uint (line 288) | def tensor2uint(img):
  function single2tensor3 (line 301) | def single2tensor3(img):
  function single2tensor4 (line 306) | def single2tensor4(img):
  function tensor2single (line 311) | def tensor2single(img):
  function tensor2single3 (line 320) | def tensor2single3(img):
  function single2tensor5 (line 329) | def single2tensor5(img):
  function single32tensor5 (line 333) | def single32tensor5(img):
  function single42tensor4 (line 337) | def single42tensor4(img):
  function tensor2img (line 342) | def tensor2img(tensor, out_type=np.uint8, min_max=(0, 1)):
  function augment_img (line 379) | def augment_img(img, mode=0):
  function augment_img_tensor4 (line 399) | def augment_img_tensor4(img, mode=0):
  function augment_img_tensor (line 419) | def augment_img_tensor(img, mode=0):
  function augment_img_np3 (line 437) | def augment_img_np3(img, mode=0):
  function augment_imgs (line 465) | def augment_imgs(img_list, hflip=True, rot=True):
  function modcrop (line 490) | def modcrop(img_in, scale):
  function shave (line 506) | def shave(img_in, border=0):
  function rgb2ycbcr (line 525) | def rgb2ycbcr(img, only_y=True):
  function ycbcr2rgb (line 550) | def ycbcr2rgb(img):
  function bgr2ycbcr (line 571) | def bgr2ycbcr(img, only_y=True):
  function channel_convert (line 596) | def channel_convert(in_c, tar_type, img_list):
  function calculate_psnr (line 620) | def calculate_psnr(img1, img2, border=0):
  function calculate_ssim (line 641) | def calculate_ssim(img1, img2, border=0):
  function ssim (line 668) | def ssim(img1, img2):
  function cubic (line 698) | def cubic(x):
  function calculate_weights_indices (line 707) | def calculate_weights_indices(in_length, out_length, scale, kernel, kern...
  function imresize (line 766) | def imresize(img, scale, antialiasing=True):
  function imresize_np (line 841) | def imresize_np(img, scale, antialiasing=True):

FILE: examples/images/diffusion/ldm/modules/midas/api.py
  function disabled_train (line 20) | def disabled_train(self, mode=True):
  function load_midas_transform (line 26) | def load_midas_transform(model_type):
  function load_model (line 71) | def load_model(model_type):
  class MiDaSInference (line 137) | class MiDaSInference(nn.Module):
    method __init__ (line 146) | def __init__(self, model_type):
    method forward (line 153) | def forward(self, x):

FILE: examples/images/diffusion/ldm/modules/midas/midas/base_model.py
  class BaseModel (line 4) | class BaseModel(torch.nn.Module):
    method load (line 5) | def load(self, path):

FILE: examples/images/diffusion/ldm/modules/midas/midas/blocks.py
  function _make_encoder (line 7) | def _make_encoder(
  function _make_scratch (line 51) | def _make_scratch(in_shape, out_shape, groups=1, expand=False):
  function _make_pretrained_efficientnet_lite3 (line 80) | def _make_pretrained_efficientnet_lite3(use_pretrained, exportable=False):
  function _make_efficientnet_backbone (line 87) | def _make_efficientnet_backbone(effnet):
  function _make_resnet_backbone (line 98) | def _make_resnet_backbone(resnet):
  function _make_pretrained_resnext101_wsl (line 109) | def _make_pretrained_resnext101_wsl(use_pretrained):
  class Interpolate (line 114) | class Interpolate(nn.Module):
    method __init__ (line 117) | def __init__(self, scale_factor, mode, align_corners=False):
    method forward (line 131) | def forward(self, x):
  class ResidualConvUnit (line 146) | class ResidualConvUnit(nn.Module):
    method __init__ (line 149) | def __init__(self, features):
    method forward (line 163) | def forward(self, x):
  class FeatureFusionBlock (line 180) | class FeatureFusionBlock(nn.Module):
    method __init__ (line 183) | def __init__(self, features):
    method forward (line 194) | def forward(self, *xs):
  class ResidualConvUnit_custom (line 212) | class ResidualConvUnit_custom(nn.Module):
    method __init__ (line 215) | def __init__(self, features, activation, bn):
    method forward (line 239) | def forward(self, x):
  class FeatureFusionBlock_custom (line 267) | class FeatureFusionBlock_custom(nn.Module):
    method __init__ (line 270) | def __init__(self, features, activation, deconv=False, bn=False, expan...
    method forward (line 295) | def forward(self, *xs):

FILE: examples/images/diffusion/ldm/modules/midas/midas/dpt_depth.py
  function _make_fusion_block (line 9) | def _make_fusion_block(features, use_bn):
  class DPT (line 20) | class DPT(BaseModel):
    method __init__ (line 21) | def __init__(
    method forward (line 59) | def forward(self, x):
  class DPTDepthModel (line 80) | class DPTDepthModel(DPT):
    method __init__ (line 81) | def __init__(self, path=None, non_negative=True, **kwargs):
    method forward (line 99) | def forward(self, x):

FILE: examples/images/diffusion/ldm/modules/midas/midas/midas_net.py
  class MidasNet (line 13) | class MidasNet(BaseModel):
    method __init__ (line 16) | def __init__(self, path=None, features=256, non_negative=True):
    method forward (line 51) | def forward(self, x):

FILE: examples/images/diffusion/ldm/modules/midas/midas/midas_net_custom.py
  class MidasNet_small (line 13) | class MidasNet_small(BaseModel):
    method __init__ (line 16) | def __init__(
    method forward (line 90) | def forward(self, x):
  function fuse_model (line 123) | def fuse_model(m):

FILE: examples/images/diffusion/ldm/modules/midas/midas/transforms.py
  function apply_min_size (line 7) | def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AR...
  class Resize (line 45) | class Resize(object):
    method __init__ (line 48) | def __init__(
    method constrain_to_multiple_of (line 90) | def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
    method get_size (line 101) | def get_size(self, width, height):
    method __call__ (line 148) | def __call__(self, sample):
  class NormalizeImage (line 179) | class NormalizeImage(object):
    method __init__ (line 182) | def __init__(self, mean, std):
    method __call__ (line 186) | def __call__(self, sample):
  class PrepareForNet (line 192) | class PrepareForNet(object):
    method __init__ (line 195) | def __init__(self):
    method __call__ (line 198) | def __call__(self, sample):

FILE: examples/images/diffusion/ldm/modules/midas/midas/vit.py
  class Slice (line 10) | class Slice(nn.Module):
    method __init__ (line 11) | def __init__(self, start_index=1):
    method forward (line 15) | def forward(self, x):
  class AddReadout (line 19) | class AddReadout(nn.Module):
    method __init__ (line 20) | def __init__(self, start_index=1):
    method forward (line 24) | def forward(self, x):
  class ProjectReadout (line 32) | class ProjectReadout(nn.Module):
    method __init__ (line 33) | def __init__(self, in_features, start_index=1):
    method forward (line 39) | def forward(self, x):
  class Transpose (line 46) | class Transpose(nn.Module):
    method __init__ (line 47) | def __init__(self, dim0, dim1):
    method forward (line 52) | def forward(self, x):
  function forward_vit (line 57) | def forward_vit(pretrained, x):
  function _resize_pos_embed (line 101) | def _resize_pos_embed(self, posemb, gs_h, gs_w):
  function forward_flex (line 118) | def forward_flex(self, x):
  function get_activation (line 154) | def get_activation(name):
  function get_readout_oper (line 161) | def get_readout_oper(vit_features, features, use_readout, start_index=1):
  function _make_vit_b16_backbone (line 174) | def _make_vit_b16_backbone(
  function _make_pretrained_vitl16_384 (line 286) | def _make_pretrained_vitl16_384(pretrained, use_readout="ignore", hooks=...
  function _make_pretrained_vitb16_384 (line 299) | def _make_pretrained_vitb16_384(pretrained, use_readout="ignore", hooks=...
  function _make_pretrained_deitb16_384 (line 306) | def _make_pretrained_deitb16_384(pretrained, use_readout="ignore", hooks...
  function _make_pretrained_deitb16_distil_384 (line 313) | def _make_pretrained_deitb16_distil_384(pretrained, use_readout="ignore"...
  function _make_vit_b_rn50_backbone (line 326) | def _make_vit_b_rn50_backbone(
  function _make_pretrained_vitb_rn50_384 (line 451) | def _make_pretrained_vitb_rn50_384(pretrained, use_readout="ignore", hoo...

FILE: examples/images/diffusion/ldm/modules/midas/utils.py
  function read_pfm (line 11) | def read_pfm(path):
  function write_pfm (line 59) | def write_pfm(path, image, scale=1):
  function read_image (line 96) | def read_image(path):
  function resize_image (line 115) | def resize_image(img):
  function resize_depth (line 143) | def resize_depth(depth, width, height):
  function write_depth (line 161) | def write_depth(path, depth, bits=1):

FILE: examples/images/diffusion/ldm/util.py
  function log_txt_as_img (line 10) | def log_txt_as_img(wh, xc, size=10):
  function ismap (line 34) | def ismap(x):
  function isimage (line 40) | def isimage(x):
  function exists (line 46) | def exists(x):
  function default (line 50) | def default(val, d):
  function mean_flat (line 56) | def mean_flat(tensor):
  function count_params (line 64) | def count_params(model, verbose=False):
  function instantiate_from_config (line 71) | def instantiate_from_config(config):
  function get_obj_from_str (line 81) | def get_obj_from_str(string, reload=False):
  class AdamWwithEMAandWings (line 89) | class AdamWwithEMAandWings(optim.Optimizer):
    method __init__ (line 91) | def __init__(
    method __setstate__ (line 128) | def __setstate__(self, state):
    method step (line 134) | def step(self, closure=None):

FILE: examples/images/diffusion/main.py
  class DataLoaderX (line 34) | class DataLoaderX(DataLoader):
    method __iter__ (line 36) | def __iter__(self):
  function get_parser (line 42) | def get_parser(**parser_kwargs):
  function nondefault_trainer_args (line 161) | def nondefault_trainer_args(opt):
  class WrappedDataset (line 173) | class WrappedDataset(Dataset):
    method __init__ (line 176) | def __init__(self, dataset):
    method __len__ (line 179) | def __len__(self):
    method __getitem__ (line 182) | def __getitem__(self, idx):
  function worker_init_fn (line 187) | def worker_init_fn(_):
  class DataModuleFromConfig (line 207) | class DataModuleFromConfig(pl.LightningDataModule):
    method __init__ (line 208) | def __init__(
    method prepare_data (line 242) | def prepare_data(self):
    method setup (line 247) | def setup(self, stage=None):
    method _train_dataloader (line 256) | def _train_dataloader(self):
    method _val_dataloader (line 273) | def _val_dataloader(self, shuffle=False):
    method _test_dataloader (line 288) | def _test_dataloader(self, shuffle=False):
    method _predict_dataloader (line 308) | def _predict_dataloader(self, shuffle=False):
  class SetupCallback (line 318) | class SetupCallback(Callback):
    method __init__ (line 321) | def __init__(self, resume, now, logdir, ckptdir, cfgdir, config, light...
    method on_keyboard_interrupt (line 332) | def on_keyboard_interrupt(self, trainer, pl_module):
    method on_fit_start (line 340) | def on_fit_start(self, trainer, pl_module):
  class ImageLogger (line 383) | class ImageLogger(Callback):
    method __init__ (line 384) | def __init__(
    method _testtube (line 415) | def _testtube(
    method log_local (line 432) | def log_local(
    method log_img (line 456) | def log_img(self, pl_module, batch, batch_idx, split="train"):
    method check_frequency (line 500) | def check_frequency(self, check_idx):
    method on_train_batch_end (line 512) | def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch...
    method on_validation_batch_end (line 518) | def on_validation_batch_end(self, trainer, pl_module, outputs, batch, ...
  class CUDACallback (line 527) | class CUDACallback(Callback):
    method on_train_start (line 530) | def on_train_start(self, trainer, pl_module):
    method on_train_end (line 534) | def on_train_end(self, trainer, pl_module):
    method on_train_epoch_start (line 537) | def on_train_epoch_start(self, trainer, pl_module):
    method on_train_epoch_end (line 543) | def on_train_epoch_end(self, trainer, pl_module):
  function melk (line 839) | def melk(*args, **kwargs):
  function divein (line 846) | def divein(*args, **kwargs):

FILE: examples/images/diffusion/scripts/img2img.py
  function chunk (line 30) | def chunk(it, size):
  function load_model_from_config (line 35) | def load_model_from_config(config, ckpt, verbose=False):
  function load_img (line 54) | def load_img(path):
  function main (line 66) | def main():

FILE: examples/images/diffusion/scripts/inpaint.py
  function make_batch (line 14) | def make_batch(image, mask, device):

FILE: examples/images/diffusion/scripts/knn2img.py
  function chunk (line 36) | def chunk(it, size):
  function load_model_from_config (line 41) | def load_model_from_config(config, ckpt, verbose=False):
  class Searcher (line 61) | class Searcher(object):
    method __init__ (line 62) | def __init__(self, database, retriever_version="ViT-L/14"):
    method train_searcher (line 73) | def train_searcher(self, k, metric="dot_product", searcher_savedir=None):
    method load_single_file (line 86) | def load_single_file(self, saved_embeddings):
    method load_multi_files (line 91) | def load_multi_files(self, data_archive):
    method load_database (line 99) | def load_database(self):
    method load_retriever (line 119) | def load_retriever(
    method load_searcher (line 129) | def load_searcher(self):
    method search (line 134) | def search(self, x, k):
    method __call__ (line 164) | def __call__(self, x, n):

FILE: examples/images/diffusion/scripts/sample_diffusion.py
  function custom_to_pil (line 20) | def custom_to_pil(x):
  function custom_to_np (line 32) | def custom_to_np(x):
  function logs2pil (line 41) | def logs2pil(logs, keys=["sample"]):
  function convsample (line 59) | def convsample(model, shape, return_intermediates=True, verbose=True, ma...
  function convsample_ddim (line 67) | def convsample_ddim(model, steps, shape, eta=1.0):
  function make_convolutional_sample (line 82) | def make_convolutional_sample(
  function run (line 116) | def run(model, logdir, batch_size=50, vanilla=False, custom_steps=None, ...
  function save_logs (line 150) | def save_logs(logs, path, n_saved=0, key="sample", np_path=None):
  function get_parser (line 169) | def get_parser():
  function load_model_from_config (line 202) | def load_model_from_config(config, sd):
  function load_model (line 210) | def load_model(config, ckpt, gpu, eval_mode):

FILE: examples/images/diffusion/scripts/tests/test_watermark.py
  function testit (line 6) | def testit(img_path):

FILE: examples/images/diffusion/scripts/train_searcher.py
  function search_bruteforce (line 13) | def search_bruteforce(searcher):
  function search_partioned_ah (line 17) | def search_partioned_ah(
  function search_ah (line 30) | def search_ah(searcher, dims_per_block, aiq_threshold, reorder_k):
  function load_datapool (line 36) | def load_datapool(dpath):
  function train_searcher (line 71) | def train_searcher(

FILE: examples/images/diffusion/scripts/txt2img.py
  function chunk (line 32) | def chunk(it, size):
  function load_model_from_config (line 37) | def load_model_from_config(config, ckpt, verbose=False):
  function parse_args (line 56) | def parse_args():
  function put_watermark (line 184) | def put_watermark(img, wm_encoder=None):
  function main (line 192) | def main(opt):

FILE: examples/images/diffusion/scripts/utils.py
  class Linear8bit (line 6) | class Linear8bit(nn.Linear):
    method __init__ (line 7) | def __init__(
    method quant (line 31) | def quant(self):
    method forward (line 40) | def forward(self, x):
  function replace_module (line 54) | def replace_module(model):
  function getModelSize (line 70) | def getModelSize(model):

FILE: examples/images/dreambooth/train_dreambooth.py
  function import_model_class_from_model_name_or_path (line 27) | def import_model_class_from_model_name_or_path(pretrained_model_name_or_...
  function parse_args (line 47) | def parse_args(input_args=None):
  class DreamBoothDataset (line 240) | class DreamBoothDataset(Dataset):
    method __init__ (line 246) | def __init__(
    method __len__ (line 288) | def __len__(self):
    method __getitem__ (line 291) | def __getitem__(self, index):
  class PromptDataset (line 319) | class PromptDataset(Dataset):
    method __init__ (line 322) | def __init__(self, prompt, num_samples):
    method __len__ (line 326) | def __len__(self):
    method __getitem__ (line 329) | def __getitem__(self, index):
  function get_full_repo_name (line 336) | def get_full_repo_name(model_id: str, organization: Optional[str] = None...
  function main (line 346) | def main(args):

FILE: examples/images/dreambooth/train_dreambooth_colossalai.py
  function import_model_class_from_model_name_or_path (line 33) | def import_model_class_from_model_name_or_path(pretrained_model_name_or_...
  function parse_args (line 53) | def parse_args(input_args=None):
  class DreamBoothDataset (line 264) | class DreamBoothDataset(Dataset):
    method __init__ (line 270) | def __init__(
    method __len__ (line 315) | def __len__(self):
    method __getitem__ (line 318) | def __getitem__(self, index):
  class PromptDataset (line 346) | class PromptDataset(Dataset):
    method __init__ (line 349) | def __init__(self, prompt, num_samples):
    method __len__ (line 353) | def __len__(self):
    method __getitem__ (line 356) | def __getitem__(self, index):
  function get_full_repo_name (line 363) | def get_full_repo_name(model_id: str, organization: Optional[str] = None...
  function main (line 373) | def main(args):

FILE: examples/images/dreambooth/train_dreambooth_colossalai_lora.py
  function import_model_class_from_model_name_or_path (line 36) | def import_model_class_from_model_name_or_path(pretrained_model_name_or_...
  function parse_args (line 56) | def parse_args(input_args=None):
  class DreamBoothDataset (line 266) | class DreamBoothDataset(Dataset):
    method __init__ (line 272) | def __init__(
    method __len__ (line 314) | def __len__(self):
    method __getitem__ (line 317) | def __getitem__(self, index):
  class PromptDataset (line 345) | class PromptDataset(Dataset):
    method __init__ (line 348) | def __init__(self, prompt, num_samples):
    method __len__ (line 352) | def __len__(self):
    method __getitem__ (line 355) | def __getitem__(self, index):
  function get_full_repo_name (line 362) | def get_full_repo_name(model_id: str, organization: Optional[str] = None...
  function main (line 372) | def main(args):

FILE: examples/images/dreambooth/train_dreambooth_inpaint.py
  function prepare_mask_and_masked_image (line 35) | def prepare_mask_and_masked_image(image, mask):
  function random_mask (line 53) | def random_mask(im_shape, ratio=1, mask_full_image=False):
  function parse_args (line 77) | def parse_args():
  class DreamBoothDataset (line 255) | class DreamBoothDataset(Dataset):
    method __init__ (line 261) | def __init__(
    method __len__ (line 303) | def __len__(self):
    method __getitem__ (line 306) | def __getitem__(self, index):
  class PromptDataset (line 338) | class PromptDataset(Dataset):
    method __init__ (line 341) | def __init__(self, prompt, num_samples):
    method __len__ (line 345) | def __len__(self):
    method __getitem__ (line 348) | def __getitem__(self, index):
  function get_full_repo_name (line 355) | def get_full_repo_name(model_id: str, organization: Optional[str] = None...
  function main (line 365) | def main():

FILE: examples/images/resnet/train.py
  function build_dataloader (line 30) | def build_dataloader(batch_size: int, coordinator: DistCoordinator, plug...
  function evaluate (line 54) | def evaluate(model: nn.Module, test_dataloader: DataLoader, coordinator:...
  function train_epoch (line 73) | def train_epoch(
  function main (line 100) | def main():

FILE: examples/images/vit/args.py
  function parse_demo_args (line 4) | def parse_demo_args():
  function parse_benchmark_args (line 54) | def parse_benchmark_args():

FILE: examples/images/vit/data.py
  class BeansDataset (line 6) | class BeansDataset(Dataset):
    method __init__ (line 7) | def __init__(self, image_processor, tp_size=1, split="train"):
    method __len__ (line 20) | def __len__(self):
    method __getitem__ (line 23) | def __getitem__(self, idx):
    method process_example (line 26) | def process_example(self, example):
  function beans_collator (line 32) | def beans_collator(batch):

FILE: examples/images/vit/vit_benchmark.py
  function format_num (line 17) | def format_num(num: int, bytes=False):
  function get_data_batch (line 27) | def get_data_batch(batch_size, num_labels, num_channels=3, height=224, w...
  function colo_memory_cap (line 35) | def colo_memory_cap(size_in_GB):
  function main (line 45) | def main():

FILE: examples/images/vit/vit_train_demo.py
  function move_to_cuda (line 24) | def move_to_cuda(batch, device):
  function run_forward_backward (line 28) | def run_forward_backward(
  function train_epoch (line 54) | def train_epoch(
  function evaluate_model (line 89) | def evaluate_model(
  function main (line 136) | def main():

FILE: examples/inference/benchmark_ops/benchmark_context_attn_unpad.py
  function bench_kernel (line 38) | def bench_kernel(

FILE: examples/inference/benchmark_ops/benchmark_decoding_attn.py
  function bench_kernel (line 44) | def bench_kernel(

FILE: examples/inference/benchmark_ops/benchmark_flash_decoding_attention.py
  function prepare_data (line 43) | def prepare_data(
  function benchmark_flash_decoding_attention (line 70) | def benchmark_flash_decoding_attention(

FILE: examples/inference/benchmark_ops/benchmark_fused_rotary_embdding_unpad.py
  function benchmark_rotary_emb (line 49) | def benchmark_rotary_emb(

FILE: examples/inference/benchmark_ops/benchmark_kv_cache_memcopy.py
  function benchmark_kvcache_copy (line 39) | def benchmark_kvcache_copy(

FILE: examples/inference/benchmark_ops/benchmark_rmsnorm.py
  function benchmark_rms_layernorm (line 44) | def benchmark_rms_layernorm(

FILE: examples/inference/benchmark_ops/benchmark_rotary_embedding.py
  function torch_rotary_emb (line 26) | def torch_rotary_emb(x, cos, sin):
  function benchmark_rotary_emb (line 38) | def benchmark_rotary_emb(

FILE: examples/inference/benchmark_ops/benchmark_xine_copy.py
  function benchmark_get_xine_cache (line 29) | def benchmark_get_xine_cache(

FILE: examples/inference/client/locustfile.py
  class QuickstartUser (line 4) | class QuickstartUser(HttpUser):
    method completion (line 9) | def completion(self):
    method completion_streaming (line 14) | def completion_streaming(self):
    method chat (line 19) | def chat(self):
    method chat_streaming (line 33) | def chat_streaming(self):
    method generate_streaming (line 48) | def generate_streaming(self):
    method generate (line 53) | def generate(self):
    method health_check (line 58) | def health_check(self):

FILE: examples/inference/llama/benchmark_llama.py
  function data_gen (line 73) | def data_gen(batch_size: int = 4, seq_len: int = 512):
  function print_details_info (line 78) | def print_details_info(model_config, args, whole_end2end, total_token_num):
  function benchmark_inference (line 104) | def benchmark_inference(args):
  function hybrid_inference (line 233) | def hybrid_inference(rank, world_size, port, args):
  function benchmark (line 240) | def benchmark(args):

FILE: examples/inference/llama/benchmark_llama3.py
  function data_gen (line 80) | def data_gen(batch_size: int = 4, seq_len: int = 512):
  function print_details_info (line 85) | def print_details_info(model_config, whole_end2end, total_token_num, dty...
  function benchmark_inference (line 111) | def benchmark_inference(args):
  function inference (line 194) | def inference(rank, world_size, port, args):
  function benchmark (line 201) | def benchmark(args):

FILE: examples/inference/llama/llama_generation.py
  function infer (line 23) | def infer(args):

FILE: examples/inference/stable_diffusion/benchmark_sd3.py
  function log_generation_time (line 25) | def log_generation_time(log_data, log_file):
  function warmup (line 31) | def warmup(engine, args):
  function profile_context (line 41) | def profile_context(args):
  function log_and_profile (line 57) | def log_and_profile(h, w, avg_time, log_msg, args, model_name, mode, pro...
  function benchmark_colossalai (line 83) | def benchmark_colossalai(rank, world_size, port, args):
  function benchmark_diffusers (line 117) | def benchmark_diffusers(args):
  function benchmark (line 144) | def benchmark(args):

FILE: examples/inference/stable_diffusion/compute_metric.py
  function read_image (line 15) | def read_image(path: str):
  class MultiImageDataset (line 27) | class MultiImageDataset(Dataset):
    method __init__ (line 28) | def __init__(self, root0, root1, is_gt=False):
    method __len__ (line 40) | def __len__(self):
    method __getitem__ (line 43) | def __getitem__(self, idx):

FILE: examples/inference/stable_diffusion/sd3_generation.py
  function infer (line 23) | def infer(args):

FILE: examples/language/bert/benchmark.py
  class RandintDataset (line 34) | class RandintDataset(Dataset):
    method __init__ (line 35) | def __init__(self, dataset_length: int, sequence_length: int, vocab_si...
    method __len__ (line 51) | def __len__(self):
    method __getitem__ (line 54) | def __getitem__(self, idx):
  function main (line 58) | def main():

FILE: examples/language/bert/benchmark_utils.py
  function format_num (line 20) | def format_num(num: int, bytes=False):
  function _is_valid (line 30) | def _is_valid(val):
  function get_call_arg_names (line 34) | def get_call_arg_names(module_or_fn):
  function measure_params (line 40) | def measure_params(model):
  function warm_up (line 51) | def warm_up(
  function fmt (line 74) | def fmt(d: dict):
  function benchmark (line 78) | def benchmark(

FILE: examples/language/bert/data.py
  class GLUEDataBuilder (line 7) | class GLUEDataBuilder:
    method __init__ (line 44) | def __init__(
    method setup (line 67) | def setup(self):
    method prepare_data (line 81) | def prepare_data(self):
    method train_dataloader (line 85) | def train_dataloader(self):
    method val_dataloader (line 90) | def val_dataloader(self):
    method test_dataloader (line 100) | def test_dataloader(self):
    method convert_to_features (line 109) | def convert_to_features(self, example_batch):

FILE: examples/language/bert/finetune.py
  function move_to_cuda (line 40) | def move_to_cuda(batch):
  function evaluate_model (line 45) | def evaluate_model(
  function train_epoch (line 128) | def train_epoch(
  function main (line 171) | def main():

FILE: examples/language/commons/utils.py
  function get_data (line 5) | def get_data(batch_size, seq_len, vocab_size):
  function get_tflops (line 11) | def get_tflops(model_numel, batch_size, seq_len, step_time):

FILE: examples/language/data_utils.py
  class StatefulDistributedSampler (line 14) | class StatefulDistributedSampler(DistributedSampler):
    method __init__ (line 15) | def __init__(
    method __iter__ (line 27) | def __iter__(self) -> Iterator:
    method __len__ (line 33) | def __len__(self) -> int:
    method set_start_index (line 36) | def set_start_index(self, start_index: int) -> None:
  function prepare_dataloader (line 40) | def prepare_dataloader(
  function load_json (line 97) | def load_json(file_path: str):
  function save_json (line 102) | def save_json(data, file_path: str):
  class RandomDataset (line 107) | class RandomDataset(Dataset):
    method __init__ (line 108) | def __init__(self, num_samples: int = 1000, max_length: int = 2048, vo...
    method __len__ (line 116) | def __len__(self):
    method __getitem__ (line 119) | def __getitem__(self, idx):

FILE: examples/language/deepseek/benchmark.py
  function main (line 75) | def main():

FILE: examples/language/gpt/experiments/auto_offload/model_zoo.py
  class GPTLMModel (line 6) | class GPTLMModel(nn.Module):
    method __init__ (line 7) | def __init__(self, hidden_size=768, num_layers=12, num_attention_heads...
    method forward (line 20) | def forward(self, input_ids, attention_mask):
  class GPTLMLoss (line 25) | class GPTLMLoss(nn.Module):
    method __init__ (line 26) | def __init__(self):
    method forward (line 30) | def forward(self, logits, labels):
  function get_gpt2_components (line 37) | def get_gpt2_components(model_type: str, batch_size: int):

FILE: examples/language/gpt/experiments/auto_offload/train_gpt_offload.py
  function parse_args (line 19) | def parse_args():
  function train_gpt (line 29) | def train_gpt(args):
  function run (line 96) | def run(rank, world_size, port, args):

FILE: examples/language/gpt/experiments/auto_parallel/auto_parallel_with_gpt.py
  function get_cpu_mem (line 24) | def get_cpu_mem():
  function get_gpu_mem (line 28) | def get_gpu_mem():
  function get_mem_info (line 32) | def get_mem_info(prefix=""):
  function get_tflops (line 36) | def get_tflops(model_numel, batch_size, seq_len, step_time):
  function get_data (line 42) | def get_data(batch_size, seq_len, vocab_size):
  function main (line 48) | def main():

FILE: examples/language/gpt/experiments/auto_parallel/gpt_modules.py
  class GPT2MLP (line 10) | class GPT2MLP(nn.Module):
    method __init__ (line 11) | def __init__(self, intermediate_size, config):
    method forward (line 19) | def forward(self, hidden_states: Optional[Tuple[torch.FloatTensor]]) -...
  class GPT2Attention (line 31) | class GPT2Attention(nn.Module):
    method __init__ (line 32) | def __init__(self, config, layer_idx=None):
    method _attn (line 62) | def _attn(self, query, key, value, attention_mask=None, head_mask=None):
    method _split_heads (line 92) | def _split_heads(self, tensor, num_heads, attn_head_size):
    method _merge_heads (line 97) | def _merge_heads(self, tensor, num_heads, attn_head_size):
    method forward (line 102) | def forward(
  class GPT2Block (line 117) | class GPT2Block(nn.Module):
    method __init__ (line 118) | def __init__(self, config, layer_idx=None):
    method forward (line 127) | def forward(
  class GPT2Model (line 151) | class GPT2Model(GPT2PreTrainedModel):
    method __init__ (line 152) | def __init__(self, config):
    method forward (line 167) | def forward(
  class GPT2LMHeadModel (line 213) | class GPT2LMHeadModel(GPT2PreTrainedModel):
    method __init__ (line 214) | def __init__(self, config):
    method forward (line 222) | def forward(
  class GPTLMLoss (line 236) | class GPTLMLoss(nn.Module):
    method __init__ (line 237) | def __init__(self):
    method forward (line 241) | def forward(self, logits, labels):

FILE: examples/language/gpt/experiments/pipeline_parallel/model_zoo.py
  class GPTLMModel (line 6) | class GPTLMModel(nn.Module):
    method __init__ (line 7) | def __init__(
    method forward (line 30) | def forward(self, input_ids, attention_mask):
  function gpt2_medium (line 35) | def gpt2_medium(checkpoint=False):
  function gpt2_xl (line 39) | def gpt2_xl(checkpoint=True):
  function gpt2_10b (line 43) | def gpt2_10b(checkpoint=True):
  function gpt2_14b (line 47) | def gpt2_14b(checkpoint=True):
  function gpt2_20b (line 51) | def gpt2_20b(checkpoint=True):
  function gpt2_24b (line 55) | def gpt2_24b(checkpoint=True):
  function model_builder (line 59) | def model_builder(model_size: str) -> callable:

FILE: examples/language/gpt/experiments/pipeline_parallel/train_gpt_pp.py
  function parse_args (line 18) | def parse_args():
  class GPTLMLoss (line 33) | class GPTLMLoss(nn.Module):
    method __init__ (line 34) | def __init__(self):
    method forward (line 38) | def forward(self, logits, labels):
  function get_data (line 46) | def get_data(batch_size, seq_len, vocab_size):
  function get_tflops (line 52) | def get_tflops(model_numel, batch_size, seq_len, step_time):
  function get_annotated_model (line 57) | def get_annotated_model(model, data_kwargs, num_stages, num_microbatches):
  function create_partition_module (line 73) | def create_partition_module(pp_rank: int, num_stages: int, model, data_k...
  function partition (line 83) | def partition(model, data_kwargs, num_microbatches, pp_rank: int, chunk:...
  function run_master (line 88) | def run_master(args):

FILE: examples/language/gpt/gemini/commons/model_zoo.py
  class GPTLMModel (line 6) | class GPTLMModel(nn.Module):
    method __init__ (line 7) | def __init__(
    method forward (line 30) | def forward(self, input_ids, attention_mask):
  function gpt2_medium (line 35) | def gpt2_medium(checkpoint=False):
  function gpt2_xl (line 39) | def gpt2_xl(checkpoint=True):
  function gpt2_10b (line 43) | def gpt2_10b(checkpoint=True):
  function gpt2_14b (line 47) | def gpt2_14b(checkpoint=True):
  function gpt2_20b (line 51) | def gpt2_20b(checkpoint=True):
  function gpt2_24b (line 55) | def gpt2_24b(checkpoint=True):
  function gpt2_30b (line 59) | def gpt2_30b(checkpoint=True):
  function gpt2_40b (line 63) | def gpt2_40b(checkpoint=True):
  function model_builder (line 67) | def model_builder(model_size: str) -> callable:

FILE: examples/language/gpt/gemini/commons/utils.py
  class DummyProfiler (line 6) | class DummyProfiler:
    method __init__ (line 7) | def __init__(self):
    method step (line 10) | def step(self):
  function get_data (line 15) | def get_data(batch_size, seq_len, vocab_size):
  function get_tflops (line 21) | def get_tflops(model_numel, batch_size, seq_len, step_time):
  function get_time_stamp (line 25) | def get_time_stamp():

FILE: examples/language/gpt/gemini/train_gpt_demo.py
  function parse_args (line 26) | def parse_args():
  class GPTLMLoss (line 57) | class GPTLMLoss(nn.Module):
    method __init__ (line 58) | def __init__(self):
    method forward (line 62) | def forward(self, logits, labels):
  function get_cpu_mem (line 69) | def get_cpu_mem():
  function get_gpu_mem (line 73) | def get_gpu_mem():
  function get_mem_info (line 77) | def get_mem_info(prefix=""):
  function get_model_size (line 81) | def get_model_size(model: nn.Module):
  function model_size_formatter (line 89) | def model_size_formatter(numel: int) -> str:
  function set_cpu_maximum_parallelism (line 103) | def set_cpu_maximum_parallelism():
  function main (line 111) | def main():

FILE: examples/language/gpt/hybridparallelism/benchmark.py
  function main (line 35) | def main():

FILE: examples/language/gpt/hybridparallelism/data.py
  class GLUEDataBuilder (line 7) | class GLUEDataBuilder:
    method __init__ (line 44) | def __init__(
    method setup (line 69) | def setup(self):
    method prepare_data (line 83) | def prepare_data(self):
    method train_dataloader (line 87) | def train_dataloader(self):
    method val_dataloader (line 92) | def val_dataloader(self):
    method test_dataloader (line 101) | def test_dataloader(self):
    method convert_to_features (line 110) | def convert_to_features(self, example_batch):

FILE: examples/language/gpt/hybridparallelism/finetune.py
  function move_to_cuda (line 37) | def move_to_cuda(batch):
  function evaluate_model (line 42) | def evaluate_model(
  function train_epoch (line 124) | def train_epoch(
  function main (line 170) | def main():

FILE: examples/language/gpt/titans/dataset/webtext.py
  class WebtextDataset (line 13) | class WebtextDataset(Dataset):
    method __init__ (line 14) | def __init__(self, path: Optional[str] = None, seq_len=1024) -> None:
    method __len__ (line 38) | def __len__(self):
    method __getitem__ (line 41) | def __getitem__(self, index):

FILE: examples/language/gpt/titans/model/embed.py
  class VocabParallelEmbedding (line 18) | class VocabParallelEmbedding(torch.nn.Module):
    method __init__ (line 32) | def __init__(
    method zero_parameters (line 65) | def zero_parameters(self):
    method add_tokentype_embeddings (line 75) | def add_tokentype_embeddings(self, num_tokentypes):
    method forward (line 89) | def forward(self, input_ids, position_ids=None, tokentype_ids=None):
    method state_dict_for_save_checkpoint (line 112) | def state_dict_for_save_checkpoint(self, destination=None, prefix="", ...
    method load_state_dict (line 125) | def load_state_dict(self, state_dict, strict=True):
  class VocabParallelEmbedding1D (line 168) | class VocabParallelEmbedding1D(torch.nn.Module):
    method __init__ (line 179) | def __init__(self, num_embeddings, embedding_dim, dtype=None, init_met...
    method forward (line 203) | def forward(self, input_):
  class vocab_parallel_cross_entropy (line 231) | class vocab_parallel_cross_entropy(nn.Module):
    method __init__ (line 232) | def __init__(self):
    method forward (line 235) | def forward(self, vocab_parallel_logits, target):
  class _VocabParallelCrossEntropy (line 244) | class _VocabParallelCrossEntropy(torch.autograd.Function):
    method forward (line 246) | def forward(ctx, vocab_parallel_logits, target):
    method backward (line 299) | def backward(ctx, grad_output):
  class VocabUtility (line 319) | class VocabUtility:
    method vocab_range_from_per_partition_vocab_size (line 325) | def vocab_range_from_per_partition_vocab_size(per_partition_vocab_size...
    method vocab_range_from_global_vocab_size (line 331) | def vocab_range_from_global_vocab_size(global_vocab_size, rank, world_...
  class VocabParallelGPTLMHead1D (line 336) | class VocabParallelGPTLMHead1D(ParallelLayer):
    method __init__ (line 341) | def __init__(self, embed=None, vocab_size=None, dtype=None, embed_dim=...
    method forward (line 348) | def forward(self, x: Tensor) -> Tensor:
  class HiddenParallelEmbedding (line 357) | class HiddenParallelEmbedding(torch.nn.Module):
    method __init__ (line 371) | def __init__(
    method zero_parameters (line 411) | def zero_parameters(self):
    method add_tokentype_embeddings (line 421) | def add_tokentype_embeddings(self, num_tokentypes):
    method forward (line 435) | def forward(self, input_ids, position_ids=None, tokentype_ids=None):
    method state_dict_for_save_checkpoint (line 457) | def state_dict_for_save_checkpoint(self, destination=None, prefix="", ...
    method load_state_dict (line 470) | def load_state_dict(self, state_dict, strict=True):
  class HiddenParallelEmbedding1D (line 513) | class HiddenParallelEmbedding1D(torch.nn.Module):
    method __init__ (line 524) | def __init__(self, num_embeddings, embedding_dim, dtype=torch.float, p...
    method forward (line 543) | def forward(self, input_):
  class HiddenParallelGPTLMHead1D (line 555) | class HiddenParallelGPTLMHead1D(ParallelLayer):
    method __init__ (line 560) | def __init__(
    method forward (line 579) | def forward(self, x: Tensor) -> Tensor:

FILE: examples/language/gpt/titans/model/gpt1d.py
  class GPTMLP1D (line 29) | class GPTMLP1D(ParallelLayer):
    method __init__ (line 30) | def __init__(
    method _forward (line 69) | def _forward(self, hidden_states: Tensor) -> Tensor:
    method _checkpoint_forward (line 77) | def _checkpoint_forward(self, hidden_states: Tensor) -> Tensor:
    method forward (line 80) | def forward(self, hidden_states: Tensor) -> Tensor:
  class GenericGPTSelfAttention1D (line 87) | class GenericGPTSelfAttention1D(ParallelLayer):
    method __init__ (line 88) | def __init__(
    method softmax_forward (line 118) | def softmax_forward(self, attention_scores, attention_mask, query_laye...
    method _forward (line 121) | def _forward(self, hidden_states: Tensor, attention_mask=None) -> Tensor:
    method _checkpoint_forward (line 148) | def _checkpoint_forward(self, hidden_states: Tensor, attention_mask=No...
    method forward (line 151) | def forward(self, hidden_states: Tensor, attention_mask=None) -> Tensor:
  class GPTSelfAttention1D (line 158) | class GPTSelfAttention1D(GenericGPTSelfAttention1D):
    method __init__ (line 159) | def __init__(
    method softmax_forward (line 188) | def softmax_forward(self, attention_scores, attention_mask, query_laye...
  class FusedGPTSelfAttention1D (line 201) | class FusedGPTSelfAttention1D(GenericGPTSelfAttention1D):
    method __init__ (line 202) | def __init__(
    method softmax_forward (line 231) | def softmax_forward(self, attention_scores, attention_mask, query_laye...
  class GenericGPTTransformerLayer1D (line 235) | class GenericGPTTransformerLayer1D(ParallelLayer):
    method __init__ (line 236) | def __init__(
    method _forward (line 277) | def _forward(self, hidden_states, attention_mask) -> Tensor:
    method forward (line 297) | def forward(self, hidden_states, attention_mask):
  class GPTTransformerLayer1D (line 304) | class GPTTransformerLayer1D(GenericGPTTransformerLayer1D):
    method __init__ (line 305) | def __init__(
  class FusedGPTTransformerLayer1D (line 338) | class FusedGPTTransformerLayer1D(GenericGPTTransformerLayer1D):
    method __init__ (line 339) | def __init__(

FILE: examples/language/gpt/titans/model/pipeline_gpt1d.py
  class GenericPipelineGPT (line 29) | class GenericPipelineGPT(nn.Module):
    method __init__ (line 30) | def __init__(self, embedding=None, blocks=None, norm=None, head=None) ...
    method forward (line 40) | def forward(self, hidden_states=None, input_ids=None, attention_mask=N...
  class PipelineGPT1D (line 55) | class PipelineGPT1D(GenericPipelineGPT):
    method __init__ (line 56) | def __init__(
  class FusedPipelineGPT1D (line 110) | class FusedPipelineGPT1D(GenericPipelineGPT):
    method __init__ (line 111) | def __init__(
    method forward (line 164) | def forward(self, hidden_states=None, input_ids=None, attention_mask=N...
  class PipelineGPTHybrid (line 175) | class PipelineGPTHybrid(GenericPipelineGPT):
    method __init__ (line 176) | def __init__(
  function _filter_kwargs (line 228) | def _filter_kwargs(func, kwargs):
  function _build_generic_gpt_pipeline_1d (line 233) | def _build_generic_gpt_pipeline_1d(module_cls, num_layers, num_chunks, d...
  function _build_gpt_pipeline_1d (line 275) | def _build_gpt_pipeline_1d(num_layers, num_chunks, device=torch.device("...
  function _build_gpt_pipeline_hybrid (line 280) | def _build_gpt_pipeline_hybrid(num_layers, num_chunks, device=torch.devi...
  function GPT2_small_pipeline_1D (line 284) | def GPT2_small_pipeline_1D(num_chunks=1, checkpoint=False, dtype=torch.f...
  function GPT2_exlarge_pipeline_1D (line 295) | def GPT2_exlarge_pipeline_1D(num_chunks=1, checkpoint=False, dtype=torch...
  function GPT3_pipeline_1D (line 306) | def GPT3_pipeline_1D(num_chunks=1, checkpoint=False, dtype=torch.float, ...
  function GPT2_exlarge_pipeline_hybrid (line 318) | def GPT2_exlarge_pipeline_hybrid(num_chunks=1, checkpoint=False, dtype=t...
  function GPT2_small_pipeline_hybrid (line 329) | def GPT2_small_pipeline_hybrid(num_chunks=1, checkpoint=False, dtype=tor...
  function GPT3_pipeline_hybrid (line 340) | def GPT3_pipeline_hybrid(num_chunks=1, checkpoint=False, dtype=torch.flo...

FILE: examples/language/gpt/titans/train_gpt.py
  function calc_local_model_size (line 22) | def calc_local_model_size(model: torch.nn.Module):
  function main (line 32) | def main():

FILE: examples/language/grok-1/grok1_policy.py
  class Grok1Policy (line 9) | class Grok1Policy(Policy):
    method config_sanity_check (line 10) | def config_sanity_check(self):
    method preprocess (line 13) | def preprocess(self) -> nn.Module:
    method module_policy (line 20) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...
    method postprocess (line 79) | def postprocess(self):
  class Grok1ModelPolicy (line 83) | class Grok1ModelPolicy(Grok1Policy):
  class Grok1ForCausalLMPolicy (line 87) | class Grok1ForCausalLMPolicy(Grok1Policy):
    method module_policy (line 88) | def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDes...

FILE: examples/language/grok-1/utils.py
  class Bcolors (line 6) | class Bcolors:
  function print_output (line 18) | def print_output(text, output):
  function inference (line 23) | def inference(model, tokenizer, text, **generate_kwargs):
  function get_default_parser (line 36) | def get_default_parser():

FILE: examples/language/llama/benchmark.py
  function main (line 62) | def main():

FILE: examples/language/mixtral/benchmark.py
  function main (line 55) | def main():

FILE: examples/language/model_utils.py
  function low_precision_init (line 8) | def low_precision_init(target_dtype: torch.dtype = torch.float16):
  function get_model_numel (line 17) | def get_model_numel(model: nn.Module) -> int:
  function format_numel_str (line 21) | def format_numel_str(numel: int) -> str:

FILE: examples/language/opt/args.py
  function parse_demo_args (line 4) | def parse_demo_args():
  function parse_benchmark_args (line 41) | def parse_benchmark_args():

FILE: examples/language/opt/data.py
  class NetflixDataset (line 6) | class NetflixDataset(Dataset):
    method __init__ (line 7) | def __init__(self, tokenizer):
    method __len__ (line 26) | def __len__(self):
    method __getitem__ (line 29) | def __getitem__(self, idx):
  function netflix_collator (line 33) | def netflix_collator(data):

FILE: examples/language/opt/opt_benchmark.py
  function format_num (line 23) | def format_num(num: int, bytes=False):
  function get_data (line 33) | def get_data(batch_size, seq_len, vocab_size):
  function colo_memory_cap (line 39) | def colo_memory_cap(size_in_GB):
  function main (line 48) | def main():

FILE: examples/language/opt/opt_train_demo.py
  function move_to_cuda (line 28) | def move_to_cuda(batch, device):
  function train_epoch (line 32) | def train_epoch(epoch, model, optimizer, _criterion, lr_scheduler, datal...
  function main (line 67) | def main():

FILE: examples/language/palm/palm_pytorch/autoregressive_wrapper.py
  function exists (line 9) | def exists(val):
  function eval_decorator (line 13) | def eval_decorator(fn):
  function top_k (line 27) | def top_k(logits, thres=0.9):
  class AutoregressiveWrapper (line 35) | class AutoregressiveWrapper(nn.Module):
    method __init__ (line 36) | def __init__(self, net, max_seq_len=2048, pad_value=0):
    method generate (line 44) | def generate(self, start_tokens, seq_len, eos_token=None, temperature=...
    method forward (line 72) | def forward(self, x, **kwargs):

FILE: examples/language/palm/palm_pytorch/palm_pytorch.py
  class LayerNorm (line 10) | class LayerNorm(nn.Module):
    method __init__ (line 11) | def __init__(self, dim, eps=1e-5):
    method forward (line 17) | def forward(self, x):
  class ParallelResidual (line 25) | class ParallelResidual(nn.Module):
    method __init__ (line 26) | def __init__(self, *fns):
    method forward (line 30) | def forward(self, x):
  class RotaryEmbedding (line 38) | class RotaryEmbedding(nn.Module):
    method __init__ (line 39) | def __init__(self, dim):
    method forward (line 44) | def forward(self, max_seq_len, *, device):
  function rotate_half (line 53) | def rotate_half(x):
  function apply_rotary_pos_emb (line 59) | def apply_rotary_pos_emb(pos, t):
  class SwiGLU (line 68) | class SwiGLU(nn.Module):
    method forward (line 69) | def forward(self, x):
  function FeedForward (line 74) | def FeedForward(dim, mult=4):
  class Attention (line 85) | class Attention(nn.Module):
    method __init__ (line 86) | def __init__(self, dim, dim_head=64, heads=8):
    method get_mask (line 103) | def get_mask(self, n, device):
    method get_rotary_embedding (line 111) | def get_rotary_embedding(self, n, device):
    method forward (line 119) | def forward(self, x):
  function PaLM (line 189) | def PaLM(*, dim, num_tokens, depth, dim_head=64, heads=8, ff_mult=4):

FILE: examples/language/palm/train.py
  function parse_args (line 36) | def parse_args():
  function cycle (line 75) | def cycle(loader):
  function decode_token (line 81) | def decode_token(token):
  function get_tflops (line 85) | def get_tflops(model_numel, batch_size, seq_len, step_time):
  function decode_tokens (line 89) | def decode_tokens(tokens):
  function get_model_size (line 93) | def get_model_size(model: nn.Module):
  function generate_dataset (line 109) | def generate_dataset(dummy_data: bool = False):
  class TextSamplerDataset (line 127) | class TextSamplerDataset(Dataset):
    method __init__ (line 128) | def __init__(self, data, seq_len):
    method __getitem__ (line 133) | def __getitem__(self, index):
    method __len__ (line 138) | def __len__(self):

FILE: examples/language/performance_evaluator.py
  function divide (line 13) | def divide(x: float, y: float) -> float:
  function all_reduce_mean (line 22) | def all_reduce_mean(x: float, world_size: int) -> float:
  function get_profile_context (line 39) | def get_profile_context(enable_flag, warmup_steps, active_steps, save_di...
  class Timer (line 88) | class Timer:
    method __init__ (line 89) | def __init__(self) -> None:
    method start (line 93) | def start(self) -> None:
    method end (line 96) | def end(self) -> None:
    method reset (line 101) | def reset(self) -> None:
  class PerformanceEvaluator (line 105) | class PerformanceEvaluator:
    method __init__ (line 117) | def __init__(
    method on_step_start (line 142) | def on_step_start(self, step: int) -> None:
    method on_step_end (line 149) | def on_step_end(self, input_ids: Tensor, **kwargs) -> None:
    method on_fit_end (line 166) | def on_fit_end(self) -> None:

FILE: examples/tutorial/auto_parallel/auto_ckpt_batchsize_test.py
  function _benchmark (line 14) | def _benchmark(rank, world_size, port):
  function auto_activation_checkpoint_batchsize_benchmark (line 49) | def auto_activation_checkpoint_batchsize_benchmark():

FILE: examples/tutorial/auto_parallel/auto_ckpt_solver_test.py
  function _benchmark (line 14) | def _benchmark(rank, world_size, port, args):
  function auto_activation_checkpoint_benchmark (line 71) | def auto_activation_checkpoint_benchmark(args):

FILE: examples/tutorial/auto_parallel/auto_parallel_with_resnet.py
  function synthesize_data (line 13) | def synthesize_data():
  function main (line 19) | def main():

FILE: examples/tutorial/auto_parallel/bench_utils.py
  function bench (line 14) | def bench(
  function bench_rotor (line 58) | def bench_rotor(
  class GPTLMModel (line 97) | class GPTLMModel(nn.Module):
    method __init__ (line 102) | def __init__(
    method forward (line 126) | def forward(self, input_ids, attention_mask):
  class GPTLMLoss (line 131) | class GPTLMLoss(nn.Module):
    method __init__ (line 136) | def __init__(self):
    method forward (line 140) | def forward(self, logits, labels):
  function gpt2_medium (line 147) | def gpt2_medium(checkpoint=False):
  function gpt2_xl (line 151) | def gpt2_xl(checkpoint=False):
  function gpt2_6b (line 155) | def gpt2_6b(checkpoint=False):
  function data_gen_gpt2 (line 159) | def data_gen_gpt2(batch_size, seq_len, vocab_size, device="cuda:0"):
  function data_gen_resnet (line 168) | def data_gen_resnet(batch_size, shape, device="cuda:0"):

FILE: examples/tutorial/download_cifar10.py
  function main (line 6) | def main():

FILE: examples/tutorial/hybrid_parallel/train.py
  class DummyDataloader (line 17) | class DummyDataloader:
    method __init__ (line 18) | def __init__(self, length, batch_size):
    method generate (line 22) | def generate(self):
    method __iter__ (line 27) | def __iter__(self):
    method __next__ (line 31) | def __next__(self):
    method __len__ (line 38) | def __len__(self):
  function main (line 42) | def main():

FILE: examples/tutorial/large_batch_optimizer/train.py
  class DummyDataloader (line 13) | class DummyDataloader:
    method __init__ (line 14) | def __init__(self, length, batch_size):
    method generate (line 18) | def generate(self):
    method __iter__ (line 23) | def __iter__(self):
    method __next__ (line 27) | def __next__(self):
    method __len__ (line 34) | def __len__(self):
  function main (line 38) | def main():

FILE: examples/tutorial/new_api/cifar_resnet/train.py
  function build_dataloader (line 30) | def build_dataloader(batch_size: int, coordinator: DistCoordinator, plug...
  function evaluate (line 54) | def evaluate(model: nn.Module, test_dataloader: DataLoader, coordinator:...
  function train_epoch (line 73) | def train_epoch(
  function main (line 100) | def main():

FILE: examples/tutorial/new_api/cifar_vit/train.py
  function vit_cifar (line 32) | def vit_cifar(**kwargs):
  function build_dataloader (line 39) | def build_dataloader(batch_size: int, coordinator: DistCoordinator, plug...
  function evaluate (line 74) | def evaluate(model: nn.Module, test_dataloader: DataLoader, coordinator:...
  function train_epoch (line 93) | def train_epoch(
  function main (line 120) | def main():

FILE: examples/tutorial/new_api/glue_bert/data.py
  class GLUEDataBuilder (line 7) | class GLUEDataBuilder:
    method __init__ (line 44) | def __init__(
    method setup (line 67) | def setup(self):
    method prepare_data (line 81) | def prepare_data(self):
    method train_dataloader (line 85) | def train_dataloader(self):
    method val_dataloader (line 90) | def val_dataloader(self):
    method test_dataloader (line 99) | def test_dataloader(self):
    method convert_to_features (line 108) | def convert_to_features(self, example_batch):

FILE: examples/tutorial/new_api/glue_bert/finetune.py
  function move_to_cuda (line 31) | def move_to_cuda(batch):
  function evaluate (line 36) | def evaluate(
  function train_epoch (line 81) | def train_epoch(
  function main (line 108) | def main():

FILE: examples/tutorial/opt/inference/batch.py
  class BatchManagerForGeneration (line 7) | class BatchManagerForGeneration(BatchManager):
    method __init__ (line 8) | def __init__(self, max_batch_size: int = 1, pad_token_id: int = 0) -> ...
    method _left_padding (line 13) | def _left_padding(self, batch_inputs):
    method _make_batch_key (line 28) | def _make_batch_key(entry: SubmitEntry) -> tuple:
    method make_batch (line 32) | def make_batch(self, q: Deque[SubmitEntry]) -> Tuple[TaskEntry, dict]:
    method split_batch (line 56) | def split_batch(self, task_entry: TaskEntry, trunc_lens: List[int] = [...

FILE: examples/tutorial/opt/inference/benchmark/locustfile.py
  class GenerationUser (line 4) | class GenerationUser(HttpUser):
    method generate (line 6) | def generate(self):

FILE: examples/tutorial/opt/inference/cache.py
  class MissCacheError (line 7) | class MissCacheError(Exception):
  class ListCache (line 11) | class ListCache:
    method __init__ (line 12) | def __init__(self, cache_size: int, list_size: int, fixed_keys: List[H...
    method get (line 29) | def get(self, key: Hashable) -> List[Any]:
    method add (line 42) | def add(self, key: Hashable, value: Any) -> None:
    method lock (line 59) | def lock(self):

FILE: examples/tutorial/opt/inference/opt_fastapi.py
  class GenerationTaskReq (line 16) | class GenerationTaskReq(BaseModel):
  function generate (line 31) | async def generate(data: GenerationTaskReq, request: Request):
  function shutdown (line 60) | async def shutdown(*_):
  function get_model_fn (line 67) | def get_model_fn(model_name: str):
  function print_args (line 72) | def print_args(args: argparse.Namespace):

FILE: examples/tutorial/opt/inference/opt_server.py
  class GenerationTaskReq (line 19) | class GenerationTaskReq(BaseModel):
  function generate (line 36) | async def generate(request: Request, body: GenerationTaskReq):
  function shutdown (line 66) | def shutdown(*_):
  function get_model_fn (line 70) | def get_model_fn(model_name: str):
  function print_args (line 75) | def print_args(args: argparse.Namespace):

FILE: examples/tutorial/opt/inference/script/process-opt-175b/convert_ckpt.py
  function load_json (line 11) | def load_json(path: str):
  function parse_shape_info (line 16) | def parse_shape_info(flat_dir: str):
  function convert (line 31) | def convert(flat_dir: str, output_dir: str, part: int):

FILE: examples/tutorial/opt/opt/context.py
  class barrier_context (line 7) | class barrier_context:
    method __init__ (line 20) | def __init__(self, executor_rank: int = 0, parallel_mode: ParallelMode...
    method __enter__ (line 26) | def __enter__(self):
    method __exit__ (line 30) | def __exit__(self, exc_type, exc_value, exc_traceback):

FILE: examples/tutorial/opt/opt/run_clm.py
  function get_time_stamp (line 69) | def get_time_stamp():
  function parse_args (line 74) | def parse_args():
  function colo_memory_cap (line 251) | def colo_memory_cap(size_in_GB):
  class DummyDataloader (line 260) | class DummyDataloader:
    method __init__ (line 261) | def __init__(self, length, batch_size, seq_len, vocab_size):
    method generate (line 267) | def generate(self):
    method __iter__ (line 274) | def __iter__(self):
    method __next__ (line 278) | def __next__(self):
    method __len__ (line 285) | def __len__(self):
  function main (line 289) | def main():

FILE: examples/tutorial/sequence_parallel/data/__init__.py
  function cyclic_iter (line 12) | def cyclic_iter(iter):
  function build_train_valid_test_data_iterators (line 18) | def build_train_valid_test_data_iterators(

FILE: examples/tutorial/sequence_parallel/data/bert_helper.py
  function _build_key_size_numel_dictionaries (line 9) | def _build_key_size_numel_dictionaries(keys, data):
  function broadcast_data (line 53) | def broadcast_data(keys, data, datatype):
  function get_batch (line 92) | def get_batch(data_iterator):
  function get_batch_for_sequence_parallel (line 117) | def get_batch_for_sequence_parallel(data_iterator):
  class SequenceParallelDataIterator (line 153) | class SequenceParallelDataIterator:
    method __init__ (line 154) | def __init__(self, data_iter):
    method __iter__ (line 157) | def __iter__(self):
    method __next__ (line 160) | def __next__(self):

FILE: examples/tutorial/sequence_parallel/data/datasets/bert_dataset.py
  class BertDataset (line 43) | class BertDataset(Dataset):
    method __init__ (line 44) | def __init__(
    method __len__ (line 89) | def __len__(self):
    method __getitem__ (line 92) | def __getitem__(self, idx):
  function get_samples_mapping_ (line 115) | def get_samples_mapping_(
  function build_training_sample (line 198) | def build_training_sample(

FILE: examples/tutorial/sequence_parallel/data/datasets/blendable_dataset.py
  class BlendableDataset (line 24) | class BlendableDataset(torch.utils.data.Dataset):
    method __init__ (line 25) | def __init__(self, datasets, weights):
    method __len__ (line 58) | def __len__(self):
    method __getitem__ (line 61) | def __getitem__(self, idx):

FILE: examples/tutorial/sequence_parallel/data/datasets/builder.py
  function _build_train_valid_test_datasets (line 14) | def _build_train_valid_test_datasets(
  function build_train_valid_test_datasets (line 108) | def build_train_valid_test_datasets(

FILE: examples/tutorial/sequence_parallel/data/datasets/data_samplers.py
  function build_pretraining_data_loader (line 24) | def build_pretraining_data_loader(dataset, consumed_samples, micro_batch...
  class MegatronPretrainingSampler (line 54) | class MegatronPretrainingSampler:
    method __init__ (line 55) | def __init__(
    method __len__ (line 79) | def __len__(self):
    method get_start_end_idx (line 82) | def get_start_end_idx(self):
    method __iter__ (line 87) | def __iter__(self):
  class MegatronPretrainingRandomSampler (line 103) | class MegatronPretrainingRandomSampler:
    method __init__ (line 104) | def __init__(self, total_samples, consumed_samples, micro_batch_size, ...
    method __len__ (line 124) | def __len__(self):
    method __iter__ (line 127) | def __iter__(self):

FILE: examples/tutorial/sequence_parallel/data/datasets/dataset_utils.py
  function get_datasets_weights_and_num_samples (line 38) | def get_datasets_weights_and_num_samples(data_prefix, train_valid_test_n...
  function compile_helper (line 67) | def compile_helper():
  function get_a_and_b_segments (line 82) | def get_a_and_b_segments(sample, np_rng):
  function truncate_segments (line 114) | def truncate_segments(tokens_a, tokens_b, len_a, len_b, max_num_tokens, ...
  function create_tokens_and_tokentypes (line 134) | def create_tokens_and_tokentypes(tokens_a, tokens_b, cls_id, sep_id):
  function is_start_piece (line 164) | def is_start_piece(piece):
  function create_masked_lm_predictions (line 173) | def create_masked_lm_predictions(
  function pad_and_convert_to_numpy (line 362) | def pad_and_convert_to_numpy(tokens, tokentypes, masked_positions, maske...
  function build_train_valid_test_datasets (line 393) | def build_train_valid_test_datasets(
  function _build_train_valid_test_datasets (line 464) | def _build_train_valid_test_datasets(
  function get_indexed_dataset_ (line 571) | def get_indexed_dataset_(data_prefix, data_impl, skip_warmup):
  function get_train_valid_test_split_ (line 590) | def get_train_valid_test_split_(splits_string, size):

FILE: examples/tutorial/sequence_parallel/data/datasets/helpers.cpp
  function build_blending_indices (line 35) | void build_blending_indices(py::array_t<uint8_t>& dataset_index,
  function build_sample_idx (line 95) | py::array build_sample_idx(const py::array_t<int32_t>& sizes_,
  function get_target_sample_len (line 182) | inline int32_t get_target_sample_len(const int32_t short_seq_ratio,
  function build_mapping_impl (line 197) | py::array build_mapping_impl(const py::array_t<int64_t>& docs_,
  function build_mapping (line 425) | py::array build_mapping(const py::array_t<int64_t>& docs_,
  function build_blocks_mapping_impl (line 449) | py::array build_blocks_mapping_impl(
  function build_blocks_mapping (line 663) | py::array build_blocks_mapping(
  function PYBIND11_MODULE (line 685) | PYBIND11_MODULE(helpers, m) {

FILE: examples/tutorial/sequence_parallel/data/datasets/ict_dataset.py
  function make_attention_mask (line 11) | def make_attention_mask(source_block, target_block):
  function get_ict_dataset (line 23) | def get_ict_dataset(use_titles=True, query_in_block_prob=1):
  class ICTDataset (line 48) | class ICTDataset(Dataset):
    method __init__ (line 51) | def __init__(
    method __len__ (line 95) | def __len__(self):
    method __getitem__ (line 98) | def __getitem__(self, idx):
    method get_block (line 146) | def get_block(self, start_idx, end_idx, doc_idx):
    method get_null_block (line 156) | def get_null_block(self):
    method concat_and_pad_tokens (line 163) | def concat_and_pad_tokens(self, tokens, title=None):

FILE: examples/tutorial/sequence_parallel/data/datasets/indexed_dataset.py
  function __best_fitting_dtype (line 22) | def __best_fitting_dtype(vocab_size=None):
  function get_available_dataset_impl (line 29) | def get_available_dataset_impl():
  function infer_dataset_impl (line 33) | def infer_dataset_impl(path):
  function make_builder (line 49) | def make_builder(out_file, impl, vocab_size=None):
  function make_dataset (line 56) | def make_dataset(path, impl, skip_warmup=False):
  function dataset_exists (line 73) | def dataset_exists(path, impl):
  function read_longs (line 80) | def read_longs(f, n):
  function write_longs (line 86) | def write_longs(f, a):
  function code (line 93) | def code(dtype):
  function index_file_path (line 100) | def index_file_path(prefix_path):
  function data_file_path (line 104) | def data_file_path(prefix_path):
  function create_doc_idx (line 108) | def create_doc_idx(sizes):
  class IndexedDataset (line 116) | class IndexedDataset(torch.utils.data.Dataset):
    method __init__ (line 121) | def __init__(self, path):
    method read_index (line 127) | def read_index(self, path):
    method read_data (line 144) | def read_data(self, path):
    method check_index (line 147) | def check_index(self, i):
    method __del__ (line 151) | def __del__(self):
    method __getitem__ (line 156) | def __getitem__(self, idx):
    method __len__ (line 180) | def __len__(self):
    method num_tokens (line 183) | def num_tokens(self, index):
    method size (line 186) | def size(self, index):
    method exists (line 190) | def exists(path):
    method supports_prefetch (line 194) | def supports_prefetch(self):
  class IndexedCachedDataset (line 198) | class IndexedCachedDataset(IndexedDataset):
    method __init__ (line 199) | def __init__(self, path):
    method supports_prefetch (line 205) | def supports_prefetch(self):
    method prefetch (line 208) | def prefetch(self, indices):
    method __getitem__ (line 233) | def __getitem__(self, idx):
  class IndexedDatasetBuilder (line 250) | class IndexedDatasetBuilder(object):
    method __init__ (line 253) | def __init__(self, out_file, dtype=np.int32):
    method add_item (line 262) | def add_item(self, tensor):
    method end_document (line 269) | def end_document(self):
    method merge_file_ (line 272) | def merge_file_(self, another_file):
    method finalize (line 292) | def finalize(self, index_file):
  function _warmup_mmap_file (line 307) | def _warmup_mmap_file(path):
  class MMapIndexedDataset (line 313) | class MMapIndexedDataset(torch.utils.data.Dataset):
    class Index (line 314) | class Index(object):
      method writer (line 318) | def writer(cls, path, dtype):
      method __init__ (line 363) | def __init__(self, path, skip_warmup=False):
      method __del__ (line 400) | def __del__(self):
      method dtype (line 405) | def dtype(self):
      method sizes (line 409) | def sizes(self):
      method doc_idx (line 413) | def doc_idx(self):
      method __getitem__ (line 417) | def __getitem__(self, i):
      method __len__ (line 420) | def __len__(self):
    method __init__ (line 423) | def __init__(self, path, skip_warmup=False):
    method __getstate__ (line 432) | def __getstate__(self):
    method __setstate__ (line 435) | def __setstate__(self, state):
    method _do_init (line 438) | def _do_init(self, path, skip_warmup):
    method __del__ (line 450) | def __del__(self):
    method __len__ (line 455) | def __len__(self):
    method __getitem__ (line 459) | def __getitem__(self, idx):
    method get (line 476) | def get(self, idx, offset=0, length=None):
    method sizes (line 490) | def sizes(self):
    method doc_idx (line 494) | def doc_idx(self):
    method get_doc_idx (line 497) | def get_doc_idx(self):
    method set_doc_idx (line 500) | def set_doc_idx(self, doc_idx_):
    method supports_prefetch (line 504) | def supports_prefetch(self):
    method exists (line 508) | def exists(path):
  class MMapIndexedDatasetBuilder (line 512) | class MMapIndexedDatasetBuilder(object):
    method __init__ (line 513) | def __init__(self, out_file, dtype=np.int64):
    method add_item (line 519) | def add_item(self, tensor):
    method end_document (line 524) | def end_document(self):
    method merge_file_ (line 527) | def merge_file_(self, another_file):
    method finalize (line 539) | def finalize(self, index_file):

FILE: examples/tutorial/sequence_parallel/data/datasets/test/test_indexed_dataset.py
  function test_indexed_dataset (line 16) | def test_indexed_dataset(args):
  function test_indexed_dataset_get (line 42) | def test_indexed_dataset_get(args):
  function main (line 82) | def main():

FILE: examples/tutorial/sequence_parallel/data/dummy_dataloader.py
  class DummyDataloader (line 4) | class DummyDataloader:
    method __init__ (line 5) | def __init__(self, batch_size, vocab_size, seq_length):
    method generate (line 11) | def generate(self):
    method __iter__ (line 48) | def __iter__(self):
    method __next__ (line 51) | def __next__(self):

FILE: examples/tutorial/sequence_parallel/data/tokenizer/__init__.py
  function initialize_tokenizer (line 23) | def initialize_tokenizer(vocab_file, tokenizer_type, vocab_extra_ids=0):
  function get_tokenizer (line 30) | def get_tokenizer():
  function get_padded_vocab_size (line 35) | def get_padded_vocab_size():

FILE: examples/tutorial/sequence_parallel/data/tokenizer/bert_tokenization.py
  function validate_case_matches_checkpoint (line 27) | def validate_case_matches_checkpoint(do_lower_case, init_checkpoint):
  function convert_to_unicode (line 76) | def convert_to_unicode(text):
  function printable_text (line 96) | def printable_text(text):
  function load_vocab (line 119) | def load_vocab(vocab_file):
  function convert_by_vocab (line 134) | def convert_by_vocab(vocab, items):
  function convert_tokens_to_ids (line 142) | def convert_tokens_to_ids(vocab, tokens):
  function convert_ids_to_tokens (line 146) | def convert_ids_to_tokens(inv_vocab, ids):
  function whitespace_tokenize (line 150) | def whitespace_tokenize(text):
  class FullTokenizer (line 159) | class FullTokenizer(object):
    method __init__ (line 162) | def __init__(self, vocab_file, do_lower_case=True):
    method tokenize (line 168) | def tokenize(self, text):
    method convert_tokens_to_ids (line 176) | def convert_tokens_to_ids(self, tokens):
    method convert_ids_to_tokens (line 179) | def convert_ids_to_tokens(self, ids):
    method convert_tokens_to_string (line 183) | def convert_tokens_to_string(tokens, clean_up_tokenization_spaces=True):
    method vocab_size (line 211) | def vocab_size(self):
  class BasicTokenizer (line 215) | class BasicTokenizer(object):
    method __init__ (line 218) | def __init__(self, do_lower_case=True):
    method tokenize (line 226) | def tokenize(self, text):
    method _run_strip_accents (line 250) | def _run_strip_accents(self, text):
    method _run_split_on_punc (line 261) | def _run_split_on_punc(self, text):
    method _tokenize_chinese_chars (line 281) | def _tokenize_chinese_chars(self, text):
    method _is_chinese_char (line 294) | def _is_chinese_char(self, cp):
    method _clean_text (line 318) | def _clean_text(self, text):
  class WordpieceTokenizer (line 332) | class WordpieceTokenizer(object):
    method __init__ (line 335) | def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=...
    method tokenize (line 340) | def tokenize(self, text):
  function _is_whitespace (line 394) | def _is_whitespace(char):
  function _is_control (line 406) | def _is_control(char):
  function _is_punctuation (line 418) | def _is_punctuation(char):

FILE: examples/tutorial/sequence_parallel/data/tokenizer/tokenizer.py
  function build_tokenizer (line 25) | def build_tokenizer(vocab_file, tokenizer_type, vocab_extra_ids=0):
  function _vocab_size_with_padding (line 44) | def _vocab_size_with_padding(orig_vocab_size, make_vocab_size_divisible_...
  class AbstractTokenizer (line 65) | class AbstractTokenizer(ABC):
    method __init__ (line 68) | def __init__(self, name):
    method vocab_size (line 74) | def vocab_size(self):
    method vocab (line 79) | def vocab(self):
    method inv_vocab (line 84) | def inv_vocab(self):
    method tokenize (line 88) | def tokenize(self, text):
    method detokenize (line 91) | def detokenize(self, token_ids):
    method cls (line 95) | def cls(self):
    method sep (line 99) | def sep(self):
    method pad (line 103) | def pad(self):
    method eod (line 107) | def eod(self):
    method mask (line 111) | def mask(self):
  class _BertWordPieceTokenizer (line 115) | class _BertWordPieceTokenizer(AbstractTokenizer):
    method __init__ (line 118) | def __init__(self, vocab_file, lower_case=True, vocab_extra_ids=0):
    method add_token (line 147) | def add_token(self, token):
    method add_additional_special_tokens (line 154) | def add_additional_special_tokens(self, tokens_list):
    method vocab_size (line 160) | def vocab_size(self):
    method vocab (line 164) | def vocab(self):
    method inv_vocab (line 168) | def inv_vocab(self):
    method tokenize (line 171) | def tokenize(self, text):
    method decode (line 175) | def decode(self, ids):
    method decode_token_ids (line 179) | def decode_token_ids(self, token_ids):
    method cls (line 194) | def cls(self):
    method sep (line 198) | def sep(self):
    method pad (line 202) | def pad(self):
    method mask (line 206) | def mask(self):
    method bos_token (line 210) | def bos_token(self):
    method eos_token (line 215) | def eos_token(self):
    method additional_special_tokens (line 220) | def additional_special_tokens(self):
    method bos_token_id (line 225) | def bos_token_id(self):
    method eos_token_id (line 230) | def eos_token_id(self):
    method additional_special_tokens_ids (line 235) | def additional_special_tokens_ids(self):
    method additional_special_tokens (line 240) | def additional_special_tokens(self, value):

FILE: examples/tutorial/sequence_parallel/loss_func/bert_loss.py
  class BertLoss (line 9) | class BertLoss(nn.Module):
    method forward (line 10) | def forward(self, lm_loss, sop_logits, loss_mask, sentence_order):

FILE: examples/tutorial/sequence_parallel/loss_func/cross_entropy.py
  class _VocabCrossEntropy (line 5) | class _VocabCrossEntropy(torch.autograd.Function):
    method forward (line 8) | def forward(ctx, vocab_parallel_logits, target):
    method backward (line 47) | def backward(ctx, grad_output):
  function vocab_cross_entropy (line 67) | def vocab_cross_entropy(vocab_logits, target):

FILE: examples/tutorial/sequence_parallel/loss_func/utils.py
  function ensure_divisibility (line 4) | def ensure_divisibility(numerator, denominator):
  function divide (line 9) | def divide(numerator, denominator):
  function split_tensor_along_last_dim (line 16) | def split_tensor_along_last_dim(tensor, num_partitions, contiguous_split...
  class VocabUtility (line 36) | class VocabUtility:
    method vocab_range_from_per_partition_vocab_size (line 42) | def vocab_range_from_per_partition_vocab_size(per_partition_vocab_size...
    method vocab_range_from_global_vocab_size (line 48) | def vocab_range_from_global_vocab_size(global_vocab_size, rank, world_...

FILE: examples/tutorial/sequence_parallel/lr_scheduler/annealing_lr.py
  class AnnealingLR (line 21) | class AnnealingLR(object):
    method __init__ (line 24) | def __init__(
    method get_lr (line 59) | def get_lr(self):
    method step (line 92) | def step(self, increment=1):
    method state_dict (line 99) | def state_dict(self):
    method _check_and_set (line 110) | def _check_and_set(self, cls_value, sd_value, name):
    method load_state_dict (line 122) | def load_state_dict(self, sd):

FILE: examples/tutorial/sequence_parallel/model/bert.py
  class BertForPretrain (line 18) | class BertForPretrain(nn.Module):
    method __init__ (line 19) | def __init__(
    method _init_normal (line 75) | def _init_normal(self, tensor):
    method _output_init_normal (line 78) | def _output_init_normal(self, tensor):
    method reset_parameters (line 81) | def reset_parameters(self):
    method forward (line 102) | def forward(self, input_ids, attention_masks, tokentype_ids, lm_labels):
  class PipelineBertForPretrain (line 129) | class PipelineBertForPretrain(nn.Module):
    method __init__ (line 130) | def __init__(
    method _init_normal (line 201) | def _init_normal(self, tensor):
    method _output_init_normal (line 204) | def _output_init_normal(self, tensor):
    method reset_parameters (line 207) | def reset_parameters(self):
    method forward (line 230) | def forward(self, input_ids, attention_masks, tokentype_ids, lm_labels):
  function _filter_kwargs (line 266) | def _filter_kwargs(func, kwargs):
  function build_pipeline_bert (line 271) | def build_pipeline_bert(num_layers, num_chunks, device=torch.device("cud...

FILE: examples/tutorial/sequence_parallel/model/layers/bert_layer.py
  function attention_mask_func (line 12) | def attention_mask_func(attention_scores, attention_mask):
  class BertLayer (line 17) | class BertLayer(nn.Module):
    method __init__ (line 23) | def __init__(
    method forward (line 66) | def forward(self, hidden_states, attention_mask):

FILE: examples/tutorial/sequence_parallel/model/layers/dropout.py
  function bias_dropout_add (line 4) | def bias_dropout_add(x, bias, residual, prob, training):
  function get_bias_dropout_add (line 11) | def get_bias_dropout_add(training):

FILE: examples/tutorial/sequence_parallel/model/layers/embedding.py
  class VocabEmbedding (line 7) | class VocabEmbedding(torch.nn.Module):
    method __init__ (line 8) | def __init__(self, num_embeddings, embedding_dim):
    method forward (line 24) | def forward(self, hidden_state):
    method __repr__ (line 36) | def __repr__(self):
  class Embedding (line 40) | class Embedding(nn.Module):
    method __init__ (line 53) | def __init__(self, hidden_size, vocab_size, max_sequence_length, embed...
    method word_embedding_weight (line 77) | def word_embedding_weight(self):
    method forward (line 80) | def forward(self, input_ids, position_ids, tokentype_ids=None):

FILE: examples/tutorial/sequence_parallel/model/layers/head.py
  class BertLMHead (line 14) | class BertLMHead(nn.Module):
    method __init__ (line 22) | def __init__(
    method forward (line 34) | def forward(self, hidden_states, word_embeddings_weight, lm_labels):
  class BertBinaryHead (line 45) | class BertBinaryHead(nn.Module):
    method __init__ (line 46) | def __init__(self, hidden_size):
    method forward (line 51) | def forward(self, hidden_states):
  class BertDualHead (line 60) | class BertDualHead(nn.Module):
    method __init__ (line 61) | def __init__(self, hidden_size, vocab_size, add_binary_head):
    method forward (line 70) | def forward(self, hidden_states, word_embeddings_weight, lm_labels):

FILE: examples/tutorial/sequence_parallel/model/layers/init_method.py
  function init_normal (line 6) | def init_normal(tensor, sigma):
  function output_init_normal (line 11) | def output_init_normal(tensor, sigma, num_layers):

FILE: examples/tutorial/sequence_parallel/model/layers/linear.py
  class Linear (line 8) | class Linear(nn.Module):
    method __init__ (line 27) | def __init__(self, input_size, output_size, bias=True, skip_bias_add=F...
    method forward (line 50) | def forward(self, input_):
    method __repr__ (line 60) | def __repr__(self):

FILE: examples/tutorial/sequence_parallel/model/layers/mlp.py
  class TransformerMLP (line 9) | class TransformerMLP(nn.Module):
    method __init__ (line 17) | def __init__(self, hidden_size, mlp_ratio, fuse_gelu=True):
    method forward (line 29) | def forward(self, hidden_states):

FILE: examples/tutorial/sequence_parallel/model/layers/pooler.py
  class Pooler (line 7) | class Pooler(nn.Module):
    method __init__ (line 19) | def __init__(self, hidden_size):
    method forward (line 23) | def forward(self, hidden_states, sequence_index=0):

FILE: examples/tutorial/sequence_parallel/model/layers/preprocess.py
  class PreProcessor (line 8) | class PreProcessor(nn.Module):
    method __init__ (line 9) | def __init__(self, sub_seq_length):
    method bert_position_ids (line 13) | def bert_position_ids(self, token_ids):
    method bert_extended_attention_mask (line 24) | def bert_extended_attention_mask(self, attention_mask):
    method forward (line 47) | def forward(self, input_ids=None, attention_mask=None):

FILE: examples/tutorial/sequence_parallel/train.py
  function process_batch_data (line 21) | def process_batch_data(batch_data):
  function parse_args (line 31) | def parse_args():
  function pipeline_data_process_func (line 37) | def pipeline_data_process_func(stage_output, micro_batch_data):
  function main (line 48) | def main():

FILE: extensions/base_extension.py
  class _Extension (line 9) | class _Extension(ABC):
    method __init__ (line 10) | def __init__(self, name: str, support_aot: bool, support_jit: bool, pr...
    method name (line 17) | def name(self):
    method support_aot (line 21) | def support_aot(self):
    method support_jit (line 25) | def support_jit(self):
    method get_jit_extension_folder_path (line 29) | def get_jit_extension_folder_path():
    method is_available (line 61) | def is_available(self) -> bool:
    method assert_compatible (line 67) | def assert_compatible(self) -> None:
    method build_aot (line 73) | def build_aot(self) -> Union["CppExtension", "CUDAExtension"]:
    method build_jit (line 77) | def build_jit(self) -> Callable:
    method load (line 81) | def load(self) -> Callable:

FILE: extensions/cpp_extension.py
  class _CppExtension (line 13) | class _CppExtension(_Extension):
    method __init__ (line 14) | def __init__(self, name: str, priority: int = 1):
    method csrc_abs_path (line 25) | def csrc_abs_path(self, path):
    method pybind_abs_path (line 28) | def pybind_abs_path(self, path):
    method relative_to_abs_path (line 31) | def relative_to_abs_path(self, code_path: str) -> str:
    method strip_empty_entries (line 51) | def strip_empty_entries(self, args):
    method import_op (line 57) | def import_op(self):
    method build_aot (line 63) | def build_aot(self) -> "CppExtension":
    method build_jit (line 73) | def build_jit(self) -> None:
    method sources_files (line 112) | def sources_files(self) -> List[str]:
    method include_dirs (line 118) | def include_dirs(self) -> List[str]:
    method cxx_flags (line 125) | def cxx_flags(self) -> List[str]:
    method load (line 130) | def load(self):

FILE: extensions/csrc/common/data_type.h
  function namespace (line 8) | namespace colossalAI {

FILE: extensions/csrc/common/mp_type_traits.h
  function namespace (line 12) | namespace colossalAI {

FILE: extensions/csrc/common/target.h
  type class (line 12) | enum class
  type class (line 17) | enum class
  function BitLen (line 25) | enum class BitLen : int {

FILE: extensions/csrc/common/vec_type_traits.h
  function namespace (line 13) | namespace colossalAI {

FILE: extensions/csrc/funcs/binary_functor.h
  function namespace (line 16) | namespace colossalAI {

FILE: extensions/csrc/funcs/cast_functor.h
  function namespace (line 22) | namespace colossalAI {

FILE: extensions/csrc/funcs/reduce_function.h
  function namespace (line 11) | namespace funcs {

FILE: extensions/csrc/funcs/ternary_functor.h
  function namespace (line 17) | namespace colossalAI {

FILE: extensions/csrc/funcs/unary_functor.h
  function namespace (line 15) | namespace colossalAI {

FILE: extensions/csrc/kernel/arm/cpu_adam_arm.cpp
  function PYBIND11_MODULE (line 300) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: extensions/csrc/kernel/arm/cpu_adam_arm.h
  function float32x4_t (line 14) | inline float32x4_t simd_load_offset(const void *ptr, at::ScalarType dtype,
  function float32x4_t (line 34) | inline float32x4_t simd_load(void const *ptr, at::ScalarType dtype) {
  function simd_store_offset (line 38) | inline void simd_store_offset(void *ptr, at::ScalarType dtype, float32x4...
  function simd_store (line 62) | inline void simd_store(void *ptr, at::ScalarType dtype, float32x4_t data) {
  function float32x4_t (line 66) | inline float32x4_t simd_set(float value) {
  function scalar_load_offset (line 73) | inline float scalar_load_offset(const void *ptr, at::ScalarType dtype,
  function scalar_store_offset (line 90) | inline void scalar_store_offset(void *ptr, at::ScalarType dtype, float d...
  function class (line 129) | class AdamOptimizer {
  function update_state (line 183) | inline void update_state(float lr, float epsilon, float weight_decay,

FILE: extensions/csrc/kernel/cuda/attention/attention_utils.h
  function namespace (line 32) | namespace colossalAI {

FILE: extensions/csrc/kernel/cuda/utils/gpu_launch_config.h
  function namespace (line 8) | namespace colossalAI {

FILE: extensions/csrc/kernel/cuda/utils/nvgpu_dev_info.h
  function namespace (line 12) | namespace colossalAI {

FILE: extensions/csrc/kernel/cuda/utils/vec_copy.h
  function namespace (line 7) | namespace colossalAI {

FILE: extensions/csrc/kernel/x86/cpu_adam.cpp
  function PYBIND11_MODULE (line 442) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: extensions/csrc/kernel/x86/cpu_adam.h
  function class (line 93) | class Adam_Optimizer {
  function update_state (line 131) | inline void update_state(float lr, float epsilon, float weight_decay,
  function simd_load (line 146) | inline void simd_load(bool is_half, float *ptr, __half *h_ptr,
  function simd_store (line 155) | inline void simd_store(bool is_half, float *ptr, __half *h_ptr,

FILE: extensions/cuda_extension.py
  class _CudaExtension (line 18) | class _CudaExtension(_CppExtension):
    method nvcc_flags (line 20) | def nvcc_flags(self) -> List[str]:
    method is_available (line 26) | def is_available(self) -> bool:
    method assert_compatible (line 38) | def assert_compatible(self) -> None:
    method get_cuda_home_include (line 48) | def get_cuda_home_include(self):
    method include_dirs (line 59) | def include_dirs(self) -> List[str]:
    method build_jit (line 65) | def build_jit(self) -> None:
    method build_aot (line 106) | def build_aot(self) -> "CUDAExtension":

FILE: extensions/pybind/cpu_adam/cpu_adam_arm.py
  class CpuAdamArmExtension (line 7) | class CpuAdamArmExtension(_CppExtension):
    method __init__ (line 8) | def __init__(self):
    method is_available (line 11) | def is_available(self) -> bool:
    method assert_compatible (line 15) | def assert_compatible(self) -> None:
    method sources_files (line 22) | def sources_files(self):
    method include_dirs (line 28) | def include_dirs(self) -> List[str]:
    method cxx_flags (line 31) | def cxx_flags(self):
    method nvcc_flags (line 41) | def nvcc_flags(self):

FILE: extensions/pybind/cpu_adam/cpu_adam_x86.py
  class CpuAdamX86Extension (line 7) | class CpuAdamX86Extension(_CudaExtension):
    method __init__ (line 8) | def __init__(self):
    method is_available (line 11) | def is_available(self) -> bool:
    method assert_compatible (line 14) | def assert_compatible(self) -> None:
    method sources_files (line 22) | def sources_files(self):
    method cxx_flags (line 28) | def cxx_flags(self):
    method nvcc_flags (line 41) | def nvcc_flags(self):

FILE: extensions/pybind/flash_attention/flash_attention_dao_cuda.py
  class FlashAttentionDaoCudaExtension (line 4) | class FlashAttentionDaoCudaExtension(_Extension):
    method __init__ (line 5) | def __init__(self):
    method is_available (line 8) | def is_available(self) -> bool:
    method assert_compatible (line 21) | def assert_compatible(self) -> bool:
    method build_aot (line 24) | def build_aot(self) -> None:
    method build_jit (line 29) | def build_jit(self) -> None:
    method load (line 34) | def load(self):

FILE: extensions/pybind/flash_attention/flash_attention_npu.py
  class FlashAttentionNpuExtension (line 6) | class FlashAttentionNpuExtension(_Extension):
    method __init__ (line 7) | def __init__(self):
    method is_available (line 10) | def is_available(self) -> bool:
    method assert_compatible (line 18) | def assert_compatible(self) -> bool:
    method build_aot (line 21) | def build_aot(self) -> None:
    method build_jit (line 26) | def build_jit(self) -> None:
    method load (line 31) | def load(self):

FILE: extensions/pybind/flash_attention/flash_attention_sdpa_cuda.py
  class FlashAttentionSdpaCudaExtension (line 4) | class FlashAttentionSdpaCudaExtension(_Extension):
    method __init__ (line 5) | def __init__(self):
    method is_available (line 8) | def is_available(self) -> bool:
    method assert_compatible (line 18) | def assert_compatible(self) -> bool:
    method build_aot (line 21) | def build_aot(self) -> None:
    method build_jit (line 24) | def build_jit(self) -> None:
    method load (line 27) | def load(self):

FILE: extensions/pybind/inference/inference.cpp
  function PYBIND11_MODULE (line 81) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: extensions/pybind/inference/inference_ops_cuda.py
  class InferenceOpsCudaExtension (line 5) | class InferenceOpsCudaExtension(_CudaExtension):
    method __init__ (line 6) | def __init__(self):
    method sources_files (line 9) | def sources_files(self):
    method cxx_flags (line 25) | def cxx_flags(self):
    method nvcc_flags (line 29) | def nvcc_flags(self):

FILE: extensions/pybind/layernorm/layer_norm.cpp
  function compute_n1_n2 (line 14) | void compute_n1_n2(at::Tensor input, at::IntArrayRef normalized_shape, i...
  function check_args (line 28) | void check_args(at::IntArrayRef normalized_shape, at::Tensor gamma,
  function check_args (line 34) | void check_args(at::Tensor input, at::IntArrayRef normalized_shape, int ...
  function check_args (line 65) | void check_args(at::Tensor input, at::IntArrayRef normalized_shape,
  function layer_norm_affine (line 84) | std::vector<at::Tensor> layer_norm_affine(at::Tensor input,
  function layer_norm_gradient_affine (line 113) | std::vector<at::Tensor> layer_norm_gradient_affine(
  function PYBIND11_MODULE (line 137) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: extensions/pybind/layernorm/layernorm_cuda.py
  class LayerNormCudaExtension (line 5) | class LayerNormCudaExtension(_CudaExtension):
    method __init__ (line 6) | def __init__(self):
    method sources_files (line 9) | def sources_files(self):
    method include_dirs (line 15) | def include_dirs(self):
    method cxx_flags (line 19) | def cxx_flags(self):
    method nvcc_flags (line 22) | def nvcc_flags(self):

FILE: extensions/pybind/moe/moe.cpp
  function moe_dispatch_forward (line 33) | torch::Tensor moe_dispatch_forward(int s, int ec, int h,
  function moe_dispatch_backward (line 43) | torch::Tensor moe_dispatch_backward(int s, int ec, int h,
  function moe_combine_forward (line 54) | torch::Tensor moe_combine_forward(int s, int e, int c, int h,
  function moe_combine_backward (line 67) | std::vector<torch::Tensor> moe_combine_backward(int s, int e, int c, int h,
  function moe_cumsum (line 82) | torch::Tensor moe_cumsum(torch::Tensor mask) {
  function PYBIND11_MODULE (line 87) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: extensions/pybind/moe/moe_cuda.py
  class MoeCudaExtension (line 5) | class MoeCudaExtension(_CudaExtension):
    method __init__ (line 6) | def __init__(self):
    method sources_files (line 9) | def sources_files(self):
    method cxx_flags (line 15) | def cxx_flags(self):
    method nvcc_flags (line 18) | def nvcc_flags(self):

FILE: extensions/pybind/optimizer/fused_optimizer_cuda.py
  class FusedOptimizerCudaExtension (line 5) | class FusedOptimizerCudaExtension(_CudaExtension):
    method __init__ (line 6) | def __init__(self):
    method sources_files (line 9) | def sources_files(self):
    method cxx_flags (line 22) | def cxx_flags(self):
    method nvcc_flags (line 26) | def nvcc_flags(self):

FILE: extensions/pybind/optimizer/optimizer.cpp
  function PYBIND11_MODULE (line 38) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: extensions/pybind/softmax/scaled_masked_softmax.cpp
  function fwd (line 19) | torch::Tensor fwd(torch::Tensor const& input, torch::Tensor const& mask,
  function bwd (line 30) | torch::Tensor bwd(torch::Tensor const& output_grads,
  function PYBIND11_MODULE (line 45) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: extensions/pybind/softmax/scaled_masked_softmax_cuda.py
  class ScaledMaskedSoftmaxCudaExtension (line 5) | class ScaledMaskedSoftmaxCudaExtension(_CudaExtension):
    method __init__ (line 6) | def __init__(self):
    method sources_files (line 9) | def sources_files(self):
    method cxx_flags (line 15) | def cxx_flags(self):
    method nvcc_flags (line 18) | def nvcc_flags(self):

FILE: extensions/pybind/softmax/scaled_upper_triang_masked_softmax.cpp
  function fwd (line 15) | torch::Tensor fwd(torch::Tensor const& input, float scale_factor) {
  function bwd (line 24) | torch::Tensor bwd(torch::Tensor const& output_grads,
  function PYBIND11_MODULE (line 39) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: extensions/pybind/softmax/scaled_upper_triangle_masked_softmax_cuda.py
  class ScaledUpperTriangleMaskedSoftmaxCudaExtension (line 5) | class ScaledUpperTriangleMaskedSoftmaxCudaExtension(_CudaExtension):
    method __init__ (line 6) | def __init__(self):
    method sources_files (line 9) | def sources_files(self):
    method cxx_flags (line 18) | def cxx_flags(self):
    method nvcc_flags (line 21) | def nvcc_flags(self):

FILE: extensions/triton_extension.py
  class _TritonExtension (line 6) | class _TritonExtension(_Extension):
    method __init__ (line 7) | def __init__(self, name: str, priority: int = 1):
    method is_hardware_compatible (line 10) | def is_hardware_compatible(self) -> bool:
    method load (line 20) | def load(self):

FILE: extensions/utils.py
  function print_rank_0 (line 8) | def print_rank_0(message: str) -> None:
  function get_cuda_version_in_pytorch (line 26) | def get_cuda_version_in_pytorch() -> List[int]:
  function get_cuda_bare_metal_version (line 45) | def get_cuda_bare_metal_version(cuda_dir) -> List[int]:
  function check_system_pytorch_cuda_match (line 84) | def check_system_pytorch_cuda_match(cuda_dir):
  function get_pytorch_version (line 104) | def get_pytorch_version() -> List[int]:
  function check_pytorch_version (line 120) | def check_pytorch_version(min_major_version, min_minor_version) -> bool:
  function check_cuda_availability (line 142) | def check_cuda_availability():
  function set_cuda_arch_list (line 154) | def set_cuda_arch_list(cuda_dir):
  function get_cuda_cc_flag (line 193) | def get_cuda_cc_flag() -> List[str]:
  function append_nvcc_threads (line 217) | def append_nvcc_threads(nvcc_extra_args: List[str]) -> List[str]:

FILE: setup.py
  function fetch_requirements (line 23) | def fetch_requirements(path) -> List[str]:
  function fetch_readme (line 37) | def fetch_readme() -> str:
  function get_version (line 48) | def get_version() -> str:

FILE: tests/conftest.py
  function pytest_runtest_setup (line 6) | def pytest_runtest_setup(item):

FILE: tests/kit/model_zoo/custom/base.py
  class CheckpointModule (line 5) | class CheckpointModule(nn.Module):
    method __init__ (line 6) | def __init__(self, checkpoint: bool = False):
    method _forward (line 11) | def _forward(self, *args, **kwargs):
    method forward (line 14) | def forward(self, *args, **kwargs):
    method train (line 20) | def train(self, mode: bool = True):
    method eval (line 24) | def eval(self):

FILE: tests/kit/model_zoo/custom/hanging_param_model.py
  class HangingParamModule (line 9) | class HangingParamModule(CheckpointModule):
    method __init__ (line 15) | def __init__(self, checkpoint=False) -> None:
    method forward (line 21) | def forward(self, x):
  function data_gen (line 28) | def data_gen():
  function loss_fn (line 32) | def loss_fn(x):
  function output_transform (line 38) | def output_transform(x: torch.Tensor):

FILE: tests/kit/model_zoo/custom/nested_model.py
  class SubNet (line 9) | class SubNet(nn.Module):
    method __init__ (line 10) | def __init__(self, out_features) -> None:
    method forward (line 14) | def forward(self, x, weight):
  class NestedNet (line 18) | class NestedNet(CheckpointModule):
    method __init__ (line 19) | def __init__(self, checkpoint=False) -> None:
    method forward (line 25) | def forward(self, x):
  function data_gen (line 33) | def data_gen():
  function loss_fn (line 37) | def loss_fn(x):
  function output_transform (line 43) | def output_transform(x: torch.Tensor):

FILE: tests/kit/model_zoo/custom/repeated_computed_layers.py
  class NetWithRepeatedlyComputedLayers (line 9) | class NetWithRepeatedlyComputedLayers(CheckpointModule):
    method __init__ (line 15) | def __init__(self, checkpoint=False) -> None:
    method forward (line 22) | def forward(self, x):
  function data_gen (line 28) | def data_gen():
  function loss_fn (line 32) | def loss_fn(x):
  function output_transform (line 38) | def output_transform(x: torch.Tensor):

FILE: tests/kit/model_zoo/custom/simple_mlp.py
  class Net (line 15) | class Net(nn.Module):
    method __init__ (line 16) | def __init__(self, in_dim=_IN_DIM, hid_dim=_HID_DIM, identity=True, dt...
    method forward (line 26) | def forward(self, x):
  class TPNet (line 30) | class TPNet(nn.Module):
    method __init__ (line 31) | def __init__(
    method forward (line 48) | def forward(self, x):
  function data_gen (line 52) | def data_gen():
  function output_transform (line 56) | def output_transform(x: torch.Tensor):

FILE: tests/kit/model_zoo/custom/simple_net.py
  class SimpleNet (line 9) | class SimpleNet(CheckpointModule):
    method __init__ (line 14) | def __init__(self, checkpoint=False) -> None:
    method forward (line 23) | def forward(self, x):
  function data_gen (line 33) | def data_gen():
  function loss_fn (line 37) | def loss_fn(x):
  function output_transform (line 43) | def output_transform(x: torch.Tensor):

FILE: tests/kit/model_zoo/diffusers/diffusers.py
  function data_clip_model (line 24) | def data_clip_model():
  function data_clip_text (line 34) | def data_clip_text():
  function data_clip_vision (line 40) | def data_clip_vision():

FILE: tests/kit/model_zoo/executor.py
  function run_fwd (line 10) | def run_fwd(
  function run_fwd_bwd (line 34) | def run_fwd_bwd(

FILE: tests/kit/model_zoo/registry.py
  class ModelAttribute (line 9) | class ModelAttribute:
  class ModelZooRegistry (line 22) | class ModelZooRegistry(dict):
    method register (line 27) | def register(
    method get_sub_registry (line 64) | def get_sub_registry(

FILE: tests/kit/model_zoo/torchaudio/torchaudio.py
  function conformer_data_gen_fn (line 18) | def conformer_data_gen_fn():
  function emformer_data_gen_fn (line 53) | def emformer_data_gen_fn():
  function wavernn_data_gen_fn (line 82) | def wavernn_data_gen_fn():
  function tacotron_data_gen_fn (line 108) | def tacotron_data_gen_fn():
  function wav2vec_data_gen_fn (line 130) | def wav2vec_data_gen_fn():

FILE: tests/kit/model_zoo/torchrec/torchrec.py
  function gen_kt (line 15) | def gen_kt():
  function gen_kjt (line 21) | def gen_kjt():
  function interaction_arch_data_gen_fn (line 31) | def interaction_arch_data_gen_fn():
  function simple_dfm_data_gen_fn (line 36) | def simple_dfm_data_gen_fn():
  function sparse_arch_data_gen_fn (line 41) | def sparse_arch_data_gen_fn():
  function output_transform_fn (line 46) | def output_transform_fn(x):
  function get_ebc (line 56) | def get_ebc():
  function sparse_arch_model_fn (line 63) | def sparse_arch_model_fn():
  function simple_deep_fmnn_model_fn (line 68) | def simple_deep_fmnn_model_fn():
  function dlrm_model_fn (line 73) | def dlrm_model_fn():
  function dlrm_sparsearch_model_fn (line 78) | def dlrm_sparsearch_model_fn():

FILE: tests/kit/model_zoo/torchvision/torchvision.py
  function swin_s (line 16) | def swin_s():

FILE: tests/kit/model_zoo/transformers/albert.py
  function data_gen_fn (line 13) | def data_gen_fn():
  function data_gen_for_pretrain (line 20) | def data_gen_for_pretrain():
  function data_gen_for_qa (line 74) | def data_gen_for_qa():
  function data_gen_for_mcq (line 81) | def data_gen_for_mcq():

FILE: tests/kit/model_zoo/transformers/bert.py
  function data_gen (line 12) | def data_gen():
  function data_gen_for_lm (line 27) | def data_gen_for_lm():
  function data_gen_for_pretraining (line 35) | def data_gen_for_pretraining():
  function data_gen_for_sequence_classification (line 43) | def data_gen_for_sequence_classification():
  function data_gen_for_token_classification (line 51) | def data_gen_for_token_classification():
  function data_gen_for_mcq (line 59) | def data_gen_for_mcq():
  function data_gen_for_qa (line 345) | def data_gen_for_qa():

FILE: tests/kit/model_zoo/transformers/blip2.py
  function data_gen (line 12) | def data_gen():

FILE: tests/kit/model_zoo/transformers/bloom.py
  function data_gen (line 11) | def data_gen():
  function data_gen_for_lm (line 24) | def data_gen_for_lm():
  function data_gen_for_token_classification (line 32) | def data_gen_for_token_classification():
  function data_gen_for_sequence_classification (line 40) | def data_gen_for_sequence_classification():
  function data_gen_for_question_answering (line 47) | def data_gen_for_question_answering():

FILE: tests/kit/model_zoo/transformers/chatglm2.py
  function data_gen (line 12) | def data_gen():
  function data_gen_for_conditional_generation (line 18) | def data_gen_for_conditional_generation():
  function init_chatglm (line 54) | def init_chatglm():

FILE: tests/kit/model_zoo/transformers/command.py
  function data_gen (line 18) | def data_gen():
  function data_gen_for_causal_lm (line 36) | def data_gen_for_causal_lm():

FILE: tests/kit/model_zoo/transformers/deepseek.py
  function data_gen (line 13) | def data_gen():
  function data_gen_for_lm (line 27) | def data_gen_for_lm():
  function data_gen_for_sequence_classification (line 35) | def data_gen_for_sequence_classification():
  function init_deepseek (line 51) | def init_deepseek():

FILE: tests/kit/model_zoo/transformers/deepseek_v3.py
  function data_gen (line 13) | def data_gen():
  function data_gen_for_lm (line 27) | def data_gen_for_lm():
  function init_deepseek (line 43) | def init_deepseek():

FILE: tests/kit/model_zoo/transformers/falcon.py
  function data_gen (line 11) | def data_gen():
  function data_gen_for_lm (line 24) | def data_gen_for_lm():
  function data_gen_for_token_classification (line 32) | def data_gen_for_token_classification():
  function data_gen_for_sequence_classification (line 40) | def data_gen_for_sequence_classification():
  function data_gen_for_question_answering (line 47) | def data_gen_for_question_answering():

FILE: tests/kit/model_zoo/transformers/gpt.py
  function data_gen (line 13) | def data_gen():
  function data_gen_for_lm (line 26) | def data_gen_for_lm():
  function data_gen_for_question_answering (line 43) | def data_gen_for_question_answering():
  function data_gen_for_token_classification (line 54) | def data_gen_for_token_classification():
  function data_gen_for_sequence_classification (line 62) | def data_gen_for_sequence_classification():
  function date_gen_for_double_heads (line 69) | def date_gen_for_double_heads():

FILE: tests/kit/model_zoo/transformers/gptj.py
  function data_gen (line 13) | def data_gen():
  function data_gen_for_lm (line 27) | def data_gen_for_lm():
  function data_gen_for_question_answering (line 35) | def data_gen_for_question_answering():
  function data_gen_for_sequence_classification (line 46) | def data_gen_for_sequence_classification():

FILE: tests/kit/model_zoo/transformers/llama.py
  function data_gen (line 18) | def data_gen():
  function data_gen_for_causal_lm (line 40) | def data_gen_for_causal_lm():

FILE: tests/kit/model_zoo/transformers/mistral.py
  function data_gen (line 12) | def data_gen():
  function data_gen_for_lm (line 26) | def data_gen_for_lm():
  function data_gen_for_sequence_classification (line 34) | def data_gen_for_sequence_classification():

FILE: tests/kit/model_zoo/transformers/mixtral.py
  function data_gen (line 13) | def data_gen():
  function data_gen_for_lm (line 27) | def data_gen_for_lm():
  function data_gen_for_sequence_classification (line 35) | def data_gen_for_sequence_classification():

FILE: tests/kit/model_zoo/transformers/opt.py
  function data_gen (line 13) | def data_gen():
  function data_gen_for_causal_lm (line 19) | def data_gen_for_causal_lm():
  function data_gen_for_sequence_classification (line 28) | def data_gen_for_sequence_classification():
  function data_gen_for_question_answering (line 37) | def data_gen_for_question_answering():

FILE: tests/kit/model_zoo/transformers/qwen2.py
  function data_gen (line 18) | def data_gen():
  function data_gen_for_causal_lm (line 37) | def data_gen_for_causal_lm():

FILE: tests/kit/model_zoo/transformers/qwen3.py
  function data_gen (line 18) | def data_gen():
  function data_gen_for_causal_lm (line 66) | def data_gen_for_causal_lm():

FILE: tests/kit/model_zoo/transformers/sam.py
  function data_gen (line 12) | def data_gen():

FILE: tests/kit/model_zoo/transformers/t5.py
  function data_gen_for_encoder_only (line 12) | def data_gen_for_encoder_only():
  function data_gen_for_conditional_generation (line 24) | def data_gen_for_conditional_generation():
  function data_gen_for_t5_model (line 34) | def data_gen_for_t5_model():
  function data_gen_for_token_classification (line 43) | def data_gen_for_token_classification():

FILE: tests/kit/model_zoo/transformers/vit.py
  function data_gen (line 14) | def data_gen():
  function data_gen_for_image_classification (line 19) | def data_gen_for_image_classification():
  function data_gen_for_masked_image_modeling (line 25) | def data_gen_for_masked_image_modeling():

FILE: tests/kit/model_zoo/transformers/whisper.py
  function data_gen (line 12) | def data_gen():
  function data_gen_for_conditional_generation (line 30) | def data_gen_for_conditional_generation():
  function data_gen_for_audio_classification (line 40) | def data_gen_for_audio_classification():

FILE: tests/test_analyzer/test_fx/test_bias_addition.py
  class LinearModel (line 14) | class LinearModel(torch.nn.Module):
    method __init__ (line 15) | def __init__(self, in_features, out_features, bias):
    method forward (line 19) | def forward(self, x):
  class ConvModel (line 24) | class ConvModel(torch.nn.Module):
    method __init__ (line 25) | def __init__(self, in_channel, out_channels, kernel_size, bias) -> None:
    method forward (line 34) | def forward(self, x, select=0):
  class SiuModel (line 42) | class SiuModel(torch.nn.Module):
    method __init__ (line 43) | def __init__(self, bias) -> None:
    method forward (line 48) | def forward(self, x, select=torch.Tensor([0])):
  class AddmmModel (line 58) | class AddmmModel(torch.nn.Module):
    method __init__ (line 59) | def __init__(self, alpha, beta) -> None:
    method forward (line 64) | def forward(self, x):
  function test_siu_model (line 75) | def test_siu_model(bias, bias_addition_split, shape, select):
  function test_addmm_model (line 97) | def test_addmm_model(alpha, beta, bias_addition_split, shape):

FILE: tests/test_analyzer/test_fx/test_mod_dir.py
  class LinearModel (line 12) | class LinearModel(torch.nn.Module):
    method __init__ (line 13) | def __init__(self, in_features, out_features, bias):
    method forward (line 17) | def forward(self, x):
  class ConvModel (line 22) | class ConvModel(torch.nn.Module):
    method __init__ (line 23) | def __init__(self, in_channel, out_channels, kernel_size, bias) -> None:
    method forward (line 32) | def forward(self, x):
  class AModel (line 38) | class AModel(torch.nn.Module):
    method __init__ (line 39) | def __init__(self, bias) -> None:
    method forward (line 45) | def forward(self, x):
  function test_mod_dir (line 58) | def test_mod_dir(bias, bias_addition_split, shape):

FILE: tests/test_analyzer/test_fx/test_nested_ckpt.py
  class MyModule (line 14) | class MyModule(nn.Module):
    method __init__ (line 15) | def __init__(self):
    method checkpoint_0 (line 23) | def checkpoint_0(self, x):
    method checkpoint_0_0 (line 26) | def checkpoint_0_0(self, x):
    method checkpoint_0_0_0 (line 29) | def checkpoint_0_0_0(self, x):
    method checkpoint_0_0_0_0 (line 32) | def checkpoint_0_0_0_0(self, x):
    method checkpoint_0_0_1 (line 35) | def checkpoint_0_0_1(self, x):
    method checkpoint_0_1 (line 38) | def checkpoint_0_1(self, x):
    method forward (line 41) | def forward(self, x):
  function test_nested_ckpt (line 47) | def test_nested_ckpt():

FILE: tests/test_analyzer/test_fx/test_shape_prop.py
  function linear_impl (line 15) | def linear_impl(*args, **kwargs):
  function _check_gm_validity (line 23) | def _check_gm_validity(gm: torch.fx.GraphModule):
  function test_torchvision_shape_prop (line 37) | def test_torchvision_shape_prop(m):
  function test_timm_shape_prop (line 52) | def test_timm_shape_prop(m):

FILE: tests/test_analyzer/test_fx/test_symbolic_profile.py
  function _check_gm_validity (line 15) | def _check_gm_validity(gm: torch.fx.GraphModule):
  function test_torchvision_profile (line 23) | def test_torchvision_profile(m, verbose=False, bias_addition_split=False):
  function test_timm_profile (line 38) | def test_timm_profile(m, verbose=False, bias_addition_split=False):

FILE: tests/test_analyzer/test_subclasses/test_aten.py
  function compare_all (line 64) | def compare_all(tensor: torch.Tensor, meta_tensor: torch.Tensor) -> Any:
  function run_and_compare (line 76) | def run_and_compare(f: Union[nn.Module, Callable], x: torch.Tensor, requ...
  function test_meta_aten (line 89) | def test_meta_aten():

FILE: tests/test_analyzer/test_subclasses/test_flop_tensor.py
  function test_flop_count_module (line 17) | def test_flop_count_module(m):
  function test_flop_count_function (line 47) | def test_flop_count_function(func, args, kwargs):

FILE: tests/test_analyzer/test_subclasses/test_meta_mode.py
  function compare_all (line 15) | def compare_all(tensor: torch.Tensor, meta_tensor: torch.Tensor):
  function run_and_compare (line 27) | def run_and_compare(model):
  function test_meta_mode_shape (line 42) | def test_meta_mode_shape(m):

FILE: tests/test_auto_parallel/test_ckpt_solvers/test_C_solver_consistency.py
  function _run_C_solver_consistency_test (line 29) | def _run_C_solver_consistency_test(rank, world_size, port):
  function test_C_solver_consistency (line 76) | def test_C_solver_consistency():

FILE: tests/test_auto_parallel/test_ckpt_solvers/test_ckpt_torchvision.py
  function _is_activation_checkpoint_available (line 37) | def _is_activation_checkpoint_available(gm: GraphModule):
  function _is_all_gradient_close (line 43) | def _is_all_gradient_close(m: torch.nn.Module, gm: GraphModule):
  function _is_graph_linearized (line 50) | def _is_graph_linearized(gm: GraphModule):
  function check_backward_consistency (line 60) | def check_backward_consistency(
  function _run_ckpt_solver (line 77) | def _run_ckpt_solver(rank, world_size, port):
  function test_ckpt_solver (line 109) | def test_ckpt_solver():
  function _run_ckpt_solver_torch11 (line 113) | def _run_ckpt_solver_torch11(rank, world_size, port):
  function test_ckpt_solver_torch11 (line 144) | def test_ckpt_solver_torch11():

FILE: tests/test_auto_parallel/test_ckpt_solvers/test_linearize.py
  function test_linearize (line 32) | def test_linearize():
  function test_linearize_torch11 (line 96) | def test_linearize_torch11():

FILE: tests/test_auto_parallel/test_offload/model_utils.py
  class GPTLMModel (line 8) | class GPTLMModel(nn.Module):
    method __init__ (line 9) | def __init__(self, hidden_size=768, num_layers=12, num_attention_heads...
    method forward (line 22) | def forward(self, input_ids, attention_mask):
  class LMLoss (line 27) | class LMLoss(nn.Module):
    method __init__ (line 28) | def __init__(self):
    method forward (line 32) | def forward(self, logits, labels):
  class BertLMModel (line 39) | class BertLMModel(nn.Module):
    method __init__ (line 40) | def __init__(self, hidden_size=768, num_layers=12, num_attention_heads...
    method forward (line 53) | def forward(self, input_ids, attention_mask):
  function get_bert_components (line 59) | def get_bert_components():
  function get_gpt2_components (line 78) | def get_gpt2_components():

FILE: tests/test_auto_parallel/test_offload/test_perf.py
  function exam_fwd_bwd (line 26) | def exam_fwd_bwd(model_name: str, memory_budget: float, solver_name: str):
  function run_dist (line 146) | def run_dist(rank, world_size, port):
  function test_perf (line 154) | def test_perf():

FILE: tests/test_auto_parallel/test_offload/test_solver.py
  function solver_test (line 19) | def solver_test(model_name: str, memory_budget: float, solver_name: str):

FILE: tests/test_auto_parallel/test_pass/test_node_converting_pass.py
  class TestModule (line 11) | class TestModule(torch.nn.Module):
    method forward (line 12) | def forward(self, x):
  function insert_narrow (line 17) | def insert_narrow(gm, x_node):
  function test_node_args_converting_pass (line 29) | def test_node_args_converting_pass():

FILE: tests/test_auto_parallel/test_pass/test_size_value_converting_pass.py
  class TestModule (line 13) | class TestModule(torch.nn.Module):
    method forward (line 14) | def forward(self, x):
  function insert_narrow (line 19) | def insert_narrow(gm, x_node):
  function recover_narrow (line 28) | def recover_narrow(gm, narrow_node):
  function test_size_value_converting_pass (line 39) | def test_size_value_converting_pass():

FILE: tests/test_auto_parallel/test_tensor_shard/test_bias_addition_forward.py
  class LinearModel (line 17) | class LinearModel(torch.nn.Module):
    method __init__ (line 18) | def __init__(self, in_features, out_features):
    method forward (line 22) | def forward(self, x):
  class ConvModel (line 29) | class ConvModel(torch.nn.Module):
    method __init__ (line 30) | def __init__(self, in_channels, out_channels, kernel_size, bias=True):
    method forward (line 36) | def forward(self, x):
  function check_linear_module (line 43) | def check_linear_module(rank, world_size, port):
  function check_conv_module (line 60) | def check_conv_module(rank, world_size, port):
  function test_bias_addition_module (line 81) | def test_bias_addition_module():

FILE: tests/test_auto_parallel/test_tensor_shard/test_broadcast.py
  function test_is_broadcastable (line 12) | def test_is_broadcastable():
  function test_get_broadcast_shape (line 26) | def test_get_broadcast_shape():
  function test_recover_sharding_spec_for_broadcast_shape (line 40) | def test_recover_sharding_spec_for_broadcast_shape():

FILE: tests/test_auto_parallel/test_tensor_shard/test_checkpoint.py
  class GPT2MLPWithCkpt (line 24) | class GPT2MLPWithCkpt(nn.Module):
    method __init__ (line 25) | def __init__(self, intermediate_size, hidden_size):
    method forward (line 32) | def forward(self, hidden_states: Optional[Tuple[torch.FloatTensor]]) -...
  function check_act_ckpt (line 40) | def check_act_ckpt(rank, world_size, port):
  function test_mlp_layer (line 69) | def test_mlp_layer():

FILE: tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_ddp.py
  class MLP (line 20) | class MLP(torch.nn.Module):
    method __init__ (line 21) | def __init__(self, in_features):
    method forward (line 26) | def forward(self, x):
  function check_compatibility_with_ddp (line 33) | def check_compatibility_with_ddp(rank, world_size, port):
  function test_compatibility_with_ddp (line 102) | def test_compatibility_with_ddp():

FILE: tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_gemini.py
  class MLP (line 22) | class MLP(torch.nn.Module):
    method __init__ (line 23) | def __init__(self, in_features):
    method forward (line 28) | def forward(self, x):
  function check_auto_parallel_with_gemini (line 35) | def check_auto_parallel_with_gemini(rank, world_size, port):
  function test_auto_parallel_with_gemini (line 109) | def test_auto_parallel_with_gemini():

FILE: tests/test_auto_parallel/test_tensor_shard/test_find_repeat_block.py
  class RepeatBlock (line 21) | class RepeatBlock(nn.Module):
    method __init__ (line 22) | def __init__(self, intermediate_size, hidden_size):
    method forward (line 28) | def forward(self, hidden_states: Optional[Tuple[torch.FloatTensor]]) -...
  class RepeatModel (line 36) | class RepeatModel(nn.Module):
    method __init__ (line 37) | def __init__(self, intermediate_size, hidden_size, num_layers):
    method forward (line 41) | def forward(self, x):
  class NonRepeatBlock (line 48) | class NonRepeatBlock(nn.Module):
    method __init__ (line 49) | def __init__(self, intermediate_size, hidden_size, layer_index):
    method forward (line 56) | def forward(self, hidden_states: Optional[Tuple[torch.FloatTensor]]) -...
  class NonRepeatModel (line 64) | class NonRepeatModel(nn.Module):
    method __init__ (line 65) | def __init__(self, intermediate_size, hidden_size, num_layers):
    method forward (line 69) | def forward(self, x):
  function test_repeat_blocks (line 79) | def test_repeat_blocks(model_cls):

FILE: tests/test_auto_parallel/test_tensor_shard/test_gpt/gpt_modules.py
  class GPT2MLP (line 10) | class GPT2MLP(nn.Module):
    method __init__ (line 11) | def __init__(self, intermediate_size, config):
    method forward (line 21) | def forward(self, hidden_states: Optional[Tuple[torch.FloatTensor]]) -...
  class GPT2Attention (line 35) | class GPT2Attention(nn.Module):
    method __init__ (line 36) | def __init__(self, config, layer_idx=None):
    method _attn (line 66) | def _attn(self, query, key, value, attention_mask=None, head_mask=None):
    method _split_heads (line 99) | def _split_heads(self, tensor, num_heads, attn_head_size):
    method _merge_heads (line 104) | def _merge_heads(self, tensor, num_heads, attn_head_size):
    method forward (line 109) | def forward(
  class GPT2Block (line 131) | class GPT2Block(nn.Module):
    method __init__ (line 132) | def __init__(self, config, layer_idx=None):
    method forward (line 141) | def forward(
  class GPT2Model (line 166) | class GPT2Model(GPT2PreTrainedModel):
    method __init__ (line 169) | def __init__(self, config):
    method forward (line 184) | def forward(
  class GPT2LMHeadModel (line 234) | class GPT2LMHeadModel(GPT2PreTrainedModel):
    method __init__ (line 237) | def __init__(self, config):
    method forward (line 249) | def forward(
  class GPTLMLoss (line 264) | class GPTLMLoss(nn.Module):
    method __init__ (line 265) | def __init__(self):
    method forward (line 269) | def forward(self, logits, labels):

FILE: tests/test_auto_parallel/test_tensor_shard/test_gpt/test_runtime_with_gpt_modules.py
  function _check_module_grad (line 50) | def _check_module_grad(
  function check_attention_layer (line 74) | def check_attention_layer(rank, model_cls, world_size, port):
  function test_mlp_layer (line 195) | def test_mlp_layer(model_cls):

FILE: tests/test_auto_parallel/test_tensor_shard/test_gpt/test_solver_with_gpt_module.py
  function test_self_attention_block (line 23) | def test_self_attention_block(model_cls):

FILE: tests/test_auto_parallel/test_tensor_shard/test_liveness_analysis.py
  class LinearModel (line 13) | class LinearModel(nn.Module):
    method __init__ (line 14) | def __init__(self):
    method forward (line 20) | def forward(self, x1, x2):
  function test_liveness_analysis (line 31) | def test_liveness_analysis():

FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_activation_metainfo.py
  function test_activation_meta_info (line 21) | def test_activation_meta_info(func):

FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_binary_elementwise_metainfo.py
  class BinaryElementwiseOpModule (line 13) | class BinaryElementwiseOpModule(nn.Module):
    method __init__ (line 14) | def __init__(self, token=torch.add, shape=64) -> None:
    method forward (line 19) | def forward(self, input):
  function _binary_elementwise_mem_test (line 23) | def _binary_elementwise_mem_test(rank, world_size, port):
  function test_binary_elementwise_meta_concrete_info_match (line 60) | def test_binary_elementwise_meta_concrete_info_match():

FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_conv_metainfo.py
  class ConvFunctionModule (line 13) | class ConvFunctionModule(nn.Module):
    method __init__ (line 14) | def __init__(self, in_channels=4, out_channels=64, kernel_size=3):
    method forward (line 18) | def forward(self, input):
  function _conv_module_mem_test (line 22) | def _conv_module_mem_test(rank, world_size, port, bias):
  function test_conv_meta_concrete_info_match (line 60) | def test_conv_meta_concrete_info_match(bias=False):
  function _conv_function_mem_test (line 64) | def _conv_function_mem_test(rank, world_size, port):
  function test_conv_function_concrete_info_match (line 101) | def test_conv_function_concrete_info_match():

FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_embedding_metainfo.py
  function test_embedding_meta_info (line 14) | def test_embedding_meta_info():

FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_linear_metainfo.py
  class MyModule (line 13) | class MyModule(nn.Module):
    method __init__ (line 14) | def __init__(self, in_features=64, out_features=128):
    method forward (line 18) | def forward(self, input):
  function _linear_module_mem_test (line 22) | def _linear_module_mem_test(rank, world_size, port):
  function test_linear_module_meta_concrete_info_match (line 56) | def test_linear_module_meta_concrete_info_match():
  function _linear_function_mem_test (line 60) | def _linear_function_mem_test(rank, world_size, port):
  function test_linear_function_meta_concrete_info_match (line 94) | def test_linear_function_meta_concrete_info_match():

FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_matmul_metainfo.py
  function test_matmul_function_meta_info (line 29) | def test_matmul_function_meta_info(tensor_shapes):

FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_norm_metainfo.py
  function _batchnorm_module_mem_test (line 17) | def _batchnorm_module_mem_test(rank, world_size, port):
  function test_batchnorm_meta_concrete_info_match (line 54) | def test_batchnorm_meta_concrete_info_match():
  function test_layernorm_meta_info (line 66) | def test_layernorm_meta_info(tensor_shape):

FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_pooling_metainfo.py
  function _adaptiveavgpool_module_mem_test (line 13) | def _adaptiveavgpool_module_mem_test(rank, world_size, port):
  function test_adaptiveavgpool_meta_concrete_info_match (line 50) | def test_adaptiveavgpool_meta_concrete_info_match():
  function _maxpool_module_mem_test (line 54) | def _maxpool_module_mem_test(rank, world_size, port):
  function test_maxpool_meta_concrete_info_match (line 91) | def test_maxpool_meta_concrete_info_match():

FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_tensor_metainfo.py
  class SplitModule (line 13) | class SplitModule(nn.Module):
    method __init__ (line 14) | def __init__(self) -> None:
    method forward (line 17) | def forward(self, x):
  function test_tensor_meta_info (line 23) | def test_tensor_meta_info():

FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_where_metainfo.py
  function test_where_meta_info (line 14) | def test_where_meta_info():

FILE: tests/test_auto_parallel/test_tensor_shard/test_metainfo/utils.py
  function mem_test_for_node_strategy (line 24) | def mem_test_for_node_strategy(
  function print_results (line 154) | def print_results(

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addbmm_handler.py
  class AddBMMTensorMethodModule (line 15) | class AddBMMTensorMethodModule(nn.Module):
    method __init__ (line 16) | def __init__(self, using_kwargs):
    method forward (line 20) | def forward(self, bias, x1, x2):
  class AddBMMTorchFunctionModule (line 28) | class AddBMMTorchFunctionModule(nn.Module):
    method __init__ (line 29) | def __init__(self, using_kwargs):
    method forward (line 33) | def forward(self, bias, x1, x2):
  function check_2d_device_mesh (line 41) | def check_2d_device_mesh(rank, world_size, port, module, bias_shape, usi...
  function check_1d_device_mesh (line 151) | def check_1d_device_mesh(rank, module, bias_shape, using_kwargs, world_s...
  function test_2d_device_mesh (line 252) | def test_2d_device_mesh(module, bias_shape, using_kwargs):
  function test_1d_device_mesh (line 269) | def test_1d_device_mesh(module, bias_shape, using_kwargs):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addmm_handler.py
  class AddmmModel (line 21) | class AddmmModel(nn.Module):
    method __init__ (line 22) | def __init__(self):
    method forward (line 25) | def forward(self, input, m1, m2):
  class AddmmModel_with_param (line 30) | class AddmmModel_with_param(nn.Module):
    method __init__ (line 31) | def __init__(self, weight_shape, bias_shape):
    method forward (line 36) | def forward(self, m1):
  function check_addmm_function_handler (line 41) | def check_addmm_function_handler(rank, world_size, port, input_shape, mo...
  function test_addmm_handler (line 186) | def test_addmm_handler(input_shape, model_cls):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_batch_norm_handler.py
  function check_bn_module_handler (line 17) | def check_bn_module_handler(rank, world_size, port):
  function test_bn_module_handler (line 113) | def test_bn_module_handler():

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_function_node.py
  class LinearModule (line 24) | class LinearModule(torch.nn.Module):
    method __init__ (line 25) | def __init__(self, weight_shape):
    method forward (line 30) | def forward(self, x):
  function check_linear_module_handler (line 35) | def check_linear_module_handler(rank, world_size, port):
  function test_linear_handler (line 165) | def test_linear_handler():

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_module_node.py
  class LinearModule (line 21) | class LinearModule(torch.nn.Module):
    method __init__ (line 22) | def __init__(self, in_features, out_features, bias):
    method forward (line 26) | def forward(self, x):
  function check_linear_module_handler (line 31) | def check_linear_module_handler(rank, world_size, port, bias):
  function test_linear_handler (line 154) | def test_linear_handler(bias=True):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_binary_elementwise_handler.py
  function check_binary_elementwise_handler_with_tensor (line 17) | def check_binary_elementwise_handler_with_tensor(rank, world_size, port,...
  class BEOpModelWithNodeConst (line 124) | class BEOpModelWithNodeConst(nn.Module):
    method __init__ (line 125) | def __init__(self, op):
    method forward (line 129) | def forward(self, x1):
  class BEOpModelWithIntConst (line 135) | class BEOpModelWithIntConst(nn.Module):
    method __init__ (line 136) | def __init__(self, op, const):
    method forward (line 141) | def forward(self, x1):
  function check_binary_elementwise_handler_with_int (line 146) | def check_binary_elementwise_handler_with_int(rank, world_size, port, op...
  function test_binary_elementwise_handler_with_tensor (line 234) | def test_binary_elementwise_handler_with_tensor(op, other_dim):
  function test_binary_elementwise_handler_with_int (line 249) | def test_binary_elementwise_handler_with_int(op, model_cls, other_dim):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bmm_handler.py
  class BMMTensorMethodModule (line 17) | class BMMTensorMethodModule(nn.Module):
    method forward (line 18) | def forward(self, x1, x2):
  class BMMTorchFunctionModule (line 22) | class BMMTorchFunctionModule(nn.Module):
    method forward (line 23) | def forward(self, x1, x2):
  function check_2d_device_mesh (line 27) | def check_2d_device_mesh(rank, module, world_size, port):
  function check_1d_device_mesh (line 122) | def check_1d_device_mesh(rank, module, world_size, port):
  function test_bmm_handler (line 206) | def test_bmm_handler(module):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_conv_handler.py
  function check_conv_module_handler (line 17) | def check_conv_module_handler(rank, world_size, port, bias):
  class ConvModel (line 145) | class ConvModel(nn.Module):
    method __init__ (line 146) | def __init__(self):
    method forward (line 149) | def forward(self, input, others, bias=None):
  function check_conv_function_handler (line 154) | def check_conv_function_handler(rank, world_size, port, bias):
  function test_conv_module_handler (line 302) | def test_conv_module_handler(bias=False):
  function test_conv_function_handler (line 312) | def test_conv_function_handler(bias=False):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_default_reshape_handler.py
  class ReshapeModel (line 14) | class ReshapeModel(nn.Module):
    method __init__ (line 15) | def __init__(self):
    method forward (line 18) | def forward(self, input, other):
  function test_reshape_handler (line 26) | def test_reshape_handler():

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_embedding_handler.py
  class EmbeddingModule (line 24) | class EmbeddingModule(nn.Module):
    method __init__ (line 25) | def __init__(self, num_embeddings, embedding_dims):
    method forward (line 29) | def forward(self, input):
  function check_embedding_module_handler (line 34) | def check_embedding_module_handler(rank, world_size, port):
  class EmbeddingFunction (line 142) | class EmbeddingFunction(nn.Module):
    method __init__ (line 143) | def __init__(self):
    method forward (line 146) | def forward(self, input, others):
  function check_embedding_function_handler (line 151) | def check_embedding_function_handler(rank, world_size, port):
  function test_embedding_module_handler (line 272) | def test_embedding_module_handler():
  function test_embedding_function_handler (line 279) | def test_embedding_function_handler():

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_getattr_handler.py
  class GetattrModel (line 14) | class GetattrModel(nn.Module):
    method __init__ (line 15) | def __init__(self):
    method forward (line 19) | def forward(self, input):
  function test_getattr_handler (line 26) | def test_getattr_handler():

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_getitem_handler.py
  class GetItemFromTensorModel (line 21) | class GetItemFromTensorModel(nn.Module):
    method __init__ (line 22) | def __init__(self, getitem_index):
    method forward (line 26) | def forward(self, input, other):
  function check_getitem_from_tensor_handler (line 32) | def check_getitem_from_tensor_handler(rank, getitem_index, world_size, p...
  function test_getitem_from_tensor_handler (line 101) | def test_getitem_from_tensor_handler(getitem_index):
  class GetItemFromTupleModel (line 105) | class GetItemFromTupleModel(nn.Module):
    method __init__ (line 106) | def __init__(self):
    method forward (line 109) | def forward(self, input):
  function test_getitem_from_tuple_handler (line 117) | def test_getitem_from_tuple_handler():

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_layer_norm_handler.py
  function check_ln_module_handler (line 18) | def check_ln_module_handler(rank, world_size, port):
  function test_ln_module_handler (line 103) | def test_ln_module_handler():

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_linear_handler.py
  function check_linear_module_handler (line 24) | def check_linear_module_handler(rank, world_size, port, bias, input_shape):
  class LinearModel (line 163) | class LinearModel(nn.Module):
    method __init__ (line 164) | def __init__(self):
    method forward (line 167) | def forward(self, input, others, bias=None):
  function check_linear_function_handler (line 172) | def check_linear_function_handler(rank, world_size, port, bias, input_sh...
  function test_linear_handler (line 314) | def test_linear_handler(input_shape, bias=False):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_matmul_handler.py
  class MatMulModule (line 24) | class MatMulModule(nn.Module):
    method forward (line 25) | def forward(self, x1, x2):
  function test_matmul_node_handler (line 50) | def test_matmul_node_handler(tensor_shapes):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_norm_pooling_handler.py
  function test_norm_pool_handler (line 15) | def test_norm_pool_handler():

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_output_handler.py
  class OutputModel (line 14) | class OutputModel(nn.Module):
    method __init__ (line 15) | def __init__(self):
    method forward (line 18) | def forward(self, x):
  function test_output_handler (line 26) | def test_output_handler(output_option):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_permute_and_transpose_handler.py
  class ConvReshapeModel (line 20) | class ConvReshapeModel(nn.Module):
    method __init__ (line 21) | def __init__(self, reshape_dims, call_function):
    method forward (line 26) | def forward(self, input, other):
  class LinearReshapeModel (line 36) | class LinearReshapeModel(nn.Module):
    method __init__ (line 37) | def __init__(self, reshape_dims, call_function):
    method forward (line 42) | def forward(self, input, other):
  function check_view_handler (line 52) | def check_view_handler(rank, world_size, port, call_function, reshape_di...
  function test_view_handler (line 323) | def test_view_handler(call_function, reshape_dims, model_cls):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_placeholder_handler.py
  class PlaceholderModel (line 14) | class PlaceholderModel(nn.Module):
    method __init__ (line 15) | def __init__(self):
    method forward (line 18) | def forward(self, input):
  function test_placeholder_handler (line 25) | def test_placeholder_handler(placeholder_option):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_shard_option.py
  class LinearModel (line 14) | class LinearModel(nn.Module):
    method __init__ (line 15) | def __init__(self):
    method forward (line 18) | def forward(self, input, others, bias=None):
  function check_shard_option (line 23) | def check_shard_option(shard_option):
  function test_shard_option (line 109) | def test_shard_option():

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_softmax_handler.py
  class LinearSplitModel (line 19) | class LinearSplitModel(nn.Module):
    method __init__ (line 20) | def __init__(self, softmax_dim):
    method forward (line 24) | def forward(self, input, other):
  function check_split_handler (line 30) | def check_split_handler(rank, world_size, port, softmax_dim, model_cls):
  function test_split_handler (line 174) | def test_split_handler(softmax_dim, model_cls):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_split_handler.py
  class ConvSplitModel (line 19) | class ConvSplitModel(nn.Module):
    method __init__ (line 20) | def __init__(self, split_size, split_dim):
    method forward (line 25) | def forward(self, input, other):
  class LinearSplitModel (line 31) | class LinearSplitModel(nn.Module):
    method __init__ (line 32) | def __init__(self, split_size, split_dim):
    method forward (line 37) | def forward(self, input, other):
  function check_split_handler (line 43) | def check_split_handler(rank, world_size, port, split_size, split_dim, m...
  function test_split_handler (line 252) | def test_split_handler(split_size, split_dim, model_cls):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_sum_handler.py
  class LinearSumModel (line 18) | class LinearSumModel(nn.Module):
    method __init__ (line 19) | def __init__(self, sum_dims, keepdim):
    method forward (line 24) | def forward(self, input, other):
  function check_sum_handler (line 33) | def check_sum_handler(rank, world_size, port, sum_dims, keepdim):
  function test_sum_handler (line 226) | def test_sum_handler(sum_dims, keepdim):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_tensor_constructor.py
  class TensorConstructorModel (line 13) | class TensorConstructorModel(nn.Module):
    method __init__ (line 14) | def __init__(self):
    method forward (line 17) | def forward(self, x):
  function test_where_handler (line 25) | def test_where_handler():

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_unary_element_wise_handler.py
  class ReLuModel (line 14) | class ReLuModel(nn.Module):
    method __init__ (line 15) | def __init__(self):
    method forward (line 19) | def forward(self, input, other):
  function test_elementwise_handler (line 27) | def test_elementwise_handler():

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_view_handler.py
  class ConvViewModel (line 20) | class ConvViewModel(nn.Module):
    method __init__ (line 21) | def __init__(self, tgt_shape):
    method forward (line 25) | def forward(self, input, other):
  class LinearViewModel (line 31) | class LinearViewModel(nn.Module):
    method __init__ (line 32) | def __init__(self, tgt_shape):
    method forward (line 36) | def forward(self, input, other):
  function check_view_handler (line 42) | def check_view_handler(rank, tgt_shape, model_cls, world_size, port):
  function test_view_handler (line 250) | def test_view_handler(tgt_shape, model_cls):

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_where_handler.py
  class ConvModel (line 14) | class ConvModel(nn.Module):
    method __init__ (line 15) | def __init__(self):
    method forward (line 18) | def forward(self, condition, x, y):
  function test_where_handler (line 25) | def test_where_handler():

FILE: tests/test_auto_parallel/test_tensor_shard/test_node_handler/utils.py
  function _build_model_to_compare (line 20) | def _build_model_to_compare(
  function numerical_test_for_node_strategy (line 66) | def numerical_test_for_node_strategy(
  function assert_close_helper (line 184) | def assert_close_helper(

FILE: tests/test_auto_parallel/test_tensor_shard/test_solver_with_resnet_v2.py
  function test_cost_graph (line 19) | def test_cost_graph():

FILE: tests/test_autochunk/test_autochunk_alphafold/benchmark_autochunk_alphafold.py
  function _benchmark_evoformer_stack_gm (line 19) | def _benchmark_evoformer_stack_gm(
  function _benchmark_evoformer_stack_origin (line 67) | def _benchmark_evoformer_stack_origin(
  function _benchmark_memory (line 90) | def _benchmark_memory(model, inputs):
  function _benchmark_speed (line 99) | def _benchmark_speed(model, inputs, loop=5):
  function benchmark_evoformer_stack (line 112) | def benchmark_evoformer_stack(data_args):

FILE: tests/test_autochunk/test_autochunk_alphafold/test_autochunk_alphafold_utils.py
  function assert_codegen_run (line 19) | def assert_codegen_run(
  function run_test (line 89) | def run_test(

FILE: tests/test_autochunk/test_autochunk_alphafold/test_autochunk_evoformer_block.py
  function get_model (line 20) | def get_model():
  function get_data (line 44) | def get_data(msa_len: int, pair_len: int) -> Tuple[List, List]:
  function get_chunk_target (line 60) | def get_chunk_target() -> Dict:
  function test_evoformer_block (line 88) | def test_evoformer_block(data_args, max_memory):

FILE: tests/test_autochunk/test_autochunk_alphafold/test_autochunk_evoformer_stack.py
  function get_model (line 20) | def get_model():
  function get_data (line 48) | def get_data(msa_len: int, pair_len: int) -> Tuple[List, List]:
  function test_evoformer_stack (line 71) | def test_evoformer_stack(data_args, max_memory):

FILE: tests/test_autochunk/test_autochunk_alphafold/test_autochunk_extramsa_block.py
  function get_model (line 19) | def get_model():
  function get_data (line 44) | def get_data(msa_len: int, pair_len: int) -> Tuple[List, List]:
  function test_extramsa_block (line 67) | def test_extramsa_block(data_args, max_memory):

FILE: tests/test_autochunk/test_autochunk_diffuser/benchmark_autochunk_diffuser.py
  function _benchmark_autochunk_unet_gm (line 20) | def _benchmark_autochunk_unet_gm(
  function _benchmark_autochunk_unet_origin (line 73) | def _benchmark_autochunk_unet_origin(
  function _benchmark_memory (line 98) | def _benchmark_memory(model, inputs):
  function _benchmark_speed (line 107) | def _benchmark_speed(model, inputs, loop=5):
  function benchmark_autochunk_unet (line 120) | def benchmark_autochunk_unet(batch=1, height=448, width=448):

FILE: tests/test_autochunk/test_autochunk_diffuser/test_autochunk_diffuser_utils.py
  function assert_codegen_run (line 18) | def assert_codegen_run(
  function run_test (line 95) | def run_test(

FILE: tests/test_autochunk/test_autochunk_diffuser/test_autochunk_unet.py
  function get_data (line 31) | def get_data(shape: tuple) -> Tuple[List, List]:
  function test_evoformer_block (line 52) | def test_evoformer_block(model, shape, max_memory):

FILE: tests/test_autochunk/test_autochunk_transformer/benchmark_autochunk_transformer.py
  function _benchmark_autochunk_gpt_gm (line 20) | def _benchmark_autochunk_gpt_gm(
  function _benchmark_autochunk_gpt_origin (line 73) | def _benchmark_autochunk_gpt_origin(
  function _benchmark_memory (line 98) | def _benchmark_memory(model, inputs):
  function _benchmark_speed (line 107) | def _benchmark_speed(model, inputs, loop=5):
  function benchmark_autochunk_gpt (line 120) | def benchmark_autochunk_gpt(batch=1, seq=512, n_embd=768, n_head=12):

FILE: tests/test_autochunk/test_autochunk_transformer/test_autochunk_gpt.py
  function get_data (line 24) | def get_data(shape: tuple) -> Tuple[List, List]:
  function test_autochunk_gpt (line 44) | def test_autochunk_gpt(model, shape, max_memory):

FILE: tests/test_autochunk/test_autochunk_transformer/test_autochunk_transformer_utils.py
  function assert_codegen_run (line 17) | def assert_codegen_run(
  function assert_allclose (line 81) | def assert_allclose(out_model: Any, out_gm: Any) -> None:
  function run_test (line 97) | def run_test(

FILE: tests/test_autochunk/test_autochunk_vit/test_autochunk_vit.py
  function get_data (line 21) | def get_data() -> Tuple[List, List]:
  function test_evoformer_block (line 34) | def test_evoformer_block(model, max_memory):

FILE: tests/test_autochunk/test_autochunk_vit/test_autochunk_vit_utils.py
  function assert_codegen_run (line 18) | def assert_codegen_run(
  function run_test (line 85) | def run_test(

FILE: tests/test_booster/test_accelerator.py
  function test_accelerator (line 9) | def test_accelerator(device):

FILE: tests/test_booster/test_mixed_precision/test_fp16_torch.py
  function run_torch_amp (line 10) | def run_torch_amp(rank, world_size, port):
  function test_torch_ddp_plugin (line 39) | def test_torch_ddp_plugin():

FILE: tests/test_booster/test_plugin/test_3d_plugin.py
  class RandomDataset (line 22) | class RandomDataset(Dataset):
    method __init__ (line 23) | def __init__(self, num_samples: int = 100, max_length: int = 512, voca...
    method __len__ (line 32) | def __len__(self):
    method __getitem__ (line 35) | def __getitem__(self, idx):
  function move_to_cuda (line 43) | def move_to_cuda(batch):
  function run_fn (line 48) | def run_fn(init_method, model_fn, data_gen_fn, output_transform_fn) -> O...
  function check_3d_plugin (line 87) | def check_3d_plugin(init_method: str = "none", early_stop: bool = True):
  function run_grad_acc_test (line 195) | def run_grad_acc_test(test_args):
  function run_dist (line 268) | def run_dist(rank, world_size, port, early_stop: bool = True):
  function test_3d_plugin (line 276) | def test_3d_plugin(early_stop: bool = True):

FILE: tests/test_booster/test_plugin/test_dp_plugin_base.py
  class DPPluginWrapper (line 17) | class DPPluginWrapper(DPPluginBase):
    method configure (line 20) | def configure(
    method control_checkpoint_io (line 30) | def control_checkpoint_io(self) -> bool:
    method control_device (line 33) | def control_device(self) -> bool:
    method control_precision (line 36) | def control_precision(self) -> bool:
    method get_checkpoint_io (line 39) | def get_checkpoint_io(self) -> CheckpointIO:
    method support_no_sync (line 42) | def support_no_sync(self) -> bool:
    method supported_devices (line 45) | def supported_devices(self) -> List[str]:
    method supported_precisions (line 48) | def supported_precisions(self) -> List[str]:
    method no_sync (line 51) | def no_sync(self, model: nn.Module) -> Iterator[None]:
    method enable_lora (line 54) | def enable_lora(self, model: nn.Module, pretrained_dir: str, lora_conf...
    method support_lora (line 57) | def support_lora(self) -> bool:
  function check_dataloader_sharding (line 61) | def check_dataloader_sharding():
  function run_dist (line 86) | def run_dist(rank, world_size, port):
  function test_dp_plugin_dataloader (line 93) | def test_dp_plugin_dataloader():

FILE: tests/test_booster/test_plugin/test_gemini_plugin.py
  function run_fn (line 19) | def run_fn(init_method, model_fn, data_gen_fn, output_transform_fn, zero...
  function check_gemini_plugin (line 75) | def check_gemini_plugin(
  function run_dist (line 164) | def run_dist(rank, world_size, port, early_stop: bool = True):
  function test_gemini_plugin (line 171) | def test_gemini_plugin(early_stop: bool = True):

FILE: tests/test_booster/test_plugin/test_low_level_zero_plugin.py
  function run_fn (line 26) | def run_fn(stage, model_fn, data_gen_fn, output_transform_fn, lora_confi...
  function check_low_level_zero_plugin (line 62) | def check_low_level_zero_plugin(stage: int, early_stop: bool = True):
  function check_low_level_zero_lora (line 103) | def check_low_level_zero_lora(stage, model_name, early_stop: bool = True):
  function run_dist (line 132) | def run_dist(rank, world_size, port, early_stop: bool = True):
  function test_low_level_zero_plugin (line 140) | def test_low_level_zero_plugin(early_stop: bool = True):

FILE: tests/test_booster/test_plugin/test_torch_ddp_plugin.py
  function run_fn (line 18) | def run_fn(model_fn, data_gen_fn, output_transform_fn):
  function check_torch_ddp_plugin (line 43) | def check_torch_ddp_plugin():
  class DummyModel (line 56) | class DummyModel(nn.Module):
    method __init__ (line 57) | def __init__(self):
    method forward (line 61) | def forward(self, x):
  function check_torch_ddp_no_sync (line 65) | def check_torch_ddp_no_sync():
  function run_dist (line 110) | def run_dist(rank, world_size, port):
  function test_torch_ddp_plugin (line 118) | def test_torch_ddp_plugin():

FILE: tests/test_booster/test_plugin/test_torch_fsdp_plugin.py
  function run_fn (line 20) | def run_fn(model_fn, data_gen_fn, output_transform_fn):
  function check_torch_fsdp_plugin (line 51) | def check_torch_fsdp_plugin():
  function run_dist (line 74) | def run_dist(rank, world_size, port):
  function test_torch_fsdp_plugin (line 82) | def test_torch_fsdp_plugin():

FILE: tests/test_checkpoint_io/test_gemini_checkpoint_io.py
  function exam_state_dict_with_origin (line 39) | def exam_state_dict_with_origin(
  function exam_state_dict (line 94) | def exam_state_dict(
  function exam_lazy_from_pretrained (line 193) | def exam_lazy_from_pretrained():
  function run_dist (line 210) | def run_dist(rank, world_size, port):
  function test_gemini_ckpIO (line 219) | def test_gemini_ckpIO():

FILE: tests/test_checkpoint_io/test_gemini_torch_compability.py
  function exam_torch_load_from_gemini (line 24) | def exam_torch_load_from_gemini(shard: bool, model_name: str):
  function exam_gemini_load_from_torch (line 90) | def exam_gemini_load_from_torch(shard: bool, model_name: str):
  function run_dist (line 165) | def run_dist(rank, world_size, port):
  function test_gemini_ckpIO (line 174) | def test_gemini_ckpIO(world_size):

FILE: tests/test_checkpoint_io/test_general_checkpoint_io.py
  function test_unsharded_checkpoint (line 23) | def test_unsharded_checkpoint(use_safetensors: bool, use_async: bool):
  function test_sharded_model_checkpoint (line 77) | def test_sharded_model_checkpoint(use_safetensors: bool, use_async: bool):
  function test_sharded_optimizer_checkpoint (line 117) | def test_sharded_optimizer_checkpoint(use_async: bool):
  function test_sharded_optimizer_multiple_param_groups (line 186) | def test_sharded_optimizer_multiple_param_groups(use_async: bool):

FILE: tests/test_checkpoint_io/test_hybrid_parallel_plugin_checkpoint_io.py
  function exam_state_dict (line 48) | def exam_state_dict(
  function run_dist (line 146) | def run_dist(rank, world_size, port):
  function test_hybrid_ckpIO (line 154) | def test_hybrid_ckpIO(world_size):

FILE: tests/test_checkpoint_io/test_low_level_zero_checkpoint_io.py
  function check_low_level_zero_checkpointIO (line 33) | def check_low_level_zero_checkpointIO(stage: int, shard: bool, offload: ...
  function run_fn (line 95) | def run_fn(stage, shard, offload, model_fn, data_gen_fn, output_transfor...
  function check_low_level_zero_lora_checkpointIO (line 159) | def check_low_level_zero_lora_checkpointIO(
  function run_dist (line 192) | def run_dist(rank, world_size, port):
  function test_low_level_zero_checkpointIO (line 201) | def test_low_level_zero_checkpointIO():

FILE: tests/test_checkpoint_io/test_plugins_huggingface_compatibility.py
  function exam_from_pretrained (line 23) | def exam_from_pretrained(plugin_type: str, model_name: str, shard=True, ...
  function run_dist (line 70) | def run_dist(rank, world_size, port):
  function test_huggingface_compatibility (line 78) | def test_huggingface_compatibility(world_size):

FILE: tests/test_checkpoint_io/test_safetensors_async_io.py
  function gen_optim_state_dict (line 13) | def gen_optim_state_dict():
  function gen_model_state_dict (line 108) | def gen_model_state_dict():
  function test_create_pin (line 118) | def test_create_pin(empty: bool, num_threads: int):
  function test_save_load (line 139) | def test_save_load():

FILE: tests/test_checkpoint_io/test_torch_ddp_checkpoint_io.py
  function check_torch_ddp_checkpointIO (line 19) | def check_torch_ddp_checkpointIO(shard: bool, size_per_shard: int, use_a...
  function run_dist (line 74) | def run_dist(rank, world_size, port):
  function test_torch_ddp_checkpointIO (line 80) | def test_torch_ddp_checkpointIO():

FILE: tests/test_checkpoint_io/test_torch_fsdp_checkpoint_io.py
  function compare_nested_dict (line 18) | def compare_nested_dict(dict1, dict2):
  function check_torch_fsdp_ckpt (line 47) | def check_torch_fsdp_ckpt(use_async: bool):
  function run_dist (line 153) | def run_dist(rank, world_size, port):
  function test_torch_fsdp_ckpt (line 161) | def test_torch_fsdp_ckpt():

FILE: tests/test_checkpoint_io/utils.py
  function shared_tempdir (line 9) | def shared_tempdir() -> Iterator[str]:

FILE: tests/test_cluster/test_device_mesh_manager.py
  function check_device_mesh_manager (line 7) | def check_device_mesh_manager(rank, world_size, port):
  function test_device_mesh_manager (line 28) | def test_device_mesh_manager():

FILE: tests/test_cluster/test_process_group_mesh.py
  function check_process_group_mesh_with_cases (line 9) | def check_process_group_mesh_with_cases():
  function run_dist (line 127) | def run_dist(rank, world_size, port):
  function test_process_group_mesh (line 138) | def test_process_group_mesh():

FILE: tests/test_config/test_load_config.py
  function test_load_config (line 9) | def test_load_config():

FILE: tests/test_device/test_alpha_beta.py
  function check_alpha_beta (line 9) | def check_alpha_beta(rank, world_size, port, physical_devices):
  function test_profile_alpha_beta (line 22) | def test_profile_alpha_beta(physical_devices):

FILE: tests/test_device/test_device_mesh.py
  function test_device_mesh (line 10) | def test_device_mesh():
  function check_1d_device_mesh (line 23) | def check_1d_device_mesh():
  function check_2d_device_mesh (line 39) | def check_2d_device_mesh():
  function check_init_from_process_group (line 77) | def check_init_from_process_group(rank, world_size, port):
  function test_device_mesh_from_process_group (line 83) | def test_device_mesh_from_process_group():

FILE: tests/test_device/test_extract_alpha_beta.py
  function check_extract_alpha_beta (line 9) | def check_extract_alpha_beta(rank, world_size, port, physical_devices):
  function test_profile_alpha_beta (line 25) | def test_profile_alpha_beta(physical_devices):

FILE: tests/test_device/test_init_logical_pg.py
  function check_layer (line 11) | def check_layer(rank, world_size, port):
  function test_logical_pg (line 32) | def test_logical_pg():

FILE: tests/test_device/test_search_logical_device_mesh.py
  function check_alpha_beta (line 9) | def check_alpha_beta(rank, world_size, port, physical_devices):
  function test_profile_alpha_beta (line 25) | def test_profile_alpha_beta(physical_devices):

FILE: tests/test_fp8/test_all_to_all_single.py
  function check_all2all (line 16) | def check_all2all(shape, dtype, async_op):
  function check_all2all_uneven (line 32) | def check_all2all_uneven(shape, dtype, async_op):
  function run_dist (line 65) | def run_dist(rank, world_size, port):
  function test_all_to_all_single (line 72) | def test_all_to_all_single():

FILE: tests/test_fp8/test_fp8_all_to_all.py
  function check_4gpu (line 17) | def check_4gpu(shape, scatter_dim, dtype, fp8_format):
  function run_dist (line 29) | def run_dist(rank, world_size, port):
  function test_all_to_all (line 35) | def test_all_to_all():

FILE: tests/test_fp8/test_fp8_all_to_all_single.py
  function check_4gpu (line 18) | def check_4gpu(shape, dtype, fp8_format):
  function run_dist (line 27) | def run_dist(rank, world_size, port):
  function test_all_to_all_single (line 33) | def test_all_to_all_single():

FILE: tests/test_fp8/test_fp8_allgather.py
  function check_4gpu (line 20) | def check_4gpu(shape, dtype, fp8_format, async_op):
  function run_dist (line 35) | def run_dist(rank, world_size, port):
  function test_all_gather (line 41) | def test_all_gather():

FILE: tests/test_fp8/test_fp8_allreduce.py
  function check_4gpu (line 27) | def check_4gpu(shape, dtype, fp8_format, async_op):
  function run_dist (line 45) | def run_dist(rank, world_size, port):
  function test_all_reduce (line 51) | def test_all_reduce():

FILE: tests/test_fp8/test_fp8_cast.py
  function test_fp8_cast (line 13) | def test_fp8_cast(shape, dtype, fp8_format):

FILE: tests/test_fp8/test_fp8_ddp_comm_hook.py
  function setup (line 14) | def setup(rank, world_size):
  function cleanup (line 22) | def cleanup():
  class ToyModel (line 26) | class ToyModel(nn.Module):
    method __init__ (line 27) | def __init__(self):
    method forward (line 33) | def forward(self, x):
  function demo_basic (line 37) | def demo_basic(rank, world_size):
  function run_demo (line 79) | def run_demo(demo_fn, world_size):

FILE: tests/test_fp8/test_fp8_fsdp_comm_hook.py
  function cleanup (line 16) | def cleanup():
  class ToyModel (line 20) | class ToyModel(nn.Module):
    method __init__ (line 21) | def __init__(self):
    method forward (line 27) | def forward(self, x):
  function run_model (line 33) | def run_model(mode):
  function demo_basic (line 92) | def demo_basic(rank, world_size, port):
  function test_fsdp (line 101) | def test_fsdp():

FILE: tests/test_fp8/test_fp8_hook.py
  function new_linear_fp8 (line 17) | def new_linear_fp8(x, w, bias=None):
  class FP8TestHook (line 23) | class FP8TestHook(FP8Hook):
    method rewrite_op (line 24) | def rewrite_op(self, func):
  function test_fp8_hook (line 39) | def test_fp8_hook():

FILE: tests/test_fp8/test_fp8_linear.py
  function test_fp8_linear (line 18) | def test_fp8_linear(use_bias: bool, use_batch: bool):

FILE: tests/test_fp8/test_fp8_reduce_scatter.py
  function check_4gpu (line 18) | def check_4gpu(shape, scatter_dim, dtype, fp8_format, async_op):
  function run_dist (line 34) | def run_dist(rank, world_size, port):
  function test_reduce_scatter (line 40) | def test_reduce_scatter():

FILE: tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py
  class MLP (line 23) | class MLP(torch.nn.Module):
    method __init__ (line 24) | def __init__(self):
    method forward (line 29) | def forward(self, x):
  class relu (line 33) | class relu(torch.nn.Module):
    method __init__ (line 34) | def __init__(self) -> None:
    method forward (line 38) | def forward(self, x):
  class MyModule (line 42) | class MyModule(torch.nn.Module):
    method __init__ (line 43) | def __init__(self):
    method ckpt2 (line 49) | def ckpt2(self, x):
    method ckpt3 (line 52) | def ckpt3(self, x, y):
    method forward (line 55) | def forward(self, x, y):
  function _run_act_ckpt_codegen (line 65) | def _run_act_ckpt_codegen(rank, world_size, port):
  function test_act_ckpt_codegen (line 124) | def test_act_ckpt_codegen():
  function _run_act_ckpt_python_code_torch11 (line 128) | def _run_act_ckpt_python_code_torch11(rank, world_size, port):
  function test_act_ckpt_python_code_torch11 (line 186) | def test_act_ckpt_python_code_torch11():

FILE: tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py
  class MyModule (line 19) | class MyModule(torch.nn.Module):
    method __init__ (line 20) | def __init__(self):
    method forward (line 29) | def forward(self, x):
  function _run_act_ckpt_codegen (line 33) | def _run_act_ckpt_codegen(rank, world_size, port):
  function test_act_ckpt_codegen (line 93) | def test_act_ckpt_codegen():
  function _run_act_ckpt_python_code_torch11 (line 97) | def _run_act_ckpt_python_code_torch11(rank, world_size, port):
  function test_act_ckpt_python_code_torch11 (line 159) | def test_act_ckpt_python_code_torch11():

FILE: tests/test_fx/test_codegen/test_offload_codegen.py
  class MyNet (line 24) | class MyNet(torch.nn.Module):
    method __init__ (line 25) | def __init__(self) -> None:
    method forward (line 35) | def forward(self, x):
  function _is_all_gradient_close (line 46) | def _is_all_gradient_close(m: torch.nn.Module, gm: GraphModule) -> bool:
  function _test_fwd_and_bwd (line 53) | def _test_fwd_and_bwd(model: torch.nn.Module, gm: ColoGraphModule, data:...
  function _run_offload_codegen (line 67) | def _run_offload_codegen(rank, world_size, port):
  function test_act_ckpt_codegen (line 121) | def test_act_ckpt_codegen():
  function _run_offload_codegen_torch11 (line 125) | def _run_offload_codegen_torch11(rank, world_size, port):
  function test_act_ckpt_python_code_torch11 (line 180) | def test_act_ckpt_python_code_torch11():

FILE: tests/test_fx/test_coloproxy.py
  class Conv1D (line 10) | class Conv1D(nn.Module):
    method __init__ (line 11) | def __init__(self, nf, nx):
    method forward (line 19) | def forward(self, x):
  function test_coloproxy (line 27) | def test_coloproxy():

FILE: tests/test_fx/test_comm_size_compute.py
  class MLP (line 19) | class MLP(torch.nn.Module):
    method __init__ (line 20) | def __init__(self, dim: int):
    method forward (line 27) | def forward(self, x):
  function test_comm_size_compute (line 36) | def test_comm_size_compute():

FILE: tests/test_fx/test_graph_manipulation.py
  class MLP (line 8) | class MLP(torch.nn.Module):
    method __init__ (line 9) | def __init__(self, dim: int):
    method forward (line 17) | def forward(self, x):
  function test_graph_manipulation (line 27) | def test_graph_manipulation():

FILE: tests/test_fx/test_meta/test_aten.py
  function compare_all (line 63) | def compare_all(tensor: torch.Tensor, meta_tensor: torch.Tensor) -> Any:
  function run_and_compare (line 75) | def run_and_compare(f: Union[nn.Module, Callable], x: torch.Tensor, requ...
  function test_meta_aten (line 88) | def test_meta_aten():

FILE: tests/test_fx/test_meta/test_backward.py
  function test_torchvision_models (line 44) | def test_torchvision_models():
  function test_timm_models (line 53) | def test_timm_models():

FILE: tests/test_fx/test_meta/test_meta_trace.py
  function test_torchvision_models_trace (line 44) | def test_torchvision_models_trace():
  function test_timm_models_trace (line 53) | def test_timm_models_trace():

FILE: tests/test_fx/test_meta_info_prop.py
  function meta_check (line 16) | def meta_check(meta_info_spec: TensorMetadata, orig_tensor: torch.Tensor):
  function test_meta_info_prop (line 24) | def test_meta_info_prop():

FILE: tests/test_fx/test_parallel_1d.py
  class MLP (line 15) | class MLP(torch.nn.Module):
    method __init__ (line 16) | def __init__(self, dim: int):
    method forward (line 23) | def forward(self, x):
  function check_layer (line 34) | def check_layer(rank, world_size, port):
  function test_1d (line 53) | def test_1d():

FILE: tests/test_fx/test_pipeline/test_hf_model/hf_utils.py
  function split_model_and_compare_output (line 17) | def split_model_and_compare_output(model, data_gen):

FILE: tests/test_fx/test_pipeline/test_hf_model/test_albert.py
  function test_single_sentence_albert (line 11) | def test_single_sentence_albert():

FILE: tests/test_fx/test_pipeline/test_hf_model/test_bert.py
  function test_single_sentence_bert (line 11) | def test_single_sentence_bert():

FILE: tests/test_fx/test_pipeline/test_hf_model/test_gpt.py
  function test_gpt (line 13) | def test_gpt():

FILE: tests/test_fx/test_pipeline/test_hf_model/test_opt.py
  function test_opt (line 11) | def test_opt():

FILE: tests/test_fx/test_pipeline/test_hf_model/test_t5.py
  function test_t5 (line 11) | def test_t5():

FILE: tests/test_fx/test_pipeline/test_timm_model/test_timm.py
  function test_timm_models_without_control_flow (line 8) | def test_timm_models_without_control_flow():
  function test_timm_models_with_control_flow (line 28) | def test_timm_models_with_control_flow():

FILE: tests/test_fx/test_pipeline/test_timm_model/timm_utils.py
  function split_model_and_compare_output (line 18) | def split_model_and_compare_output(model, data, meta_args=None):

FILE: tests/test_fx/test_pipeline/test_topo/test_topo.py
  function test_opt (line 11) | def test_opt():

FILE: tests/test_fx/test_pipeline/test_topo/topo_utils.py
  class MLP (line 18) | class MLP(torch.nn.Module):
    method __init__ (line 19) | def __init__(self, config={}):
    method forward (line 28) | def forward(self, x):
  function split_model_and_get_DAG (line 34) | def split_model_and_get_DAG(model, data_gen):
  function check_input (line 62) | def check_input(top_module, input_partition: Partition):
  function check_submod (line 75) | def check_submod(top_module, part_id, mid_partition: Partition):
  function check_topo (line 92) | def check_topo(top_module, topo: Topo):

FILE: tests/test_fx/test_pipeline/test_torchvision/test_torchvision.py
  function test_torchvision_models (line 23) | def test_torchvision_models():

FILE: tests/test_fx/test_pipeline_passes.py
  class MLP (line 17) | class MLP(torch.nn.Module):
    method __init__ (line 18) | def __init__(self, dim: int):
    method forward (line 25) | def forward(self, x):
  function pipeline_pass_test_helper (line 33) | def pipeline_pass_test_helper(model, data, pass_func):
  function test_pipeline_passes (line 43) | def test_pipeline_passes():

FILE: tests/test_fx/test_profiler/gpt_utils.py
  class GPTLMModel (line 5) | class GPTLMModel(nn.Module):
    method __init__ (line 6) | def __init__(
    method forward (line 30) | def forward(self, input_ids, attention_mask):
  class GPTLMLoss (line 35) | class GPTLMLoss(nn.Module):
    method __init__ (line 36) | def __init__(self):
    method forward (line 40) | def forward(self, logits, labels):
  function gpt2_medium (line 47) | def gpt2_medium(checkpoint=False):
  function gpt2_xl (line 51) | def gpt2_xl(checkpoint=False):

FILE: tests/test_fx/test_profiler/test_profiler_meta_info_prop.py
  function extract_forward_mem (line 22) | def extract_forward_mem(gm: torch.fx.GraphModule):
  function extract_forward_flops (line 32) | def extract_forward_flops(gm: torch.fx.GraphModule):
  function gen_tm_data (line 41) | def gen_tm_data(batch_size: int, shape: Tuple[int, int, int], device="cu...
  function gen_gpt_data (line 47) | def gen_gpt_data(batch_size, seq_len, vocab_size, device="cpu"):
  function run_tm_forward (line 53) | def run_tm_forward(gm: torch.fx.GraphModule):
  function run_gpt_forward (line 91) | def run_gpt_forward(gm: torch.fx.GraphModule):
  function test_meta_info_prop (line 130) | def test_meta_info_prop():
  function test_gpt_meta_info_prop (line 195) | def test_gpt_meta_info_prop():

FILE: tests/test_fx/test_tracer/test_activation_checkpoint_annotation.py
  class MLP (line 9) | class MLP(torch.nn.Module):
    method __init__ (line 10) | def __init__(self):
    method forward (line 15) | def forward(self, x):
  class MyModule (line 22) | class MyModule(torch.nn.Module):
    method __init__ (line 23) | def __init__(self):
    method forward (line 29) | def forward(self, x):
  function test_activation_checkpoint_annotation (line 37) | def test_activation_checkpoint_annotation():

FILE: tests/test_fx/test_tracer/test_bias_addition_module.py
  class LinearModel (line 7) | class LinearModel(torch.nn.Module):
    method __init__ (line 8) | def __init__(self, in_features, out_features):
    method forward (line 12) | def forward(self, x):
  class ConvModel (line 19) | class ConvModel(torch.nn.Module):
    method __init__ (line 20) | def __init__(self, in_channels, out_channels, kernel_size, bias=True):
    method forward (line 26) | def forward(self, x):
  function test_linear_module (line 34) | def test_linear_module():
  function test_conv_module (line 71) | def test_conv_module():

FILE: tests/test_fx/test_tracer/test_control_flow.py
  class ControlFlowModel (line 9) | class ControlFlowModel(nn.Module):
    method __init__ (line 10) | def __init__(self):
    method forward (line 15) | def forward(self, x, y):
  function test_control_flow (line 26) | def test_control_flow():

FILE: tests/test_fx/test_tracer/test_functional_conv.py
  function test_conv (line 9) | def test_conv():

FILE: tests/test_fx/test_tracer/test_hf_model/hf_tracer_utils.py
  function trace_model_and_compare_output (line 9) | def trace_model_and_compare_output(model, data_gen, ignore_data: List[st...

FILE: tests/test_fx/test_tracer/test_hf_model/test_hf_albert.py
  function test_albert (line 15) | def test_albert():

FILE: tests/test_fx/test_tracer/test_hf_model/test_hf_bert.py
  function test_bert (line 12) | def test_bert():

FILE: tests/test_fx/test_tracer/test_hf_model/test_hf_diffuser.py
  function assert_dict (line 10) | def assert_dict(da, db, assert_fn):
  function trace_and_compare (line 20) | def trace_and_compare(model_cls, data, output_fn):
  function test_diffusers (line 45) | def test_diffusers():
  function test_torch_diffusers (line 58) | def test_torch_diffusers():

FILE: tests/test_fx/test_tracer/test_hf_model/test_hf_gpt.py
  function test_gpt (line 12) | def test_gpt():

FILE: tests/test_fx/test_tracer/test_hf_model/test_hf_opt.py
  function test_opt (line 12) | def test_opt():

FILE: tests/test_fx/test_tracer/test_hf_model/test_hf_t5.py
  function test_t5 (line 12) | def test_t5():

FILE: tests/test_fx/test_tracer/test_patched_module.py
  function _run (line 7) | def _run(data, module, patch_fn):
  function _assert_output_shape (line 20) | def _assert_output_shape(data, module, patch_fn, expect_exception, outpu...
  function test_linear (line 37) | def test_linear():
  function test_rnn (line 49) | def test_rnn():
  function test_embedding (line 66) | def test_embedding():
  function test_conv1d (line 147) | def test_conv1d():
  function test_conv2d (line 187) | def test_conv2d():
  function test_conv3d (line 234) | def test_conv3d():
  function test_conv_transpose1d (line 281) | def test_conv_transpose1d():
  function test_conv_transpose2d (line 309) | def test_conv_transpose2d():
  function test_conv_transpose3d (line 337) | def test_conv_transpose3d():
  function test_pool1d (line 365) | def test_pool1d():
  function test_pool2d (line 399) | def test_pool2d():
  function test_pool3d (line 436) | def test_pool3d():
  function test_adaptive_pooling_1d (line 474) | def test_adaptive_pooling_1d():
  function test_adaptive_pooling_2d (line 495) | def test_adaptive_pooling_2d():
  function test_adaptive_pooling_3d (line 516) | def test_adaptive_pooling_3d():

FILE: tests/test_fx/test_tracer/test_patched_op.py
  function _run (line 9) | def _run(data, patch_fn):
  function _assert_output_shape (line 17) | def _assert_output_shape(data, patch_fn, expect_exception, output_shape):
  function test_repeat_interleave (line 29) | def test_repeat_interleave():
  function test_torch_max (line 67) | def test_torch_max():

FILE: tests/test_fx/test_tracer/test_timm_model/test_timm_model.py
  function trace_and_compare (line 10) | def trace_and_compare(model_cls, data, output_transform_fn, meta_args=No...
  function test_timm_models (line 55) | def test_timm_models():

FILE: tests/test_fx/test_tracer/test_torchaudio_model/test_torchaudio_model.py
  function test_torchaudio_models (line 13) | def test_torchaudio_models():

FILE: tests/test_fx/test_tracer/test_torchaudio_model/torchaudio_utils.py
  function trace_and_compare (line 6) | def trace_and_compare(model, data_gen, output_transform_fn, need_meta=Fa...

FILE: tests/test_fx/test_tracer/test_torchrec_model/test_deepfm_model.py
  function trace_and_compare (line 11) | def trace_and_compare(model_cls, data, output_transform_fn, meta_args=No...
  function test_torchrec_deepfm_models (line 54) | def test_torchrec_deepfm_models():

FILE: tests/test_fx/test_tracer/test_torchrec_model/test_dlrm_model.py
  function trace_and_compare (line 11) | def trace_and_compare(model_cls, data, output_transform_fn, meta_args=No...
  function test_torchrec_dlrm_models (line 54) | def test_torchrec_dlrm_models():

FILE: tests/test_fx/test_tracer/test_torchvision_model/test_torchvision_model.py
  function test_torchvision_models (line 9) | def test_torchvision_models():

FILE: tests/test_infer/_utils.py
  function build_model (line 6) | def build_model(
  function run_infer (line 29) | def run_infer(original_model, sharded_model, data_gen_fn, output_transfo...

FILE: tests/test_infer/test_async_engine/test_async_engine.py
  class MockSequence (line 10) | class MockSequence:
  class MockEngine (line 14) | class MockEngine:
    method __init__ (line 15) | def __init__(self):
    method async_step (line 21) | async def async_step(self):
    method add_single_request (line 25) | def add_single_request(self, **kwargs):
    method generate (line 29) | def generate(self, request_id):
    method stop_generating (line 32) | def stop_generating(self):
    method add_request (line 35) | def add_request(self, **kwargs):
    method abort_request (line 39) | def abort_request(self, request_id):
  class MockAsyncInferenceEngine (line 44) | class MockAsyncInferenceEngine(AsyncInferenceEngine):
    method _init_engine (line 45) | def _init_engine(self, *args, **kwargs):
  function test_new_requests_event (line 50) | async def test_new_requests_event():

FILE: tests/test_infer/test_async_engine/test_request_tracer.py
  class SampleEvent (line 7) | class SampleEvent:
    method __init__ (line 8) | def __init__(self):
    method set (line 11) | def set(self):
    method clear (line 14) | def clear(self):
  function test_request_tracer (line 18) | def test_request_tracer():

FILE: tests/test_infer/test_batch_bucket.py
  function test_bucket (line 30) | def test_bucket(test_config):

FILE: tests/test_infer/test_config_and_struct.py
  function check_config_and_inference (line 9) | def check_config_and_inference():
  function run_dist (line 34) | def run_dist(rank, world_size, port):
  function test_config_and_inference (line 41) | def test_config_and_inference():

FILE: tests/test_infer/test_continuous_batching.py
  function setup_seed (line 14) | def setup_seed(seed):
  function generate_inputs (line 21) | def generate_inputs(num_sequences, min_length, max_length):
  function check_inference_engine (line 35) | def check_inference_engine(n_multiple, max_batch_size, max_input_len, ma...
  function run_dist (line 59) | def run_dist(rank, world_size, port):
  function test_continuous_batching (line 66) | def test_continuous_batching():

FILE: tests/test_infer/test_cuda_graph.py
  function setup_seed (line 14) | def setup_seed(seed):
  function check_inference_engine (line 21) | def check_inference_engine(use_cuda_graph=False, batch_size=32):
  function check_output_consistency (line 74) | def check_output_consistency(batch_size):
  function run_dist (line 82) | def run_dist(rank, world_size, port):
  function test_cuda_graph_infer (line 91) | def test_cuda_graph_infer():

FILE: tests/test_infer/test_drafter.py
  function tokenizer (line 15) | def tokenizer():
  function test_drafter (line 20) | def test_drafter(tokenizer, spec_num: int):
  function test_spec_dec (line 45) | def test_spec_dec(tokenizer):

FILE: tests/test_infer/test_inference_engine.py
  function setup_seed (line 18) | def setup_seed(seed):
  function check_inference_engine (line 26) | def check_inference_engine(use_engine=False, prompt_template=None, do_sa...
  function run_engine (line 88) | def run_engine(world_size, **kwargs):
  function check_spec_dec (line 96) | def check_spec_dec(num_layers, max_length):
  function run_dist (line 166) | def run_dist(rank, world_size, port, func_to_run, ret=None, **kwargs):
  function test_tp_engine (line 179) | def test_tp_engine(prompt_template, do_sample):
  function test_spec_dec (line 202) | def test_spec_dec(num_layers, max_length):

FILE: tests/test_infer/test_kernels/cuda/test_convert_fp8.py
  function test_fp8_conversion (line 27) | def test_fp8_conversion(

FILE: tests/test_infer/test_kernels/cuda/test_flash_decoding_attention.py
  function prepare_data (line 26) | def prepare_data(
  function numpy_allclose (line 52) | def numpy_allclose(x, y, rtol, atol):
  function test_flash_decoding_attention (line 67) | def test_flash_decoding_attention(
  function test_vllm_flash_decoding_attention (line 209) | def test_vllm_flash_decoding_attention(

FILE: tests/test_infer/test_kernels/cuda/test_get_cos_and_sin.py
  function numpy_equal (line 11) | def numpy_equal(x, y):
  function test_get_cos_and_sin (line 22) | def test_get_cos_and_sin(BATCH_SIZE, MAX_SEQ_LEN, HEAD_DIM, dtype):

FILE: tests/test_infer/test_kernels/cuda/test_kv_cache_memcpy.py
  function prepare_data (line 17) | def prepare_data(
  function run_decode_copy_kv_to_caches (line 46) | def run_decode_copy_kv_to_caches(
  function run_context_copy_kv_to_cache (line 101) | def run_context_copy_kv_to_cache(
  function test_kv_cache_memcopy (line 145) | def test_kv_cache_memcopy(

FILE: tests/test_infer/test_kernels/cuda/test_rms_layernorm.py
  function test_rms_layernorm (line 13) | def test_rms_layernorm(M: int, N: int):

FILE: tests/test_infer/test_kernels/cuda/test_rotary_embdding_unpad.py
  function numpy_allclose (line 14) | def numpy_allclose(x, y, rtol, atol):
  function test_rotary_emb (line 27) | def test_rotary_emb(BATCH_SIZE, SEQ_LEN, H, K_H, D, dtype):

FILE: tests/test_infer/test_kernels/cuda/test_silu_and_mul.py
  function test_silu_and_mul (line 14) | def test_silu_and_mul(SHAPE_X, SHAPE_Y, SHAPE_Z, dtype):

FILE: tests/test_infer/test_kernels/triton/kernel_utils.py
  function repeat_kv (line 10) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  function create_attention_mask (line 22) | def create_attention_mask(kv_lengths: torch.Tensor, bsz: int, q_len: int...
  function torch_attn_ref (line 41) | def torch_attn_ref(
  function mock_alloc_block_table_and_kvcache (line 81) | def mock_alloc_block_table_and_kvcache(
  function mock_alloc_block_table_and_kvcache_v2 (line 115) | def mock_alloc_block_table_and_kvcache_v2(
  function mock_alloc_block_table_and_kvcache_v3 (line 149) | def mock_alloc_block_table_and_kvcache_v3(
  function mock_alloc_block_table_and_kvcache_vllm (line 193) | def mock_alloc_block_table_and_kvcache_vllm(
  function mock_alloc_single_token (line 238) | def mock_alloc_single_token(block_tables: torch.Tensor, context_lengths:...
  function generate_caches_and_block_tables (line 262) | def generate_caches_and_block_tables(
  function generate_caches_and_block_tables_v2 (line 278) | def generate_caches_and_block_tables_v2(
  function generate_caches_and_block_tables_v3 (line 294) | def generate_caches_and_block_tables_v3(
  function generate_caches_and_block_tables_vllm (line 314) | def generate_caches_and_block_tables_vllm(
  function convert_kv_unpad_to_padded (line 334) | def convert_kv_unpad_to_padded(

FILE: tests/test_infer/test_kernels/triton/test_context_attn_unpad.py
  function _fill_with_neg_inf (line 27) | def _fill_with_neg_inf(t):
  function generate_alibi_mask (line 32) | def generate_alibi_mask(slopes, num_heads, max_seq_len, device):
  function torch_attn_unpad (line 44) | def torch_attn_unpad(
  function test_context_attention (line 97) | def test_context_attention(

FILE: tests/test_infer/test_kernels/triton/test_decoding_attn.py
  function numpy_allclose (line 31) | def numpy_allclose(x, y, rtol, atol):
  function prepare_data (line 38) | def prepare_data(
  function test_flash_decoding (line 80) | def test_flash_decoding(

FILE: tests/test_infer/test_kernels/triton/test_fused_rotary_embedding.py
  function test_fused_rotary_emb (line 24) | def test_fused_rotary_emb():

FILE: tests/test_infer/test_kernels/triton/test_kvcache_copy.py
  function prepare_data (line 26) | def prepare_data(
  function test_copy_kv_to_caches (line 80) | def test_copy_kv_to_caches(

FILE: tests/test_infer/test_kernels/triton/test_rmsnorm_triton.py
  function test_layer_norm (line 25) | def test_layer_norm(M, N):

FILE: tests/test_infer/test_kernels/triton/test_rotary_embdding_unpad.py
  function torch_rotary_emb (line 23) | def torch_rotary_emb(x, cos, sin):
  function test_rotary_emb (line 43) | def test_rotary_emb(BATCH_SIZE, SEQ_LEN, H, D, dtype, use_new_kcache_lay...

FILE: tests/test_infer/test_kernels/triton/test_xine_copy.py
  function get_cos_sin (line 19) | def get_cos_sin(lengths, cos_cache, sin_cache, is_prompts, dtype):
  function test_get_xine_cache (line 48) | def test_get_xine_cache(BATCH_SIZE, MAX_SEQ_LEN, HEAD_DIM, dtype):

FILE: tests/test_infer/test_kvcache_manager.py
  function test_logical_blocks (line 23) | def test_logical_blocks(test_config):
  function check_cache_manager (line 69) | def check_cache_manager(test_config):
  function run_dist (line 166) | def run_dist(rank, world_size, port):
  function test_cache_manager (line 173) | def test_cache_manager():

FILE: tests/test_infer/test_models/test_attention.py
  function test_copy_to_cache (line 12) | def test_copy_to_cache():
  function test_convert_kvcache (line 30) | def test_convert_kvcache():
  function test_context_attention (line 41) | def test_context_attention():
  function test_decoding_attention (line 94) | def test_decoding_attention():

FILE: tests/test_infer/test_models/test_baichuan.py
  function setup_seed (line 20) | def setup_seed(seed):
  function check_inference_engine (line 28) | def check_inference_engine(use_engine=False, do_sample=False, use_cuda_k...
  function run_engine (line 80) | def run_engine(world_size, **kwargs):
  function run_dist (line 88) | def run_dist(rank, world_size, port, func_to_run, ret=None, **kwargs):
  function check_tp_engine (line 101) | def check_tp_engine(prompt_template, do_sample, use_cuda_kernel):
  function test_inference_engine (line 133) | def test_inference_engine():

FILE: tests/test_infer/test_models/test_custom_model.py
  function test_model (line 43) | def test_model(model, prompt_template, do_sample, use_cuda_kernel):
  function run_engine (line 82) | def run_engine(world_size, **kwargs):
  function run_dist (line 89) | def run_dist(rank, world_size, port, func_to_run, ret=None, **kwargs):
  function _run_engine (line 98) | def _run_engine(model, use_engine=False, do_sample=False, use_cuda_kerne...
  function setup_seed (line 152) | def setup_seed(seed):

FILE: tests/test_infer/test_request_handler.py
  function check_running_list (line 11) | def check_running_list():
  function check_request_handler (line 56) | def check_request_handler():
  function run_dist (line 92) | def run_dist(rank, world_size, port):
  function test_running_list_and_request_handler (line 100) | def test_running_list_and_request_handler():

FILE: tests/test_infer/test_rpc_engine.py
  function setup_seed (line 14) | def setup_seed(seed):
  function check_inference_engine (line 22) | def check_inference_engine(tp_size, use_engine=False, prompt_template=No...
  function run_engine (line 74) | def run_engine(tp_size, **kwargs):
  function test_tp_engine (line 84) | def test_tp_engine(prompt_template, do_sample):

FILE: tests/test_infer/test_streamingllm.py
  function data_gen (line 14) | def data_gen(batch_size: int = 4, seq_len: int = 512):
  function setup_seed (line 19) | def setup_seed(seed):
  function check_streamingllm (line 27) | def check_streamingllm():
  function run_dist (line 103) | def run_dist(rank, world_size, port, func_to_run, ret=None, **kwargs):
  function test_engine (line 113) | def test_engine():

FILE: tests/test_lazy/lazy_init_utils.py
  function set_seed (line 21) | def set_seed(seed: int) -> None:
  function assert_model_equal (line 27) | def assert_model_equal(m1: torch.nn.Module, m2: torch.nn.Module) -> None:
  function assert_forward_equal (line 41) | def assert_forward_equal(
  function check_lazy_init (line 69) | def check_lazy_init(
  function assert_dist_model_equal (line 93) | def assert_dist_model_equal(

FILE: tests/test_lazy/test_from_pretrained.py
  function test_lazy_from_pretrained (line 9) | def test_lazy_from_pretrained():

FILE: tests/test_lazy/test_models.py
  function test_models_lazy_init (line 17) | def test_models_lazy_init(subset, default_device):

FILE: tests/test_lazy/test_ops.py
  function test_lazy_ops (line 13) | def test_lazy_ops():

FILE: tests/test_legacy/test_amp/test_naive_fp16.py
  function check_equal (line 12) | def check_equal(a, b):
  function run_naive_amp (line 19) | def run_naive_amp():
  function run_dist (line 79) | def run_dist(rank, world_size, port):
  function test_naive_amp (line 87) | def test_naive_amp():

FILE: tests/test_legacy/test_amp/test_torch_fp16.py
  function run_torch_amp (line 12) | def run_torch_amp():
  function run_dist (line 78) | def run_dist(rank, world_size, port):
  function test_torch_amp (line 86) | def test_torch_amp():

FILE: tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py
  function check_layer (line 17) | def check_layer(rank, world_size, port):
  function test_object_list_p2p (line 50) | def test_object_list_p2p():

FILE: tests/test_legacy/test_comm/test_comm.py
  function check_all_gather (line 17) | def check_all_gather():
  function check_reduce_scatter (line 28) | def check_reduce_scatter():
  function check_all_reduce (line 39) | def check_all_reduce():
  function check_layer (line 50) | def check_layer(rank, world_size, port):
  function test_comm (line 66) | def test_comm():

FILE: tests/test_legacy/test_comm/test_object_list_p2p.py
  function check_send_recv_forward (line 28) | def check_send_recv_forward():
  function check_send_recv_backward (line 48) | def check_send_recv_backward():
  function check_send_recv_forward_backward (line 68) | def check_send_recv_forward_backward():
  function check_layer (line 90) | def check_layer(rank, world_size, port):
  function test_object_list_p2p (line 101) | def test_object_list_p2p():

FILE: tests/test_legacy/test_comm/test_object_list_p2p_v2.py
  function check_send_recv_forward (line 30) | def check_send_recv_forward():
  function check_send_recv_backward (line 60) | def check_send_recv_backward():
  function check_small_pipeline (line 85) | def check_small_pipeline():
  function check_layer (line 105) | def check_layer(rank, world_size, port):
  function test_object_list_p2p (line 119) | def test_object_list_p2p():

FILE: tests/test_legacy/test_context/test_hybrid_parallel.py
  function check_data_parallel_rank (line 18) | def check_data_parallel_rank(rank):
  function check_pipeline_parallel_rank (line 32) | def check_pipeline_parallel_rank(rank):
  function check_model_parallel_rank (line 44) | def check_model_parallel_rank(rank):
  function check_tensor_parallel_rank (line 51) | def check_tensor_parallel_rank(rank):
  function get_tp_info (line 60) | def get_tp_info():
  function check_2d_tensor_parallel_rank (line 68) | def check_2d_tensor_parallel_rank(rank):
  function check_2p5d_tensor_parallel_rank (line 82) | def check_2p5d_tensor_parallel_rank(rank):
  function check_3d_tensor_parallel_rank (line 100) | def check_3d_tensor_parallel_rank(rank):
  function init_context (line 116) | def init_context(config_path, rank, world_size, backend, port, host):
  function run_dist (line 130) | def run_dist(rank, world_size, port, backend, port_list, host):
  function test_context (line 139) | def test_context():

FILE: tests/test_legacy/test_data/test_cifar10_dataset.py
  function test_cifar10_dataset (line 11) | def test_cifar10_dataset():

FILE: tests/test_legacy/test_data/test_data_parallel_sampler.py
  function run_data_sampler (line 29) | def run_data_sampler(rank, world_size, port):
  function test_data_sampler (line 60) | def test_data_sampler():

FILE: tests/test_legacy/test_data/test_deterministic_dataloader.py
  function run_data_sampler (line 38) | def run_data_sampler(rank, world_size, port):
  function test_data_sampler (line 70) | def test_data_sampler():

FILE: tests/test_legacy/test_engine/test_engine.py
  function run_train (line 17) | def run_train(model_name, amp_mode):
  function run_engine (line 52) | def run_engine(rank, world_size, port):
  function test_engine (line 62) | def test_engine():

FILE: tests/test_legacy/test_engine/test_gradient_accumluation.py
  function run_no_pipeline (line 27) | def run_no_pipeline(rank, world_size, port):
  function test_engine (line 90) | def test_engine():

FILE: tests/test_legacy/test_layers/test_1d/checks_1d/check_layer_1d.py
  function check_linear_col (line 24) | def check_linear_col():
  function check_linear_row (line 93) | def check_linear_row():
  function check_embed (line 161) | def check_embed():
  function check_vocab_parallel_embed (line 203) | def check_vocab_parallel_embed():
  function check_classifier_no_given_weight (line 245) | def check_classifier_no_given_weight():
  function check_vocab_parallel_classifier_no_given_weight (line 311) | def check_vocab_parallel_classifier_no_given_weight():
  function check_classifier_given_embed_weight (line 371) | def check_classifier_given_embed_weight():
  function check_vocab_parallel_classifier_given_embed_weight (line 422) | def check_vocab_parallel_classifier_given_embed_weight():
  function check_vocab_parallel_loss (line 474) | def check_vocab_parallel_loss():
  function check_linear_row_stream_inference (line 510) | def check_linear_row_stream_inference():

FILE: tests/test_legacy/test_layers/test_1d/checks_1d/common.py
  function check_equal (line 15) | def check_equal(A, B):

FILE: tests/test_legacy/test_layers/test_1d/test_1d.py
  function check_layer (line 18) | def check_layer(rank, world_size, port):
  function test_1d (line 40) | def test_1d():

FILE: tests/test_legacy/test_layers/test_2d/checks_2d/check_layer_2d.py
  function check_linear (line 24) | def check_linear():
  function check_layernorm (line 105) | def check_layernorm():
  function check_embed (line 156) | def check_embed():
  function check_patch_embed (line 203) | def check_patch_embed():
  function check_vocab_parallel_embed (line 276) | def check_vocab_parallel_embed():
  function check_classifier_no_given_weight (line 323) | def check_classifier_no_given_weight():
  function check_vocab_parallel_classifier_no_given_weight (line 401) | def check_vocab_parallel_classifier_no_given_weight():
  function check_classifier_given_embed_weight (line 469) | def check_classifier_given_embed_weight():
  function check_vocab_parallel_classifier_given_embed_weight (line 521) | def check_vocab_parallel_classifier_given_embed_weight():
  function check_loss (line 575) | def check_loss():
  function check_vocab_parallel_loss (line 610) | def check_vocab_parallel_loss():

FILE: tests/test_legacy/test_layers/test_2d/checks_2d/check_operation_2d.py
  function check_AB (line 15) | def check_AB():
  function check_ABT (line 97) | def check_ABT():
  function check_ATB (line 177) | def check_ATB():

FILE: tests/test_legacy/test_layers/test_2d/checks_2d/common.py
  function check_equal (line 15) | def check_equal(A, B):

FILE: tests/test_legacy/test_layers/test_2d/test_2d.py
  function check_operations (line 31) | def check_operations():
  function check_layer (line 37) | def check_layer():
  function check_layer_and_operation (line 51) | def check_layer_and_operation(rank, world_size, port):
  function test_2d (line 66) | def test_2d():

FILE: tests/test_legacy/test_layers/test_2p5d/checks_2p5d/check_layer_2p5d.py
  function check_linear (line 25) | def check_linear():
  function check_layernorm (line 106) | def check_layernorm():
  function check_embed (line 158) | def check_embed():
  function check_patch_embed (line 206) | def check_patch_embed():
  function check_vocab_parallel_embed (line 280) | def check_vocab_parallel_embed():
  function check_classifier_no_given_weight (line 328) | def check_classifier_no_given_weight():
  function check_vocab_parallel_classifier_no_given_weight (line 407) | def check_vocab_parallel_classifier_no_given_weight():
  function check_classifier_given_embed_weight (line 474) | def check_classifier_given_embed_weight():
  function check_vocab_parallel_classifier_given_embed_weight (line 526) | def check_vocab_parallel_classifier_given_embed_weight():
  function check_loss (line 580) | def check_loss():
  function check_vocab_parallel_loss (line 615) | def check_vocab_parallel_loss():

FILE: tests/test_legacy/test_layers/test_2p5d/checks_2p5d/check_operation_2p5d.py
  function check_AB (line 12) | def check_AB():
  function check_ABT (line 95) | def check_ABT():
  function check_ATB (line 177) | def check_ATB():

FILE: tests/test_legacy/test_layers/test_2p5d/checks_2p5d/common.py
  function check_equal (line 13) | def check_equal(A, B):

FILE: tests/test_legacy/test_layers/test_2p5d/test_2p5d.py
  function check_operations (line 19) | def check_operations():
  function check_layer (line 25) | def check_layer():
  function check_layer_and_operation (line 39) | def check_layer_and_operation(rank, world_size, port):
  function test_2p5d (line 54) | def test_2p5d():

FILE: tests/test_legacy/test_layers/test_3d/checks_3d/check_layer_3d.py
  function check_linear (line 31) | def check_linear():
  function check_layernorm (line 119) | def check_layernorm():
  function check_classifier_no_given_weight (line 206) | def check_classifier_no_given_weight():
  function check_vocab_parallel_classifier_no_given_weight (line 306) | def check_vocab_parallel_classifier_no_given_weight():
  function check_classifier_given_embed_weight (line 413) | def check_classifier_given_embed_weight():
  function check_vocab_parallel_classifier_given_embed_weight (line 497) | def check_vocab_parallel_classifier_given_embed_weight():
  function check_patch_embed (line 581) | def check_patch_embed():
  function check_embed (line 679) | def check_embed():
  function check_vocab_parallel_embed (line 747) | def check_vocab_parallel_embed():
  function check_loss (line 823) | def check_loss():
  function check_vocab_parallel_loss (line 876) | def check_vocab_parallel_loss():

FILE: tests/test_legacy/test_layers/test_3d/checks_3d/common.py
  function check_equal (line 16) | def check_equal(A, B):

FILE: tests/test_legacy/test_layers/test_3d/test_3d.py
  function check_layer (line 32) | def check_layer():
  function check_layer_and_operation (line 45) | def check_layer_and_operation(rank, world_size, port):
  function test_3d (line 59) | def test_3d():

FILE: tests/test_legacy/test_layers/test_cache_embedding.py
  function set_seed (line 25) | def set_seed(seed):
  function synthesize_1d_sparse_feature (line 34) | def synthesize_1d_sparse_feature(
  function test_cachemgr (line 59) | def test_cachemgr():
  function test_reorder_with_freq (line 88) | def test_reorder_with_freq():
  function test_freq_aware_embed (line 119) | def test_freq_aware_embed(use_LFU: bool):
  function test_lfu_strategy (line 168) | def test_lfu_strategy(init_freq: bool):
  function gather_tensor (line 215) | def gather_tensor(tensor, rank, world_size):
  function run_parallel_freq_aware_embed_tablewise (line 224) | def run_parallel_freq_aware_embed_tablewise(rank, world_size):
  function run_parallel_freq_aware_embed_columnwise (line 310) | def run_parallel_freq_aware_embed_columnwise(rank, world_size):
  function run_dist (line 380) | def run_dist(rank, world_size, port):
  function test_parallel_freq_aware_embed (line 389) | def test_parallel_freq_aware_embed(world_size):

FILE: tests/test_legacy/test_layers/test_sequence/checks_seq/check_layer_seq.py
  function check_selfattention (line 9) | def check_selfattention():

FILE: tests/test_legacy/test_layers/test_sequence/test_sequence.py
  function check_ring_qk (line 14) | def check_ring_qk(rank, world_size):
  function check_ring_av (line 66) | def check_ring_av(rank, world_size):
  function run_test (line 120) | def run_test(rank, world_size, port):
  function test_sequence (line 132) | def test_sequence():

FILE: tests/test_legacy/test_moe/moe_utils.py
  function delete_moe_info (line 15) | def delete_moe_info(model):
  class MoeModel (line 21) | class MoeModel(nn.Module):
    method __init__ (line 22) | def __init__(self, ep_group: ProcessGroup = None):
    method forward (line 29) | def forward(self, x):
  class MoeGradientHandler (line 37) | class MoeGradientHandler(BaseGradientHandler):
    method __init__ (line 49) | def __init__(self, model, optimizer=None):
    method handle_gradient (line 52) | def handle_gradient(self):
  function assert_not_equal_in_group (line 73) | def assert_not_equal_in_group(tensor, process_group=None):
  function run_fwd_bwd (line 88) | def run_fwd_bwd(model, data, label, criterion, optimizer, enable_autocas...
  function sync_local_from_ep (line 105) | def sync_local_from_ep(local_model, ep_model, assert_grad_flag: bool = F...

FILE: tests/test_legacy/test_moe/test_grad_handler.py
  function run_test (line 18) | def run_test(rank, world_size, port):
  function test_grad_handler (line 76) | def test_grad_handler():

FILE: tests/test_legacy/test_moe/test_moe_group.py
  function run_moe_init (line 17) | def run_moe_init(expert_parallel):
  function _run_test (line 62) | def _run_test(rank, world_size, port, expert_parallel):
  function test_moe_initialization (line 77) | def test_moe_initialization(expert_parallel):

FILE: tests/test_legacy/test_moe/test_moe_hybrid_zero.py
  function run_fwd_bwd (line 15) | def run_fwd_bwd(model, data, label, criterion, optimizer, enable_autocas...
  function run_zero_optim_test (line 32) | def run_zero_optim_test(local_rank, world_size, stage=1):
  function run_dist (line 83) | def run_dist(rank, world_size, port):
  function test_moe_zero_optim (line 93) | def test_moe_zero_optim(world_size):

FILE: tests/test_legacy/test_moe/test_moe_load_balance.py
  function split_ddp_grad (line 17) | def split_ddp_grad(grad, world_size):
  function run_fwd_bwd (line 27) | def run_fwd_bwd(model, data, label, criterion, optimizer, enable_autocas...
  function run_zero_optim_test (line 44) | def run_zero_optim_test(local_rank, world_size, stage=1):
  function run_hybrid_zero_optim_test (line 100) | def run_hybrid_zero_optim_test(local_rank, world_size, stage=1):
  function run_dist (line 166) | def run_dist(rank, world_size, port):
  function test_moe_load_balance (line 184) | def test_moe_load_balance(world_size):

FILE: tests/test_legacy/test_pipeline/rpc_test_utils.py
  function color_debug (line 18) | def color_debug(text, prefix=" ", color="blue"):
  class MLP (line 23) | class MLP(nn.Module):
    method __init__ (line 24) | def __init__(self, dim: int, layers: int):
    method forward (line 31) | def forward(self, x):
  class DAG_MLP (line 37) | class DAG_MLP(nn.Module):
    method __init__ (line 38) | def __init__(self, dim: int, layers: int):
    method forward (line 46) | def forward(self, x, y):
  class RpcTestModel (line 53) | class RpcTestModel(nn.Module):
    method __init__ (line 54) | def __init__(self, stage_id, actual_stage_num, feat_num, h) -> None:
    method forward (line 69) | def forward(self, x) -> torch.Tensor:
  function parse_args (line 78) | def parse_args():
  function pg_parse_args (line 96) | def pg_parse_args():
  function run_worker (line 109) | def run_worker(rank, args, master_func):
  function rpc_run (line 144) | def rpc_run(args, master_func):

FILE: tests/test_legacy/test_pipeline/test_cuda_rpc_chimera.py
  function partition (line 13) | def partition(pp_rank: int, chunk: int, stage_num: int):
  function run_master (line 19) | def run_master(args):

FILE: tests/test_legacy/test_pipeline/test_cuda_rpc_optimizer.py
  function partition (line 14) | def partition(pp_rank: int, chunk: int, stage_num: int):
  function run_master (line 20) | def run_master(args):

FILE: tests/test_legacy/test_pipeline/test_cuda_rpc_pipeline.py
  function partition (line 11) | def partition(pp_rank: int, chunk: int, stage_num: int):
  function run_master (line 17) | def run_master(args):

FILE: tests/test_legacy/test_pipeline/test_cuda_rpc_value_correctness.py
  function partition (line 12) | def partition(pp_rank: int, chunk: int, stage_num: int):
  function run_master (line 18) | def run_master(args):

FILE: tests/test_legacy/test_pipeline/test_middleware_1f1b.py
  function create_partition_module (line 25) | def create_partition_module(pp_rank: int, stage_num: int, model, data_kw...
  function partition (line 40) | def partition(model, data_kwargs: dict, pp_rank: int, chunk: int, stage_...
  function run_master (line 46) | def run_master(model_cls, world_size, forward_only):
  function run_worker (line 103) | def run_worker(rank, world_size, port, model_cls, forward_only, master_f...
  function test_pp_middleware_fwd (line 129) | def test_pp_middleware_fwd(model_cls, forward_only):

FILE: tests/test_legacy/test_pipeline/test_pipelinable.py
  class MLP (line 11) | class MLP(torch.nn.Module):
    method __init__ (line 12) | def __init__(self, dim: int = 256):
    method forward (line 20) | def forward(self, x):
  function run_pipelinable (line 28) | def run_pipelinable(rank, world_size, port):
  function test_pipelinable (line 53) | def test_pipelinable():

FILE: tests/test_legacy/test_pipeline/test_pipeline_process_group.py
  function run_worker (line 12) | def run_worker(rank, args):

FILE: tests/test_legacy/test_tensor/common_utils/_utils.py
  function set_seed (line 14) | def set_seed(seed):
  function check_equal (line 24) | def check_equal(A, B):
  function replace_parameter_add_grad (line 28) | def replace_parameter_add_grad(layer, weight=None, bias=None):
  function broadcast_tensor_chunk (line 39) | def broadcast_tensor_chunk(tensor, chunk_size=1, local_rank=0):
  function tensor_equal (line 45) | def tensor_equal(t_a: torch.Tensor, t_b: torch.Tensor, rtol: float = 1e-...
  function tensor_shard_equal (line 50) | def tensor_shard_equal(
  function split_param_single_dim_tp1d (line 70) | def split_param_single_dim_tp1d(dim, param, pg):
  function split_param_row_tp1d (line 77) | def split_param_row_tp1d(param, pg):
  function split_param_col_tp1d (line 81) | def split_param_col_tp1d(param, pg):
  function debug_print (line 85) | def debug_print(ranks, *args):

FILE: tests/test_legacy/test_tensor/core/test_dist_spec_mgr.py
  function run (line 12) | def run():
  function check_mem (line 34) | def check_mem():
  function run_dist (line 50) | def run_dist(rank, world_size, port):
  function test_dist_spec_mgr (line 59) | def test_dist_spec_mgr(world_size):

FILE: tests/test_legacy/test_tensor/test_parameter.py
  function test_multiinheritance (line 11) | def test_multiinheritance():

FILE: tests/test_legacy/test_trainer/test_pipeline/test_p2p.py
  function check_equal (line 30) | def check_equal(A, B):
  function check_forward (line 34) | def check_forward(output_tensor, rank, logger):
  function check_backward (line 46) | def check_backward(output_grad, rank, logger):
  function check_forward_backward (line 58) | def check_forward_backward(output_tensor, output_grad, rank, logger):
  function check_comm (line 74) | def check_comm(size, rank, prev_rank, next_rank, logger):
  function run_check (line 88) | def run_check(rank, world_size, port):
  function test_p2p (line 104) | def test_p2p():

FILE: tests/test_legacy/test_trainer/test_pipeline/test_pipeline_schedule.py
  function run_schedule (line 25) | def run_schedule(rank, world_size, port):
  function test_pipeline_schedule (line 84) | def test_pipeline_schedule():

FILE: tests/test_legacy/test_trainer/test_trainer_with_non_pipe_schedule.py
  function run_trainer (line 20) | def run_trainer(model_name):
  function run_dist (line 50) | def run_dist(rank, world_size, port):
  function test_trainer_no_pipeline (line 58) | def test_trainer_no_pipeline():

FILE: tests/test_legacy/test_trainer/test_trainer_with_pipe_schedule.py
  function run_trainer_with_pipeline (line 31) | def run_trainer_with_pipeline(rank, world_size, port):
  function test_trainer_with_pipeline (line 91) | def test_trainer_with_pipeline():

FILE: tests/test_legacy/test_utils/test_activation_checkpointing.py
  function forward (line 13) | def forward(x, weight):
  function forward_inplace_ckpt (line 20) | def forward_inplace_ckpt(x, weight, cpu_offload=False):
  function forward_inplace (line 33) | def forward_inplace(x, weight):
  function test_activation_checkpointing (line 45) | def test_activation_checkpointing(cpu_offload, use_reentrant):

FILE: tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_1d.py
  function build_pipeline (line 20) | def build_pipeline(model):
  function check_equal (line 36) | def check_equal(A, B):
  function check_checkpoint_1d (line 40) | def check_checkpoint_1d(rank, world_size, port):
  function test_checkpoint_1d (line 74) | def test_checkpoint_1d():

FILE: tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_2d.py
  function build_pipeline (line 20) | def build_pipeline(model):
  function check_equal (line 36) | def check_equal(A, B):
  function check_checkpoint_2d (line 40) | def check_checkpoint_2d(rank, world_size, port):
  function test_checkpoint_2d (line 74) | def test_checkpoint_2d():

FILE: tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_2p5d.py
  function build_pipeline (line 20) | def build_pipeline(model):
  function check_equal (line 36) | def check_equal(A, B):
  function check_checkpoint_2p5d (line 40) | def check_checkpoint_2p5d(rank, world_size, port):
  function test_checkpoint_2p5d (line 74) | def test_checkpoint_2p5d():

FILE: tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_3d.py
  function build_pipeline (line 20) | def build_pipeline(model):
  function check_equal (line 36) | def check_equal(A, B):
  function check_checkpoint_3d (line 40) | def check_checkpoint_3d(rank, world_size, port):
  function test_checkpoint_3d (line 74) | def test_checkpoint_3d():

FILE: tests/test_legacy/test_utils/test_memory.py
  function _run_colo_set_process_memory_fraction_and_colo_device_memory_capacity (line 9) | def _run_colo_set_process_memory_fraction_and_colo_device_memory_capacit...
  function run_dist (line 16) | def run_dist(rank, world_size, port):
  function test_memory_utils (line 23) | def test_memory_utils(world_size):

FILE: tests/test_legacy/test_utils/test_norm_gradient_clipping.py
  function close (line 15) | def close(num: float, other: float, rtol: float = 1e-5, atol: float = 1e...
  function shard_param (line 19) | def shard_param(p: ColoParameter) -> None:
  function check_grad_equal (line 25) | def check_grad_equal(p: Parameter, colo_p: ColoParameter) -> None:
  function run_grad_clip_norm (line 37) | def run_grad_clip_norm(world_size: int, dtype: torch.dtype, device: str,...
  function run_dist (line 63) | def run_dist(rank, world_size, port):
  function test_zero_clip_grad (line 73) | def test_zero_clip_grad(world_size: int):

FILE: tests/test_legacy/test_zero/test_commons.py
  function run_tensor_move (line 9) | def run_tensor_move(rank, world_size, port):
  function test_tensor_move (line 36) | def test_tensor_move():

FILE: tests/test_lora/test_lora.py
  function check_fwd_bwd (line 20) | def check_fwd_bwd(model_fn, data_gen_fn, output_transform_fn, loss_fn, t...
  function run_lora_test (line 90) | def run_lora_test():
  function run_dist (line 101) | def run_dist(rank, world_size, port):
  function test_torch_ddp_lora (line 107) | def test_torch_ddp_lora():

FILE: tests/test_moe/moe_utils.py
  function assert_loose_close (line 12) | def assert_loose_close(a, b, dtype: torch.dtype = torch.float32, name=""):
  function loose_close (line 16) | def loose_close(a, b, dtype: torch.dtype = torch.float32):
  function check_model_equal (line 36) | def check_model_equal(model1, model2, dtype):
  function distributed_debug_mode (line 43) | def distributed_debug_mode(num_stacks: int = 1, funcs_to_patch: Optional...

FILE: tests/test_moe/test_deepseek_layer.py
  function check_deepseek_moe_layer (line 19) | def check_deepseek_moe_layer():
  function run_dist (line 66) | def run_dist(rank: int, world_size: int, port: int):
  function test_deepseek_moe_layer (line 73) | def test_deepseek_moe_layer(world_size: int):

FILE: tests/test_moe/test_kernel.py
  function check_equal (line 16) | def check_equal(tensor_a, tensor_b, atol=1e-06):
  function run_moe_cumsum (line 20) | def run_moe_cumsum():
  function run_moe_dispatch_combine_fwd_bwd (line 36) | def run_moe_dispatch_combine_fwd_bwd(data_type=torch.float32, hidden_siz...
  function test_moe_kernel (line 91) | def test_moe_kernel(data_type):

FILE: tests/test_moe/test_mixtral_layer.py
  function check_mixtral_moe_layer (line 20) | def check_mixtral_moe_layer():
  function run_dist (line 61) | def run_dist(rank: int, world_size: int, port: int):
  function test_mixtral_moe_layer (line 68) | def test_mixtral_moe_layer(world_size: int):

FILE: tests/test_moe/test_moe_checkpoint.py
  function get_optimizer_snapshot (line 26) | def get_optimizer_snapshot(optim):
  function check_optimizer_snapshot_equal (line 42) | def check_optimizer_snapshot_equal(snapshot1, snapshot2, param2name, moe...
  function check_moe_checkpoint (line 90) | def check_moe_checkpoint(test_config):
  function run_dist (line 163) | def run_dist(rank: int, world_size: int, port: int):
  function test_mixtral_moe_layer (line 170) | def test_mixtral_moe_layer(world_size: int):

FILE: tests/test_moe/test_moe_ep_tp.py
  function run_zero_with_original_model (line 26) | def run_zero_with_original_model(stage: int, ep_size: int):
  function run_dist (line 118) | def run_dist(rank, world_size, port):
  function test_moe_ep_tp (line 127) | def test_moe_ep_tp(world_size):

FILE: tests/test_moe/test_moe_ep_zero.py
  function run_zero_with_original_model (line 26) | def run_zero_with_original_model(stage: int, ep_size: int):
  function run_dist (line 105) | def run_dist(rank, world_size, port):
  function test_moe_ep_zero (line 114) | def test_moe_ep_zero(world_size):

FILE: tests/test_optimizer/_utils.py
  function force_assign_grad (line 21) | def force_assign_grad(p, g_dtype, grad=None):
  function setup_param_groups (line 29) | def setup_param_groups(model: nn.Module) -> list:
  function setup_flatten_param_groups_sharding_spec_shape (line 45) | def setup_flatten_param_groups_sharding_spec_shape(model: nn.Module) -> ...
  function set_master_param_to_shard_param (line 62) | def set_master_param_to_shard_param(master_param_list) -> dict:
  function set_dist_grad (line 67) | def set_dist_grad(
  function check_optim_states (line 103) | def check_optim_states(org_optim, sharded_optim):
  function check_bert_fwd_bwd (line 112) | def check_bert_fwd_bwd(
  function run_bert_test (line 200) | def run_bert_test(test_config, optim_class, sharded_optim_class):
  function _run_bert_test (line 221) | def _run_bert_test(rank, world_size, port, optim_class, sharded_optim_cl...
  function check_optim_on_bert (line 226) | def check_optim_on_bert(optim_class, sharded_optim_class):
  function check_dist_optim_state (line 230) | def check_dist_optim_state(org_optimizer, sharded_optimizer):
  function check_dist_param (line 307) | def check_dist_param(org_model, sharded_model, weight_layer_for_check, a...
  function check_dist_grad (line 315) | def check_dist_grad(sharded_optimizer, org_model, sharded_model, weight_...

FILE: tests/test_optimizer/test_adam_kernel.py
  class AdamKernel (line 29) | class AdamKernel:
    method __init__ (line 30) | def __init__(self, lr: float, beta1: float, beta2: float, eps: float, ...
    method update (line 39) | def update(self, step: int, param: Tensor, grad: Tensor, exp_avg: Tens...
  class TorchAdamKernel (line 43) | class TorchAdamKernel(AdamKernel):
    method update (line 44) | def update(self, step: int, param: Tensor, grad: Tensor, exp_avg: Tens...
  class FusedAdamKernel (line 65) | class FusedAdamKernel(AdamKernel):
    method __init__ (line 66) | def __init__(self, lr: float, beta1: float, beta2: float, eps: float, ...
    method update (line 74) | def update(self, step: int, param: Tensor, grad: Tensor, exp_avg: Tens...
  class CPUAdamKernel (line 91) | class CPUAdamKernel(AdamKernel):
    method __init__ (line 92) | def __init__(self, lr: float, beta1: float, beta2: float, eps: float, ...
    method update (line 100) | def update(self, step: int, param: Tensor, grad: Tensor, exp_avg: Tens...
  function check_adam_kernel (line 117) | def check_adam_kernel(
  function test_fused_adam_kernel (line 153) | def test_fused_adam_kernel(adamw, weight_decay, p_dtype, g_dtype):
  function test_cpu_adam_kernel (line 167) | def test_cpu_adam_kernel(adamw, weight_decay, p_dtype, g_dtype):

FILE: tests/test_optimizer/test_adam_optim.py
  function set_grad (line 30) | def set_grad(model: nn.Module, torch_model: nn.Module, g_dtype: torch.dt...
  function test_adam_optim_on_bert (line 40) | def test_adam_optim_on_bert(

FILE: tests/test_optimizer/test_dist_adafactor.py
  function correctness_verify (line 52) | def correctness_verify(tensor1: torch.Tensor, tensor2: torch.Tensor, dty...
  class MlpModel (line 68) | class MlpModel(nn.Module):
    method __init__ (line 69) | def __init__(self):
    method forward (line 74) | def forward(self, x):
  class TPModel (line 80) | class TPModel(nn.Module):
    method __init__ (line 81) | def __init__(self, linear1, linear2, tp_group=None):
    method forward (line 88) | def forward(self, x):
  function exam_dist_adafactor_base (line 96) | def exam_dist_adafactor_base(dtype: torch.dtype, tp_zero_size: tuple[int...
  function exam_dist_adafactor_zero (line 196) | def exam_dist_adafactor_zero(dtype: torch.dtype, tp_zero_size: tuple[int...
  function exam_bert_test_on_lowlevelzero_plugin (line 321) | def exam_bert_test_on_lowlevelzero_plugin(test_config):
  function exam_bert_test_on_hybrid_plugin (line 405) | def exam_bert_test_on_hybrid_plugin(test_config):
  function run_dist (line 456) | def run_dist(rank, world_size, port):
  function test_dist_adafactor (line 467) | def test_dist_adafactor():

FILE: tests/test_optimizer/test_dist_came.py
  function correctness_verify (line 44) | def correctness_verify(tensor1: torch.Tensor, tensor2: torch.Tensor, dty...
  function exam_dist_came_base (line 63) | def exam_dist_came_base(dtype: torch.dtype, tp_zero_size: tuple[int, int]):
  function exam_bert_test_on_lowlevelzero_plugin (line 181) | def exam_bert_test_on_lowlevelzero_plugin(test_config):
  function exam_bert_test_on_hybrid_plugin (line 279) | def exam_bert_test_on_hybrid_plugin(test_config):
  function run_dist (line 346) | def run_dist(rank, world_size, port):
  function test_dist_came (line 356) | def test_dist_came():

FILE: tests/test_optimizer/test_dist_galore.py
  function assert_grad_close (line 64) | def assert_grad_close(tp_model, torch_model, tp_group):
  function assert_distributed_close (line 85) | def assert_distributed_close(tp_model, torch_model, rtol, atol, tp_group):
  function force_assign_grad (line 103) | def force_assign_grad(p, g_dtype, grad=None):
  function run_dist_galore_basic (line 113) | def run_dist_galore_basic(p_g_dtype: tuple[torch.dtype, torch.dtype], tp...
  function run_dist_galore_fwd_bwd (line 178) | def run_dist_galore_fwd_bwd(p_g_dtype: tuple[torch.dtype, torch.dtype], ...
  function check_dist_galore (line 269) | def check_dist_galore(rank, world_size, port):
  function test_dist_galore (line 297) | def test_dist_galore():

FILE: tests/test_optimizer/test_dist_lamb.py
  function assert_distributed_close (line 34) | def assert_distributed_close(tp_model, torch_model, rtol, atol, tp_group):
  function set_dist_grad (line 52) | def set_dist_grad(
  function run_dist_lamb_basic (line 89) | def run_dist_lamb_basic(
  function run_dist_lamb_fwd_bwd (line 159) | def run_dist_lamb_fwd_bwd(
  function check_dist_lamb (line 257) | def check_dist_lamb(rank, world_size, port):
  function test_dist_lamb (line 275) | def test_dist_lamb():

FILE: tests/test_optimizer/test_lr_scheduler.py
  function test_lr_scheduler_save_load (line 7) | def test_lr_scheduler_save_load():

FILE: tests/test_optimizer/test_nvme.py
  function move_some_params_to_cuda (line 9) | def move_some_params_to_cuda(model, torch_model):
  function check_params_equal (line 16) | def check_params_equal(model, torch_model):
  function test_nvme_adam (line 27) | def test_nvme_adam(nvme_offload_fraction, nvme_offload_dir, adam_cls):

FILE: tests/test_pipeline/test_p2p_communication.py
  function check_p2p_communication (line 15) | def check_p2p_communication():
  function run_dist (line 71) | def run_dist(rank, world_size, port):
  function test_pipeline_p2p (line 78) | def test_pipeline_p2p():

FILE: tests/test_pipeline/test_pipeline_utils/test_t5_pipeline_utils.py
  class _ShardConfig (line 8) | class _ShardConfig(ShardConfig):
    method __post_init__ (line 9) | def __post_init__(self):
  class _PipelineStageManager (line 13) | class _PipelineStageManager(PipelineStageManager):
    method __init__ (line 14) | def __init__(self):
    method num_stages (line 21) | def num_stages(self):
  function test_t5_pipeline_distribution (line 25) | def test_t5_pipeline_distribution():
  function test_t5_pipeline_layers (line 47) | def test_t5_pipeline_layers():

FILE: tests/test_pipeline/test_pipeline_utils/test_whisper_pipeline_utils.py
  class _ShardConfig (line 8) | class _ShardConfig(ShardConfig):
    method __post_init__ (line 9) | def __post_init__(self):
  class _PipelineStageManager (line 13) | class _PipelineStageManager(PipelineStageManager):
    method __init__ (line 14) | def __init__(self):
    method num_stages (line 21) | def num_stages(self):
  function test_whisper_pipeline_distribution (line 25) | def test_whisper_pipeline_distribution():
  function test_whisper_pipeline_layers (line 47) | def test_whisper_pipeline_layers():

FILE: tests/test_pipeline/test_schedule/test_interleaved.py
  class MlpModel (line 23) | class MlpModel(nn.Module):
    method __init__ (line 24) | def __init__(self):
    method forward (line 28) | def forward(self, x):
  function pp_linear_fwd (line 34) | def pp_linear_fwd(
  function run_pp (line 50) | def run_pp(
  function test_pp (line 154) | def test_pp(num_microbatch: int, batch_size: int, num_model_chunk: int):

FILE: tests/test_pipeline/test_schedule/test_oneF_oneB.py
  class MlpModel (line 23) | class MlpModel(nn.Module):
    method __init__ (line 24) | def __init__(self):
    method forward (line 28) | def forward(self, x):
  function pp_linear_fwd (line 34) | def pp_linear_fwd(
  function examine_pp (line 48) | def examine_pp(num_microbatch: int, batch_size: int):
  function run_dist (line 145) | def run_dist(
  function test_pp (line 161) | def test_pp(num_microbatch: int, batch_size: int, world_size: int):

FILE: tests/test_pipeline/test_schedule/test_pipeline_schedule_utils.py
  function test_get_batch_size (line 6) | def test_get_batch_size():
  function test_get_micro_batch (line 16) | def test_get_micro_batch():
  function test_merge_batch (line 37) | def test_merge_batch():

FILE: tests/test_pipeline/test_schedule/test_zerobubble_pp.py
  class MlpModel (line 39) | class MlpModel(nn.Module):
    method __init__ (line 40) | def __init__(
    method forward (line 51) | def forward(
    method no_sync (line 78) | def no_sync(self):
  function assert_optim_param_groups (line 82) | def assert_optim_param_groups(optim_base_param_groups, optim_pp_param_gr...
  function get_model_numel (line 89) | def get_model_numel(model: torch.nn.Module) -> Tuple[int, int]:
  function run_fwd_bwd_iter_input (line 114) | def run_fwd_bwd_iter_input(test_config):
  function run_fwd_bwd_vschedule_with_optim (line 580) | def run_fwd_bwd_vschedule_with_optim(test_config):
  function run_with_booster_moehybridplugin (line 762) | def run_with_booster_moehybridplugin(config: Tuple[int, ...]):
  function run_with_booster_hybridplugin (line 921) | def run_with_booster_hybridplugin(config: Tuple[int, ...]):
  function run_dist (line 1067) | def run_dist(rank, world_size, port):
  function test_pp (line 1076) | def test_pp():

FILE: tests/test_pipeline/test_stage_manager.py
  function check_stage_manager (line 10) | def check_stage_manager():
  function run_dist (line 66) | def run_dist(rank, world_size, port):
  function test_pipeline_stage_manager (line 73) | def test_pipeline_stage_manager():

FILE: tests/test_shardformer/test_flash_attention.py
  function attention_ref (line 22) | def attention_ref(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, att...
  function gen_padded_kwargs (line 33) | def gen_padded_kwargs(dtype: torch.dtype):
  function gen_padded_causal_kwargs (line 42) | def gen_padded_causal_kwargs(dtype: torch.dtype):
  function gen_causal_kwargs (line 53) | def gen_causal_kwargs(dtype: torch.dtype):
  function gen_custom_kwargs (line 57) | def gen_custom_kwargs(dtype: torch.dtype):
  function post_process_kwargs_for_raw_attn (line 67) | def post_process_kwargs_for_raw_attn(attn_kwargs: dict):
  function check_attn_func (line 75) | def check_attn_func(dtype: torch.dtype, attn_func, attn_kwargs: dict, pa...
  function test_flash_attn_func (line 102) | def test_flash_attn_func(dtype: torch.dtype):

FILE: tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_amp_optimizer.py
  function check_forward_backward (line 23) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_test (line 148) | def run_test(test_config):
  function run_3d_test (line 183) | def run_3d_test(test_config):
  function check_grad_clip_norm (line 194) | def check_grad_clip_norm(rank, world_size, port):
  function check_grad_clip_norm_3d (line 200) | def check_grad_clip_norm_3d(rank, world_size, port):
  function test_grad_clip_norm (line 209) | def test_grad_clip_norm():
  function test_grad_clip_norm_3d (line 216) | def test_grad_clip_norm_3d():

FILE: tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_naive_optimizer.py
  function check_forward_backward (line 23) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_test (line 116) | def run_test(test_config):
  function run_3d_test (line 141) | def run_3d_test(test_config):
  function check_grad_clip_norm (line 152) | def check_grad_clip_norm(rank, world_size, port):
  function check_grad_clip_norm_3d (line 158) | def check_grad_clip_norm_3d(rank, world_size, port):
  function test_grad_clip_norm (line 167) | def test_grad_clip_norm():
  function test_grad_clip_norm_3d (line 174) | def test_grad_clip_norm_3d():

FILE: tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_zero_optimizer.py
  function check_forward_backward (line 24) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_test (line 137) | def run_test(test_config):
  function run_3d_test (line 174) | def run_3d_test(test_config):
  function check_grad_clip_norm (line 185) | def check_grad_clip_norm(rank, world_size, port):
  function check_grad_clip_norm_3d (line 191) | def check_grad_clip_norm_3d(rank, world_size, port):
  function test_grad_clip_norm (line 200) | def test_grad_clip_norm():
  function test_grad_clip_norm_3d (line 207) | def test_grad_clip_norm_3d():

FILE: tests/test_shardformer/test_layer/test_dist_crossentropy.py
  function check_dist_crossentropy (line 15) | def check_dist_crossentropy(rank, world_size, port, ignore_index):
  function test_dist_crossentropy (line 49) | def test_dist_crossentropy():

FILE: tests/test_shardformer/test_layer/test_dist_log_prob.py
  function log_probs_from_logits (line 14) | def log_probs_from_logits(logits: torch.Tensor, labels: torch.Tensor) ->...
  function check_dist_log_prob (line 30) | def check_dist_log_prob(rank, world_size, port):
  function test_dist_log_prob (line 62) | def test_dist_log_prob():

FILE: tests/test_shardformer/test_layer/test_dropout.py
  function check_dropout_parallel_input (line 10) | def check_dropout_parallel_input():
  function check_dropout_replicated_input (line 42) | def check_dropout_replicated_input():
  function run_dist (line 58) | def run_dist(rank, world_size, port):
  function test_dropout (line 65) | def test_dropout():

FILE: tests/test_shardformer/test_layer/test_embedding.py
  function check_embedding_1d (line 15) | def check_embedding_1d(lazy_init: bool):
  function run_dist (line 45) | def run_dist(rank, world_size, port):
  function test_embedding_1d (line 51) | def test_embedding_1d():

FILE: tests/test_shardformer/test_layer/test_gpt2_qkv_fused_linear_1d.py
  class Conv1D (line 20) | class Conv1D(nn.Module):
    method __init__ (line 31) | def __init__(self, nf, nx):
    method forward (line 38) | def forward(self, x):
  function check_linear_conv_1d_col (line 45) | def check_linear_conv_1d_col(lazy_init: bool, seq_parallel_mode: str):
  function check_linear_conv_1d_row (line 86) | def check_linear_conv_1d_row(lazy_init: bool, seq_parallel_mode: bool):
  function check_linear_conv_1d_without_weight_grad_store (line 122) | def check_linear_conv_1d_without_weight_grad_store(lazy_init: bool, seq_...
  function check_linear_conv_1d_with_weight_grad_store (line 155) | def check_linear_conv_1d_with_weight_grad_store(lazy_init: bool, seq_par...
  function check_gpt2_qkv_fused_linear_1d (line 193) | def check_gpt2_qkv_fused_linear_1d(lazy_init: bool, seq_parallel_mode: b...
  function run_dist (line 200) | def run_dist(rank, world_size, port):
  function test_linearconv (line 208) | def test_linearconv():

FILE: tests/test_shardformer/test_layer/test_layernorm.py
  function check_layernorm (line 14) | def check_layernorm(lazy_init: bool):
  function run_dist (line 43) | def run_dist(rank, world_size, port):
  function test_layernorm (line 49) | def test_layernorm():

FILE: tests/test_shardformer/test_layer/test_linear_1d.py
  function check_linear_1d_col (line 19) | def check_linear_1d_col(lazy_init: bool, seq_parallel_mode: bool, overla...
  function check_linear_1d_row (line 75) | def check_linear_1d_row(lazy_init: bool, seq_parallel_mode: bool):
  function check_linear_without_weight_grad_store (line 121) | def check_linear_without_weight_grad_store(lazy_init: bool, seq_parallel...
  function check_linear_with_weight_grad_store (line 161) | def check_linear_with_weight_grad_store(lazy_init: bool, seq_parallel_mo...
  function check_linear_col_plus_row (line 208) | def check_linear_col_plus_row(lazy_init: bool, seq_parallel_mode: bool, ...
  function run_dist_linear_test (line 269) | def run_dist_linear_test(lazy_init, seq_parallel_mode, overlap):
  function check_dist_linear (line 277) | def check_dist_linear(rank, world_size, port):
  function test_linear (line 283) | def test_linear():

FILE: tests/test_shardformer/test_layer/test_qkv_fused_linear_1d.py
  function check_linear_1d_col (line 19) | def check_linear_1d_col(lazy_init: bool):
  function check_linear_1d_row (line 54) | def check_linear_1d_row(lazy_init: bool):
  function check_linear_1d_col_row (line 89) | def check_linear_1d_col_row(lazy_init: bool):
  function check_linear_1d_base (line 124) | def check_linear_1d_base(lazy_init: bool):
  function run_dist (line 155) | def run_dist(rank, world_size, port):
  function test_linearconv (line 165) | def test_linearconv():

FILE: tests/test_shardformer/test_layer/test_ring_attn.py
  function check_ring_attn (line 21) | def check_ring_attn(seq_len, bs, nheads, d, dtype, inner_ring_size):
  function check_packed_seq (line 86) | def check_packed_seq(seqlen, bs, nheads, d, dtype):
  function launch_single_ring (line 167) | def launch_single_ring(rank, world_size, port):
  function launch_double_ring (line 173) | def launch_double_ring(rank, world_size, port):
  function test_ring_attn (line 180) | def test_ring_attn(world_size):
  function test_double_ring (line 186) | def test_double_ring(world_size):

FILE: tests/test_shardformer/test_layer/test_sequence_parallel.py
  class SequenceParallelAttention (line 15) | class SequenceParallelAttention(torch.nn.Module):
    method __init__ (line 25) | def __init__(
    method attn (line 49) | def attn(self, q, k, v):
    method forward (line 60) | def forward(self, x) -> Tensor:
  function seq_parallel_attn (line 94) | def seq_parallel_attn(seq_len, hidden_dim, head_num, batch_size):
  function run_seq_parallel_attn (line 163) | def run_seq_parallel_attn(seq_len, hidden_dim, head_num, batch_size):
  function check_all2all_attn (line 167) | def check_all2all_attn(rank, world_size, port):
  function test_all_to_all_attention (line 173) | def test_all_to_all_attention():

FILE: tests/test_shardformer/test_layer/test_vocab_parallel_embedding_1d.py
  function check_vocab_embedding_1d (line 15) | def check_vocab_embedding_1d(lazy_init: bool):
  function run_dist (line 47) | def run_dist(rank, world_size, port):
  function test_vocab_embedding (line 53) | def test_vocab_embedding():

FILE: tests/test_shardformer/test_model/_utils.py
  function build_model (line 31) | def build_model(
  function build_pipeline_model (line 63) | def build_pipeline_model(
  function run_forward (line 91) | def run_forward(original_model, sharded_model, data_gen_fn, output_trans...
  function check_state_dict (line 109) | def check_state_dict(org_model: Module, sharded_model: Module, name: str...
  function build_model_from_hybrid_plugin (line 120) | def build_model_from_hybrid_plugin(
  function build_model_from_low_level_zero_plugin (line 174) | def build_model_from_low_level_zero_plugin(
  function run_forward_backward_with_hybrid_plugin (line 200) | def run_forward_backward_with_hybrid_plugin(
  function run_forward_backward_with_low_level_zero_plugin (line 263) | def run_forward_backward_with_low_level_zero_plugin(
  function check_output_hidden_state (line 301) | def check_output_hidden_state(
  function check_loss (line 334) | def check_loss(org_loss: Tensor, sharded_loss: Tensor, atol: float = 1e-...
  function check_weight (line 338) | def check_weight(
  function get_grad_tensors_for_check (line 368) | def get_grad_tensors_for_check(
  function check_grad (line 408) | def check_grad(
  function unwrap_model (line 436) | def unwrap_model(
  function check_all_grad_tensors (line 450) | def check_all_grad_tensors(check_tensors):

FILE: tests/test_shardformer/test_model/test_shard_bert.py
  function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_bert_test (line 145) | def run_bert_test(test_config):
  function run_bert_3d_test (line 189) | def run_bert_3d_test(test_config):
  function check_bert (line 200) | def check_bert(rank, world_size, port):
  function check_bert_3d (line 206) | def check_bert_3d(rank, world_size, port):
  function test_bert (line 215) | def test_bert():
  function test_bert_3d (line 222) | def test_bert_3d():

FILE: tests/test_shardformer/test_model/test_shard_blip2.py
  function check_forward_backward (line 17) | def check_forward_backward(org_model, sharded_model, data_gen_fn, output...
  function run_blip2_test (line 72) | def run_blip2_test(
  function check_blip2 (line 99) | def check_blip2(rank, world_size, port):
  function test_blip2 (line 114) | def test_blip2():

FILE: tests/test_shardformer/test_model/test_shard_bloom.py
  function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_bloom_test (line 143) | def run_bloom_test(test_config):
  function run_bloom_3d_test (line 178) | def run_bloom_3d_test(test_config):
  function check_bloom (line 189) | def check_bloom(rank, world_size, port):
  function check_bloom_3d (line 195) | def check_bloom_3d(rank, world_size, port):
  function test_bloom (line 204) | def test_bloom():
  function test_bloom_3d (line 211) | def test_bloom_3d():

FILE: tests/test_shardformer/test_model/test_shard_chatglm2.py
  function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_chatglm_test (line 229) | def run_chatglm_test(test_config):
  function run_chatglm_3d_test (line 273) | def run_chatglm_3d_test(test_config):
  function check_chatglm (line 289) | def check_chatglm(rank, world_size, port):
  function check_chatglm_3d (line 301) | def check_chatglm_3d(rank, world_size, port):
  function test_chatglm (line 316) | def test_chatglm():
  function test_chatglm_3d (line 323) | def test_chatglm_3d():

FILE: tests/test_shardformer/test_model/test_shard_command.py
  function check_forward_backward (line 29) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_command_test (line 280) | def run_command_test(test_config):
  function run_command_3d_test (line 334) | def run_command_3d_test(test_config):
  function check_command (line 345) | def check_command(rank, world_size, port):
  function check_command_3d (line 351) | def check_command_3d(rank, world_size, port):
  function test_command (line 360) | def test_command():
  function test_command_3d (line 367) | def test_command_3d():

FILE: tests/test_shardformer/test_model/test_shard_deepseek.py
  function run_deepseek_commom (line 28) | def run_deepseek_commom(parallel_config: Tuple[int, ...]):
  function run_deepseek_test (line 186) | def run_deepseek_test(config: Tuple[int, ...]):
  function run_deepseek_3d_test (line 211) | def run_deepseek_3d_test(config: Tuple[int, ...]):
  function check_deepseek (line 215) | def check_deepseek(rank, world_size, port):
  function check_deepseek_3d (line 220) | def check_deepseek_3d(rank, world_size, port):
  function test_deepseek (line 228) | def test_deepseek(world_size):
  function test_deepseek_3d (line 235) | def test_deepseek_3d(world_size):

FILE: tests/test_shardformer/test_model/test_shard_deepseek_v3.py
  function check_forward_backward (line 26) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_deepseek_v3_test (line 70) | def run_deepseek_v3_test(config: Tuple[int, ...]):
  function check_deepseek_v3 (line 91) | def check_deepseek_v3(rank, world_size, port):
  function test_deepseek_v3 (line 99) | def test_deepseek_v3(world_size):

FILE: tests/test_shardformer/test_model/test_shard_falcon.py
  function check_forward_backward (line 23) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_falcon_test (line 112) | def run_falcon_test(test_config):
  function run_falcon_3d_test (line 147) | def run_falcon_3d_test(test_config):
  function check_falcon (line 158) | def check_falcon(rank, world_size, port):
  function check_falcon_3d (line 164) | def check_falcon_3d(rank, world_size, port):
  function test_falcon (line 173) | def test_falcon():
  function test_falcon_3d (line 180) | def test_falcon_3d():

FILE: tests/test_shardformer/test_model/test_shard_gpt2.py
  function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_gpt2_test (line 209) | def run_gpt2_test(test_config):
  function run_gpt2_3d_test (line 259) | def run_gpt2_3d_test(test_config):
  function check_gpt2 (line 279) | def check_gpt2(rank, world_size, port):
  function check_gpt2_3d (line 291) | def check_gpt2_3d(rank, world_size, port):
  function test_gpt2 (line 306) | def test_gpt2():
  function test_gpt2_3d (line 313) | def test_gpt2_3d():

FILE: tests/test_shardformer/test_model/test_shard_gptj.py
  function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_gptj_test (line 183) | def run_gptj_test(test_config):
  function run_gptj_3d_test (line 224) | def run_gptj_3d_test(test_config):
  function check_gptj (line 240) | def check_gptj(rank, world_size, port):
  function check_gptj_3d (line 252) | def check_gptj_3d(rank, world_size, port):
  function test_gptj (line 268) | def test_gptj():
  function test_gptj_3d (line 275) | def test_gptj_3d():

FILE: tests/test_shardformer/test_model/test_shard_llama.py
  function check_forward_backward (line 30) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_llama_test (line 282) | def run_llama_test(test_config):
  function run_llama_3d_test (line 353) | def run_llama_3d_test(test_config):
  function check_llama (line 368) | def check_llama(rank, world_size, port):
  function check_llama_3d (line 374) | def check_llama_3d(rank, world_size, port):
  function test_llama (line 383) | def test_llama():
  function test_llama_3d (line 390) | def test_llama_3d():

FILE: tests/test_shardformer/test_model/test_shard_mistral.py
  function check_forward_backward (line 27) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_mistral_test (line 161) | def run_mistral_test(test_config):
  function check_mistral (line 172) | def check_mistral(rank, world_size, port):
  function test_mistral (line 180) | def test_mistral():

FILE: tests/test_shardformer/test_model/test_shard_mixtral.py
  function run_mixtral_commom (line 29) | def run_mixtral_commom(config: Tuple[int, ...]):
  function run_mixtral_test (line 179) | def run_mixtral_test(config: Tuple[int, ...]):
  function run_mixtral_3d_test (line 203) | def run_mixtral_3d_test(config: Tuple[int, ...]):
  function check_mixtral (line 208) | def check_mixtral(rank, world_size, port):
  function check_mixtral_3d (line 213) | def check_mixtral_3d(rank, world_size, port):
  function test_mixtral (line 221) | def test_mixtral(world_size):
  function test_mixtral_3d (line 228) | def test_mixtral_3d(world_size):

FILE: tests/test_shardformer/test_model/test_shard_opt.py
  function check_forward_backward (line 26) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_opt_test (line 178) | def run_opt_test(test_config):
  function run_opt_3d_test (line 217) | def run_opt_3d_test(test_config):
  function check_OPTModel (line 233) | def check_OPTModel(rank, world_size, port):
  function check_opt_3d (line 245) | def check_opt_3d(rank, world_size, port):
  function test_OPTModel (line 260) | def test_OPTModel():
  function test_opt_3d (line 267) | def test_opt_3d():

FILE: tests/test_shardformer/test_model/test_shard_qwen2.py
  function check_forward_backward (line 27) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_qwen2_test (line 212) | def run_qwen2_test(test_config):
  function run_qwen2_3d_test (line 261) | def run_qwen2_3d_test(test_config):
  function check_qwen2 (line 276) | def check_qwen2(rank, world_size, port):
  function check_qwen2_3d (line 282) | def check_qwen2_3d(rank, world_size, port):
  function test_qwen2 (line 292) | def test_qwen2():
  function test_qwen2_3d (line 300) | def test_qwen2_3d():

FILE: tests/test_shardformer/test_model/test_shard_qwen3.py
  function check_forward_backward (line 23) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_qwen3_test (line 208) | def run_qwen3_test(test_config):
  function run_qwen3_3d_test (line 257) | def run_qwen3_3d_test(test_config):
  function check_qwen3 (line 272) | def check_qwen3(rank, world_size, port):
  function check_qwen3_3d (line 278) | def check_qwen3_3d(rank, world_size, port):
  function test_qwen3 (line 288) | def test_qwen3():
  function test_qwen3_3d (line 296) | def test_qwen3_3d():

FILE: tests/test_shardformer/test_model/test_shard_sam.py
  function check_forward_backward (line 17) | def check_forward_backward(org_model, sharded_model, data_gen_fn, output...
  function run_sam_test (line 47) | def run_sam_test(enable_fused_normalization, enable_tensor_parallelism, ...
  function check_sam (line 58) | def check_sam(rank, world_size, port):
  function test_sam (line 67) | def test_sam():

FILE: tests/test_shardformer/test_model/test_shard_t5.py
  function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_t5_test (line 162) | def run_t5_test(test_config):
  function run_t5_3d_test (line 212) | def run_t5_3d_test(test_config):
  function check_t5 (line 228) | def check_t5(rank, world_size, port):
  function check_t5_3d (line 240) | def check_t5_3d(rank, world_size, port):
  function test_t5 (line 255) | def test_t5():
  function test_t5_3d (line 262) | def test_t5_3d():

FILE: tests/test_shardformer/test_model/test_shard_vit.py
  function check_forward_backward (line 22) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_vit_test (line 132) | def run_vit_test(test_config):
  function run_vit_3d_test (line 158) | def run_vit_3d_test(test_config):
  function check_vit (line 168) | def check_vit(rank, world_size, port):
  function check_vit_3d (line 174) | def check_vit_3d(rank, world_size, port):
  function test_vit (line 183) | def test_vit():
  function test_vit_3d (line 190) | def test_vit_3d():

FILE: tests/test_shardformer/test_model/test_shard_whisper.py
  function check_forward_backward (line 21) | def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, l...
  function run_whisper_test (line 150) | def run_whisper_test(test_config):
  function run_whisper_3d_test (line 187) | def run_whisper_3d_test(test_config):
  function check_whisper (line 197) | def check_whisper(rank, world_size, port):
  function check_whisper_3d (line 203) | def check_whisper_3d(rank, world_size, port):
  function test_whisper (line 212) | def test_whisper():
  function test_whisper_3d (line 219) | def test_whisper_3d():

FILE: tests/test_shardformer/test_shard_utils.py
  class Net (line 7) | class Net(nn.Module):
    method __init__ (line 8) | def __init__(self) -> None:
  function test_release_layer (line 14) | def test_release_layer():

FILE: tests/test_shardformer/test_with_torch_ddp.py
  function check_shardformer_with_ddp (line 18) | def check_shardformer_with_ddp(lazy_init: bool):
  function run_dist (line 72) | def run_dist(rank, world_size, port):
  function test_gpt2 (line 81) | def test_gpt2():

FILE: tests/test_smoothquant/test_llama_attention.py
  function torch_context_attention (line 30) | def torch_context_attention(xq, xk, xv, bs, seqlen, num_head, head_dim):
  function test_llama_context_attention (line 56) | def test_llama_context_attention():

FILE: tests/test_smoothquant/test_llama_mlp.py
  function torch_llama_mlp (line 29) | def torch_llama_mlp(gate_proj, up_proj, down_proj, x):
  function test_llama_mlp (line 49) | def test_llama_mlp():

FILE: tests/test_smoothquant/test_smoothquant_linear.py
  function test_linear (line 20) | def test_linear():

FILE: tests/test_smoothquant/test_sq_rotary_embedding.py
  function torch_rotary_emb (line 19) | def torch_rotary_emb(x, cos, sin):
  function test_rotary_emb (line 33) | def test_rotary_emb():

FILE: tests/test_tensor/test_comm_spec_apply.py
  function check_all_gather (line 13) | def check_all_gather(device_mesh, rank):
  function check_shard (line 40) | def check_shard(device_mesh, rank):
  function check_all_to_all (line 66) | def check_all_to_all(device_mesh, rank):
  function check_all_reduce_fwd (line 111) | def check_all_reduce_fwd(device_mesh, rank):
  function check_all_reduce_bwd (line 138) | def check_all_reduce_bwd(device_mesh, rank):
  function check_all_reduce_in_flatten_device_mesh (line 156) | def check_all_reduce_in_flatten_device_mesh(device_mesh, rank):
  function check_comm (line 179) | def check_comm(rank, world_size, port):
  function test_comm_spec (line 209) | def test_comm_spec():

FILE: tests/test_tensor/test_dtensor/test_comm_spec.py
  function check_all_gather (line 12) | def check_all_gather(process_groups_dict, rank):
  function check_shard (line 31) | def check_shard(process_groups_dict, rank):
  function check_all_to_all (line 51) | def check_all_to_all(process_groups_dict, rank):
  function check_all_reduce_fwd (line 92) | def check_all_reduce_fwd(process_groups_dict, rank):
  function check_all_reduce_bwd (line 113) | def check_all_reduce_bwd(process_groups_dict, rank):
  function check_comm (line 125) | def check_comm(rank, world_size, port):
  function test_comm_spec (line 155) | def test_comm_spec():

FILE: tests/test_tensor/test_dtensor/test_dtensor.py
  class TestModel (line 10) | class TestModel(torch.nn.Module):
    method __init__ (line 11) | def __init__(self, in_features, out_features):
    method forward (line 16) | def forward(self, x):
  function check_dtensor (line 22) | def check_dtensor(rank, world_size, port):
  function test_dtensor (line 81) | def test_dtensor():

FILE: tests/test_tensor/test_dtensor/test_dtensor_sharding_spec.py
  function test_dtensor_sharding_spec (line 7) | def test_dtensor_sharding_spec():

FILE: tests/test_tensor/test_dtensor/test_layout_converter.py
  function check_one_step_transform (line 21) | def check_one_step_transform(rank, world_size, port):
  function check_layout_converting (line 83) | def check_layout_converting(rank, world_size, port):
  function check_layout_converting_apply (line 142) | def check_layout_converting_apply(rank, world_size, port):
  function test_layout_converter (line 176) | def test_layout_converter():

FILE: tests/test_tensor/test_mix_gather.py
  function check_mix_gather_S0S1 (line 14) | def check_mix_gather_S0S1(device_mesh, rank):
  function check_two_all_gather_S0S1 (line 48) | def check_two_all_gather_S0S1(device_mesh, rank):
  function check_mix_gather_S1S0 (line 90) | def check_mix_gather_S1S0(device_mesh, rank):
  function check_two_all_gather_S1S0 (line 124) | def check_two_all_gather_S1S0(device_mesh, rank):
  function check_mix_gather_S01R (line 166) | def check_mix_gather_S01R(device_mesh, rank):
  function check_two_all_gather_S01R (line 193) | def check_two_all_gather_S01R(device_mesh, rank):
  function check_mix_gather_RS01 (line 231) | def check_mix_gather_RS01(device_mesh, rank):
  function check_two_all_gather_RS01 (line 259) | def check_two_all_gather_RS01(device_mesh, rank):
  function check_comm (line 297) | def check_comm(rank, world_size, port):
  function test_mix_gather (line 328) | def test_mix_gather():

FILE: tests/test_tensor/test_padded_tensor.py
  function check_padded_tensor (line 11) | def check_padded_tensor(rank, world_size, port):
  function test_padded_tensor (line 40) | def test_padded_tensor():

FILE: tests/test_tensor/test_shape_consistency.py
  function test_one_step_transform (line 18) | def test_one_step_transform():
  function test_shape_consistency (line 94) | def test_shape_consistency():

FILE: tests/test_tensor/test_shape_consistency_apply.py
  function check_apply (line 12) | def check_apply(rank, world_size, port):
  function test_apply (line 70) | def test_apply():

FILE: tests/test_tensor/test_sharding_spec.py
  function test_sharding_spec (line 7) | def test_sharding_spec():

FILE: tests/test_zero/test_gemini/test_chunk_mgrv2.py
  function exam_chunk_memory (line 17) | def exam_chunk_memory(keep_gathered, pin_memory):
  function run_dist (line 51) | def run_dist(rank, world_size, port):
  function test_chunk_manager (line 59) | def test_chunk_manager(world_size):

FILE: tests/test_zero/test_gemini/test_chunkv2.py
  function dist_sum (line 14) | def dist_sum(x):
  function add_param (line 20) | def add_param(param_list, param_cp_list, *args, **kwargs):
  function check_equal (line 26) | def check_equal(param, param_cp):
  function exam_chunk_basic (line 38) | def exam_chunk_basic(init_device, keep_gathered, pin_memory, async_op):
  function run_dist (line 114) | def run_dist(rank, world_size, port):
  function test_chunk_function (line 122) | def test_chunk_function(world_size):

FILE: tests/test_zero/test_gemini/test_gemini_use_rmt.py
  function run_gemini_use_rmt (line 20) | def run_gemini_use_rmt(placement_policy, keep_gather, model_name: str, u...
  function run_dist (line 82) | def run_dist(rank, world_size, port):
  function test_gemini_use_rmt (line 91) | def test_gemini_use_rmt(world_size):

FILE: tests/test_zero/test_gemini/test_grad_accum.py
  function check_grad (line 23) | def check_grad(model: GeminiDDP, torch_model: torch.nn.Module):
  function exam_gemini_grad_acc (line 53) | def exam_gemini_grad_acc(
  function run_dist (line 150) | def run_dist(rank, world_size, port):
  function test_grad_accumulation (line 157) | def test_grad_accumulation():

FILE: tests/test_zero/test_gemini/test_grad_clip.py
  function check_param (line 39) | def check_param(model: GeminiDDP, torch_model: torch.nn.Module):
  function exam_grad_clipping (line 57) | def exam_grad_clipping(
  function run_dist (line 125) | def run_dist(rank, world_size, port):
  function test_grad_clip (line 133) | def test_grad_clip(world_size):

FILE: tests/test_zero/test_gemini/test_inference.py
  function check_param (line 27) | def check_param(model: GeminiDDP, torch_model: torch.nn.Module):
  function multi_chunk_init (line 40) | def multi_chunk_init(model: torch.nn.Module, placement_config: dict):
  function single_chunk_init (line 49) | def single_chunk_init(model: torch.nn.Module, placement_config: dict):
  function exam_inference (line 61) | def exam_inference(placement_config: dict, model_name: str, model_init_f...
  function run_dist (line 112) | def run_dist(rank, world_size, port):
  function test_inference (line 119) | def test_inference(world_size):

FILE: tests/test_zero/test_gemini/test_optim.py
  function check_param (line 38) | def check_param(model: GeminiDDP, torch_model: torch.nn.Module, dtype: t...
  function exam_model_step (line 67) | def exam_model_step(
  function exam_tiny_example (line 132) | def exam_tiny_example(placement_config, model_name: str, mixed_precision...
  function run_dist (line 183) | def run_dist(rank, world_size, port):
  function test_optim (line 192) | def test_optim(world_size):

FILE: tests/test_zero/test_gemini/test_runtime_mem_tracer.py
  function test_runtime_mem_tracer (line 14) | def test_runtime_mem_tracer():

FILE: tests/test_zero/test_gemini/test_search.py
  function exam_search_chunk_size (line 28) | def exam_search_chunk_size():
  function exam_chunk_manager (line 40) | def exam_chunk_manager():
  function run_dist (line 58) | def run_dist(rank, world_size, port):
  function test_search (line 67) | def test_search(world_size):

FILE: tests/test_zero/test_gemini/test_zeroddp_state_dict.py
  function ignore_the_first_parameter (line 18) | def ignore_the_first_parameter(model: torch.nn.Module):
  function exam_state_dict (line 29) | def exam_state_dict(placement_config, keep_gathered, model_name: str, ma...
  function run_dist (line 76) | def run_dist(rank, world_size, port):
  function test_zero_ddp (line 84) | def test_zero_ddp(world_size):

FILE: tests/test_zero/test_gemini/test_zerooptim_state_dict.py
  function exam_zero_optim_state_dict (line 23) | def exam_zero_optim_state_dict(placement_config, keep_gathered):
  function run_dist (line 70) | def run_dist(rank, world_size, port):
  function test_zero_optim (line 79) | def test_zero_optim(world_size):

FILE: tests/test_zero/test_low_level/test_coll_nd.py
  function check_all_gather_2d (line 14) | def check_all_gather_2d():
  function run_dist (line 29) | def run_dist(rank, world_size, port):
  function test_comm_nd (line 37) | def test_comm_nd():

FILE: tests/test_zero/test_low_level/test_grad_acc.py
  class MlpModel (line 17) | class MlpModel(nn.Module):
    method __init__ (line 18) | def __init__(self):
    method forward (line 23) | def forward(self, x):
  function exam_zero_1_2_grad_acc (line 29) | def exam_zero_1_2_grad_acc():
  function exam_zero_1_grad_acc (line 76) | def exam_zero_1_grad_acc(sync):
  function run_dist (line 136) | def run_dist(rank, world_size, port):
  function test_grad_accumulation (line 145) | def test_grad_accumulation():

FILE: tests/test_zero/test_low_level/test_mem_leak.py
  class MlpModel (line 10) | class MlpModel(nn.Module):
    method __init__ (line 11) | def __init__(self):
    method forward (line 15) | def forward(self, x):
  class TestLowLevelZeroOptimizer (line 23) | class TestLowLevelZeroOptimizer(LowLevelZeroOptimizer):
    method __del__ (line 24) | def __del__(self):
  function exam_mem_leak (line 30) | def exam_mem_leak(world_size):
  function run_dist (line 48) | def run_dist(rank, world_size, port):
  function test_zero_1_2 (line 56) | def test_zero_1_2():

FILE: tests/test_zero/test_low_level/test_zero1_2.py
  class MlpModel (line 17) | class MlpModel(nn.Module):
    method __init__ (line 18) | def __init__(self):
    method forward (line 24) | def forward(self, x):
  function loose_close (line 30) | def loose_close(a, b, dtype: torch.dtype = torch.float32):
  function split_ddp_grad (line 46) | def split_ddp_grad(grad, world_size):
  function exam_zero_1_2 (line 57) | def exam_zero_1_2(fp8_communication: bool):
  function exam_zero_1_torch_ddp (line 129) | def exam_zero_1_torch_ddp(dtype: torch.dtype, master_weights: bool, extr...
  function run_dist (line 214) | def run_dist(rank, world_size, port):
  function test_zero_1_2 (line 223) | def test_zero_1_2():

FILE: tests/test_zero/test_low_level/test_zero_ckpt.py
  class MlpModel (line 17) | class MlpModel(nn.Module):
    method __init__ (line 18) | def __init__(self):
    method forward (line 23) | def forward(self, x):
  function loose_close (line 29) | def loose_close(a, b, dtype: torch.dtype = torch.float32):
  function exam_zero_1_torch_ddp_ckpt (line 46) | def exam_zero_1_torch_ddp_ckpt(extra_dp_size: int):
  function run_dist (line 120) | def run_dist(rank, world_size, port):
  function test_zero_ckpt (line 128) | def test_zero_ckpt():