SYMBOL INDEX (175 symbols across 26 files) FILE: tests/test_PII.py function test_PII_make_dataset (line 31) | def test_PII_make_dataset(config_file): FILE: tests/test_full_pipe.py function get_config_files (line 38) | def get_config_files(): function print_test_header (line 47) | def print_test_header(test_name: str, config_file: str = ""): function print_config_header (line 60) | def print_config_header(config_file: str): function setup_data_environment (line 71) | def setup_data_environment(data_folder_name: str = "test_person"): function setup_test_environment (line 106) | def setup_test_environment(): function restore_test_env (line 137) | def restore_test_env(): function run_cli_command (line 163) | def run_cli_command(command: list[str], config_path: str, timeout: int |... function load_config_with_path (line 201) | def load_config_with_path(config_file: str, config_section: str): function run_make_dataset_test (line 215) | def run_make_dataset_test(config_file: str): function run_train_sft_test (line 246) | def run_train_sft_test(config_file: str): function run_webchat_demo_test (line 259) | def run_webchat_demo_test(config_file: str): function run_server_test (line 269) | def run_server_test(config_file: str) -> subprocess.Popen: function run_test_model_test (line 279) | def run_test_model_test(config_file: str, server_process: subprocess.Pop... function clean_model_output (line 294) | def clean_model_output(): function test_full_pipeline_for_config (line 300) | def test_full_pipeline_for_config(config_file): FILE: weclone/cli.py function clear_argv (line 25) | def clear_argv(func): function with_community_info (line 43) | def with_community_info(func): function apply_common_decorators (line 56) | def apply_common_decorators(capture_output_enabled=False): function cli (line 83) | def cli(ctx, config_path): function qa_generator (line 106) | def qa_generator(): function train_sft (line 116) | def train_sft(): function web_demo (line 125) | def web_demo(): function eval_model (line 134) | def eval_model(): function test_model (line 143) | def test_model(): function server (line 152) | def server(): function version (line 161) | def version(): function show_community_info (line 166) | def show_community_info(): function _check_project_root (line 197) | def _check_project_root(): function _check_versions (line 223) | def _check_versions(): FILE: weclone/core/PII/pii_detector.py class PIIResult (line 16) | class PIIResult: class PIIDetector (line 24) | class PIIDetector: method __init__ (line 27) | def __init__(self, language: str = "en", threshold: float = 0.5): method _init_engines (line 41) | def _init_engines(self): method _add_custom_recognizers (line 72) | def _add_custom_recognizers(self, language: str): method has_pii (line 92) | def has_pii(self, text: str, entities: Optional[List[str]] = None) -> ... method batch_has_pii (line 96) | def batch_has_pii(self, texts: List[str]) -> List[bool]: method detect_pii (line 112) | def detect_pii(self, text: str) -> List[PIIResult]: method batch_detect_pii (line 149) | def batch_detect_pii(self, texts: List[str]) -> List[List[PIIResult]]: method anonymize_text (line 209) | def anonymize_text(self, text: str, entities: Optional[List[str]] = No... method get_supported_entities (line 239) | def get_supported_entities(self) -> List[str]: method get_all_entities (line 242) | def get_all_entities(self) -> List[str]: class ChinesePIIDetector (line 256) | class ChinesePIIDetector(PIIDetector): method __init__ (line 259) | def __init__(self, threshold: float = 0.5): method _add_custom_recognizers (line 277) | def _add_custom_recognizers(self, language: str): FILE: weclone/core/inference/offline_infer.py function extract_json_from_text (line 25) | def extract_json_from_text(text: str) -> str: function parse_guided_decoding_results (line 34) | def parse_guided_decoding_results( function vllm_infer (line 76) | def vllm_infer( FILE: weclone/core/inference/online_infer.py class OnlineLLM (line 17) | class OnlineLLM: method __init__ (line 18) | def __init__( method chat (line 39) | def chat( method chat_async (line 73) | def chat_async( method chat_batch (line 84) | def chat_batch( method close (line 159) | def close(self): method __enter__ (line 164) | def __enter__(self): method __exit__ (line 167) | def __exit__(self, exc_type, exc_val, exc_tb): FILE: weclone/data/chat_parsers/telegram_parser.py class TelegramChatParser (line 16) | class TelegramChatParser: method __init__ (line 19) | def __init__(self, config: WCMakeDatasetConfig): method get_message_type_and_content (line 38) | def get_message_type_and_content(self, message: Dict) -> tuple[str, st... method extract_text_content (line 96) | def extract_text_content(self, text_field) -> str: method determine_sender_type (line 109) | def determine_sender_type(self, from_id: str) -> int: method process_message (line 112) | def process_message(self, message: Dict) -> List[ChatMessage]: method process_chat (line 171) | def process_chat(self, jdata: Dict) -> List[ChatMessage]: method to_csv (line 199) | def to_csv(self, chat_messages: List[ChatMessage], output_file: str): method copy_received_images (line 251) | def copy_received_images( function process_telegram_dataset (line 285) | def process_telegram_dataset(config: WCMakeDatasetConfig) -> None: FILE: weclone/data/clean/strategies.py class CleaningStrategy (line 19) | class CleaningStrategy(ABC): method judge (line 25) | def judge(self, data: List[QaPair]) -> None: method clean (line 31) | def clean(self) -> str: class LLMCleaningStrategy (line 73) | class LLMCleaningStrategy(CleaningStrategy): method judge (line 78) | def judge(self, data: List[QaPair]) -> None: class OlineLLMCleaningStrategy (line 154) | class OlineLLMCleaningStrategy(CleaningStrategy): method judge (line 158) | def judge(self, data: List[QaPair]) -> None: FILE: weclone/data/models.py class ChatMessage (line 12) | class ChatMessage: class CutMessage (line 27) | class CutMessage: class Message (line 34) | class Message: class QaPair (line 40) | class QaPair: class QaPairScore (line 49) | class QaPairScore(BaseModel): class QaPairScoreWithId (line 53) | class QaPairScoreWithId(QaPairScore): FILE: weclone/data/qa_generator.py class DataProcessor (line 31) | class DataProcessor: method __init__ (line 32) | def __init__(self): method main (line 120) | def main(self): method pre_parse_chat_dataset (line 156) | def pre_parse_chat_dataset(self): method _execute_length_cdf_script (line 160) | def _execute_length_cdf_script(self): method get_csv_files (line 207) | def get_csv_files(self): method match_qa (line 228) | def match_qa(self, messages: List[ChatMessage]) -> List[Union[QaPair, ... method group_consecutive_messages (line 437) | def group_consecutive_messages(self, messages: List[ChatMessage]) -> L... method process_by_msgtype (line 580) | def process_by_msgtype(self, chat_message: ChatMessage): method load_file (line 586) | def load_file(self, file_path) -> List[ChatMessage]: method process_text (line 678) | def process_text(self, chat_message: ChatMessage): method save_result (line 681) | def save_result(self, qa_res: List[QaPair]): FILE: weclone/data/strategies.py class ConversationStrategy (line 9) | class ConversationStrategy(ABC): method is_same_conversation (line 15) | def is_same_conversation(self, history_msg: List[ChatMessage], current... class TimeWindowStrategy (line 21) | class TimeWindowStrategy(ConversationStrategy): method is_same_conversation (line 26) | def is_same_conversation(self, history_msg: List[ChatMessage], current... class LLMStrategy (line 32) | class LLMStrategy(ConversationStrategy): method is_same_conversation (line 35) | def is_same_conversation(self, history_msg: List[ChatMessage], current... class CompositeStrategy (line 41) | class CompositeStrategy(ConversationStrategy): method is_same_conversation (line 47) | def is_same_conversation(self, history_msg: List[ChatMessage], current... FILE: weclone/data/utils.py function check_image_file_exists (line 13) | def check_image_file_exists(file_path: str) -> str | bool: class ImageToTextProcessor (line 37) | class ImageToTextProcessor: method __init__ (line 40) | def __init__(self, api_url: str, api_key: str, model_name: str, config... method _process_images_in_parallel (line 53) | def _process_images_in_parallel(self, qa_list): method _encode_image_to_base64 (line 102) | def _encode_image_to_base64(self, image_path: str) -> str: method _get_image_format (line 111) | def _get_image_format(self, image_path: str) -> str: method _call_vision_api (line 126) | def _call_vision_api(self, image_path: str) -> str: method describe_image (line 171) | def describe_image(self, image_path: str) -> str: FILE: weclone/eval/cli_demo.py function main (line 5) | def main(): FILE: weclone/eval/eval_model.py function main (line 4) | def main(): FILE: weclone/eval/test_model.py function handler_text (line 26) | def handler_text(content: str, history: list, config): function main (line 49) | def main(): FILE: weclone/eval/web_demo.py function main (line 6) | def main(): FILE: weclone/server/api_service.py function main (line 10) | def main(): FILE: weclone/train/export_model.py function main (line 4) | def main(): FILE: weclone/train/train_sft.py function main (line 14) | def main(): FILE: weclone/utils/config.py function load_base_config (line 19) | def load_base_config() -> WcConfig: function create_config_by_arg_type (line 50) | def create_config_by_arg_type(arg_type: str, wc_config: WcConfig) -> Bas... function process_config_dict_and_argv (line 88) | def process_config_dict_and_argv(arg_type: str, config_pydantic: BaseMod... function load_config (line 95) | def load_config(arg_type: str) -> BaseModel: FILE: weclone/utils/config_models.py class StrEnum (line 11) | class StrEnum(str, Enum): method __str__ (line 18) | def __str__(self) -> str: method _missing_ (line 22) | def _missing_(cls, value): class BaseConfigModel (line 29) | class BaseConfigModel(BaseModel): class PlatformType (line 35) | class PlatformType(StrEnum): class LanguageType (line 42) | class LanguageType(StrEnum): class DataModality (line 49) | class DataModality(StrEnum): class CombineStrategy (line 59) | class CombineStrategy(StrEnum): class CleanStrategy (line 65) | class CleanStrategy(StrEnum): class FinetuningType (line 71) | class FinetuningType(StrEnum): class CommonArgs (line 79) | class CommonArgs(BaseConfigModel): class CliArgs (line 93) | class CliArgs(BaseModel): class LLMCleanConfig (line 99) | class LLMCleanConfig(BaseConfigModel): class CleanDatasetConfig (line 107) | class CleanDatasetConfig(BaseConfigModel): class VisionApiConfig (line 113) | class VisionApiConfig(BaseConfigModel): class TelegramArgs (line 123) | class TelegramArgs(BaseModel): class MakeDatasetArgs (line 128) | class MakeDatasetArgs(BaseConfigModel): class TrainSftArgs (line 168) | class TrainSftArgs(BaseConfigModel): class InferArgs (line 201) | class InferArgs(BaseConfigModel): class VllmArgs (line 208) | class VllmArgs(BaseConfigModel): class TestModelArgs (line 212) | class TestModelArgs(BaseConfigModel): class CommonMethods (line 216) | class CommonMethods: method _parse_dataset_name (line 217) | def _parse_dataset_name(self) -> str: class WcConfig (line 224) | class WcConfig(BaseModel): class WCInferConfig (line 237) | class WCInferConfig(CommonArgs, InferArgs): class WCTrainSftConfig (line 243) | class WCTrainSftConfig(CommonArgs, TrainSftArgs, CommonMethods): method process_config (line 251) | def process_config(self): class WCMakeDatasetConfig (line 267) | class WCMakeDatasetConfig(CommonArgs, MakeDatasetArgs, CommonMethods): method process_config (line 277) | def process_config(self): FILE: weclone/utils/i18n.py class MultiLangList (line 4) | class MultiLangList: method __init__ (line 5) | def __init__(self, translations: Dict[str, List[str]], default_lang="e... method _validate_translations (line 14) | def _validate_translations(self): method _build_reverse_mapping (line 31) | def _build_reverse_mapping(self): method set_language (line 39) | def set_language(self, lang: str): method get_items (line 47) | def get_items(self, lang: Optional[str] = None) -> List[str]: method get_item (line 52) | def get_item(self, index: int, lang: Optional[str] = None) -> str: method translate_text (line 59) | def translate_text(self, text: str, target_lang: Optional[str] = None)... method get_translation_pair (line 95) | def get_translation_pair(self, text: str) -> Dict[str, str]: method translate_batch (line 119) | def translate_batch(self, texts: List[str], target_lang: Optional[str]... method __iter__ (line 132) | def __iter__(self): method __len__ (line 135) | def __len__(self): method __getitem__ (line 138) | def __getitem__(self, index): FILE: weclone/utils/length_cdf.py function calculate_token_length (line 26) | def calculate_token_length( function length_cdf (line 65) | def length_cdf( FILE: weclone/utils/log.py class InterceptHandler (line 21) | class InterceptHandler(logging.Handler): method __init__ (line 22) | def __init__(self, level=logging.INFO): method emit (line 25) | def emit(self, record): function capture_output (line 42) | def capture_output(func): function configure_log_level_from_config (line 110) | def configure_log_level_from_config(): FILE: weclone/utils/retry.py function retry_on_http_error (line 9) | def retry_on_http_error( function retry_openai_api (line 93) | def retry_openai_api( function _calculate_delay (line 147) | def _calculate_delay( class RetryConfig (line 163) | class RetryConfig: method __init__ (line 166) | def __init__( method apply_to_function (line 184) | def apply_to_function(self, func: Callable) -> Callable: FILE: weclone/utils/tools.py function dict_to_argv (line 1) | def dict_to_argv(d):