SYMBOL INDEX (97 symbols across 8 files) FILE: v1.0_initial/advanced_ocr.py class AdvancedOCR (line 17) | class AdvancedOCR: method __init__ (line 18) | def __init__(self, model_path=None, confidence_threshold=0.5, use_cach... method _setup_gemini_api (line 52) | def _setup_gemini_api(self): method _setup_gcs_client (line 64) | def _setup_gcs_client(self): method _calculate_image_hash (line 83) | def _calculate_image_hash(self, image): method _get_cached_result (line 99) | def _get_cached_result(self, image_hash, cache_type): method _save_to_cache (line 123) | def _save_to_cache(self, image_hash, cache_type, result): method _detect_with_doclayout_yolo (line 142) | def _detect_with_doclayout_yolo(self, image_np): method _merge_overlapping_regions (line 210) | def _merge_overlapping_regions(self, regions): method _detect_regions (line 277) | def _detect_regions(self, image_np): method _crop_region (line 304) | def _crop_region(self, image, region): method _process_text_region (line 323) | def _process_text_region(self, region_img, region_info): method _process_table_region (line 434) | def _process_table_region(self, region_img, region_info): method _process_figure_region (line 599) | def _process_figure_region(self, region_img, region_info): method _process_formula_region (line 763) | def _process_formula_region(self, region_img, region_info): method _process_title_region (line 840) | def _process_title_region(self, region_img, region_info): method _process_list_region (line 856) | def _process_list_region(self, region_img, region_info): method _process_regions (line 872) | def _process_regions(self, image_np, regions): method _combine_processed_regions (line 927) | def _combine_processed_regions(self, processed_regions): method _upload_to_gcs (line 945) | def _upload_to_gcs(self, data, gcs_path): method process_image (line 976) | def process_image(self, image_path): method process_pdf (line 1019) | def process_pdf(self, pdf_path, output_folder=None): method save_result (line 1110) | def save_result(self, result, output_path): FILE: v1.0_initial/custom_doclayout_yolo.py class DocLayoutYOLO (line 18) | class DocLayoutYOLO: method __init__ (line 21) | def __init__(self, model_path=None): method init_model (line 34) | def init_model(self): method predict (line 68) | def predict(self, image_path, imgsz=1024, conf=0.25, device=None): FILE: v1.0_initial/ocr_stage1.py function run_docker_container (line 36) | def run_docker_container(input_dir, output_dir, credentials_dir, image_n... function main (line 127) | def main(): FILE: v1.0_initial/ocr_stage2.py function parse_gcs_prefix (line 88) | def parse_gcs_prefix(gcs_url: str) -> Tuple[str, str]: function load_json_from_gcs (line 105) | def load_json_from_gcs(gcs_url: str) -> Optional[Dict]: function save_json_to_gcs (line 137) | def save_json_to_gcs(data: Dict, gcs_path: str) -> Optional[str]: function check_folder_exists (line 164) | def check_folder_exists(folder_path: str) -> bool: function simplify_special_content_tags (line 184) | def simplify_special_content_tags(text: str) -> str: function extract_special_content (line 209) | def extract_special_content(text: str) -> Tuple[str, Dict[str, List[Dict... function restore_special_content (line 248) | def restore_special_content(text: str, special_contents: Dict[str, List[... function chatgpt_correct_text (line 291) | def chatgpt_correct_text(original_text: str) -> Dict[str, Any]: function chatgpt_correct_special_content (line 390) | def chatgpt_correct_special_content(content_type: str, content: str) -> ... function extract_page_number_from_filename (line 406) | def extract_page_number_from_filename(filename: str) -> Optional[int]: function process_page_stage2 (line 422) | def process_page_stage2(page_data: Dict, original_blob_name: str, folder... function list_top_level_folders (line 481) | def list_top_level_folders() -> List[str]: function check_stage1_exists (line 507) | def check_stage1_exists(folder_name: str) -> bool: function check_stage2_exists (line 520) | def check_stage2_exists(folder_name: str) -> bool: function list_stage1_subfolders (line 533) | def list_stage1_subfolders(folder_name: str) -> List[str]: function list_page_blobs (line 564) | def list_page_blobs(folder_name: str, subfolder: str) -> List[Any]: function process_folder (line 595) | def process_folder(folder_name: str) -> Dict[str, Any]: function process_all_folders (line 664) | def process_all_folders() -> Dict[str, Dict[str, Any]]: function main (line 694) | def main(): FILE: v2.0_initial/advanced_ocr.py class AdvancedOCR (line 18) | class AdvancedOCR: method __init__ (line 19) | def __init__(self, model_path=None, confidence_threshold=0.5, use_cach... method _setup_gemini_api (line 53) | def _setup_gemini_api(self): method _setup_gcs_client (line 65) | def _setup_gcs_client(self): method _calculate_image_hash (line 84) | def _calculate_image_hash(self, image): method _get_cached_result (line 104) | def _get_cached_result(self, image_hash, cache_type): method _save_to_cache (line 128) | def _save_to_cache(self, image_hash, cache_type, result): method _detect_with_doclayout_yolo (line 147) | def _detect_with_doclayout_yolo(self, image_np): method _merge_overlapping_regions (line 219) | def _merge_overlapping_regions(self, regions): method _detect_regions (line 286) | def _detect_regions(self, image_np): method _crop_region (line 313) | def _crop_region(self, image, region): method _optimize_image_for_api (line 333) | def _optimize_image_for_api(self, image): method _process_text_region (line 357) | def _process_text_region(self, region_img, region_info): method _process_table_region (line 478) | def _process_table_region(self, region_img, region_info): method _process_figure_region (line 665) | def _process_figure_region(self, region_img, region_info): method _process_formula_region (line 852) | def _process_formula_region(self, region_img, region_info): method _process_title_region (line 953) | def _process_title_region(self, region_img, region_info): method _process_list_region (line 969) | def _process_list_region(self, region_img, region_info): method _process_regions (line 985) | def _process_regions(self, image_np, regions): method _combine_processed_regions (line 1044) | def _combine_processed_regions(self, processed_regions): method _upload_to_gcs (line 1064) | def _upload_to_gcs(self, data, gcs_path): method process_image (line 1100) | def process_image(self, image_path): method process_pdf (line 1149) | def process_pdf(self, pdf_path, output_folder=None): method save_result (line 1260) | def save_result(self, result, output_path): FILE: v2.0_initial/custom_doclayout_yolo.py class DocLayoutYOLO (line 18) | class DocLayoutYOLO: method __init__ (line 21) | def __init__(self, model_path=None): method init_model (line 34) | def init_model(self): method predict (line 68) | def predict(self, image_path, imgsz=1024, conf=0.25, device=None): FILE: v2.0_initial/ocr_stage1.py function run_docker_container (line 36) | def run_docker_container(input_dir, output_dir, credentials_dir, image_n... function main (line 126) | def main(): FILE: v2.0_initial/ocr_stage2.py function parse_gcs_prefix (line 88) | def parse_gcs_prefix(gcs_url: str) -> Tuple[str, str]: function load_json_from_gcs (line 105) | def load_json_from_gcs(gcs_url: str) -> Optional[Dict]: function save_json_to_gcs (line 137) | def save_json_to_gcs(data: Dict, gcs_path: str) -> Optional[str]: function check_folder_exists (line 164) | def check_folder_exists(folder_path: str) -> bool: function simplify_special_content_tags (line 184) | def simplify_special_content_tags(text: str) -> str: function extract_special_content (line 209) | def extract_special_content(text: str) -> Tuple[str, Dict[str, List[Dict... function restore_special_content (line 248) | def restore_special_content(text: str, special_contents: Dict[str, List[... function chatgpt_correct_text (line 291) | def chatgpt_correct_text(original_text: str) -> Dict[str, Any]: function chatgpt_correct_special_content (line 390) | def chatgpt_correct_special_content(content_type: str, content: str) -> ... function extract_page_number_from_filename (line 406) | def extract_page_number_from_filename(filename: str) -> Optional[int]: function process_page_stage2 (line 422) | def process_page_stage2(page_data: Dict, original_blob_name: str, folder... function list_top_level_folders (line 481) | def list_top_level_folders() -> List[str]: function check_stage1_exists (line 507) | def check_stage1_exists(folder_name: str) -> bool: function check_stage2_exists (line 520) | def check_stage2_exists(folder_name: str) -> bool: function list_stage1_subfolders (line 533) | def list_stage1_subfolders(folder_name: str) -> List[str]: function list_page_blobs (line 564) | def list_page_blobs(folder_name: str, subfolder: str) -> List[Any]: function process_folder (line 595) | def process_folder(folder_name: str) -> Dict[str, Any]: function process_all_folders (line 664) | def process_all_folders() -> Dict[str, Dict[str, Any]]: function main (line 694) | def main():