SYMBOL INDEX (1723 symbols across 93 files) FILE: babeldoc/assets/assets.py class ResultContainer (line 38) | class ResultContainer: method __init__ (line 39) | def __init__(self): method set_result (line 42) | def set_result(self, result): function run_in_another_thread (line 46) | def run_in_another_thread(coro): function run_coro (line 58) | def run_coro(coro): function _retry_if_not_cancelled_and_failed (line 62) | def _retry_if_not_cancelled_and_failed(retry_state): function verify_file (line 80) | def verify_file(path: Path, sha3_256: str): function download_file (line 102) | async def download_file( function get_font_metadata (line 131) | async def get_font_metadata( function _get_fastest_upstream_for_font_internal (line 151) | async def _get_fastest_upstream_for_font_internal( function get_fastest_upstream_for_font (line 173) | async def get_fastest_upstream_for_font( function get_fastest_upstream_for_model (line 201) | async def get_fastest_upstream_for_model(client: httpx.AsyncClient | Non... function get_fastest_upstream (line 205) | async def get_fastest_upstream(client: httpx.AsyncClient | None = None): function get_doclayout_onnx_model_path_async (line 226) | async def get_doclayout_onnx_model_path_async(client: httpx.AsyncClient ... function get_table_detection_rapidocr_model_path_async (line 248) | async def get_table_detection_rapidocr_model_path_async( function get_doclayout_onnx_model_path (line 270) | def get_doclayout_onnx_model_path(): function get_table_detection_rapidocr_model_path (line 274) | def get_table_detection_rapidocr_model_path(): function get_font_url_by_name_and_upstream (line 278) | def get_font_url_by_name_and_upstream(font_file_name: str, upstream: str): function get_font_and_metadata_async (line 286) | async def get_font_and_metadata_async( function get_font_and_metadata (line 325) | def get_font_and_metadata(font_file_name: str): function get_cmap_file_path_async (line 329) | async def get_cmap_file_path_async( function download_cmap_file_async (line 355) | async def download_cmap_file_async( function get_cmap_data_async (line 379) | async def get_cmap_data_async( function get_cmap_file_path (line 387) | def get_cmap_file_path(name: str): function get_cmap_data (line 391) | def get_cmap_data(name: str): function get_font_family (line 395) | def get_font_family(lang_code: str): function download_all_fonts_async (line 400) | async def download_all_fonts_async(client: httpx.AsyncClient | None = No... function download_all_cmaps_async (line 428) | async def download_all_cmaps_async(client: httpx.AsyncClient | None = No... function async_warmup (line 453) | async def async_warmup(): function warmup (line 468) | def warmup(): function generate_all_assets_file_list (line 472) | def generate_all_assets_file_list(): function generate_offline_assets_package_async (line 514) | async def generate_offline_assets_package_async(output_directory: Path |... function restore_offline_assets_package_async (line 544) | async def restore_offline_assets_package_async(input_path: Path | None =... function get_offline_assets_tag (line 590) | def get_offline_assets_tag(file_list: dict | None = None): function generate_offline_assets_package (line 607) | def generate_offline_assets_package(output_directory: Path | None = None): function restore_offline_assets_package (line 611) | def restore_offline_assets_package(input_path: Path | None = None): FILE: babeldoc/assets/embedding_assets_metadata.py function __add_fallback_to_font_family (line 1395) | def __add_fallback_to_font_family(): function __cleanup_unused_font_metadata (line 1410) | def __cleanup_unused_font_metadata(): function get_font_family (line 1427) | def get_font_family(lang_code: str): function verify_font_family (line 1447) | def verify_font_family(font_family: str | dict): FILE: babeldoc/asynchronize/__init__.py class Args (line 5) | class Args: method __init__ (line 6) | def __init__(self, args, kwargs): class AsyncCallback (line 11) | class AsyncCallback: method __init__ (line 12) | def __init__(self): method step_callback (line 17) | def step_callback(self, *args, **kwargs): method finished_callback (line 28) | def finished_callback(self, *args, **kwargs): method __await__ (line 36) | def __await__(self): method __aiter__ (line 40) | def __aiter__(self): method __anext__ (line 44) | async def __anext__(self): FILE: babeldoc/babeldoc_exception/BabelDOCException.py class ScannedPDFError (line 1) | class ScannedPDFError(Exception): method __init__ (line 2) | def __init__(self, message): class ExtractTextError (line 6) | class ExtractTextError(Exception): method __init__ (line 7) | def __init__(self, message): class InputFileGeneratedByBabelDOCError (line 11) | class InputFileGeneratedByBabelDOCError(Exception): method __init__ (line 12) | def __init__(self, message): class ContentFilterError (line 16) | class ContentFilterError(Exception): method __init__ (line 17) | def __init__(self, message): FILE: babeldoc/const.py function get_cache_file_path (line 14) | def get_cache_file_path(filename: str, sub_folder: str | None = None) ->... function enable_process_pool (line 52) | def enable_process_pool(): function get_process_pool (line 62) | def get_process_pool(): function close_process_pool (line 76) | def close_process_pool(): function batched (line 87) | def batched(iterable, n, *, strict=False): FILE: babeldoc/docvision/base_doclayout.py class YoloResult (line 12) | class YoloResult: method __init__ (line 15) | def __init__(self, names, boxes=None, boxes_data=None): class YoloBox (line 25) | class YoloBox: method __init__ (line 28) | def __init__(self, data=None, xyxy=None, conf=None, cls=None): class DocLayoutModel (line 40) | class DocLayoutModel(abc.ABC): method load_onnx (line 42) | def load_onnx(): method load_available (line 50) | def load_available(): method stride (line 55) | def stride(self) -> int: method handle_document (line 59) | def handle_document( FILE: babeldoc/docvision/doclayout.py class OnnxModel (line 39) | class OnnxModel(DocLayoutModel): method __init__ (line 40) | def __init__(self, model_path: str): method from_pretrained (line 63) | def from_pretrained(): method stride (line 68) | def stride(self): method resize_and_pad_image (line 71) | def resize_and_pad_image(self, image, new_shape): method scale_boxes (line 119) | def scale_boxes(self, img1_shape, boxes, img0_shape): method predict (line 145) | def predict(self, image, imgsz=800, batch_size=16, **kwargs): method handle_document (line 208) | def handle_document( FILE: babeldoc/docvision/rpc_doclayout.py function encode_image (line 25) | def encode_image(image) -> bytes: function predict_layout (line 59) | def predict_layout( class ResultContainer (line 119) | class ResultContainer: method __init__ (line 120) | def __init__(self): class RpcDocLayoutModel (line 124) | class RpcDocLayoutModel(DocLayoutModel): method __init__ (line 127) | def __init__(self, host: str = "http://localhost:8000"): method stride (line 135) | def stride(self) -> int: method resize_and_pad_image (line 139) | def resize_and_pad_image(self, image, new_shape): method scale_boxes (line 180) | def scale_boxes(self, img1_shape, boxes, img0_shape): method predict_image (line 206) | def predict_image( method predict (line 241) | def predict(self, image, imgsz=1024, **kwargs) -> list[YoloResult]: method predict_page (line 257) | def predict_page( method handle_document (line 273) | def handle_document( method from_host (line 290) | def from_host(host: str) -> "RpcDocLayoutModel": FILE: babeldoc/docvision/rpc_doclayout2.py function encode_image (line 26) | def encode_image(image) -> bytes: function predict_layout (line 59) | def predict_layout( class ResultContainer (line 144) | class ResultContainer: method __init__ (line 145) | def __init__(self): class RpcDocLayoutModel (line 149) | class RpcDocLayoutModel(DocLayoutModel): method __init__ (line 152) | def __init__(self, host: str = "http://localhost:8000"): method stride (line 160) | def stride(self) -> int: method resize_and_pad_image (line 164) | def resize_and_pad_image(self, image, new_shape): method scale_boxes (line 205) | def scale_boxes(self, img1_shape, boxes, img0_shape): method predict_image (line 231) | def predict_image( method predict (line 267) | def predict(self, image, imgsz=1024, **kwargs) -> list[YoloResult]: method predict_page (line 283) | def predict_page( method handle_document (line 299) | def handle_document( method from_host (line 316) | def from_host(host: str) -> "RpcDocLayoutModel": FILE: babeldoc/docvision/rpc_doclayout3.py function encode_image (line 26) | def encode_image(image) -> bytes: function predict_layout (line 59) | def predict_layout( class ResultContainer (line 137) | class ResultContainer: method __init__ (line 138) | def __init__(self): class RpcDocLayoutModel (line 142) | class RpcDocLayoutModel(DocLayoutModel): method __init__ (line 145) | def __init__(self, host: str = "http://localhost:8000"): method stride (line 153) | def stride(self) -> int: method resize_and_pad_image (line 157) | def resize_and_pad_image(self, image, new_shape): method scale_boxes (line 198) | def scale_boxes(self, img1_shape, boxes, img0_shape): method predict_image (line 224) | def predict_image( method predict (line 260) | def predict(self, image, imgsz=1024, **kwargs) -> list[YoloResult]: method predict_page (line 276) | def predict_page( method handle_document (line 292) | def handle_document( method from_host (line 309) | def from_host(host: str) -> "RpcDocLayoutModel": FILE: babeldoc/docvision/rpc_doclayout4.py function encode_image (line 26) | def encode_image(image) -> bytes: function predict_layout (line 59) | def predict_layout( class ResultContainer (line 144) | class ResultContainer: method __init__ (line 145) | def __init__(self): class RpcDocLayoutModel (line 149) | class RpcDocLayoutModel(DocLayoutModel): method __init__ (line 152) | def __init__(self, host: str = "http://localhost:8000"): method stride (line 160) | def stride(self) -> int: method resize_and_pad_image (line 164) | def resize_and_pad_image(self, image, new_shape): method scale_boxes (line 205) | def scale_boxes(self, img1_shape, boxes, img0_shape): method predict_image (line 231) | def predict_image( method predict (line 267) | def predict(self, image, imgsz=1024, **kwargs) -> list[YoloResult]: method predict_page (line 283) | def predict_page( method handle_document (line 299) | def handle_document( method from_host (line 316) | def from_host(host: str) -> "RpcDocLayoutModel": FILE: babeldoc/docvision/rpc_doclayout5.py function encode_image (line 26) | def encode_image(image) -> bytes: function predict_layout (line 59) | def predict_layout( class ResultContainer (line 135) | class ResultContainer: method __init__ (line 136) | def __init__(self): class RpcDocLayoutModel (line 140) | class RpcDocLayoutModel(DocLayoutModel): method __init__ (line 143) | def __init__(self, host: str = "http://localhost:8000"): method stride (line 151) | def stride(self) -> int: method resize_and_pad_image (line 155) | def resize_and_pad_image(self, image, new_shape): method scale_boxes (line 196) | def scale_boxes(self, img1_shape, boxes, img0_shape): method predict_image (line 222) | def predict_image( method predict (line 258) | def predict(self, image, imgsz=1024, **kwargs) -> list[YoloResult]: method predict_page (line 274) | def predict_page( method handle_document (line 290) | def handle_document( method from_host (line 307) | def from_host(host: str) -> "RpcDocLayoutModel": FILE: babeldoc/docvision/rpc_doclayout6.py function encode_image (line 39) | def encode_image(image) -> bytes: function clip_num (line 61) | def clip_num(num: float, min_value: float, max_value: float) -> float: function predict_layout (line 81) | def predict_layout( function predict_layout2 (line 199) | def predict_layout2( class ResultContainer (line 284) | class ResultContainer: method __init__ (line 285) | def __init__(self): function filter_text (line 289) | def filter_text(txt: str, font_mapper: FontMapper): class RpcDocLayoutModel (line 300) | class RpcDocLayoutModel(DocLayoutModel): method __init__ (line 303) | def __init__(self, host: str = "http://localhost:8000;http://localhost... method init_font_mapper (line 324) | def init_font_mapper(self, translation_config): method stride (line 328) | def stride(self) -> int: method resize_and_pad_image (line 332) | def resize_and_pad_image(self, image, new_shape): method scale_boxes (line 373) | def scale_boxes(self, img1_shape, boxes, img0_shape): method calculate_iou (line 399) | def calculate_iou(self, box1, box2): method is_subset (line 422) | def is_subset(self, inner_box, outer_box): method expand_box_to_contain (line 434) | def expand_box_to_contain(self, box_to_expand, box_to_contain): method post_process_boxes (line 446) | def post_process_boxes(self, merged_boxes: list[YoloBox], names: dict[... method predict_image (line 477) | def predict_image( method predict (line 560) | def predict(self, image, imgsz=1024, **kwargs) -> list[YoloResult]: #... method predict_page (line 574) | def predict_page(self, page, pdf_bytes: Path, translate_config, save_d... method handle_document (line 593) | def handle_document( # type: ignore[override] method from_host (line 612) | def from_host(host: str) -> "RpcDocLayoutModel": FILE: babeldoc/docvision/rpc_doclayout7.py function encode_image (line 34) | def encode_image(image) -> bytes: function predict_layout (line 67) | def predict_layout( class ResultContainer (line 171) | class ResultContainer: method __init__ (line 172) | def __init__(self): class RpcDocLayoutModel (line 176) | class RpcDocLayoutModel(DocLayoutModel): method __init__ (line 179) | def __init__(self, host: str = "http://localhost:8000"): method stride (line 187) | def stride(self) -> int: method resize_and_pad_image (line 191) | def resize_and_pad_image(self, image, new_shape): method scale_boxes (line 232) | def scale_boxes(self, img1_shape, boxes, img0_shape): method predict_image (line 258) | def predict_image( method predict_page (line 299) | def predict_page( method handle_document (line 315) | def handle_document( method from_host (line 332) | def from_host(host: str) -> "RpcDocLayoutModel": FILE: babeldoc/docvision/table_detection/rapidocr.py function convert_to_yolo_result (line 29) | def convert_to_yolo_result(predictions): function create_yolo_result_from_nested_coords (line 66) | def create_yolo_result_from_nested_coords(nested_coords: np.ndarray, nam... class RapidOCRModel (line 85) | class RapidOCRModel: method __init__ (line 86) | def __init__(self): method stride (line 105) | def stride(self): method resize_and_pad_image (line 108) | def resize_and_pad_image(self, image, new_shape): method scale_boxes (line 156) | def scale_boxes(self, img1_shape, boxes, img0_shape): method predict (line 182) | def predict(self, image, imgsz=800, batch_size=16, **kwargs): method handle_document (line 228) | def handle_document( method _is_box_in_table (line 277) | def _is_box_in_table(self, box_xyxy, table_box, page, img_width, img_h... FILE: babeldoc/format/pdf/babelpdf/base14.py function get_cached_bbox (line 3311) | def get_cached_bbox(database, family, encoding): function get_base14_bbox (line 3321) | def get_base14_bbox(family, encoding_name="WinAnsiEncoding"): FILE: babeldoc/format/pdf/babelpdf/cidfont.py function indirect (line 7) | def indirect(obj): function get_xref (line 12) | def get_xref(doc, xref, key): function get_font_file (line 18) | def get_font_file(doc, xref): function get_font_descriptor (line 27) | def get_font_descriptor(doc, xref): function get_descendant_fonts (line 32) | def get_descendant_fonts(doc, xref): function get_glyph_bbox (line 43) | def get_glyph_bbox(face, g): function get_face_bbox (line 56) | def get_face_bbox(blob): function get_cidfont_bbox (line 64) | def get_cidfont_bbox(doc, xref): FILE: babeldoc/format/pdf/babelpdf/cmap.py function parse_blob_value (line 28) | def parse_blob_value(text): function parse_cmap_char (line 32) | def parse_cmap_char(text, store): function parse_cmap_range (line 39) | def parse_cmap_range(text, store): function parse_cmap (line 47) | def parse_cmap(text): function _normalize_cmap_name (line 63) | def _normalize_cmap_name(name: str) -> str: function use_cmap (line 70) | def use_cmap(name: str): function propagation (line 99) | def propagation(r, c): class CharacterMap (line 119) | class CharacterMap: method __init__ (line 120) | def __init__(self, text): method decode_one (line 132) | def decode_one(self, text): method decode (line 139) | def decode(self, text): FILE: babeldoc/format/pdf/babelpdf/encoding.py function get_type1_encoding (line 1038) | def get_type1_encoding(name): FILE: babeldoc/format/pdf/babelpdf/type3.py function merge_bbox (line 7) | def merge_bbox(bbox_list, factor=1): function get_type3_bbox (line 16) | def get_type3_bbox(doc, obj): FILE: babeldoc/format/pdf/babelpdf/utils.py function guarded_bbox (line 4) | def guarded_bbox(bbox): FILE: babeldoc/format/pdf/converter.py class PDFConverterEx (line 32) | class PDFConverterEx(PDFConverter): method __init__ (line 33) | def __init__( method begin_page (line 41) | def begin_page(self, page, ctm) -> None: method end_page (line 56) | def end_page(self, _page) -> None: method begin_figure (line 60) | def begin_figure(self, name, bbox, matrix) -> None: method end_figure (line 66) | def end_figure(self, _: str) -> None: method render_char (line 75) | def render_char( class AWLTChar (line 129) | class AWLTChar(LTChar): method __init__ (line 132) | def __init__( method __repr__ (line 190) | def __repr__(self) -> str: method get_text (line 193) | def get_text(self) -> str: class Paragraph (line 197) | class Paragraph: method __init__ (line 198) | def __init__(self, y, x, x0, x1, size, brk): class TranslateConverter (line 208) | class TranslateConverter(PDFConverterEx): method __init__ (line 209) | def __init__( method receive_layout (line 234) | def receive_layout(self, ltpage: LTPage): FILE: babeldoc/format/pdf/document_il/backend/pdf_creater.py class RenderUnit (line 33) | class RenderUnit(ABC): method __init__ (line 36) | def __init__( method render (line 51) | def render( method get_sort_key (line 59) | def get_sort_key(self) -> tuple[int, int]: class CharacterRenderUnit (line 64) | class CharacterRenderUnit(RenderUnit): method __init__ (line 67) | def __init__( method render (line 76) | def render(self, draw_op: BitStream, context: "RenderContext") -> None: class FormRenderUnit (line 128) | class FormRenderUnit(RenderUnit): method __init__ (line 131) | def __init__( method render (line 140) | def render(self, draw_op: BitStream, context: "RenderContext") -> None: class RectangleRenderUnit (line 218) | class RectangleRenderUnit(RenderUnit): method __init__ (line 221) | def __init__( method render (line 232) | def render(self, draw_op: BitStream, context: "RenderContext") -> None: class CurveRenderUnit (line 261) | class CurveRenderUnit(RenderUnit): method __init__ (line 264) | def __init__( method render (line 273) | def render(self, draw_op: BitStream, context: "RenderContext") -> None: class RenderContext (line 335) | class RenderContext: method __init__ (line 338) | def __init__( function to_int (line 361) | def to_int(src): function parse_mapping (line 365) | def parse_mapping(text): function apply_normalization (line 372) | def apply_normalization(cmap, gid, code): function batched (line 385) | def batched(iterable, n, *, strict=False): function update_tounicode_cmap_pair (line 396) | def update_tounicode_cmap_pair(cmap, data): function update_tounicode_cmap_code (line 403) | def update_tounicode_cmap_code(cmap, data): function parse_tounicode_cmap (line 408) | def parse_tounicode_cmap(data): function parse_truetype_data (line 421) | def parse_truetype_data(data): function make_tounicode (line 448) | def make_tounicode(cmap, used): function reproduce_one_font (line 469) | def reproduce_one_font(doc, index): function reproduce_cmap (line 484) | def reproduce_cmap(doc): function _subset_fonts_process (line 500) | def _subset_fonts_process(pdf_path, output_path): function _save_pdf_clean_process (line 519) | def _save_pdf_clean_process( class PDFCreater (line 557) | class PDFCreater: method __init__ (line 560) | def __init__( method render_graphic_state (line 574) | def render_graphic_state( method render_paragraph_to_char (line 610) | def render_paragraph_to_char( method create_render_units_for_page (line 639) | def create_render_units_for_page( method render_units_to_stream (line 721) | def render_units_to_stream( method get_available_font_list (line 742) | def get_available_font_list(self, pdf, page): method get_xobj_available_fonts (line 746) | def get_xobj_available_fonts(self, page_xref_id, pdf): method _render_rectangle (line 772) | def _render_rectangle( method create_side_by_side_dual_pdf (line 811) | def create_side_by_side_dual_pdf( method create_alternating_pages_dual_pdf (line 895) | def create_alternating_pages_dual_pdf( method write_debug_info (line 925) | def write_debug_info( method subset_fonts_in_subprocess (line 1016) | def subset_fonts_in_subprocess( method save_pdf_with_timeout (line 1090) | def save_pdf_with_timeout( method restore_media_box (line 1231) | def restore_media_box(self, doc: pymupdf.Document, mediabox_data: dict... method write (line 1239) | def write( method update_page_content_stream (line 1425) | def update_page_content_stream( FILE: babeldoc/format/pdf/document_il/frontend/il_creater.py function invert_matrix (line 44) | def invert_matrix( function batched (line 74) | def batched(iterable, n, *, strict=False): function indirect (line 111) | def indirect(obj): function get_char_cbox (line 116) | def get_char_cbox(face, idx): function get_name_cbox (line 121) | def get_name_cbox(face, name): function font_encoding_lookup (line 130) | def font_encoding_lookup(doc, idx, key): function parse_font_encoding (line 138) | def parse_font_encoding(doc, idx): function get_truetype_ansi_bbox_list (line 146) | def get_truetype_ansi_bbox_list(face): function collect_face_cmap (line 153) | def collect_face_cmap(face): function get_truetype_custom_bbox_list (line 164) | def get_truetype_custom_bbox_list(face): function parse_font_file (line 178) | def parse_font_file(doc, idx, encoding, differences): function parse_encoding (line 209) | def parse_encoding(obj_str): function parse_mapping (line 225) | def parse_mapping(text): function update_cmap_pair (line 232) | def update_cmap_pair(cmap, data): function update_cmap_code (line 244) | def update_cmap_code(cmap, data): function parse_cmap (line 254) | def parse_cmap(cmap_str): function get_code (line 267) | def get_code(cmap, c): function get_bbox (line 274) | def get_bbox(bbox, size, c, x, y): function get_rotation_angle (line 319) | def get_rotation_angle(matrix): class ILCreater (line 331) | class ILCreater: method __init__ (line 334) | def __init__(self, translation_config: TranslationConfig): method transform_clip_path (line 365) | def transform_clip_path( method get_render_order_and_increase (line 404) | def get_render_order_and_increase(self): method get_render_order (line 408) | def get_render_order(self): method on_finish (line 411) | def on_finish(self): method is_graphic_operation (line 414) | def is_graphic_operation(self, operator: str): method is_passthrough_per_char_operation (line 423) | def is_passthrough_per_char_operation(self, operator: str): method can_remove_old_passthrough_per_char_instruction (line 429) | def can_remove_old_passthrough_per_char_instruction(self, operator: str): method on_line_dash (line 435) | def on_line_dash(self, dash, phase): method on_passthrough_per_char (line 439) | def on_passthrough_per_char(self, operator: str, args: list[str]): method remove_latest_passthrough_per_char_instruction (line 460) | def remove_latest_passthrough_per_char_instruction(self): method parse_arg (line 464) | def parse_arg(self, arg: str): method pop_passthrough_per_char_instruction (line 473) | def pop_passthrough_per_char_instruction(self): method push_passthrough_per_char_instruction (line 490) | def push_passthrough_per_char_instruction(self): method on_stroking_color_space (line 497) | def on_stroking_color_space(self, color_space_name): method on_non_stroking_color_space (line 500) | def on_non_stroking_color_space(self, color_space_name): method on_new_stream (line 503) | def on_new_stream(self): method push_xobj (line 509) | def push_xobj(self): method pop_xobj (line 519) | def pop_xobj(self): method on_xobj_begin (line 524) | def on_xobj_begin(self, bbox, xref_id): method on_xobj_end (line 546) | def on_xobj_end(self, xobj_id, base_op): method on_page_start (line 554) | def on_page_start(self): method on_page_end (line 577) | def on_page_end(self): method on_page_crop_box (line 602) | def on_page_crop_box( method on_page_media_box (line 612) | def on_page_media_box( method on_page_number (line 622) | def on_page_number(self, page_number: int): method on_page_base_operation (line 627) | def on_page_base_operation(self, operation: str): method on_page_resource_font (line 631) | def on_page_resource_font(self, font: PDFFont, xref_id: int, font_id: ... method parse_font_xobj_id (line 768) | def parse_font_xobj_id(self, xobj_id: int): method create_graphic_state (line 801) | def create_graphic_state( method on_lt_char (line 870) | def on_lt_char(self, char: LTChar): method _collect_valid_char (line 1022) | def _collect_valid_char(self, ch: str): method on_lt_curve (line 1059) | def on_lt_curve(self, curve: babeldoc.pdfminer.layout.LTCurve): method on_xobj_form (line 1170) | def on_xobj_form( method on_pdf_clip_path (line 1225) | def on_pdf_clip_path( method create_il (line 1236) | def create_il(self): method on_total_pages (line 1245) | def on_total_pages(self, total_pages: int): method on_pdf_figure (line 1259) | def on_pdf_figure(self, figure: LTFigure): method on_inline_image_begin (line 1268) | def on_inline_image_begin(self): method on_inline_image_end (line 1276) | def on_inline_image_end(self, stream_obj, ctm): FILE: babeldoc/format/pdf/document_il/il_version_1.py class BaseOperations (line 6) | class BaseOperations: class Meta (line 7) | class Meta: class Box (line 19) | class Box: class Meta (line 20) | class Meta: class GraphicState (line 54) | class GraphicState: class Meta (line 55) | class Meta: class PdfAffineTransform (line 68) | class PdfAffineTransform: class Meta (line 69) | class Meta: class PdfFontCharBoundingBox (line 117) | class PdfFontCharBoundingBox: class Meta (line 118) | class Meta: class PdfInlineForm (line 159) | class PdfInlineForm: class Meta (line 160) | class Meta: class PdfMatrix (line 180) | class PdfMatrix: class Meta (line 181) | class Meta: class PdfPath (line 229) | class PdfPath: class Meta (line 230) | class Meta: class PdfXobjForm (line 263) | class PdfXobjForm: class Meta (line 264) | class Meta: class Cropbox (line 286) | class Cropbox: class Meta (line 287) | class Meta: class Mediabox (line 300) | class Mediabox: class Meta (line 301) | class Meta: class PageLayout (line 314) | class PageLayout: class Meta (line 315) | class Meta: class PdfFigure (line 349) | class PdfFigure: class Meta (line 350) | class Meta: class PdfFont (line 363) | class PdfFont: class Meta (line 364) | class Meta: class PdfFormSubtype (line 444) | class PdfFormSubtype: class Meta (line 445) | class Meta: class PdfOriginalPath (line 465) | class PdfOriginalPath: class Meta (line 466) | class Meta: class PdfRectangle (line 480) | class PdfRectangle: class Meta (line 481) | class Meta: class PdfStyle (line 535) | class PdfStyle: class Meta (line 536) | class Meta: class VisualBbox (line 564) | class VisualBbox: class Meta (line 565) | class Meta: class PdfCharacter (line 578) | class PdfCharacter: class Meta (line 579) | class Meta: class PdfCurve (line 671) | class PdfCurve: class Meta (line 672) | class Meta: class PdfForm (line 761) | class PdfForm: class Meta (line 762) | class Meta: class PdfSameStyleUnicodeCharacters (line 847) | class PdfSameStyleUnicodeCharacters: class Meta (line 848) | class Meta: class PdfXobject (line 874) | class PdfXobject: class Meta (line 875) | class Meta: class PdfFormula (line 919) | class PdfFormula: class Meta (line 920) | class Meta: class PdfLine (line 988) | class PdfLine: class Meta (line 989) | class Meta: class PdfSameStyleCharacters (line 1017) | class PdfSameStyleCharacters: class Meta (line 1018) | class Meta: class PdfParagraphComposition (line 1047) | class PdfParagraphComposition: class Meta (line 1048) | class Meta: class PdfParagraph (line 1089) | class PdfParagraph: class Meta (line 1090) | class Meta: class Page (line 1182) | class Page: class Meta (line 1183) | class Meta: class Document (line 1290) | class Document: class Meta (line 1291) | class Meta: FILE: babeldoc/format/pdf/document_il/midend/add_debug_information.py class AddDebugInformation (line 15) | class AddDebugInformation: method __init__ (line 18) | def __init__(self, translation_config: TranslationConfig): method process (line 22) | def process(self, docs: il_version_1.Document): method _create_rectangle (line 29) | def _create_rectangle( method _create_text (line 43) | def _create_text( method process_page (line 78) | def process_page(self, page: il_version_1.Page): FILE: babeldoc/format/pdf/document_il/midend/automatic_term_extractor.py class BatchParagraph (line 69) | class BatchParagraph: method __init__ (line 70) | def __init__( class DocumentTermExtractTracker (line 79) | class DocumentTermExtractTracker: method __init__ (line 80) | def __init__(self): method new_page (line 83) | def new_page(self): method to_json (line 88) | def to_json(self): class PageTermExtractTracker (line 109) | class PageTermExtractTracker: method __init__ (line 110) | def __init__(self): method new_paragraph (line 113) | def new_paragraph(self): class ParagraphTermExtractTracker (line 119) | class ParagraphTermExtractTracker: method __init__ (line 120) | def __init__(self): method append_paragraph_unicode (line 123) | def append_paragraph_unicode(self, unicode: str): method set_output (line 126) | def set_output(self, output: str): method set_input (line 129) | def set_input(self, _input: str): class AutomaticTermExtractor (line 133) | class AutomaticTermExtractor: method __init__ (line 136) | def __init__( method calc_token_count (line 154) | def calc_token_count(self, text: str) -> int: method _snapshot_token_usage (line 160) | def _snapshot_token_usage(self) -> tuple[int, int, int, int]: method _clean_json_output (line 179) | def _clean_json_output(self, llm_output: str) -> str: method _process_llm_response (line 193) | def _process_llm_response(self, llm_response_text: str, request_id: str): method process_page (line 226) | def process_page( method extract_terms_from_paragraphs (line 274) | def extract_terms_from_paragraphs( method procress (line 357) | def procress(self, doc_il: ILDocument): FILE: babeldoc/format/pdf/document_il/midend/detect_scanned_file.py class DetectScannedFile (line 19) | class DetectScannedFile: method __init__ (line 22) | def __init__(self, translation_config: TranslationConfig): method _save_debug_box_to_page (line 25) | def _save_debug_box_to_page(self, page: il_version_1.Page, similarity:... method fast_check (line 68) | def fast_check(self, doc: pymupdf.Document) -> bool: method process (line 84) | def process( method clean_render_order_for_chars (line 144) | def clean_render_order_for_chars(self, docs: il_version_1.Document): method detect_page_is_scanned (line 151) | def detect_page_is_scanned( FILE: babeldoc/format/pdf/document_il/midend/il_translator.py class RichTextPlaceholder (line 77) | class RichTextPlaceholder: method __init__ (line 78) | def __init__( method to_dict (line 94) | def to_dict(self) -> dict: class FormulaPlaceholder (line 108) | class FormulaPlaceholder: method __init__ (line 109) | def __init__( method to_dict (line 121) | def to_dict(self) -> dict: class PbarContext (line 133) | class PbarContext: method __init__ (line 134) | def __init__(self, pbar): method __enter__ (line 137) | def __enter__(self): method __exit__ (line 140) | def __exit__(self, exc_type, exc_value, traceback): class DocumentTranslateTracker (line 144) | class DocumentTranslateTracker: method __init__ (line 145) | def __init__(self): method new_page (line 151) | def new_page(self): method new_cross_page (line 156) | def new_cross_page(self): method new_cross_column (line 161) | def new_cross_column(self): method to_json (line 167) | def to_json(self): method convert_paragraph (line 190) | def convert_paragraph(self, page): class PageTranslateTracker (line 234) | class PageTranslateTracker: method __init__ (line 235) | def __init__(self): method new_paragraph (line 238) | def new_paragraph(self): class ParagraphTranslateTracker (line 244) | class ParagraphTranslateTracker: method __init__ (line 245) | def __init__(self): method set_pdf_unicode (line 250) | def set_pdf_unicode(self, unicode: str): method set_input (line 253) | def set_input(self, input_text: str): method set_placeholders (line 256) | def set_placeholders( method set_original_placeholders (line 261) | def set_original_placeholders(self, placeholders: dict[str, int] | None): method record_multi_paragraph_id (line 265) | def record_multi_paragraph_id(self, mid): method record_multi_paragraph_index (line 268) | def record_multi_paragraph_index(self, index): method set_output (line 271) | def set_output(self, output: str): method record_removed_hallucinated_placeholder (line 274) | def record_removed_hallucinated_placeholder(self, token: str): method new_llm_translate_tracker (line 282) | def new_llm_translate_tracker(self) -> LLMTranslateTracker: method last_llm_translate_tracker (line 287) | def last_llm_translate_tracker(self) -> LLMTranslateTracker | None: class LLMTranslateTracker (line 293) | class LLMTranslateTracker: method __init__ (line 294) | def __init__(self): method set_input (line 302) | def set_input(self, input_text: str): method set_output (line 305) | def set_output(self, output_text: str): method set_error_message (line 308) | def set_error_message(self, error_message: str): method set_placeholder_full_match (line 312) | def set_placeholder_full_match(self): method set_fallback_to_translate (line 315) | def set_fallback_to_translate(self): method to_dict (line 318) | def to_dict(self): class ILTranslator (line 329) | class ILTranslator: method __init__ (line 332) | def __init__( method calc_token_count (line 382) | def calc_token_count(self, text: str) -> int: method translate (line 388) | def translate(self, docs: Document): method find_title_paragraph (line 426) | def find_title_paragraph(self, docs: Document) -> PdfParagraph | None: method process_page (line 442) | def process_page( class TranslateInput (line 479) | class TranslateInput: method __init__ (line 480) | def __init__( method set_original_placeholder_tokens (line 493) | def set_original_placeholder_tokens(self, tokens: dict[str, int] | N... method get_placeholders_hint (line 497) | def get_placeholders_hint(self) -> dict[str, str] | None: method create_formula_placeholder (line 515) | def create_formula_placeholder( method create_rich_text_placeholder (line 531) | def create_rich_text_placeholder( method get_translate_input (line 571) | def get_translate_input( method process_formula (line 732) | def process_formula( method process_composition (line 744) | def process_composition( method parse_translate_output (line 767) | def parse_translate_output( method pre_translate_paragraph (line 950) | def pre_translate_paragraph( method post_translate_paragraph (line 987) | def post_translate_paragraph( method _build_role_block (line 1017) | def _build_role_block(self) -> str: method _build_context_block (line 1038) | def _build_context_block( method _build_glossary_block (line 1086) | def _build_glossary_block(self, text: str) -> str: method generate_prompt_for_llm (line 1130) | def generate_prompt_for_llm( method add_content_filter_hint (line 1162) | def add_content_filter_hint(self, page: Page, paragraph: PdfParagraph): method _create_text (line 1180) | def _create_text( method translate_paragraph (line 1210) | def translate_paragraph( FILE: babeldoc/format/pdf/document_il/midend/il_translator_llm_only.py class BatchParagraph (line 98) | class BatchParagraph: method __init__ (line 99) | def __init__( class ILTranslatorLLMOnly (line 110) | class ILTranslatorLLMOnly: method __init__ (line 113) | def __init__( method calc_token_count (line 153) | def calc_token_count(self, text: str) -> int: method find_title_paragraph (line 159) | def find_title_paragraph(self, docs: Document) -> PdfParagraph | None: method translate (line 175) | def translate(self, docs: Document) -> None: method _is_body_text_paragraph (line 257) | def _is_body_text_paragraph(self, paragraph: PdfParagraph) -> bool: method _should_translate_paragraph (line 272) | def _should_translate_paragraph( method _filter_paragraphs (line 310) | def _filter_paragraphs( method _build_font_maps (line 334) | def _build_font_maps( method process_cross_page_paragraph (line 357) | def process_cross_page_paragraph( method process_cross_column_paragraph (line 454) | def process_cross_column_paragraph( method process_page (line 526) | def process_page( method translate_paragraph (line 622) | def translate_paragraph( method _build_llm_prompt (line 882) | def _build_llm_prompt( method _clean_json_output (line 981) | def _clean_json_output(self, llm_output: str) -> str: FILE: babeldoc/format/pdf/document_il/midend/layout_parser.py class LayoutParser (line 19) | class LayoutParser: method __init__ (line 22) | def __init__(self, translation_config: TranslationConfig): method _save_debug_image (line 26) | def _save_debug_image(self, image: np.ndarray, layout, page_number: int): method _save_debug_box_to_page (line 61) | def _save_debug_box_to_page(self, page: il_version_1.Page): method process (line 119) | def process(self, docs: il_version_1.Document, mupdf_doc: Document): method generate_fallback_line_layout_for_page (line 178) | def generate_fallback_line_layout_for_page(self, page: il_version_1.Pa... FILE: babeldoc/format/pdf/document_il/midend/paragraph_finder.py function generate_base58_id (line 46) | def generate_base58_id(length: int = 5) -> str: class ParagraphFinder (line 51) | class ParagraphFinder: method __init__ (line 56) | def __init__(self, translation_config: TranslationConfig): method _preprocess_formula_layouts (line 60) | def _preprocess_formula_layouts(self, page: Page): method add_text_fill_background (line 89) | def add_text_fill_background(self, page: Page): method update_paragraph_data (line 124) | def update_paragraph_data(self, paragraph: PdfParagraph, update_unicod... method update_line_data (line 172) | def update_line_data(self, line: PdfLine): method add_debug_info (line 179) | def add_debug_info(self, page: Page): method process (line 196) | def process(self, document): method check_cid_paragraph (line 217) | def check_cid_paragraph(self, doc: Document): method bbox_overlap (line 227) | def bbox_overlap(self, bbox1: Box, bbox2: Box) -> bool: method process_page (line 235) | def process_page(self, page: Page): method _set_paragraph_render_order (line 312) | def _set_paragraph_render_order(self, page: Page): method is_isolated_formula (line 349) | def is_isolated_formula(self, char: PdfCharacter): method _paragraph_text_ascii (line 357) | def _paragraph_text_ascii(self, p: PdfParagraph) -> str: method _is_ascii_digit_or_space_paragraph (line 368) | def _is_ascii_digit_or_space_paragraph(self, p: PdfParagraph) -> bool: method _same_layout_and_xobj (line 383) | def _same_layout_and_xobj(a: PdfParagraph, c: PdfParagraph) -> bool: method merge_alternating_line_number_paragraphs (line 393) | def merge_alternating_line_number_paragraphs(self, paragraphs: list[Pd... method _group_characters_into_paragraphs (line 420) | def _group_characters_into_paragraphs( method _merge_overlapping_clusters (line 514) | def _merge_overlapping_clusters( method _get_effective_y_bounds (line 600) | def _get_effective_y_bounds(self, char: PdfCharacter) -> tuple[float, ... method _compute_collision_counts_histogram (line 616) | def _compute_collision_counts_histogram( method _split_paragraph_into_lines (line 652) | def _split_paragraph_into_lines( method process_paragraph_spacing (line 779) | def process_paragraph_spacing(self, paragraph: PdfParagraph): method create_line (line 815) | def create_line(self, chars: list[PdfCharacter]) -> PdfParagraphCompos... method calculate_median_line_width (line 822) | def calculate_median_line_width(self, paragraphs: list[PdfParagraph]) ... method process_independent_paragraphs (line 841) | def process_independent_paragraphs( method is_bbox_contain_in_vertical (line 931) | def is_bbox_contain_in_vertical(bbox1: Box, bbox2: Box) -> bool: method fix_overlapping_paragraphs (line 939) | def fix_overlapping_paragraphs(self, page: Page): method _sort_characters_in_lines (line 1032) | def _sort_characters_in_lines(self, page: Page): method _get_char_sort_key (line 1040) | def _get_char_sort_key(self, char: PdfCharacter): FILE: babeldoc/format/pdf/document_il/midend/remove_descent.py class RemoveDescent (line 11) | class RemoveDescent: method __init__ (line 14) | def __init__(self, translation_config: TranslationConfig): method _remove_char_descent (line 17) | def _remove_char_descent( method process (line 50) | def process(self, document: il_version_1.Document): method process_page (line 65) | def process_page(self, page: il_version_1.Page): FILE: babeldoc/format/pdf/document_il/midend/styles_and_formulas.py class StylesAndFormulas (line 44) | class StylesAndFormulas: method __init__ (line 47) | def __init__(self, translation_config: TranslationConfig): method update_formula_data (line 51) | def update_formula_data(self, formula: PdfFormula): method process (line 54) | def process(self, document: Document): method update_all_formula_data (line 64) | def update_all_formula_data(self, page: Page): method _calculate_element_formula_iou (line 70) | def _calculate_element_formula_iou( method _is_element_contained_exact (line 96) | def _is_element_contained_exact( method _calculate_element_formula_distance (line 119) | def _calculate_element_formula_distance( method _collect_element_formula_candidates (line 159) | def _collect_element_formula_candidates( method _resolve_assignment_conflicts (line 257) | def _resolve_assignment_conflicts( method collect_contained_elements (line 325) | def collect_contained_elements(self, page: Page): method process_page (line 362) | def process_page(self, page: Page): method update_line_data (line 384) | def update_line_data(self, line: PdfLine): method _classify_characters_in_composition (line 391) | def _classify_characters_in_composition( method _group_classified_characters (line 525) | def _group_classified_characters( method process_page_formulas (line 568) | def process_page_formulas(self, page: Page): method process_translatable_formulas (line 621) | def process_translatable_formulas(self, page: Page): method process_page_styles (line 650) | def process_page_styles(self, page: Page): method _calculate_base_style (line 710) | def _calculate_base_style(self, paragraph) -> PdfStyle: method _get_mode_value (line 738) | def _get_mode_value(self, values): method _merge_styles (line 747) | def _merge_styles(self, style1, style2): method _merge_graphic_states (line 767) | def _merge_graphic_states(self, state1, state2): method _create_same_style_composition (line 783) | def _create_same_style_composition( method process_page_offsets (line 807) | def process_page_offsets(self, page: Page): method calculate_line_spacing (line 905) | def calculate_line_spacing(self, paragraph) -> float: method create_composition (line 933) | def create_composition( method is_translatable_formula (line 950) | def is_translatable_formula(self, formula: PdfFormula) -> bool: method should_split_formula (line 960) | def should_split_formula(self, formula: PdfFormula) -> bool: method split_formula_by_comma (line 974) | def split_formula_by_comma( method merge_formulas (line 1010) | def merge_formulas(self, formula1: PdfFormula, formula2: PdfFormula) -... method is_x_axis_contained (line 1023) | def is_x_axis_contained(self, box1: Box, box2: Box) -> bool: method has_y_intersection (line 1029) | def has_y_intersection(self, box1: Box, box2: Box) -> bool: method is_x_axis_adjacent (line 1034) | def is_x_axis_adjacent(self, box1: Box, box2: Box, tolerance: float = ... method calculate_y_iou (line 1046) | def calculate_y_iou(self, box1: Box, box2: Box) -> float: method merge_overlapping_formulas (line 1064) | def merge_overlapping_formulas(self, page: Page): method _have_same_layout_ids (line 1156) | def _have_same_layout_ids( method process_comma_formulas (line 1185) | def process_comma_formulas(self, page: Page): method remove_non_formula_lines_from_paragraphs (line 1225) | def remove_non_formula_lines_from_paragraphs(self, page: Page): FILE: babeldoc/format/pdf/document_il/midend/table_parser.py class TableParser (line 16) | class TableParser: method __init__ (line 19) | def __init__(self, translation_config: TranslationConfig): method _save_debug_image (line 23) | def _save_debug_image(self, image: np.ndarray, layouts, page_number: i... method _save_debug_box_to_page (line 62) | def _save_debug_box_to_page(self, page: il_version_1.Page): method process (line 116) | def process(self, docs: il_version_1.Document, mupdf_doc: Document): FILE: babeldoc/format/pdf/document_il/midend/typesetting.py class TypesettingUnit (line 90) | class TypesettingUnit: method __str__ (line 91) | def __str__(self): method __init__ (line 94) | def __init__( method try_resue_cache (line 153) | def try_resue_cache(self, old_tu: TypesettingUnit): method try_get_unicode (line 179) | def try_get_unicode(self) -> str | None: method mixed_character_blacklist (line 188) | def mixed_character_blacklist(self): method calc_mixed_character_blacklist (line 194) | def calc_mixed_character_blacklist(self): method can_break_line (line 207) | def can_break_line(self): method calc_can_break_line (line 213) | def calc_can_break_line(self): method is_cjk_char (line 222) | def is_cjk_char(self): method calc_is_cjk_char (line 228) | def calc_is_cjk_char(self): method is_space (line 300) | def is_space(self): method calc_is_space (line 306) | def calc_is_space(self): method is_hung_punctuation (line 313) | def is_hung_punctuation(self): method calc_is_hung_punctuation (line 319) | def calc_is_hung_punctuation(self): method is_cannot_appear_in_line_end_punctuation (line 379) | def is_cannot_appear_in_line_end_punctuation(self): method calc_is_cannot_appear_in_line_end_punctuation (line 387) | def calc_is_cannot_appear_in_line_end_punctuation(self): method passthrough (line 413) | def passthrough( method can_passthrough (line 430) | def can_passthrough(self): method calc_can_passthrough (line 436) | def calc_can_passthrough(self): method calculate_box (line 439) | def calculate_box(self): method box (line 462) | def box(self): method width (line 469) | def width(self): method calc_width (line 475) | def calc_width(self): method height (line 480) | def height(self): method calc_height (line 486) | def calc_height(self): method relocate (line 490) | def relocate( method _transform_curve_for_relocation (line 657) | def _transform_curve_for_relocation( method _transform_form_for_relocation (line 716) | def _transform_form_for_relocation( method render (line 767) | def render( class Typesetting (line 824) | class Typesetting: method __init__ (line 827) | def __init__(self, translation_config: TranslationConfig): method preprocess_document (line 843) | def preprocess_document(self, document: il_version_1.Document, pbar): method _find_optimal_scale_and_layout (line 919) | def _find_optimal_scale_and_layout( method _get_optimal_scale (line 1056) | def _get_optimal_scale( method retypeset_with_precomputed_scale (line 1074) | def retypeset_with_precomputed_scale( method typesetting_document (line 1096) | def typesetting_document(self, document: il_version_1.Document): method render_page (line 1115) | def render_page(self, page: il_version_1.Page): method add_watermark (line 1198) | def add_watermark(self, page: il_version_1.Page): method render_paragraph (line 1232) | def render_paragraph( method _get_width_before_next_break_point (line 1263) | def _get_width_before_next_break_point( method _layout_typesetting_units (line 1278) | def _layout_typesetting_units( method create_typesetting_units (line 1436) | def create_typesetting_units( method create_passthrough_composition (line 1535) | def create_passthrough_composition( method get_max_right_space (line 1560) | def get_max_right_space(self, current_box: Box, page) -> float: method get_max_bottom_space (line 1596) | def get_max_bottom_space(self, current_box: Box, page: il_version_1.Pa... method _update_paragraph_render_order (line 1632) | def _update_paragraph_render_order(self, paragraph: il_version_1.PdfPa... FILE: babeldoc/format/pdf/document_il/utils/extract_char.py function parse_pdf (line 56) | def parse_pdf(pdf_path, page_ranges=None) -> il_version_1.Document: class Line (line 90) | class Line: method __init__ (line 91) | def __init__(self, chars: list[tuple[il_version_1.Box, str, bool]]): function _recalculate_line_text_with_spacing (line 96) | def _recalculate_line_text_with_spacing(line, orientation): function extract_paragraph_line (line 146) | def extract_paragraph_line( function convert_page_to_char_boxes (line 158) | def convert_page_to_char_boxes( function _cluster_by_axis (line 167) | def _cluster_by_axis(chars: list[tuple[il_version_1.Box, str, bool]], or... function _merge_lines_on_page (line 355) | def _merge_lines_on_page(page_lines: list[Line]) -> list[Line]: function process_page_chars_to_lines (line 573) | def process_page_chars_to_lines( function process_page_chars_to_lines_internal (line 582) | def process_page_chars_to_lines_internal( function cluster_chars_to_lines (line 621) | def cluster_chars_to_lines( function draw_clustered_lines_to_image (line 635) | def draw_clustered_lines_to_image(pdf_path, clustered_lines: dict[int, l... function main (line 729) | def main(): FILE: babeldoc/format/pdf/document_il/utils/fontmap.py class PrimaryFontFamily (line 17) | class PrimaryFontFamily(enum.IntEnum): method from_str (line 24) | def from_str(cls, value: str): class FontMapper (line 35) | class FontMapper: method __init__ (line 38) | def __init__(self, translation_config: TranslationConfig): method has_char (line 119) | def has_char(self, char_unicode: str): method map_in_type (line 128) | def map_in_type( method map (line 154) | def map(self, original_font: PdfFont, char_unicode: str): method get_used_font_ids (line 215) | def get_used_font_ids(self, il: il_version_1.Document) -> set[str]: method add_font (line 228) | def add_font(self, doc_zh: pymupdf.Document, il: il_version_1.Document): FILE: babeldoc/format/pdf/document_il/utils/formular_helper.py function is_formulas_start_char (line 16) | def is_formulas_start_char( function is_formulas_middle_char (line 54) | def is_formulas_middle_char( function collect_page_formula_font_ids (line 68) | def collect_page_formula_font_ids( function is_formulas_font (line 111) | def is_formulas_font(font_name: str, formular_font_pattern: str | None) ... function update_formula_data (line 312) | def update_formula_data(formula: PdfFormula): FILE: babeldoc/format/pdf/document_il/utils/layout_helper.py function is_bullet_point (line 55) | def is_bullet_point(char: PdfCharacter) -> bool: function calculate_box_iou (line 68) | def calculate_box_iou(box1: Box, box2: Box) -> float: function formular_height_ignore_char (line 108) | def formular_height_ignore_char(char: PdfCharacter): function box_to_tuple (line 115) | def box_to_tuple(box: Box) -> tuple[float, float, float, float]: class Layout (line 122) | class Layout: method __init__ (line 123) | def __init__(self, layout_id, name): method is_newline (line 128) | def is_newline(prev_char: PdfCharacter, curr_char: PdfCharacter) -> bool: function get_paragraph_length_except (line 159) | def get_paragraph_length_except( function get_paragraph_unicode (line 200) | def get_paragraph_unicode(paragraph: PdfParagraph) -> str: function get_char_unicode_string (line 226) | def get_char_unicode_string(chars: list[PdfCharacter | str]) -> str: function get_paragraph_max_height (line 296) | def get_paragraph_max_height(paragraph: PdfParagraph) -> float: function is_same_style (line 344) | def is_same_style(style1, style2) -> bool: function is_same_style_except_size (line 356) | def is_same_style_except_size(style1, style2) -> bool: function is_same_style_except_font (line 368) | def is_same_style_except_font(style1, style2) -> bool: function is_same_graphic_state (line 378) | def is_same_graphic_state(state1: GraphicState, state2: GraphicState) ->... function add_space_dummy_chars (line 389) | def add_space_dummy_chars(paragraph: PdfParagraph) -> None: function _get_first_char_from_composition (line 458) | def _get_first_char_from_composition( function _get_last_char_from_composition (line 475) | def _get_last_char_from_composition( function _add_space_dummy_chars_to_list (line 492) | def _add_space_dummy_chars_to_list(chars: list[PdfCharacter]) -> None: function build_layout_index (line 553) | def build_layout_index(page): function calculate_iou_for_boxes (line 566) | def calculate_iou_for_boxes(box1: Box, box2: Box) -> float: function calculate_y_iou_for_boxes (line 589) | def calculate_y_iou_for_boxes(box1: Box, box2: Box) -> float: function calculate_y_true_iou_for_boxes (line 618) | def calculate_y_true_iou_for_boxes(box1: Box, box2: Box) -> float: function get_character_layout (line 650) | def get_character_layout( function is_text_layout (line 801) | def is_text_layout(layout: Layout): function is_character_in_formula_layout (line 852) | def is_character_in_formula_layout( function is_curve_in_figure_table_layout (line 883) | def is_curve_in_figure_table_layout( function is_curve_overlapping_with_paragraphs (line 932) | def is_curve_overlapping_with_paragraphs( function get_paragraph_bounding_box (line 958) | def get_paragraph_bounding_box(paragraph) -> Box | None: FILE: babeldoc/format/pdf/document_il/utils/matrix_helper.py function decompose_ctm (line 22) | def decompose_ctm(m: Matrix | PdfMatrix) -> PdfAffineTransform: function compose_ctm (line 125) | def compose_ctm(transform: PdfAffineTransform) -> Matrix: function scale_and_set_translation (line 172) | def scale_and_set_translation( function create_translation_and_scale_matrix (line 224) | def create_translation_and_scale_matrix( function multiply_matrices (line 248) | def multiply_matrices(m1: Matrix | PdfMatrix, m2: Matrix | PdfMatrix) ->... function apply_transform_to_ctm (line 287) | def apply_transform_to_ctm( function matrix_to_bytes (line 329) | def matrix_to_bytes(m: Matrix | PdfMatrix) -> bytes: FILE: babeldoc/format/pdf/document_il/utils/mupdf_helper.py function get_no_rotation_img (line 7) | def get_no_rotation_img(page: pymupdf.Page, dpi: int = 72) -> pymupdf.Pi... function get_no_rotation_img_multiprocess_internal (line 16) | def get_no_rotation_img_multiprocess_internal( function get_no_rotation_img_multiprocess (line 36) | def get_no_rotation_img_multiprocess(pdf_bytes: str, pagenum: int, dpi: ... FILE: babeldoc/format/pdf/document_il/utils/paragraph_helper.py function is_cid_paragraph (line 9) | def is_cid_paragraph(paragraph: il_version_1.PdfParagraph): function is_pure_numeric_paragraph (line 42) | def is_pure_numeric_paragraph(paragraph) -> bool: function is_placeholder_only_paragraph (line 55) | def is_placeholder_only_paragraph(paragraph: il_version_1.PdfParagraph) ... FILE: babeldoc/format/pdf/document_il/utils/spatial_analyzer.py function is_element_contained_in_formula (line 20) | def is_element_contained_in_formula( function find_contained_curves (line 53) | def find_contained_curves( function find_contained_forms (line 81) | def find_contained_forms( function find_all_contained_elements (line 109) | def find_all_contained_elements( function calculate_translation_and_scale (line 128) | def calculate_translation_and_scale( FILE: babeldoc/format/pdf/document_il/utils/style_helper.py function create_pdf_style (line 4) | def create_pdf_style(r, g, b, font_id="base", font_size=6): FILE: babeldoc/format/pdf/document_il/utils/zstd_helper.py function zstd_compress (line 6) | def zstd_compress(data) -> str: function zstd_decompress (line 15) | def zstd_decompress(data) -> str: FILE: babeldoc/format/pdf/document_il/xml_converter.py class XMLConverter (line 13) | class XMLConverter: method __init__ (line 14) | def __init__(self): method write_xml (line 20) | def write_xml(self, document: il_version_1.Document, path: str): method read_xml (line 24) | def read_xml(self, path: str) -> il_version_1.Document: method to_xml (line 28) | def to_xml(self, document: il_version_1.Document) -> str: method from_xml (line 31) | def from_xml(self, xml: str) -> il_version_1.Document: method deepcopy (line 37) | def deepcopy(self, document: il_version_1.Document) -> il_version_1.Do... method to_json (line 41) | def to_json(self, document: il_version_1.Document) -> str: method write_json (line 49) | def write_json(self, document: il_version_1.Document, path: str): FILE: babeldoc/format/pdf/high_level.py function safe_save (line 97) | def safe_save(doc, *args, **kwargs): function check_metadata (line 106) | def check_metadata(pdf: Document): function add_metadata (line 121) | def add_metadata( function fix_cmap (line 165) | def fix_cmap(translate_result: TranslateResult, translate_config: Transl... function verify_file_hash (line 185) | def verify_file_hash(file_path: str, expected_hash: str) -> bool: function translator_supports_llm (line 195) | def translator_supports_llm(translator) -> bool: function start_parse_il (line 208) | def start_parse_il( function translate (line 326) | def translate(translation_config: TranslationConfig) -> TranslateResult: function get_translation_stage (line 331) | def get_translation_stage( function async_translate (line 366) | async def async_translate(translation_config: TranslationConfig): class MemoryMonitor (line 446) | class MemoryMonitor: method __init__ (line 449) | def __init__(self, interval=0.1): method __enter__ (line 461) | def __enter__(self): method __exit__ (line 471) | def __exit__(self, exc_type, exc_val, exc_tb): method _monitor_memory_usage (line 480) | def _monitor_memory_usage(self): method get_peek_memory_psutil (line 503) | def get_peek_memory_psutil(self): function fix_null_page_content (line 508) | def fix_null_page_content(doc: Document) -> list[int]: function fix_null_xref (line 520) | def fix_null_xref(doc: Document) -> None: function fix_filter (line 543) | def fix_filter(doc): function update_page_bbox (line 589) | def update_page_bbox(doc, page, box, key): function do_translate (line 594) | def do_translate( function migrate_toc (line 803) | def migrate_toc( function fix_media_box (line 862) | def fix_media_box(doc: Document) -> None: function check_cid_char (line 890) | def check_cid_char(il: il_version_1.Document): function _do_translate_single (line 903) | def _do_translate_single( function generate_first_page_with_watermark (line 1146) | def generate_first_page_with_watermark( function merge_watermark_doc (line 1197) | def merge_watermark_doc( function download_font_assets (line 1232) | def download_font_assets(): function create_cache_folder (line 1236) | def create_cache_folder(): function init (line 1248) | def init(): FILE: babeldoc/format/pdf/pdfinterp.py function safe_float (line 48) | def safe_float(o: Any) -> float | None: class PDFContentParserEx (line 55) | class PDFContentParserEx(PDFContentParser): method __init__ (line 56) | def __init__(self, streams: Sequence[object]) -> None: method do_keyword (line 59) | def do_keyword(self, pos: int, token: PSKeyword) -> None: class PDFPageInterpreterEx (line 91) | class PDFPageInterpreterEx(PDFPageInterpreter): method __init__ (line 97) | def __init__( method dup (line 109) | def dup(self) -> "PDFPageInterpreterEx": method init_resources (line 117) | def init_resources(self, resources: dict[object, object]) -> None: method do_CS (line 170) | def do_CS(self, name: PDFStackT) -> None: method do_cs (line 183) | def do_cs(self, name: PDFStackT) -> None: method do_SCN (line 195) | def do_SCN(self) -> None: method do_scn (line 209) | def do_scn(self) -> None: method do_SC (line 223) | def do_SC(self) -> None: method do_sc (line 230) | def do_sc(self) -> None: method do_Do (line 239) | def do_Do(self, xobjid_arg: PDFStackT) -> None: method do_W (line 346) | def do_W(self) -> None: method do_W_a (line 350) | def do_W_a(self) -> None: method handle_w (line 354) | def handle_w(self, evenodd: bool): method process_page (line 358) | def process_page(self, page: PDFPage) -> None: method render_contents (line 394) | def render_contents( method do_q (line 415) | def do_q(self) -> None: method do_Q (line 421) | def do_Q(self) -> None: method do_TJ (line 428) | def do_TJ(self, seq: PDFStackT) -> None: method do_d (line 446) | def do_d(self, dash: PDFStackT, phase: PDFStackT) -> None: method do_BI (line 451) | def do_BI(self) -> None: method do_ID (line 455) | def do_ID(self) -> None: method do_EI (line 459) | def do_EI(self, obj: PDFStackT) -> None: method execute (line 466) | def execute(self, streams: Sequence[object]) -> None: FILE: babeldoc/format/pdf/result_merger.py class ResultMerger (line 13) | class ResultMerger: method __init__ (line 16) | def __init__(self, translation_config: TranslationConfig): method merge_results (line 19) | def merge_results( method _merge_pdfs (line 173) | def _merge_pdfs( FILE: babeldoc/format/pdf/split_manager.py class SplitPoint (line 8) | class SplitPoint: class BaseSplitStrategy (line 17) | class BaseSplitStrategy: method determine_split_points (line 20) | def determine_split_points(self, config) -> list[SplitPoint]: class PageCountStrategy (line 24) | class PageCountStrategy(BaseSplitStrategy): method __init__ (line 27) | def __init__(self, max_pages_per_part: int = 20): method determine_split_points (line 30) | def determine_split_points(self, config) -> list[SplitPoint]: class SplitManager (line 52) | class SplitManager: method __init__ (line 55) | def __init__(self, config=None): method determine_split_points (line 58) | def determine_split_points(self, config) -> list[SplitPoint]: method estimate_part_complexity (line 62) | def estimate_part_complexity(self, split_point: SplitPoint) -> float: FILE: babeldoc/format/pdf/translation_config.py class WatermarkOutputMode (line 20) | class WatermarkOutputMode(enum.Enum): class SharedContextCrossSplitPart (line 26) | class SharedContextCrossSplitPart: method __init__ (line 27) | def __init__(self): method initialize_glossaries (line 39) | def initialize_glossaries(self, initial_glossaries: list[Glossary] | N... method add_raw_extracted_term_pair (line 55) | def add_raw_extracted_term_pair(self, src: str, tgt: str): method _generate_unique_auto_glossary_name (line 59) | def _generate_unique_auto_glossary_name(self) -> str: method contains_term (line 75) | def contains_term(self, term: str) -> bool: method finalize_auto_extracted_glossary (line 82) | def finalize_auto_extracted_glossary(self): method get_glossaries (line 106) | def get_glossaries(self) -> list[Glossary]: method get_glossaries_for_translation (line 113) | def get_glossaries_for_translation( method add_valid_counts (line 125) | def add_valid_counts(self, char_count: int, token_count: int): class TranslationConfig (line 136) | class TranslationConfig: method create_max_pages_per_part_split_strategy (line 138) | def create_max_pages_per_part_split_strategy(max_pages_per_part: int): method __init__ (line 143) | def __init__( method parse_pages (line 363) | def parse_pages(self, pages_str: str | None) -> list[tuple[int, int]] ... method should_translate_page (line 388) | def should_translate_page(self, page_number: int) -> bool: method get_output_file_path (line 405) | def get_output_file_path(self, filename: str) -> Path: method get_working_file_path (line 408) | def get_working_file_path(self, filename: str) -> Path: method get_part_working_dir (line 411) | def get_part_working_dir(self, part_index: int) -> Path: method get_part_output_dir (line 422) | def get_part_output_dir(self, part_index: int) -> Path: method cleanup_part_output_dir (line 430) | def cleanup_part_output_dir(self, part_index: int): method cleanup_part_working_dir (line 438) | def cleanup_part_working_dir(self, part_index: int): method cleanup_temp_files (line 446) | def cleanup_temp_files(self): method raise_if_cancelled (line 457) | def raise_if_cancelled(self): method cancel_translation (line 461) | def cancel_translation(self): method get_term_extraction_translator (line 465) | def get_term_extraction_translator(self) -> BaseTranslator: method record_term_extraction_usage (line 469) | def record_term_extraction_usage( class TranslateResult (line 489) | class TranslateResult: method __init__ (line 501) | def __init__( method __str__ (line 519) | def __str__(self): FILE: babeldoc/glossary.py class GlossaryEntry (line 16) | class GlossaryEntry: method __init__ (line 17) | def __init__(self, source: str, target: str, target_language: str | No... method __repr__ (line 22) | def __repr__(self): function batched (line 26) | def batched(iterable, n, *, strict=False): class Glossary (line 40) | class Glossary: method __init__ (line 41) | def __init__(self, name: str, entries: list[GlossaryEntry]): method normalize_source (line 60) | def normalize_source(source_term: str) -> str: method _build_regex_and_lookup (line 68) | def _build_regex_and_lookup(self): method from_csv (line 124) | def from_csv(cls, file_path: Path, target_lang_out: str) -> "Glossary": method to_csv (line 172) | def to_csv(self) -> str: method __repr__ (line 190) | def __repr__(self): method get_active_entries_for_text (line 193) | def get_active_entries_for_text(self, text: str) -> list[tuple[str, st... FILE: babeldoc/main.py function create_parser (line 32) | def create_parser(): function main (line 461) | async def main(): function create_progress_handler (line 786) | def create_progress_handler( function create_cache_folder (line 869) | def create_cache_folder(): function download_font_assets (line 874) | def download_font_assets(): class EvictQueue (line 878) | class EvictQueue(queue.Queue): method __init__ (line 879) | def __init__(self, maxsize): method put (line 883) | def put(self, item, block=False, timeout=None): function speed_up_logs (line 896) | def speed_up_logs(): function cli (line 907) | def cli(): FILE: babeldoc/pdfminer/_saslprep.py function saslprep (line 46) | def saslprep(data: str, prohibit_unassigned_code_points: bool = True) ->... FILE: babeldoc/pdfminer/arcfour.py class Arcfour (line 10) | class Arcfour: method __init__ (line 11) | def __init__(self, key: Sequence[int]) -> None: method process (line 22) | def process(self, data: bytes) -> bytes: FILE: babeldoc/pdfminer/ascii85.py function ascii85decode (line 11) | def ascii85decode(data: bytes) -> bytes: function asciihexdecode (line 33) | def asciihexdecode(data: bytes) -> bytes: FILE: babeldoc/pdfminer/casting.py function safe_int (line 11) | def safe_int(o: Any) -> int | None: function safe_float (line 18) | def safe_float(o: Any) -> float | None: function safe_matrix (line 25) | def safe_matrix(a: Any, b: Any, c: Any, d: Any, e: Any, f: Any) -> Matri... function safe_rgb (line 46) | def safe_rgb(r: Any, g: Any, b: Any) -> tuple[float, float, float] | None: function safe_cmyk (line 50) | def safe_cmyk( function safe_rect_list (line 56) | def safe_rect_list(value: Any) -> Rect | None: function safe_rect (line 68) | def safe_rect(a: Any, b: Any, c: Any, d: Any) -> Rect | None: function _safe_float_triple (line 72) | def _safe_float_triple(a: Any, b: Any, c: Any) -> _FloatTriple | None: function _safe_float_quadruple (line 83) | def _safe_float_quadruple(a: Any, b: Any, c: Any, d: Any) -> _FloatQuadr... FILE: babeldoc/pdfminer/ccitt.py function get_bytes (line 26) | def get_bytes(data: bytes) -> Iterator[int]: class BitParser (line 36) | class BitParser: method __init__ (line 43) | def __init__(self) -> None: method add (line 47) | def add(cls, root: BitParserState, v: int | str, bits: str) -> None: method feedbytes (line 63) | def feedbytes(self, data: bytes) -> None: method _parse_bit (line 68) | def _parse_bit(self, x: object) -> None: class CCITTG4Parser (line 81) | class CCITTG4Parser(BitParser): class CCITTException (line 330) | class CCITTException(PDFException): class EOFB (line 333) | class EOFB(CCITTException): class InvalidData (line 336) | class InvalidData(CCITTException): class ByteSkip (line 339) | class ByteSkip(CCITTException): method __init__ (line 344) | def __init__(self, width: int, bytealign: bool = False) -> None: method feedbytes (line 350) | def feedbytes(self, data: bytes) -> None: method _parse_mode (line 361) | def _parse_mode(self, mode: object) -> BitParserState: method _parse_horiz1 (line 385) | def _parse_horiz1(self, n: Any) -> BitParserState: method _parse_horiz2 (line 398) | def _parse_horiz2(self, n: Any) -> BitParserState: method _parse_uncompressed (line 413) | def _parse_uncompressed(self, bits: str | None) -> BitParserState: method _get_bits (line 425) | def _get_bits(self) -> str: method _get_refline (line 428) | def _get_refline(self, i: int) -> str: method reset (line 442) | def reset(self) -> None: method output_line (line 449) | def output_line(self, y: int, bits: Sequence[int]) -> None: method _reset_line (line 452) | def _reset_line(self) -> None: method _flush_line (line 458) | def _flush_line(self) -> None: method _do_vertical (line 466) | def _do_vertical(self, dx: int) -> None: method _do_pass (line 490) | def _do_pass(self) -> None: method _do_horizontal (line 516) | def _do_horizontal(self, n1: int, n2: int) -> None: method _do_uncompressed (line 532) | def _do_uncompressed(self, bits: str) -> None: class CCITTFaxDecoder (line 539) | class CCITTFaxDecoder(CCITTG4Parser): method __init__ (line 540) | def __init__( method close (line 550) | def close(self) -> bytes: method output_line (line 553) | def output_line(self, y: int, bits: Sequence[int]) -> None: function ccittfaxdecode (line 563) | def ccittfaxdecode(data: bytes, params: dict[str, object]) -> bytes: function main (line 577) | def main(argv: list[str]) -> None: FILE: babeldoc/pdfminer/cmapdb.py class CMapError (line 43) | class CMapError(PDFException): class CMapBase (line 47) | class CMapBase: method __init__ (line 50) | def __init__(self, **kwargs: object) -> None: method is_vertical (line 53) | def is_vertical(self) -> bool: method set_attr (line 56) | def set_attr(self, k: str, v: object) -> None: method add_code2cid (line 59) | def add_code2cid(self, code: str, cid: int) -> None: method add_cid2unichr (line 62) | def add_cid2unichr(self, cid: int, code: PSLiteral | bytes | int) -> N... method use_cmap (line 65) | def use_cmap(self, cmap: "CMapBase") -> None: method decode (line 68) | def decode(self, code: bytes) -> Iterable[int]: class CMap (line 72) | class CMap(CMapBase): method __init__ (line 73) | def __init__(self, **kwargs: str | int) -> None: method __repr__ (line 77) | def __repr__(self) -> str: method use_cmap (line 80) | def use_cmap(self, cmap: CMapBase) -> None: method decode (line 94) | def decode(self, code: bytes) -> Iterator[int]: method dump (line 108) | def dump( class IdentityCMap (line 125) | class IdentityCMap(CMapBase): method decode (line 126) | def decode(self, code: bytes) -> tuple[int, ...]: class IdentityCMapByte (line 134) | class IdentityCMapByte(IdentityCMap): method decode (line 135) | def decode(self, code: bytes) -> tuple[int, ...]: class UnicodeMap (line 143) | class UnicodeMap(CMapBase): method __init__ (line 144) | def __init__(self, **kwargs: str | int) -> None: method __repr__ (line 148) | def __repr__(self) -> str: method get_unichr (line 151) | def get_unichr(self, cid: int) -> str: method dump (line 155) | def dump(self, out: TextIO = sys.stdout) -> None: class IdentityUnicodeMap (line 160) | class IdentityUnicodeMap(UnicodeMap): method get_unichr (line 161) | def get_unichr(self, cid: int) -> str: class FileCMap (line 167) | class FileCMap(CMap): method add_code2cid (line 168) | def add_code2cid(self, code: str, cid: int) -> None: class FileUnicodeMap (line 185) | class FileUnicodeMap(UnicodeMap): method add_cid2unichr (line 186) | def add_cid2unichr(self, cid: int, code: PSLiteral | bytes | int) -> N... class PyCMap (line 206) | class PyCMap(CMap): method __init__ (line 207) | def __init__(self, name: str, module: Any) -> None: class PyUnicodeMap (line 214) | class PyUnicodeMap(UnicodeMap): method __init__ (line 215) | def __init__(self, name: str, module: Any, vertical: bool) -> None: class CMapDB (line 224) | class CMapDB: class CMapNotFound (line 228) | class CMapNotFound(CMapError): method _load_data (line 232) | def _load_data(cls, name: str) -> Any: method get_cmap (line 251) | def get_cmap(cls, name: str) -> CMapBase: method get_unicode_map (line 269) | def get_unicode_map(cls, name: str, vertical: bool = False) -> Unicode... class CMapParser (line 279) | class CMapParser(PSStackParser[PSKeyword]): method __init__ (line 280) | def __init__(self, cmap: CMapBase, fp: BinaryIO) -> None: method run (line 287) | def run(self) -> None: method do_keyword (line 310) | def do_keyword(self, pos: int, token: PSKeyword) -> None: method _warn_once (line 463) | def _warn_once(self, msg: str) -> None: FILE: babeldoc/pdfminer/converter.py class PDFLayoutAnalyzer (line 56) | class PDFLayoutAnalyzer(PDFTextDevice): method __init__ (line 60) | def __init__( method begin_page (line 71) | def begin_page(self, page: PDFPage, ctm: Matrix) -> None: method end_page (line 78) | def end_page(self, page: PDFPage) -> None: method begin_figure (line 86) | def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None: method end_figure (line 90) | def end_figure(self, _: str) -> None: method render_image (line 96) | def render_image(self, name: str, stream: PDFStream) -> None: method paint_path (line 105) | def paint_path( method render_char (line 258) | def render_char( method handle_undefined_char (line 291) | def handle_undefined_char(self, font: PDFFont, cid: int) -> str: method receive_layout (line 295) | def receive_layout(self, ltpage: LTPage) -> None: class PDFPageAggregator (line 299) | class PDFPageAggregator(PDFLayoutAnalyzer): method __init__ (line 300) | def __init__( method receive_layout (line 309) | def receive_layout(self, ltpage: LTPage) -> None: method get_result (line 312) | def get_result(self) -> LTPage: class PDFConverter (line 321) | class PDFConverter(PDFLayoutAnalyzer, Generic[IOType]): method __init__ (line 322) | def __init__( method _is_binary_stream (line 336) | def _is_binary_stream(outfp: AnyIO) -> bool: class TextConverter (line 351) | class TextConverter(PDFConverter[AnyIO]): method __init__ (line 352) | def __init__( method write_text (line 366) | def write_text(self, text: str) -> None: method receive_layout (line 373) | def receive_layout(self, ltpage: LTPage) -> None: method render_image (line 394) | def render_image(self, name: str, stream: PDFStream) -> None: method paint_path (line 398) | def paint_path( class HTMLConverter (line 409) | class HTMLConverter(PDFConverter[AnyIO]): method __init__ (line 424) | def __init__( method write (line 477) | def write(self, text: str) -> None: method write_header (line 483) | def write_header(self) -> None: method write_footer (line 495) | def write_footer(self) -> None: method write_text (line 503) | def write_text(self, text: str) -> None: method place_rect (line 506) | def place_rect( method place_border (line 531) | def place_border(self, color: str, borderwidth: int, item: LTComponent... method place_image (line 534) | def place_image( method place_text (line 559) | def place_text( method begin_div (line 583) | def begin_div( method end_div (line 611) | def end_div(self, color: str) -> None: method put_text (line 617) | def put_text(self, text: str, fontname: str, fontsize: float) -> None: method put_newline (line 631) | def put_newline(self) -> None: method receive_layout (line 634) | def receive_layout(self, ltpage: LTPage) -> None: method close (line 720) | def close(self) -> None: class XMLConverter (line 724) | class XMLConverter(PDFConverter[AnyIO]): method __init__ (line 727) | def __init__( method write (line 754) | def write(self, text: str) -> None: method write_header (line 760) | def write_header(self) -> None: method write_footer (line 767) | def write_footer(self) -> None: method write_text (line 770) | def write_text(self, text: str) -> None: method receive_layout (line 775) | def receive_layout(self, ltpage: LTPage) -> None: method close (line 882) | def close(self) -> None: class HOCRConverter (line 886) | class HOCRConverter(PDFConverter[AnyIO]): method __init__ (line 905) | def __init__( method bbox_repr (line 926) | def bbox_repr(self, bbox: Rect) -> str: method write (line 935) | def write(self, text: str) -> None: method write_header (line 942) | def write_header(self) -> None: method write_footer (line 967) | def write_footer(self) -> None: method write_text (line 973) | def write_text(self, text: str) -> None: method write_word (line 978) | def write_word(self) -> None: method receive_layout (line 1003) | def receive_layout(self, ltpage: LTPage) -> None: method close (line 1061) | def close(self) -> None: FILE: babeldoc/pdfminer/data_structures.py class NumberTree (line 12) | class NumberTree: method __init__ (line 18) | def __init__(self, obj: Any): method _parse (line 31) | def _parse(self) -> list[tuple[int, Any]]: method values (line 46) | def values(self) -> list[tuple[int, Any]]: FILE: babeldoc/pdfminer/encodingdb.py function name2unicode (line 16) | def name2unicode(name: str) -> str: function raise_key_error_for_invalid_unicode (line 72) | def raise_key_error_for_invalid_unicode(unicode_digit: int) -> None: class EncodingDB (line 85) | class EncodingDB: method get_encoding (line 109) | def get_encoding( FILE: babeldoc/pdfminer/fontmetrics.py function convert_font_metrics (line 33) | def convert_font_metrics(path: str) -> None: FILE: babeldoc/pdfminer/glyphlist.py function convert_glyphlist (line 57) | def convert_glyphlist(path: str) -> None: FILE: babeldoc/pdfminer/high_level.py function extract_text_to_fp (line 31) | def extract_text_to_fp( function extract_text (line 153) | def extract_text( function extract_pages (line 196) | def extract_pages( FILE: babeldoc/pdfminer/image.py function align32 (line 29) | def align32(x: int) -> int: class BMPWriter (line 33) | class BMPWriter: method __init__ (line 34) | def __init__(self, fp: BinaryIO, bits: int, width: int, height: int) -... method write_line (line 88) | def write_line(self, y: int, data: bytes) -> None: class ImageWriter (line 93) | class ImageWriter: method __init__ (line 99) | def __init__(self, outdir: str) -> None: method export_image (line 104) | def export_image(self, image: LTImage) -> str: method _save_jpeg (line 142) | def _save_jpeg(self, image: LTImage) -> str: method _save_jpeg2000 (line 165) | def _save_jpeg2000(self, image: LTImage) -> str: method _save_jbig2 (line 185) | def _save_jbig2(self, image: LTImage) -> str: method _save_bmp (line 214) | def _save_bmp( method _save_bytes (line 233) | def _save_bytes(self, image: LTImage) -> str: method _save_raw (line 263) | def _save_raw(self, image: LTImage) -> str: method _is_jbig2_iamge (line 273) | def _is_jbig2_iamge(image: LTImage) -> bool: method _create_unique_image_name (line 280) | def _create_unique_image_name(self, image: LTImage, ext: str) -> tuple... FILE: babeldoc/pdfminer/jbig2.py function bit_set (line 43) | def bit_set(bit_pos: int, value: int) -> bool: function check_flag (line 47) | def check_flag(flag: int, value: int) -> bool: function masked_value (line 51) | def masked_value(mask: int, value: int) -> int: function mask_value (line 59) | def mask_value(mask: int, value: int) -> int: function unpack_int (line 67) | def unpack_int(format: str, buffer: bytes) -> int: class JBIG2StreamReader (line 81) | class JBIG2StreamReader: method __init__ (line 84) | def __init__(self, stream: BinaryIO) -> None: method get_segments (line 87) | def get_segments(self) -> list[JBIG2Segment]: method is_eof (line 107) | def is_eof(self) -> bool: method parse_flags (line 114) | def parse_flags( method parse_retention_flags (line 126) | def parse_retention_flags( method parse_page_assoc (line 171) | def parse_page_assoc(self, segment: JBIG2Segment, page: int, field: by... method parse_data_length (line 177) | def parse_data_length( class JBIG2StreamWriter (line 197) | class JBIG2StreamWriter: method __init__ (line 206) | def __init__(self, stream: BinaryIO) -> None: method write_segments (line 209) | def write_segments( method write_file (line 244) | def write_file( method encode_segment (line 277) | def encode_segment(self, segment: JBIG2Segment) -> bytes: method encode_flags (line 289) | def encode_flags(self, value: JBIG2SegmentFlags, segment: JBIG2Segment... method encode_retention_flags (line 307) | def encode_retention_flags( method encode_data_length (line 354) | def encode_data_length(self, value: int, segment: JBIG2Segment) -> bytes: method get_eop_segment (line 359) | def get_eop_segment(self, seg_number: int, page_number: int) -> JBIG2S... method get_eof_segment (line 369) | def get_eof_segment(self, seg_number: int) -> JBIG2Segment: FILE: babeldoc/pdfminer/layout.py class IndexAssigner (line 36) | class IndexAssigner: method __init__ (line 37) | def __init__(self, index: int = 0) -> None: method run (line 40) | def run(self, obj: "LTItem") -> None: class LAParams (line 49) | class LAParams: method __init__ (line 77) | def __init__( method _validate (line 97) | def _validate(self) -> None: method __repr__ (line 109) | def __repr__(self) -> str: class LTItem (line 117) | class LTItem: method analyze (line 120) | def analyze(self, laparams: LAParams) -> None: class LTText (line 124) | class LTText: method __repr__ (line 127) | def __repr__(self) -> str: method get_text (line 130) | def get_text(self) -> str: class LTComponent (line 135) | class LTComponent(LTItem): method __init__ (line 138) | def __init__(self, bbox: Rect) -> None: method __repr__ (line 142) | def __repr__(self) -> str: method __lt__ (line 146) | def __lt__(self, _: object) -> bool: method __le__ (line 149) | def __le__(self, _: object) -> bool: method __gt__ (line 152) | def __gt__(self, _: object) -> bool: method __ge__ (line 155) | def __ge__(self, _: object) -> bool: method set_bbox (line 158) | def set_bbox(self, bbox: Rect) -> None: method is_empty (line 168) | def is_empty(self) -> bool: method is_hoverlap (line 171) | def is_hoverlap(self, obj: "LTComponent") -> bool: method hdistance (line 175) | def hdistance(self, obj: "LTComponent") -> float: method hoverlap (line 182) | def hoverlap(self, obj: "LTComponent") -> float: method is_voverlap (line 189) | def is_voverlap(self, obj: "LTComponent") -> bool: method vdistance (line 193) | def vdistance(self, obj: "LTComponent") -> float: method voverlap (line 200) | def voverlap(self, obj: "LTComponent") -> float: class LTCurve (line 208) | class LTCurve(LTComponent): method __init__ (line 217) | def __init__( method get_pts (line 240) | def get_pts(self) -> str: class LTLine (line 244) | class LTLine(LTCurve): method __init__ (line 250) | def __init__( class LTRect (line 277) | class LTRect(LTCurve): method __init__ (line 283) | def __init__( class LTImage (line 310) | class LTImage(LTComponent): method __init__ (line 316) | def __init__(self, name: str, stream: PDFStream, bbox: Rect) -> None: method __repr__ (line 327) | def __repr__(self) -> str: class LTAnno (line 331) | class LTAnno(LTItem, LTText): method __init__ (line 339) | def __init__(self, text: str) -> None: method get_text (line 342) | def get_text(self) -> str: class LTChar (line 346) | class LTChar(LTComponent, LTText): method __init__ (line 349) | def __init__( method __repr__ (line 400) | def __repr__(self) -> str: method get_text (line 403) | def get_text(self) -> str: class LTContainer (line 410) | class LTContainer(LTComponent, Generic[LTItemT]): method __init__ (line 413) | def __init__(self, bbox: Rect) -> None: method __iter__ (line 417) | def __iter__(self) -> Iterator[LTItemT]: method __len__ (line 420) | def __len__(self) -> int: method add (line 423) | def add(self, obj: LTItemT) -> None: method extend (line 426) | def extend(self, objs: Iterable[LTItemT]) -> None: method analyze (line 430) | def analyze(self, laparams: LAParams) -> None: class LTExpandableContainer (line 435) | class LTExpandableContainer(LTContainer[LTItemT]): method __init__ (line 436) | def __init__(self) -> None: method add (line 441) | def add(self, obj: LTComponent) -> None: # type: ignore[override] class LTTextContainer (line 453) | class LTTextContainer(LTExpandableContainer[LTItemT], LTText): method __init__ (line 454) | def __init__(self) -> None: method get_text (line 458) | def get_text(self) -> str: class LTTextLine (line 467) | class LTTextLine(LTTextContainer[TextLineElement]): method __init__ (line 474) | def __init__(self, word_margin: float) -> None: method __repr__ (line 478) | def __repr__(self) -> str: method analyze (line 481) | def analyze(self, laparams: LAParams) -> None: method find_neighbors (line 486) | def find_neighbors( method is_empty (line 493) | def is_empty(self) -> bool: class LTTextLineHorizontal (line 497) | class LTTextLineHorizontal(LTTextLine): method __init__ (line 498) | def __init__(self, word_margin: float) -> None: method add (line 504) | def add(self, obj: LTComponent) -> None: # type: ignore[override] method find_neighbors (line 512) | def find_neighbors( method _is_left_aligned_with (line 540) | def _is_left_aligned_with(self, other: LTComponent, tolerance: float =... method _is_right_aligned_with (line 544) | def _is_right_aligned_with(self, other: LTComponent, tolerance: float ... method _is_centrally_aligned_with (line 548) | def _is_centrally_aligned_with( method _is_same_height_as (line 556) | def _is_same_height_as(self, other: LTComponent, tolerance: float = 0)... class LTTextLineVertical (line 560) | class LTTextLineVertical(LTTextLine): method __init__ (line 561) | def __init__(self, word_margin: float) -> None: method add (line 567) | def add(self, obj: LTComponent) -> None: # type: ignore[override] method find_neighbors (line 575) | def find_neighbors( method _is_lower_aligned_with (line 603) | def _is_lower_aligned_with(self, other: LTComponent, tolerance: float ... method _is_upper_aligned_with (line 607) | def _is_upper_aligned_with(self, other: LTComponent, tolerance: float ... method _is_centrally_aligned_with (line 611) | def _is_centrally_aligned_with( method _is_same_width_as (line 619) | def _is_same_width_as(self, other: LTComponent, tolerance: float) -> b... class LTTextBox (line 623) | class LTTextBox(LTTextContainer[LTTextLine]): method __init__ (line 631) | def __init__(self) -> None: method __repr__ (line 635) | def __repr__(self) -> str: method get_writing_mode (line 638) | def get_writing_mode(self) -> str: class LTTextBoxHorizontal (line 642) | class LTTextBoxHorizontal(LTTextBox): method analyze (line 643) | def analyze(self, laparams: LAParams) -> None: method get_writing_mode (line 647) | def get_writing_mode(self) -> str: class LTTextBoxVertical (line 651) | class LTTextBoxVertical(LTTextBox): method analyze (line 652) | def analyze(self, laparams: LAParams) -> None: method get_writing_mode (line 656) | def get_writing_mode(self) -> str: class LTTextGroup (line 663) | class LTTextGroup(LTTextContainer[TextGroupElement]): method __init__ (line 664) | def __init__(self, objs: Iterable[TextGroupElement]) -> None: class LTTextGroupLRTB (line 669) | class LTTextGroupLRTB(LTTextGroup): method analyze (line 670) | def analyze(self, laparams: LAParams) -> None: class LTTextGroupTBRL (line 681) | class LTTextGroupTBRL(LTTextGroup): method analyze (line 682) | def analyze(self, laparams: LAParams) -> None: class LTLayoutContainer (line 693) | class LTLayoutContainer(LTContainer[LTComponent]): method __init__ (line 694) | def __init__(self, bbox: Rect) -> None: method group_objects (line 699) | def group_objects( method group_textlines (line 776) | def group_textlines( method group_textboxes (line 810) | def group_textboxes( method analyze (line 903) | def analyze(self, laparams: LAParams) -> None: class LTFigure (line 941) | class LTFigure(LTLayoutContainer): method __init__ (line 949) | def __init__(self, name: str, bbox: Rect, matrix: Matrix) -> None: method __repr__ (line 957) | def __repr__(self) -> str: method analyze (line 960) | def analyze(self, laparams: LAParams) -> None: class LTPage (line 966) | class LTPage(LTLayoutContainer): method __init__ (line 973) | def __init__(self, pageid: int, bbox: Rect, rotate: float = 0) -> None: method __repr__ (line 978) | def __repr__(self) -> str: FILE: babeldoc/pdfminer/lzw.py class CorruptDataError (line 13) | class CorruptDataError(PDFException): class LZWDecoder (line 17) | class LZWDecoder: method __init__ (line 18) | def __init__(self, fp: BinaryIO) -> None: method readbits (line 27) | def readbits(self, bits: int) -> int: method feed (line 52) | def feed(self, code: int) -> bytes: method run (line 83) | def run(self) -> Iterator[bytes]: function lzwdecode (line 105) | def lzwdecode(data: bytes) -> bytes: FILE: babeldoc/pdfminer/pdfcolor.py class PDFColorSpace (line 14) | class PDFColorSpace: method __init__ (line 15) | def __init__(self, name: str, ncomponents: int) -> None: method __repr__ (line 19) | def __repr__(self) -> str: FILE: babeldoc/pdfminer/pdfdevice.py class PDFDevice (line 33) | class PDFDevice: method __init__ (line 36) | def __init__(self, rsrcmgr: "PDFResourceManager") -> None: method __repr__ (line 40) | def __repr__(self) -> str: method __enter__ (line 43) | def __enter__(self) -> "PDFDevice": method __exit__ (line 46) | def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) ... method close (line 49) | def close(self) -> None: method set_ctm (line 52) | def set_ctm(self, ctm: Matrix) -> None: method begin_tag (line 55) | def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = Non... method end_tag (line 58) | def end_tag(self) -> None: method do_tag (line 61) | def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) ... method begin_page (line 64) | def begin_page(self, page: PDFPage, ctm: Matrix) -> None: method end_page (line 67) | def end_page(self, page: PDFPage) -> None: method begin_figure (line 70) | def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None: method end_figure (line 73) | def end_figure(self, name: str) -> None: method paint_path (line 76) | def paint_path( method render_image (line 86) | def render_image(self, name: str, stream: PDFStream) -> None: method render_string (line 89) | def render_string( class PDFTextDevice (line 99) | class PDFTextDevice(PDFDevice): method render_string (line 100) | def render_string( method render_string_horizontal (line 151) | def render_string_horizontal( method render_string_vertical (line 195) | def render_string_vertical( method render_char (line 239) | def render_char( class TagExtractor (line 253) | class TagExtractor(PDFDevice): method __init__ (line 254) | def __init__( method render_string (line 266) | def render_string( method begin_page (line 290) | def begin_page(self, page: PDFPage, ctm: Matrix) -> None: method end_page (line 298) | def end_page(self, page: PDFPage) -> None: method begin_tag (line 302) | def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = Non... method end_tag (line 315) | def end_tag(self) -> None: method do_tag (line 321) | def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) ... method _write (line 325) | def _write(self, s: str) -> None: FILE: babeldoc/pdfminer/pdfdocument.py class PDFNoValidXRef (line 55) | class PDFNoValidXRef(PDFSyntaxError): class PDFNoValidXRefWarning (line 59) | class PDFNoValidXRefWarning(SyntaxWarning): class PDFNoOutlines (line 66) | class PDFNoOutlines(PDFException): class PDFNoPageLabels (line 70) | class PDFNoPageLabels(PDFException): class PDFDestinationNotFound (line 74) | class PDFDestinationNotFound(PDFException): class PDFEncryptionError (line 78) | class PDFEncryptionError(PDFException): class PDFPasswordIncorrect (line 82) | class PDFPasswordIncorrect(PDFEncryptionError): class PDFEncryptionWarning (line 86) | class PDFEncryptionWarning(UserWarning): class PDFTextExtractionNotAllowedWarning (line 93) | class PDFTextExtractionNotAllowedWarning(UserWarning): class PDFTextExtractionNotAllowed (line 100) | class PDFTextExtractionNotAllowed(PDFEncryptionError): class PDFBaseXRef (line 110) | class PDFBaseXRef: method get_trailer (line 111) | def get_trailer(self) -> dict[str, Any]: method get_objids (line 114) | def get_objids(self) -> Iterable[int]: method get_pos (line 120) | def get_pos(self, objid: int) -> tuple[int | None, int, int]: method load (line 123) | def load(self, parser: PDFParser) -> None: class PDFXRef (line 127) | class PDFXRef(PDFBaseXRef): method __init__ (line 128) | def __init__(self) -> None: method __repr__ (line 132) | def __repr__(self) -> str: method load (line 135) | def load(self, parser: PDFParser) -> None: method load_trailer (line 183) | def load_trailer(self, parser: PDFParser) -> None: method get_trailer (line 196) | def get_trailer(self) -> dict[str, Any]: method get_objids (line 199) | def get_objids(self) -> KeysView[int]: method get_pos (line 202) | def get_pos(self, objid: int) -> tuple[int | None, int, int]: class PDFXRefFallback (line 206) | class PDFXRefFallback(PDFXRef): method __repr__ (line 207) | def __repr__(self) -> str: method load (line 212) | def load(self, parser: PDFParser) -> None: class PDFXRefStream (line 257) | class PDFXRefStream(PDFBaseXRef): method __init__ (line 258) | def __init__(self) -> None: method __repr__ (line 266) | def __repr__(self) -> str: method load (line 269) | def load(self, parser: PDFParser) -> None: method get_trailer (line 294) | def get_trailer(self) -> dict[str, Any]: method get_objids (line 297) | def get_objids(self) -> Iterator[int]: method get_pos (line 308) | def get_pos(self, objid: int) -> tuple[int | None, int, int]: class PDFStandardSecurityHandler (line 335) | class PDFStandardSecurityHandler: method __init__ (line 341) | def __init__( method init (line 352) | def init(self) -> None: method init_params (line 359) | def init_params(self) -> None: method init_key (line 367) | def init_key(self) -> None: method is_printable (line 372) | def is_printable(self) -> bool: method is_modifiable (line 375) | def is_modifiable(self) -> bool: method is_extractable (line 378) | def is_extractable(self) -> bool: method compute_u (line 381) | def compute_u(self, key: bytes) -> bytes: method compute_encryption_key (line 396) | def compute_encryption_key(self, password: bytes) -> bytes: method authenticate (line 415) | def authenticate(self, password: str) -> bytes | None: method authenticate_user_password (line 422) | def authenticate_user_password(self, password: bytes) -> bytes | None: method verify_encryption_key (line 429) | def verify_encryption_key(self, key: bytes) -> bool: method authenticate_owner_password (line 436) | def authenticate_owner_password(self, password: bytes) -> bytes | None: method decrypt (line 456) | def decrypt( method decrypt_rc4 (line 465) | def decrypt_rc4(self, objid: int, genno: int, data: bytes) -> bytes: class PDFStandardSecurityHandlerV4 (line 473) | class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler): method init_params (line 476) | def init_params(self) -> None: method get_cfm (line 498) | def get_cfm(self, name: str) -> Callable[[int, int, bytes], bytes] | N... method decrypt (line 506) | def decrypt( method decrypt_identity (line 522) | def decrypt_identity(self, objid: int, genno: int, data: bytes) -> bytes: method decrypt_aes128 (line 525) | def decrypt_aes128(self, objid: int, genno: int, data: bytes) -> bytes: class PDFStandardSecurityHandlerV5 (line 545) | class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4): method init_params (line 548) | def init_params(self) -> None: method get_cfm (line 560) | def get_cfm(self, name: str) -> Callable[[int, int, bytes], bytes] | N... method authenticate (line 566) | def authenticate(self, password: str) -> bytes | None: method _normalize_password (line 588) | def _normalize_password(self, password: str) -> bytes: method _password_hash (line 598) | def _password_hash( method _r5_password (line 609) | def _r5_password( method _r6_password (line 622) | def _r6_password( method _bytes_mod_3 (line 648) | def _bytes_mod_3(input_bytes: bytes) -> int: method _aes_cbc_encrypt (line 652) | def _aes_cbc_encrypt(self, key: bytes, iv: bytes, data: bytes) -> bytes: method decrypt_aes256 (line 657) | def decrypt_aes256(self, objid: int, genno: int, data: bytes) -> bytes: class PDFDocument (line 669) | class PDFDocument: method __init__ (line 689) | def __init__( method _initialize_password (line 752) | def _initialize_password(self, password: str = "") -> None: method _getobj_objstm (line 769) | def _getobj_objstm(self, stream: PDFStream, index: int, objid: int) ->... method _get_objects (line 784) | def _get_objects(self, stream: PDFStream) -> tuple[list[object], int]: method _getobj_parse (line 805) | def _getobj_parse(self, pos: int, objid: int) -> object: method getobj (line 833) | def getobj(self, objid: int) -> object: method get_outlines (line 873) | def get_outlines(self) -> Iterator[OutlineType]: method get_page_labels (line 893) | def get_page_labels(self) -> Iterator[str]: method lookup_name (line 910) | def lookup_name(self, cat: str, key: str | bytes) -> Any: method get_dest (line 938) | def get_dest(self, name: str | bytes) -> Any: method find_xref (line 953) | def find_xref(self, parser: PDFParser) -> int: method read_xref_from (line 980) | def read_xref_from( class PageLabels (line 1017) | class PageLabels(NumberTree): method labels (line 1024) | def labels(self) -> Iterator[str]: method _format_page_label (line 1055) | def _format_page_label(value: int, style: Any) -> str: FILE: babeldoc/pdfminer/pdfexceptions.py class PDFException (line 4) | class PDFException(PSException): class PDFTypeError (line 8) | class PDFTypeError(PDFException, TypeError): class PDFValueError (line 12) | class PDFValueError(PDFException, ValueError): class PDFObjectNotFound (line 16) | class PDFObjectNotFound(PDFException): class PDFNotImplementedError (line 20) | class PDFNotImplementedError(PDFException, NotImplementedError): class PDFKeyError (line 24) | class PDFKeyError(PDFException, KeyError): class PDFEOFError (line 28) | class PDFEOFError(PDFException, EOFError): class PDFIOError (line 32) | class PDFIOError(PDFException, IOError): FILE: babeldoc/pdfminer/pdffont.py function get_widths (line 58) | def get_widths(seq: Iterable[object]) -> dict[str | int, float]: function get_widths2 (line 89) | def get_widths2(seq: Iterable[object]) -> dict[int, tuple[float, Point]]: class FontMetricsDB (line 110) | class FontMetricsDB: method get_metrics (line 112) | def get_metrics(cls, fontname: str) -> tuple[dict[str, object], dict[s... class Type1FontHeaderParser (line 117) | class Type1FontHeaderParser(PSStackParser[int]): method __init__ (line 127) | def __init__(self, data: BinaryIO) -> None: method get_encoding (line 131) | def get_encoding(self) -> dict[int, str]: method do_keyword (line 156) | def do_keyword(self, pos: int, token: PSKeyword) -> None: function getdict (line 173) | def getdict(data: bytes) -> dict[int, list[float | int]]: class CFFFont (line 219) | class CFFFont: class INDEX (line 614) | class INDEX: method __init__ (line 615) | def __init__(self, fp: BinaryIO) -> None: method __repr__ (line 624) | def __repr__(self) -> str: method __len__ (line 627) | def __len__(self) -> int: method __getitem__ (line 630) | def __getitem__(self, i: int) -> bytes: method __iter__ (line 634) | def __iter__(self) -> Iterator[bytes]: method __init__ (line 637) | def __init__(self, name: str, fp: BinaryIO) -> None: method getstr (line 717) | def getstr(self, sid: int) -> str | bytes: class TrueTypeFont (line 725) | class TrueTypeFont: class CMapNotFound (line 726) | class CMapNotFound(PDFException): method __init__ (line 729) | def __init__(self, name: str, fp: BinaryIO) -> None: method create_unicode_map (line 751) | def create_unicode_map(self) -> FileUnicodeMap: class PDFFontError (line 768) | class PDFFontError(PDFException): class PDFUnicodeNotDefined (line 772) | class PDFUnicodeNotDefined(PDFFontError): class PDFFont (line 784) | class PDFFont: method __init__ (line 785) | def __init__( method __repr__ (line 816) | def __repr__(self) -> str: method is_vertical (line 819) | def is_vertical(self) -> bool: method is_multibyte (line 822) | def is_multibyte(self) -> bool: method decode (line 825) | def decode(self, bytes: bytes) -> Iterable[int]: method get_ascent (line 828) | def get_ascent(self) -> float: method get_descent (line 832) | def get_descent(self) -> float: method get_width (line 836) | def get_width(self) -> float: method get_height (line 842) | def get_height(self) -> float: method char_width (line 848) | def char_width(self, cid: int) -> float: method char_disp (line 866) | def char_disp(self, cid: int) -> float | tuple[float | None, float]: method string_width (line 870) | def string_width(self, s: bytes) -> float: method to_unichr (line 873) | def to_unichr(self, cid: int) -> str: method _parse_bbox (line 877) | def _parse_bbox(descriptor: Mapping[str, Any]) -> Rect: class PDFSimpleFont (line 889) | class PDFSimpleFont(PDFFont): method __init__ (line 890) | def __init__( method to_unichr (line 916) | def to_unichr(self, cid: int) -> str: class PDFType1Font (line 928) | class PDFType1Font(PDFSimpleFont): method __init__ (line 929) | def __init__(self, rsrcmgr: "PDFResourceManager", spec: Mapping[str, A... method __repr__ (line 960) | def __repr__(self) -> str: class PDFTrueTypeFont (line 964) | class PDFTrueTypeFont(PDFType1Font): method __repr__ (line 965) | def __repr__(self) -> str: class PDFType3Font (line 969) | class PDFType3Font(PDFSimpleFont): method __init__ (line 970) | def __init__(self, rsrcmgr: "PDFResourceManager", spec: Mapping[str, A... method __repr__ (line 986) | def __repr__(self) -> str: class PDFCIDFont (line 990) | class PDFCIDFont(PDFFont): method __init__ (line 993) | def __init__( method get_cmap_from_spec (line 1088) | def get_cmap_from_spec(self, spec: Mapping[str, Any], strict: bool) ->... method _get_cmap_name (line 1107) | def _get_cmap_name(spec: Mapping[str, Any], strict: bool) -> str: method __repr__ (line 1130) | def __repr__(self) -> str: method is_vertical (line 1133) | def is_vertical(self) -> bool: method is_multibyte (line 1136) | def is_multibyte(self) -> bool: method decode (line 1139) | def decode(self, bytes: bytes) -> Iterable[int]: method char_disp (line 1150) | def char_disp(self, cid: int) -> float | tuple[float | None, float]: method to_unichr (line 1154) | def to_unichr(self, cid: int) -> str: FILE: babeldoc/pdfminer/pdfinterp.py class PDFResourceError (line 59) | class PDFResourceError(PDFException): class PDFInterpreterError (line 63) | class PDFInterpreterError(PDFException): class PDFTextState (line 74) | class PDFTextState: method __init__ (line 78) | def __init__(self) -> None: method __repr__ (line 91) | def __repr__(self) -> str: method copy (line 110) | def copy(self) -> "PDFTextState": method reset (line 125) | def reset(self) -> None: class PDFGraphicState (line 137) | class PDFGraphicState: method __init__ (line 138) | def __init__(self) -> None: method copy (line 153) | def copy(self) -> "PDFGraphicState": method __repr__ (line 166) | def __repr__(self) -> str: class PDFResourceManager (line 185) | class PDFResourceManager: method __init__ (line 193) | def __init__(self, caching: bool = True) -> None: method get_procset (line 197) | def get_procset(self, procs: Sequence[object]) -> None: method get_cmap (line 204) | def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase: method get_font (line 212) | def get_font(self, objid: object, spec: Mapping[str, object]) -> PDFFont: class PDFContentParser (line 257) | class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): method __init__ (line 258) | def __init__(self, streams: Sequence[object]) -> None: method fillfp (line 266) | def fillfp(self) -> None: method seek (line 275) | def seek(self, pos: int) -> None: method fillbuf (line 279) | def fillbuf(self) -> None: method get_inline_data (line 291) | def get_inline_data(self, pos: int, target: bytes = b"EI") -> tuple[in... method flush (line 324) | def flush(self) -> None: method do_keyword (line 331) | def do_keyword(self, pos: int, token: PSKeyword) -> None: class PDFPageInterpreter (line 367) | class PDFPageInterpreter: method __init__ (line 373) | def __init__(self, rsrcmgr: PDFResourceManager, device: PDFDevice) -> ... method dup (line 377) | def dup(self) -> "PDFPageInterpreter": method init_resources (line 380) | def init_resources(self, resources: dict[object, object]) -> None: method init_state (line 421) | def init_state(self, ctm: Matrix) -> None: method push (line 438) | def push(self, obj: PDFStackT) -> None: method pop (line 441) | def pop(self, n: int) -> list[PDFStackT]: method get_current_state (line 448) | def get_current_state(self) -> tuple[Matrix, PDFTextState, PDFGraphicS... method set_current_state (line 451) | def set_current_state( method do_q (line 458) | def do_q(self) -> None: method do_Q (line 462) | def do_Q(self) -> None: method do_cm (line 467) | def do_cm( method do_w (line 487) | def do_w(self, linewidth: PDFStackT) -> None: method do_J (line 497) | def do_J(self, linecap: PDFStackT) -> None: method do_j (line 501) | def do_j(self, linejoin: PDFStackT) -> None: method do_M (line 505) | def do_M(self, miterlimit: PDFStackT) -> None: method do_d (line 509) | def do_d(self, dash: PDFStackT, phase: PDFStackT) -> None: method do_ri (line 513) | def do_ri(self, intent: PDFStackT) -> None: method do_i (line 517) | def do_i(self, flatness: PDFStackT) -> None: method do_gs (line 521) | def do_gs(self, name: PDFStackT) -> None: method do_m (line 525) | def do_m(self, x: PDFStackT, y: PDFStackT) -> None: method do_l (line 539) | def do_l(self, x: PDFStackT, y: PDFStackT) -> None: method do_c (line 552) | def do_c( method do_v (line 584) | def do_v(self, x2: PDFStackT, y2: PDFStackT, x3: PDFStackT, y3: PDFSta... method do_y (line 599) | def do_y(self, x1: PDFStackT, y1: PDFStackT, x3: PDFStackT, y3: PDFSta... method do_h (line 614) | def do_h(self) -> None: method do_re (line 618) | def do_re(self, x: PDFStackT, y: PDFStackT, w: PDFStackT, h: PDFStackT... method do_S (line 637) | def do_S(self) -> None: method do_s (line 642) | def do_s(self) -> None: method do_f (line 647) | def do_f(self) -> None: method do_F (line 652) | def do_F(self) -> None: method do_f_a (line 655) | def do_f_a(self) -> None: method do_B (line 660) | def do_B(self) -> None: method do_B_a (line 665) | def do_B_a(self) -> None: method do_b (line 670) | def do_b(self) -> None: method do_b_a (line 675) | def do_b_a(self) -> None: method do_n (line 680) | def do_n(self) -> None: method do_W (line 684) | def do_W(self) -> None: method do_W_a (line 688) | def do_W_a(self) -> None: method do_CS (line 692) | def do_CS(self, name: PDFStackT) -> None: method do_cs (line 703) | def do_cs(self, name: PDFStackT) -> None: method do_G (line 711) | def do_G(self, gray: PDFStackT) -> None: method do_g (line 723) | def do_g(self, gray: PDFStackT) -> None: method do_RG (line 735) | def do_RG(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: method do_rg (line 747) | def do_rg(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: method do_K (line 759) | def do_K(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT)... method do_k (line 771) | def do_k(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT)... method do_SCN (line 783) | def do_SCN(self) -> None: method do_scn (line 828) | def do_scn(self) -> None: method do_SC (line 874) | def do_SC(self) -> None: method do_sc (line 878) | def do_sc(self) -> None: method do_sh (line 882) | def do_sh(self, name: object) -> None: method do_BT (line 885) | def do_BT(self) -> None: method do_ET (line 894) | def do_ET(self) -> None: method do_BX (line 897) | def do_BX(self) -> None: method do_EX (line 900) | def do_EX(self) -> None: method do_MP (line 903) | def do_MP(self, tag: PDFStackT) -> None: method do_DP (line 912) | def do_DP(self, tag: PDFStackT, props: PDFStackT) -> None: method do_BMC (line 921) | def do_BMC(self, tag: PDFStackT) -> None: method do_BDC (line 930) | def do_BDC(self, tag: PDFStackT, props: PDFStackT) -> None: method do_EMC (line 939) | def do_EMC(self) -> None: method do_Tc (line 943) | def do_Tc(self, space: PDFStackT) -> None: method do_Tw (line 958) | def do_Tw(self, space: PDFStackT) -> None: method do_Tz (line 973) | def do_Tz(self, scale: PDFStackT) -> None: method do_TL (line 987) | def do_TL(self, leading: PDFStackT) -> None: method do_Tf (line 1002) | def do_Tf(self, fontid: PDFStackT, fontsize: PDFStackT) -> None: method do_Tr (line 1025) | def do_Tr(self, render: PDFStackT) -> None: method do_Ts (line 1036) | def do_Ts(self, rise: PDFStackT) -> None: method do_Td (line 1050) | def do_Td(self, tx: PDFStackT, ty: PDFStackT) -> None: method do_TD (line 1068) | def do_TD(self, tx: PDFStackT, ty: PDFStackT) -> None: method do_Tm (line 1091) | def do_Tm( method do_T_a (line 1112) | def do_T_a(self) -> None: method do_TJ (line 1125) | def do_TJ(self, seq: PDFStackT) -> None: method do_Tj (line 1139) | def do_Tj(self, s: PDFStackT) -> None: method do__q (line 1143) | def do__q(self, s: PDFStackT) -> None: method do__w (line 1151) | def do__w(self, aw: PDFStackT, ac: PDFStackT, s: PDFStackT) -> None: method do_BI (line 1160) | def do_BI(self) -> None: method do_ID (line 1163) | def do_ID(self) -> None: method do_EI (line 1166) | def do_EI(self, obj: PDFStackT) -> None: method do_Do (line 1174) | def do_Do(self, xobjid_arg: PDFStackT) -> None: method process_page (line 1212) | def process_page(self, page: PDFPage) -> None: method render_contents (line 1227) | def render_contents( method execute (line 1247) | def execute(self, streams: Sequence[object]) -> None: FILE: babeldoc/pdfminer/pdfpage.py class PDFPage (line 30) | class PDFPage: method __init__ (line 54) | def __init__( method __repr__ (line 93) | def __repr__(self) -> str: method create_pages (line 99) | def create_pages(cls, document: PDFDocument) -> Iterator["PDFPage"]: method get_pages (line 161) | def get_pages( method _parse_mediabox (line 197) | def _parse_mediabox(self, value: Any) -> Rect: method _parse_cropbox (line 214) | def _parse_cropbox(self, value: Any, mediabox: Rect) -> Rect: method _parse_contents (line 226) | def _parse_contents(self, value: Any) -> list[Any]: FILE: babeldoc/pdfminer/pdfparser.py class PDFSyntaxError (line 25) | class PDFSyntaxError(PDFException): class PDFParser (line 30) | class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, Non... method __init__ (line 46) | def __init__(self, fp: BinaryIO) -> None: method set_document (line 51) | def set_document(self, doc: "PDFDocument") -> None: method do_keyword (line 62) | def do_keyword(self, pos: int, token: PSKeyword) -> None: class PDFStreamParser (line 139) | class PDFStreamParser(PDFParser): method __init__ (line 147) | def __init__(self, data: bytes) -> None: method flush (line 150) | def flush(self) -> None: method do_keyword (line 155) | def do_keyword(self, pos: int, token: PSKeyword) -> None: FILE: babeldoc/pdfminer/pdftypes.py class DecipherCallable (line 42) | class DecipherCallable(Protocol): method __call__ (line 45) | def __call__( class PDFObject (line 55) | class PDFObject(PSObject): class PDFObjRef (line 69) | class PDFObjRef(PDFObject): method __init__ (line 70) | def __init__( method __repr__ (line 96) | def __repr__(self) -> str: method resolve (line 99) | def resolve(self, default: object = None) -> Any: function resolve1 (line 107) | def resolve1(x: object, default: object = None) -> Any: function resolve_all (line 118) | def resolve_all(x: object, default: object = None) -> Any: function decipher_all (line 134) | def decipher_all(decipher: DecipherCallable, objid: int, genno: int, x: ... function int_value (line 148) | def int_value(x: object) -> int: function float_value (line 157) | def float_value(x: object) -> float: function num_value (line 166) | def num_value(x: object) -> float: function uint_value (line 175) | def uint_value(x: object, n_bits: int) -> int: function str_value (line 184) | def str_value(x: object) -> bytes: function list_value (line 193) | def list_value(x: object) -> list[Any] | tuple[Any, ...]: function dict_value (line 202) | def dict_value(x: object) -> dict[Any, Any]: function stream_value (line 212) | def stream_value(x: object) -> "PDFStream": function decompress_corrupted (line 221) | def decompress_corrupted(data: bytes) -> bytes: class PDFStream (line 242) | class PDFStream(PDFObject): method __init__ (line 243) | def __init__( method set_objid (line 257) | def set_objid(self, objid: int, genno: int) -> None: method __repr__ (line 261) | def __repr__(self) -> str: method __contains__ (line 277) | def __contains__(self, name: object) -> bool: method __getitem__ (line 280) | def __getitem__(self, name: str) -> Any: method get (line 283) | def get(self, name: str, default: object = None) -> Any: method get_any (line 286) | def get_any(self, names: Iterable[str], default: object = None) -> Any: method get_filters (line 292) | def get_filters(self) -> list[tuple[Any, Any]]: method decode (line 309) | def decode(self) -> None: method get_data (line 387) | def get_data(self) -> bytes: method get_rawdata (line 393) | def get_rawdata(self) -> bytes | None: FILE: babeldoc/pdfminer/psexceptions.py class PSException (line 1) | class PSException(Exception): class PSEOF (line 5) | class PSEOF(PSException): class PSSyntaxError (line 9) | class PSSyntaxError(PSException): class PSTypeError (line 13) | class PSTypeError(PSException): class PSValueError (line 17) | class PSValueError(PSException): FILE: babeldoc/pdfminer/psparser.py class PSObject (line 27) | class PSObject: class PSLiteral (line 31) | class PSLiteral(PSObject): method __init__ (line 45) | def __init__(self, name: NameType) -> None: method __repr__ (line 48) | def __repr__(self) -> str: class PSKeyword (line 53) | class PSKeyword(PSObject): method __init__ (line 64) | def __init__(self, name: bytes) -> None: method __repr__ (line 67) | def __repr__(self) -> str: class PSSymbolTable (line 75) | class PSSymbolTable(Generic[_SymbolT]): method __init__ (line 81) | def __init__(self, klass: type[_SymbolT]) -> None: method intern (line 85) | def intern(self, name: PSLiteral.NameType) -> _SymbolT: function literal_name (line 108) | def literal_name(x: Any) -> str: function keyword_name (line 122) | def keyword_name(x: Any) -> Any: class PSBaseParser (line 159) | class PSBaseParser: method __init__ (line 164) | def __init__(self, fp: BinaryIO) -> None: method __repr__ (line 169) | def __repr__(self) -> str: method flush (line 172) | def flush(self) -> None: method close (line 175) | def close(self) -> None: method tell (line 178) | def tell(self) -> int: method poll (line 181) | def poll(self, pos: int | None = None, n: int = 80) -> None: method seek (line 189) | def seek(self, pos: int) -> None: method fillbuf (line 204) | def fillbuf(self) -> None: method nextline (line 214) | def nextline(self) -> tuple[int, bytes]: method revreadlines (line 243) | def revreadlines(self) -> Iterator[bytes]: method _parse_main (line 267) | def _parse_main(self, s: bytes, i: int) -> int: method _add_token (line 313) | def _add_token(self, obj: PSBaseParserToken) -> None: method _parse_comment (line 316) | def _parse_comment(self, s: bytes, i: int) -> int: method _parse_literal (line 328) | def _parse_literal(self, s: bytes, i: int) -> int: method _parse_literal_hex (line 348) | def _parse_literal_hex(self, s: bytes, i: int) -> int: method _parse_number (line 358) | def _parse_number(self, s: bytes, i: int) -> int: method _parse_float (line 377) | def _parse_float(self, s: bytes, i: int) -> int: method _parse_keyword (line 391) | def _parse_keyword(self, s: bytes, i: int) -> int: method _parse_string (line 409) | def _parse_string(self, s: bytes, i: int) -> int: method _parse_string_1 (line 435) | def _parse_string_1(self, s: bytes, i: int) -> int: method _parse_wopen (line 464) | def _parse_wopen(self, s: bytes, i: int) -> int: method _parse_wclose (line 474) | def _parse_wclose(self, s: bytes, i: int) -> int: method _parse_hexstring (line 482) | def _parse_hexstring(self, s: bytes, i: int) -> int: method nexttoken (line 497) | def nexttoken(self) -> tuple[int, PSBaseParserToken]: class PSStackParser (line 530) | class PSStackParser(PSBaseParser, Generic[ExtraT]): method __init__ (line 531) | def __init__(self, fp: BinaryIO) -> None: method reset (line 535) | def reset(self) -> None: method seek (line 541) | def seek(self, pos: int) -> None: method push (line 545) | def push(self, *objs: PSStackEntry[ExtraT]) -> None: method pop (line 548) | def pop(self, n: int) -> list[PSStackEntry[ExtraT]]: method popall (line 553) | def popall(self) -> list[PSStackEntry[ExtraT]]: method add_results (line 558) | def add_results(self, *objs: PSStackEntry[ExtraT]) -> None: method start_type (line 565) | def start_type(self, pos: int, type: str) -> None: method end_type (line 570) | def end_type(self, type: str) -> tuple[int, list[PSStackType[ExtraT]]]: method do_keyword (line 578) | def do_keyword(self, pos: int, token: PSKeyword) -> None: method nextobject (line 581) | def nextobject(self) -> PSStackEntry[ExtraT]: FILE: babeldoc/pdfminer/runlength.py function rldecode (line 9) | def rldecode(data: bytes) -> bytes: FILE: babeldoc/pdfminer/utils.py class open_filename (line 36) | class open_filename: method __init__ (line 42) | def __init__(self, filename: FileOrName, *args: Any, **kwargs: Any) ->... method __enter__ (line 54) | def __enter__(self) -> AnyIO: method __exit__ (line 57) | def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) ... function make_compat_bytes (line 62) | def make_compat_bytes(in_str: str) -> bytes: function make_compat_str (line 68) | def make_compat_str(o: object) -> str: function shorten_str (line 80) | def shorten_str(s: str, size: int) -> str: function compatible_encode_method (line 90) | def compatible_encode_method( function paeth_predictor (line 105) | def paeth_predictor(left: int, above: int, upper_left: int) -> int: function apply_png_predictor (line 123) | def apply_png_predictor( function parse_rect (line 238) | def parse_rect(o: Any) -> Rect: function mult_matrix (line 246) | def mult_matrix(m1: Matrix, m0: Matrix) -> Matrix: function translate_matrix (line 260) | def translate_matrix(m: Matrix, v: Point) -> Matrix: function apply_matrix_pt (line 267) | def apply_matrix_pt(m: Matrix, v: Point) -> Point: function apply_matrix_norm (line 274) | def apply_matrix_norm(m: Matrix, v: Point) -> Point: function isnumber (line 284) | def isnumber(x: object) -> bool: function uniq (line 291) | def uniq(objs: Iterable[_T]) -> Iterator[_T]: function fsplit (line 301) | def fsplit(pred: Callable[[_T], bool], objs: Iterable[_T]) -> tuple[list... function drange (line 313) | def drange(v0: float, v1: float, d: int) -> range: function get_bound (line 318) | def get_bound(pts: Iterable[Point]) -> Rect: function pick (line 330) | def pick( function choplist (line 344) | def choplist(n: int, seq: Iterable[_T]) -> Iterator[tuple[_T, ...]]: function nunpack (line 354) | def nunpack(s: bytes, default: int = 0) -> int: function decode_text (line 626) | def decode_text(s: bytes) -> str: function enc (line 634) | def enc(x: str) -> str: function bbox2str (line 641) | def bbox2str(bbox: Rect) -> str: function matrix2str (line 646) | def matrix2str(m: Matrix) -> str: function vecBetweenBoxes (line 651) | def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point: class Plane (line 680) | class Plane(Generic[LTComponentT]): method __init__ (line 688) | def __init__(self, bbox: Rect, gridsize: int = 50) -> None: method __repr__ (line 695) | def __repr__(self) -> str: method __iter__ (line 698) | def __iter__(self) -> Iterator[LTComponentT]: method __len__ (line 701) | def __len__(self) -> int: method __contains__ (line 704) | def __contains__(self, obj: object) -> bool: method _getrange (line 707) | def _getrange(self, bbox: Rect) -> Iterator[Point]: method extend (line 719) | def extend(self, objs: Iterable[LTComponentT]) -> None: method add (line 723) | def add(self, obj: LTComponentT) -> None: method remove (line 735) | def remove(self, obj: LTComponentT) -> None: method find (line 744) | def find(self, bbox: Rect) -> Iterator[LTComponentT]: function format_int_roman (line 764) | def format_int_roman(value: int) -> str: function format_int_alpha (line 789) | def format_int_alpha(value: int) -> str: FILE: babeldoc/progress_monitor.py class ProgressMonitor (line 12) | class ProgressMonitor: method __init__ (line 13) | def __init__( method create_part_monitor (line 72) | def create_part_monitor( method _handle_part_progress (line 88) | def _handle_part_progress(self, **kwargs): method _handle_part_finish (line 96) | def _handle_part_finish(self, **kwargs): method stage_start (line 110) | def stage_start(self, stage_name: str, total: int): method __enter__ (line 133) | def __enter__(self): method __exit__ (line 136) | def __exit__(self, exc_type, exc_val, exc_tb): method on_finish (line 139) | def on_finish(self): method stage_done (line 149) | def stage_done(self, stage): method calculate_current_progress (line 175) | def calculate_current_progress(self, stage=None): method _calculate_current_progress (line 187) | def _calculate_current_progress(self, stage=None): method stage_update (line 214) | def stage_update(self, stage, n: int): method translate_done (line 237) | def translate_done(self, translate_result): method translate_error (line 243) | def translate_error(self, error): method raise_if_cancelled (line 250) | def raise_if_cancelled(self): method cancel (line 254) | def cancel(self): class TranslationStage (line 262) | class TranslationStage: method __init__ (line 263) | def __init__( method __enter__ (line 280) | def __enter__(self): method __exit__ (line 283) | def __exit__(self, exc_type, exc_val, exc_tb): method advance (line 294) | def advance(self, n: int = 1): class DummyTranslationStage (line 300) | class DummyTranslationStage: method __init__ (line 301) | def __init__(self, name: str, total: int, pm: ProgressMonitor, weight:... method __enter__ (line 308) | def __enter__(self): method __exit__ (line 311) | def __exit__(self, exc_type, exc_val, exc_tb): method advance (line 314) | def advance(self, n: int = 1): FILE: babeldoc/tools/generate_cmap_metadata.py function _calc_sha3_256 (line 17) | def _calc_sha3_256(path: Path) -> str: function main (line 30) | def main() -> None: FILE: babeldoc/tools/generate_font_metadata.py function get_font_metadata (line 29) | def get_font_metadata(font_path) -> PdfFont: function main (line 60) | def main(): FILE: babeldoc/tools/italic_assistance.py function find_latest_il_json (line 16) | def find_latest_il_json() -> Path | None: function extract_fonts_from_paragraph (line 34) | def extract_fonts_from_paragraph( function find_fonts_by_debug_id (line 121) | def find_fonts_by_debug_id(json_path: Path, debug_id_regex: str) -> dict... function main (line 163) | def main(): FILE: babeldoc/translator/cache.py class _TranslationCache (line 31) | class _TranslationCache(Model): class Meta (line 38) | class Meta: class TranslationCache (line 54) | class TranslationCache: method _sort_dict_recursively (line 56) | def _sort_dict_recursively(obj): method __init__ (line 67) | def __init__(self, translate_engine: str, translate_engine_params: dic... method replace_params (line 74) | def replace_params(self, params: dict = None): method update_params (line 81) | def update_params(self, params: dict = None): method add_params (line 87) | def add_params(self, k: str, v): method get (line 93) | def get(self, original_text: str) -> str | None: method set (line 111) | def set(self, original_text: str, translation: str): method _cleanup (line 128) | def _cleanup(self) -> None: function init_db (line 148) | def init_db(remove_exists=False): function init_test_db (line 165) | def init_test_db(): function clean_test_db (line 185) | def clean_test_db(test_db): FILE: babeldoc/translator/translator.py function remove_control_characters (line 24) | def remove_control_characters(s): class RateLimiter (line 28) | class RateLimiter: method __init__ (line 34) | def __init__(self, max_qps: int): method wait (line 43) | def wait(self, _rate_limit_params: dict = None): method set_max_qps (line 61) | def set_max_qps(self, max_qps: int): function set_translate_rate_limiter (line 75) | def set_translate_rate_limiter(max_qps): class BaseTranslator (line 79) | class BaseTranslator(ABC): method __init__ (line 85) | def __init__(self, lang_in, lang_out, ignore_cache): method __del__ (line 103) | def __del__(self): method add_cache_impact_parameters (line 112) | def add_cache_impact_parameters(self, k: str, v): method translate (line 120) | def translate(self, text, ignore_cache=False, rate_limit_params: dict ... method llm_translate (line 141) | def llm_translate(self, text, ignore_cache=False, rate_limit_params: d... method do_llm_translate (line 168) | def do_llm_translate(self, text, rate_limit_params: dict = None): method do_translate (line 177) | def do_translate(self, text, rate_limit_params: dict = None): method __str__ (line 190) | def __str__(self): method get_rich_text_left_placeholder (line 193) | def get_rich_text_left_placeholder(self, placeholder_id: int | str): method get_rich_text_right_placeholder (line 196) | def get_rich_text_right_placeholder(self, placeholder_id: int | str): method get_formular_placeholder (line 199) | def get_formular_placeholder(self, placeholder_id: int | str): class OpenAITranslator (line 203) | class OpenAITranslator(BaseTranslator): method __init__ (line 207) | def __init__( method do_translate (line 265) | def do_translate(self, text, rate_limit_params: dict = None) -> str: method prompt (line 279) | def prompt(self, text): method do_llm_translate (line 297) | def do_llm_translate(self, text, rate_limit_params: dict = None): method update_token_count (line 339) | def update_token_count(self, response): method get_formular_placeholder (line 360) | def get_formular_placeholder(self, placeholder_id: int | str): method get_rich_text_left_placeholder (line 364) | def get_rich_text_left_placeholder(self, placeholder_id: int | str): method get_rich_text_right_placeholder (line 370) | def get_rich_text_right_placeholder(self, placeholder_id: int | str): FILE: babeldoc/utils/atomic_integer.py class AtomicInteger (line 4) | class AtomicInteger: method __init__ (line 5) | def __init__(self, value=0): method inc (line 9) | def inc(self, d=1): method dec (line 14) | def dec(self, d=1): method value (line 18) | def value(self): method value (line 23) | def value(self, v): FILE: babeldoc/utils/memory.py function _parse_pss_from_smaps_rollup (line 12) | def _parse_pss_from_smaps_rollup(pid: int) -> int | None: function _parse_pss_from_smaps (line 32) | def _parse_pss_from_smaps(pid: int) -> int | None: function _get_pss_linux (line 54) | def _get_pss_linux(pid: int) -> int | None: function _get_rss_psutil (line 73) | def _get_rss_psutil(pid: int) -> int | None: function _get_single_process_memory (line 88) | def _get_single_process_memory( function get_memory_usage_bytes (line 119) | def get_memory_usage_bytes( function get_memory_usage_with_throttle (line 192) | def get_memory_usage_with_throttle( FILE: babeldoc/utils/priority_thread_pool_executor.py function python_exit (line 36) | def python_exit(): class PriorityQueue (line 58) | class PriorityQueue(queue.Queue): method _init (line 67) | def _init(self, maxsize): method _qsize (line 72) | def _qsize(self): method _put (line 75) | def _put(self, item): method remove (line 87) | def remove(self, task): method _get (line 95) | def _get(self): function _worker (line 104) | def _worker(executor_reference, work_queue, initializer, initargs): class PriorityThreadPoolExecutor (line 150) | class PriorityThreadPoolExecutor(ThreadPoolExecutor): method __init__ (line 155) | def __init__(self, *args, **kwargs): method submit (line 162) | def submit(self, fn, *args, **kwargs): method _adjust_thread_count (line 202) | def _adjust_thread_count(self): method shutdown (line 229) | def shutdown(self, wait=True, *, cancel_futures=False): method __del__ (line 263) | def __del__(self): FILE: tests/test_translation_cache_cleanup.py function _prepare_records (line 9) | def _prepare_records(cache: TranslationCache, num_records: int) -> None: function test_cleanup_under_limit (line 15) | def test_cleanup_under_limit(monkeypatch): function test_cleanup_over_limit (line 33) | def test_cleanup_over_limit(monkeypatch): function test_cleanup_thread_safety (line 50) | def test_cleanup_thread_safety(monkeypatch):