SYMBOL INDEX (1474 symbols across 128 files) FILE: bin/bump_version.py function validate_release_notes (line 42) | def validate_release_notes(new_version: str) -> bool: function get_github_client (line 69) | def get_github_client(): function wait_for_ci_completion (line 101) | def wait_for_ci_completion(commit_sha: str, timeout_minutes: int = 30) -... function push_and_wait_for_ci (line 178) | def push_and_wait_for_ci(branch: str) -> bool: function push_tag (line 206) | def push_tag(tag: str) -> bool: function bump_version (line 224) | def bump_version() -> None: FILE: docs/conf.py class Mock (line 94) | class Mock(MagicMock): method __getattr__ (line 96) | def __getattr__(cls, name): FILE: misc/_webservice.py function get_host_url_with_port (line 29) | def get_host_url_with_port(port: int) -> str: FILE: misc/batch.py function filecompare (line 31) | def filecompare(a, b): FILE: misc/example_plugin.py function add_options (line 34) | def add_options(parser): function prepare (line 40) | def prepare(options): function validate (line 45) | def validate(pdfinfo, options): function filter_ocr_image (line 50) | def filter_ocr_image(page, image): function filter_page_image (line 58) | def filter_page_image(page, image_filename): FILE: misc/ocrmypdf_compare.py function do_column (line 22) | def do_column(label, suffix, d): function main (line 50) | def main(): FILE: misc/pdf_compare.py function do_metadata (line 20) | def do_metadata(pdf): function main (line 34) | def main(): FILE: misc/pdf_text_diff.py function main (line 19) | def main( FILE: misc/watcher.py class LoggingLevelEnum (line 38) | class LoggingLevelEnum(str, Enum): function get_output_path (line 48) | def get_output_path(root: Path, basename: str, output_dir_year_month: bo... function wait_for_file_ready (line 63) | def wait_for_file_ready( function execute_ocrmypdf (line 91) | def execute_ocrmypdf( class HandleObserverEvent (line 136) | class HandleObserverEvent(PatternMatchingEventHandler): method __init__ (line 137) | def __init__( # noqa: D107 method on_any_event (line 153) | def on_any_event(self, event): function main (line 159) | def main( FILE: scripts/generate_glyphless_font.py function classify_codepoint (line 78) | def classify_codepoint(codepoint: int) -> str: function build_cmap (line 117) | def build_cmap() -> dict[int, str]: function create_font (line 126) | def create_font() -> TTFont: function main (line 198) | def main() -> None: FILE: src/ocrmypdf/__main__.py function sigbus (line 31) | def sigbus(*args): function run (line 40) | def run(args=None): FILE: src/ocrmypdf/_annots.py function remove_broken_goto_annotations (line 15) | def remove_broken_goto_annotations(pdf: Pdf) -> bool: FILE: src/ocrmypdf/_concurrent.py function _task_noop (line 18) | def _task_noop(*_args, **_kwargs) -> None: function _task_finished_noop (line 22) | def _task_finished_noop(_result: Any, pbar: ProgressBar): class Executor (line 26) | class Executor(ABC): method __init__ (line 32) | def __init__(self, *, pbar_class=None): method __call__ (line 36) | def __call__( method _execute (line 89) | def _execute( function setup_executor (line 103) | def setup_executor(plugin_manager) -> Executor: class SerialExecutor (line 108) | class SerialExecutor(Executor): method _execute (line 115) | def _execute( FILE: src/ocrmypdf/_exec/ghostscript.py class DuplicateFilter (line 44) | class DuplicateFilter(logging.Filter): method __init__ (line 51) | def __init__(self, logger: logging.Logger, context_window=5): method filter (line 57) | def filter(self, record): function version (line 75) | def version() -> Version: function _gs_error_reported (line 79) | def _gs_error_reported(stream) -> bool: function _gs_devicen_reported (line 84) | def _gs_devicen_reported(stream) -> bool: function rasterize_pdf (line 98) | def rasterize_pdf( class GhostscriptFollower (line 226) | class GhostscriptFollower: method __init__ (line 232) | def __init__(self, progressbar_class): method __enter__ (line 237) | def __enter__(self): method __exit__ (line 243) | def __exit__(self, exc_type, exc_value, traceback): method __call__ (line 248) | def __call__(self, line): function generate_pdfa (line 265) | def generate_pdfa( FILE: src/ocrmypdf/_exec/jbig2enc.py function version (line 16) | def version() -> Version: function available (line 26) | def available(): function convert_single (line 34) | def convert_single(cwd, infile, outfile, threshold): FILE: src/ocrmypdf/_exec/pngquant.py function version (line 17) | def version() -> Version: function available (line 21) | def available(): function quantize (line 29) | def quantize(input_file: Path, output_file: Path, quality_min: int, qual... FILE: src/ocrmypdf/_exec/tesseract.py function _tesseract_env (line 31) | def _tesseract_env(omp_thread_limit: int | None) -> dict[str, str] | None: class ThresholdingMethod (line 40) | class ThresholdingMethod(IntEnum): class TesseractLoggerAdapter (line 58) | class TesseractLoggerAdapter(logging.LoggerAdapter): method process (line 61) | def process(self, msg, kwargs): class TesseractVersion (line 110) | class TesseractVersion(Version): function version (line 118) | def version() -> Version: function has_thresholding (line 122) | def has_thresholding() -> bool: function get_languages (line 127) | def get_languages() -> set[str]: function tess_base_args (line 158) | def tess_base_args(langs: list[str], engine_mode: int | None) -> list[str]: function _parse_tesseract_output (line 167) | def _parse_tesseract_output(binary_output: bytes) -> dict[str, str]: function get_orientation (line 178) | def get_orientation( function _is_empty_page_error (line 222) | def _is_empty_page_error(exc): function get_deskew (line 234) | def get_deskew( function tesseract_log_output (line 275) | def tesseract_log_output(stream: bytes) -> None: function page_timedout (line 316) | def page_timedout(timeout: float) -> None: function _generate_null_hocr (line 322) | def _generate_null_hocr(output_hocr: Path, output_text: Path, image: Pat... function generate_hocr (line 331) | def generate_hocr( function use_skip_page (line 398) | def use_skip_page(output_pdf: Path, output_text: Path) -> None: function generate_pdf (line 405) | def generate_pdf( FILE: src/ocrmypdf/_exec/unpaper.py class UnpaperImageTooLargeError (line 34) | class UnpaperImageTooLargeError(Exception): method __init__ (line 37) | def __init__( function version (line 49) | def version() -> Version: function _setup_unpaper_io (line 54) | def _setup_unpaper_io(input_file: Path) -> Iterator[tuple[Path, Path, Pa... function run_unpaper (line 69) | def run_unpaper( function clean (line 103) | def clean( FILE: src/ocrmypdf/_exec/verapdf.py class ValidationResult (line 22) | class ValidationResult(NamedTuple): function version (line 30) | def version() -> Version: function available (line 35) | def available() -> bool: function output_type_to_flavour (line 44) | def output_type_to_flavour(output_type: str) -> str: function validate (line 62) | def validate(input_file: Path, flavour: str) -> ValidationResult: FILE: src/ocrmypdf/_graft.py class RenderMode (line 34) | class RenderMode(Enum): class Fpdf2PageInfo (line 46) | class Fpdf2PageInfo: class Fpdf2ParsedPage (line 57) | class Fpdf2ParsedPage: function _compute_text_misalignment (line 71) | def _compute_text_misalignment( function _compute_page_rotation (line 91) | def _compute_page_rotation( function _build_text_layer_ctm (line 109) | def _build_text_layer_ctm( function _ensure_dictionary (line 171) | def _ensure_dictionary(obj: Dictionary | Stream, name: Name): function strip_invisible_text (line 177) | def strip_invisible_text(pdf: Pdf, page: Page): class OcrGrafter (line 214) | class OcrGrafter: method __init__ (line 217) | def __init__(self, context: PdfContext): method graft_page (line 237) | def graft_page( method finalize (line 320) | def finalize(self): method _parse_hocr_pages (line 338) | def _parse_hocr_pages(self): method _render_and_graft_fpdf2_pages (line 372) | def _render_and_graft_fpdf2_pages(self): method _graft_fpdf2_text_layer (line 421) | def _graft_fpdf2_text_layer(self, pageno: int, text_page: Page, text_r... method _graft_sandwich_text_layer (line 512) | def _graft_sandwich_text_layer( FILE: src/ocrmypdf/_jobcontext.py class PdfContext (line 20) | class PdfContext: method __init__ (line 30) | def __init__( method get_path (line 44) | def get_path(self, name: str) -> Path: method get_page_contexts (line 52) | def get_page_contexts(self) -> Iterator[PageContext]: method get_page_context_args (line 58) | def get_page_context_args(self) -> Iterator[tuple[PageContext]]: class PageContext (line 65) | class PageContext: method __init__ (line 81) | def __init__(self, pdf_context: PdfContext, pageno): method get_path (line 92) | def get_path(self, name: str) -> Path: method __getstate__ (line 100) | def __getstate__(self): method __setstate__ (line 112) | def __setstate__(self, state): FILE: src/ocrmypdf/_logging.py class PageNumberFilter (line 14) | class PageNumberFilter(logging.Filter): method filter (line 17) | def filter(self, record): class RichLoggingHandler (line 26) | class RichLoggingHandler(RichHandler): method __init__ (line 27) | def __init__(self, console: Console, **kwargs): FILE: src/ocrmypdf/_metadata.py function get_docinfo (line 26) | def get_docinfo(base_pdf: Pdf, context: PdfContext) -> dict[str, str]: function report_on_metadata (line 60) | def report_on_metadata(options, missing): function repair_docinfo_nuls (line 78) | def repair_docinfo_nuls(pdf): function should_linearize (line 97) | def should_linearize(working_file: Path, context: PdfContext) -> bool: function _fix_metadata (line 106) | def _fix_metadata(meta_original: PdfMetadata, meta_pdf: PdfMetadata): function _unset_empty_metadata (line 118) | def _unset_empty_metadata(meta: PdfMetadata, options): function _set_language (line 142) | def _set_language(pdf: Pdf, languages: list[str]): class MetadataProgress (line 155) | class MetadataProgress: method __init__ (line 156) | def __init__(self, progressbar_class, enable: bool = True): method __enter__ (line 162) | def __enter__(self): method __exit__ (line 166) | def __exit__(self, exc_type, exc_value, traceback): method __call__ (line 169) | def __call__(self, percent: int): function metadata_fixup (line 175) | def metadata_fixup( FILE: src/ocrmypdf/_options.py class ProcessingMode (line 37) | class ProcessingMode(StrEnum): class TaggedPdfMode (line 54) | class TaggedPdfMode(StrEnum): function _pages_from_ranges (line 68) | def _pages_from_ranges(ranges: str) -> set[int]: class OcrOptions (line 108) | class OcrOptions(BaseModel): method force_ocr (line 129) | def force_ocr(self) -> bool: method skip_text (line 134) | def skip_text(self) -> bool: method redo_ocr (line 139) | def redo_ocr(self) -> bool: method jpeg_quality (line 182) | def jpeg_quality(self): method jpeg_quality (line 187) | def jpeg_quality(self, value): method validate_languages (line 232) | def validate_languages(cls, v): method validate_output_type (line 240) | def validate_output_type(cls, v): method validate_pdf_renderer (line 249) | def validate_pdf_renderer(cls, v): method validate_rasterizer (line 261) | def validate_rasterizer(cls, v): method validate_clean_final (line 270) | def validate_clean_final(cls, v, info): method validate_jobs (line 278) | def validate_jobs(cls, v): method validate_verbose (line 286) | def validate_verbose(cls, v): method validate_oversample (line 294) | def validate_oversample(cls, v): method validate_max_image_mpixels (line 302) | def validate_max_image_mpixels(cls, v): method validate_rotate_pages_threshold (line 310) | def validate_rotate_pages_threshold(cls, v): method validate_metadata_unicode (line 318) | def validate_metadata_unicode(cls, v): method validate_pages_format (line 334) | def validate_pages_format(cls, v): method validate_unpaper_args (line 346) | def validate_unpaper_args(cls, v): method handle_special_cases (line 360) | def handle_special_cases(cls, data): method validate_redo_ocr_options (line 406) | def validate_redo_ocr_options(self): method validate_output_type_compatibility (line 418) | def validate_output_type_compatibility(self): method lossless_reconstruction (line 432) | def lossless_reconstruction(self): method model_dump_json_safe (line 444) | def model_dump_json_safe(self) -> str: method model_validate_json_safe (line 493) | def model_validate_json_safe(cls, json_str: str) -> OcrOptions: method register_plugin_models (line 537) | def register_plugin_models(cls, models: dict[str, type]) -> None: method _get_plugin_options (line 546) | def _get_plugin_options(self, namespace: str) -> Any: method __getattr__ (line 609) | def __getattr__(self, name: str) -> Any: FILE: src/ocrmypdf/_pipeline.py function register_heif_opener (line 54) | def register_heif_opener(): function triage_image_file (line 67) | def triage_image_file(input_file: Path, output_file: Path, options: OcrO... function _pdf_guess_version (line 150) | def _pdf_guess_version(input_file: Path, search_window=1024) -> str: function triage (line 165) | def triage( function get_pdfinfo (line 193) | def get_pdfinfo( function validate_pdfinfo_options (line 220) | def validate_pdfinfo_options(context: PdfContext) -> None: function _vector_page_dpi (line 269) | def _vector_page_dpi(pageinfo: PageInfo) -> int: function get_page_square_dpi (line 274) | def get_page_square_dpi( function get_canvas_square_dpi (line 299) | def get_canvas_square_dpi( function is_ocr_required (line 323) | def is_ocr_required(page_context: PageContext) -> bool: function rasterize_preview (line 399) | def rasterize_preview(input_file: Path, page_context: PageContext) -> Path: function describe_rotation (line 422) | def describe_rotation( function get_orientation_correction (line 448) | def get_orientation_correction(preview: Path, page_context: PageContext)... function calculate_image_dpi (line 479) | def calculate_image_dpi(page_context: PageContext) -> Resolution: function calculate_raster_dpi (line 490) | def calculate_raster_dpi(page_context: PageContext): function rasterize (line 511) | def rasterize( function preprocess_remove_background (line 588) | def preprocess_remove_background(input_file: Path, page_context: PageCon... function preprocess_deskew (line 599) | def preprocess_deskew(input_file: Path, page_context: PageContext) -> Path: function preprocess_clean (line 630) | def preprocess_clean(input_file: Path, page_context: PageContext) -> Path: function create_ocr_image (line 642) | def create_ocr_image(image: Path, page_context: PageContext) -> Path: function ocr_engine_hocr (line 691) | def ocr_engine_hocr(input_file: Path, page_context: PageContext) -> tupl... function ocr_engine_direct (line 707) | def ocr_engine_direct( function should_visible_page_image_use_jpg (line 738) | def should_visible_page_image_use_jpg(pageinfo: PageInfo) -> bool: function create_visible_page_jpg (line 755) | def create_visible_page_jpg(image: Path, page_context: PageContext) -> P... function create_pdf_page_from_image (line 778) | def create_pdf_page_from_image( function ocr_engine_textonly_pdf (line 822) | def ocr_engine_textonly_pdf( function _offset_rect (line 840) | def _offset_rect(rect: tuple[float, float, float, float], offset: tuple[... function _adjust_pagebox (line 850) | def _adjust_pagebox( function fix_pagepdf_boxes (line 867) | def fix_pagepdf_boxes( function generate_postscript_stub (line 914) | def generate_postscript_stub(context: PdfContext) -> Path: function convert_to_pdfa (line 928) | def convert_to_pdfa(input_pdf: Path, input_ps_stub: Path, context: PdfCo... function try_speculative_pdfa (line 982) | def try_speculative_pdfa(input_pdf: Path, context: PdfContext) -> Path |... function try_auto_pdfa (line 1041) | def try_auto_pdfa(input_pdf: Path, context: PdfContext) -> tuple[Path, s... function _is_safe_pdfa (line 1082) | def _is_safe_pdfa(input_pdf: Path, options) -> bool: function should_linearize (line 1105) | def should_linearize(working_file: Path, context: PdfContext) -> bool: function get_pdf_save_settings (line 1114) | def get_pdf_save_settings(output_type: str) -> dict[str, Any]: function _file_size_ratio (line 1137) | def _file_size_ratio( function optimize_pdf (line 1160) | def optimize_pdf( function enumerate_compress_ranges (line 1182) | def enumerate_compress_ranges( function merge_sidecars (line 1211) | def merge_sidecars(txt_files: Iterable[Path | None], context: PdfContext... function copy_final (line 1234) | def copy_final( FILE: src/ocrmypdf/_pipelines/_common.py function _set_logging_tls (line 74) | def _set_logging_tls(tls): function set_thread_pageno (line 90) | def set_thread_pageno(pageno: int | None): class PageResult (line 95) | class PageResult(NamedTuple): class HOCRResultEncoder (line 117) | class HOCRResultEncoder(json.JSONEncoder): method default (line 118) | def default(self, obj): class HOCRResultDecoder (line 124) | class HOCRResultDecoder(json.JSONDecoder): method __init__ (line 125) | def __init__(self, *args, **kwargs): method dict_to_object (line 129) | def dict_to_object(self, d): class HOCRResult (line 136) | class HOCRResult: method from_json (line 158) | def from_json(cls, json_str: str) -> HOCRResult: method to_json (line 162) | def to_json(self) -> str: function configure_debug_logging (line 167) | def configure_debug_logging( function worker_init (line 197) | def worker_init(max_pixels: int | None) -> None: function manage_debug_log_handler (line 207) | def manage_debug_log_handler( function _print_temp_folder_location (line 229) | def _print_temp_folder_location(work_folder: Path): function manage_work_folder (line 246) | def manage_work_folder(*, work_folder: Path, retain: bool, print_locatio... function cli_exception_handler (line 257) | def cli_exception_handler( function setup_pipeline (line 322) | def setup_pipeline( function do_get_pdfinfo (line 342) | def do_get_pdfinfo(pdf_path: Path, executor: Executor, options) -> PdfInfo: function preprocess (line 361) | def preprocess( function make_intermediate_images (line 378) | def make_intermediate_images( function process_page (line 438) | def process_page(page_context: PageContext) -> tuple[Path, Path | None, ... function postprocess (line 471) | def postprocess( function report_output_pdf (line 506) | def report_output_pdf(options, start_input_file, optimize_messages) -> E... FILE: src/ocrmypdf/_pipelines/hocr_to_ocr_pdf.py function _exec_hocrtransform_sync (line 39) | def _exec_hocrtransform_sync(page_context: PageContext) -> HOCRResult: function exec_hocr_to_ocr_pdf (line 51) | def exec_hocr_to_ocr_pdf(context: PdfContext, executor: Executor) -> Seq... function run_hocr_to_ocr_pdf_pipeline (line 106) | def run_hocr_to_ocr_pdf_pipeline( FILE: src/ocrmypdf/_pipelines/ocr.py function _image_to_ocr_text (line 58) | def _image_to_ocr_text( function _exec_page_sync (line 80) | def _exec_page_sync(page_context: PageContext) -> PageResult: function exec_concurrent (line 101) | def exec_concurrent(context: PdfContext, executor: Executor) -> Sequence... function _run_pipeline (line 164) | def _run_pipeline( function run_pipeline_cli (line 199) | def run_pipeline_cli( function run_pipeline (line 214) | def run_pipeline( FILE: src/ocrmypdf/_pipelines/pdf_to_hocr.py function _exec_page_hocr_sync (line 39) | def _exec_page_hocr_sync(page_context: PageContext) -> HOCRResult: function exec_pdf_to_hocr (line 61) | def exec_pdf_to_hocr(context: PdfContext, executor: Executor) -> None: function run_hocr_pipeline (line 86) | def run_hocr_pipeline( FILE: src/ocrmypdf/_plugin_manager.py class OcrmypdfPluginManager (line 35) | class OcrmypdfPluginManager: method __init__ (line 44) | def __init__( method pluggy (line 59) | def pluggy(self) -> pluggy.PluginManager: method __getstate__ (line 67) | def __getstate__(self): method __setstate__ (line 76) | def __setstate__(self, state): method _setup_plugins (line 84) | def _setup_plugins(self): method get_logging_console (line 119) | def get_logging_console(self) -> Handler | None: method get_executor (line 123) | def get_executor(self, *, progressbar_class: type[ProgressBar]) -> Exe... method get_progressbar_class (line 127) | def get_progressbar_class(self) -> type[ProgressBar] | None: method rasterize_pdf_page (line 131) | def rasterize_pdf_page( method filter_ocr_image (line 161) | def filter_ocr_image( method filter_page_image (line 167) | def filter_page_image( method filter_pdf_page (line 173) | def filter_pdf_page( method get_ocr_engine (line 186) | def get_ocr_engine(self, *, options: OcrOptions | None = None) -> OcrE... method generate_pdfa (line 197) | def generate_pdfa( method optimize_pdf (line 221) | def optimize_pdf( method is_optimization_enabled (line 242) | def is_optimization_enabled(self, *, context: PdfContext) -> bool | None: method initialize (line 248) | def initialize(self, *, plugin_manager: pluggy.PluginManager) -> list[... method add_options (line 257) | def add_options(self, *, parser: ArgumentParser) -> list[None]: method register_options (line 261) | def register_options(self) -> list[dict[str, type[BaseModel]]]: method check_options (line 265) | def check_options(self, *, options: OcrOptions) -> list[None]: method validate (line 269) | def validate(self, *, pdfinfo: PdfInfo, options: OcrOptions) -> list[N... function get_plugin_manager (line 274) | def get_plugin_manager( FILE: src/ocrmypdf/_plugin_registry.py class PluginOptionRegistry (line 15) | class PluginOptionRegistry: method __init__ (line 24) | def __init__(self): method register_option_model (line 27) | def register_option_model( method get_registered_models (line 48) | def get_registered_models(self) -> dict[str, type[BaseModel]]: FILE: src/ocrmypdf/_progressbar.py class ProgressBar (line 22) | class ProgressBar(Protocol): method __init__ (line 115) | def __init__( method __enter__ (line 144) | def __enter__(self): method __exit__ (line 147) | def __exit__(self, *args): method update (line 150) | def update(self, n: float = 1, *, completed: float | None = None): class NullProgressBar (line 179) | class NullProgressBar: method __init__ (line 182) | def __init__(self, **kwargs): method __enter__ (line 185) | def __enter__(self): method __exit__ (line 188) | def __exit__(self, exc_type, exc_value, traceback): method update (line 191) | def update(self, _arg=None, *, completed=None): class RichProgressBar (line 195) | class RichProgressBar: method __init__ (line 198) | def __init__( method __enter__ (line 235) | def __enter__(self): method __exit__ (line 240) | def __exit__(self, exc_type, exc_value, traceback): method update (line 245) | def update(self, n=1, *, completed=None): FILE: src/ocrmypdf/_validation.py function check_platform (line 39) | def check_platform() -> None: function check_options_languages (line 49) | def check_options_languages( function check_options_sidecar (line 86) | def check_options_sidecar(options: OcrOptions) -> None: function check_options_preprocessing (line 101) | def check_options_preprocessing(options: OcrOptions) -> None: function _check_plugin_invariant_options (line 121) | def _check_plugin_invariant_options(options: OcrOptions) -> None: function _check_plugin_options (line 127) | def _check_plugin_options( function check_options (line 146) | def check_options(options: OcrOptions, plugin_manager: OcrmypdfPluginMan... function create_input_file (line 159) | def create_input_file(options: OcrOptions, work_folder: Path) -> tuple[P... function check_requested_output_file (line 204) | def check_requested_output_file(options: OcrOptions) -> None: function report_output_file_size (line 232) | def report_output_file_size( FILE: src/ocrmypdf/_validation_coordinator.py class ValidationCoordinator (line 20) | class ValidationCoordinator: method __init__ (line 23) | def __init__(self, plugin_manager: pluggy.PluginManager): method validate_all_options (line 27) | def validate_all_options(self, options: OcrOptions) -> None: method _validate_plugin_contexts (line 44) | def _validate_plugin_contexts(self, options: OcrOptions) -> None: method _validate_tesseract_options (line 56) | def _validate_tesseract_options(self, options: OcrOptions) -> None: method _validate_optimize_options (line 79) | def _validate_optimize_options(self, options: OcrOptions) -> None: method _validate_cross_cutting_concerns (line 93) | def _validate_cross_cutting_concerns(self, options: OcrOptions) -> None: method _handle_deprecated_pdf_renderer (line 134) | def _handle_deprecated_pdf_renderer(self, options: OcrOptions) -> None: FILE: src/ocrmypdf/api.py function setup_plugin_infrastructure (line 72) | def setup_plugin_infrastructure( class Verbosity (line 133) | class Verbosity(IntEnum): function configure_logging (line 143) | def configure_logging( function _check_no_conflicting_ocr_params (line 236) | def _check_no_conflicting_ocr_params( function _remap_language_to_languages (line 286) | def _remap_language_to_languages(options_kwargs: dict) -> None: function create_options (line 314) | def create_options( function ocr (line 379) | def ocr( function ocr (line 389) | def ocr( function ocr (line 452) | def ocr( # noqa: D417 function _pdf_to_hocr (line 706) | def _pdf_to_hocr( # noqa: D417 function _hocr_to_ocr_pdf (line 845) | def _hocr_to_ocr_pdf( # noqa: D417 FILE: src/ocrmypdf/builtin_plugins/concurrency.py function log_listener (line 39) | def log_listener(q: Queue): function process_sigbus (line 63) | def process_sigbus(*args): function process_init (line 68) | def process_init(q: Queue, user_init: UserInit, loglevel) -> None: function thread_init (line 90) | def thread_init(q: Queue, user_init: UserInit, loglevel) -> None: function setup_executor (line 102) | def setup_executor(use_threads: bool) -> tuple[Queue, Executor, WorkerIn... class StandardExecutor (line 127) | class StandardExecutor(Executor): method _execute (line 130) | def _execute( function get_executor (line 190) | def get_executor(progressbar_class): function get_progressbar_class (line 199) | def get_progressbar_class(): function get_logging_console (line 209) | def get_logging_console(): FILE: src/ocrmypdf/builtin_plugins/default_filters.py function filter_pdf_page (line 11) | def filter_pdf_page(page, image_filename, output_pdf): # pylint: disabl... FILE: src/ocrmypdf/builtin_plugins/ghostscript.py class ColorConversionStrategy (line 29) | class ColorConversionStrategy(StrEnum): class PdfaImageCompression (line 39) | class PdfaImageCompression(StrEnum): class GhostscriptOptions (line 47) | class GhostscriptOptions(BaseModel): method add_arguments_to_parser (line 59) | def add_arguments_to_parser(cls, parser, namespace: str = 'ghostscript'): function register_options (line 92) | def register_options(): function add_options (line 98) | def add_options(parser): function check_options (line 104) | def check_options(options): function rasterize_pdf_page (line 161) | def rasterize_pdf_page( function _collect_dctdecode_images (line 195) | def _collect_dctdecode_images(pdf: Pdf) -> dict[tuple, list[tuple[Stream... function _repair_gs106_jpeg_corruption (line 260) | def _repair_gs106_jpeg_corruption( function generate_pdfa (line 334) | def generate_pdfa( FILE: src/ocrmypdf/builtin_plugins/null_ocr.py class NullOcrEngine (line 29) | class NullOcrEngine(OcrEngine): method version (line 37) | def version() -> str: method creator_tag (line 42) | def creator_tag(options: OcrOptions) -> str: method __str__ (line 46) | def __str__(self) -> str: method languages (line 51) | def languages(options: OcrOptions) -> set[str]: method get_orientation (line 56) | def get_orientation(input_file: Path, options: OcrOptions) -> Orientat... method get_deskew (line 61) | def get_deskew(input_file: Path, options: OcrOptions) -> float: method supports_generate_ocr (line 66) | def supports_generate_ocr() -> bool: method generate_ocr (line 71) | def generate_ocr( method generate_hocr (line 103) | def generate_hocr( method generate_pdf (line 136) | def generate_pdf( function get_ocr_engine (line 153) | def get_ocr_engine(options): FILE: src/ocrmypdf/builtin_plugins/optimize.py class OptimizeOptions (line 25) | class OptimizeOptions(BaseModel): method add_arguments_to_parser (line 48) | def add_arguments_to_parser(cls, parser, namespace: str = 'optimize'): method validate_optimization_consistency (line 129) | def validate_optimization_consistency(self): method validate_with_context (line 138) | def validate_with_context( function register_options (line 158) | def register_options(): function add_options (line 164) | def add_options(parser): function check_options (line 170) | def check_options(options): function optimize_pdf (line 207) | def optimize_pdf( function is_optimization_enabled (line 237) | def is_optimization_enabled(context: PdfContext) -> bool: FILE: src/ocrmypdf/builtin_plugins/pypdfium.py function check_options (line 36) | def check_options(options): function _open_pdf_document (line 45) | def _open_pdf_document(input_file: Path): function _calculate_mediabox_crop (line 51) | def _calculate_mediabox_crop(page) -> tuple[float, float, float, float]: function _render_page_to_bitmap (line 74) | def _render_page_to_bitmap( function _process_image_for_output (line 127) | def _process_image_for_output( function _save_image (line 215) | def _save_image(pil_image: Image.Image, output_file: Path, format_name: ... function rasterize_pdf_page (line 230) | def rasterize_pdf_page( FILE: src/ocrmypdf/builtin_plugins/tesseract_ocr.py function _thresholding_method_converter (line 29) | def _thresholding_method_converter(value: str) -> ThresholdingMethod: class TesseractOptions (line 57) | class TesseractOptions(BaseModel): method add_arguments_to_parser (line 106) | def add_arguments_to_parser(cls, parser, namespace: str = 'tesseract'): method validate_timeout_reasonable (line 250) | def validate_timeout_reasonable(cls, v): method validate_pagesegmode_warning (line 258) | def validate_pagesegmode_warning(cls, v): method validate_downsample_consistency (line 268) | def validate_downsample_consistency(self): method validate_with_context (line 277) | def validate_with_context(self, languages: list[str]) -> None: function register_options (line 295) | def register_options(): function add_options (line 301) | def add_options(parser): function check_options (line 307) | def check_options(options): function validate (line 336) | def validate(pdfinfo, options): function filter_ocr_image (line 365) | def filter_ocr_image(page: PageContext, image: Image.Image) -> Image.Image: class TesseractOcrEngine (line 385) | class TesseractOcrEngine(OcrEngine): method version (line 389) | def version(): method _determine_renderer (line 393) | def _determine_renderer(options): method creator_tag (line 400) | def creator_tag(options): method __str__ (line 412) | def __str__(self): method languages (line 416) | def languages(options): method get_orientation (line 420) | def get_orientation(input_file, options): method get_deskew (line 429) | def get_deskew(input_file, options) -> float: method generate_hocr (line 439) | def generate_hocr(input_file, output_hocr, output_text, options): method generate_pdf (line 456) | def generate_pdf(input_file, output_pdf, output_text, options): function get_ocr_engine (line 474) | def get_ocr_engine(options): FILE: src/ocrmypdf/cli.py function numeric (line 22) | def numeric(basetype: Callable[[Any], T], min_: T | None = None, max_: T... function str_to_int (line 43) | def str_to_int(mapping: Mapping[str, int]): class LanguageSetAction (line 57) | class LanguageSetAction(argparse.Action): method __init__ (line 60) | def __init__(self, option_strings, dest, default=None, **kwargs): method __call__ (line 66) | def __call__(self, parser, namespace, values, option_string=None): function get_parser (line 75) | def get_parser(): function namespace_to_options (line 494) | def namespace_to_options(ns) -> OcrOptions: function get_options_and_plugins (line 529) | def get_options_and_plugins( FILE: src/ocrmypdf/exceptions.py class ExitCode (line 12) | class ExitCode(IntEnum): class ExitCodeException (line 31) | class ExitCodeException(Exception): method __str__ (line 37) | def __str__(self): class BadArgsError (line 45) | class BadArgsError(ExitCodeException): class MissingDependencyError (line 51) | class MissingDependencyError(ExitCodeException): class UnsupportedImageFormatError (line 57) | class UnsupportedImageFormatError(ExitCodeException): class DpiError (line 63) | class DpiError(ExitCodeException): class OutputFileAccessError (line 69) | class OutputFileAccessError(ExitCodeException): class PriorOcrFoundError (line 75) | class PriorOcrFoundError(ExitCodeException): class InputFileError (line 81) | class InputFileError(ExitCodeException): class SubprocessOutputError (line 87) | class SubprocessOutputError(ExitCodeException): class EncryptedPdfError (line 93) | class EncryptedPdfError(ExitCodeException): class TesseractConfigError (line 111) | class TesseractConfigError(ExitCodeException): class DigitalSignatureError (line 118) | class DigitalSignatureError(InputFileError): class TaggedPDFError (line 129) | class TaggedPDFError(InputFileError): class ColorConversionNeededError (line 142) | class ColorConversionNeededError(BadArgsError): FILE: src/ocrmypdf/extra_plugins/semfree.py class MessageType (line 45) | class MessageType(Enum): function split_every (line 53) | def split_every(n: int, iterable: Iterable) -> Iterator: function process_sigbus (line 65) | def process_sigbus(*args): class ConnectionLogHandler (line 70) | class ConnectionLogHandler(logging.handlers.QueueHandler): method __init__ (line 73) | def __init__(self, conn: Connection) -> None: method enqueue (line 80) | def enqueue(self, record): function process_loop (line 85) | def process_loop( class LambdaExecutor (line 117) | class LambdaExecutor(Executor): method _execute (line 120) | def _execute( function get_executor (line 197) | def get_executor(progressbar_class): function get_logging_console (line 203) | def get_logging_console(): function get_progressbar_class (line 209) | def get_progressbar_class(): FILE: src/ocrmypdf/font/font_manager.py class FontManager (line 17) | class FontManager: method __init__ (line 32) | def __init__(self, font_path: Path, font_index: int = 0): method get_hb_font (line 51) | def get_hb_font(self) -> hb.Font: method has_glyph (line 59) | def has_glyph(self, codepoint: int) -> bool: method get_font_metrics (line 71) | def get_font_metrics(self) -> tuple[float, float, float]: method get_left_side_bearing (line 83) | def get_left_side_bearing(self, char: str, font_size: float) -> float: FILE: src/ocrmypdf/font/font_provider.py class FontProvider (line 17) | class FontProvider(Protocol): method get_font (line 25) | def get_font(self, font_name: str) -> FontManager | None: method get_available_fonts (line 36) | def get_available_fonts(self) -> list[str]: method get_fallback_font (line 44) | def get_fallback_font(self) -> FontManager: class BuiltinFontProvider (line 55) | class BuiltinFontProvider: method __init__ (line 67) | def __init__(self, font_dir: Path | None = None): method _load_fonts (line 80) | def _load_fonts(self) -> None: method get_font (line 110) | def get_font(self, font_name: str) -> FontManager | None: method get_available_fonts (line 114) | def get_available_fonts(self) -> list[str]: method get_fallback_font (line 118) | def get_fallback_font(self) -> FontManager: class ChainedFontProvider (line 123) | class ChainedFontProvider: method __init__ (line 131) | def __init__(self, providers: list[FontProvider]): method get_font (line 142) | def get_font(self, font_name: str) -> FontManager | None: method get_available_fonts (line 158) | def get_available_fonts(self) -> list[str]: method get_fallback_font (line 173) | def get_fallback_font(self) -> FontManager: FILE: src/ocrmypdf/font/multi_font_manager.py class MultiFontManager (line 26) | class MultiFontManager: method __init__ (line 137) | def __init__( method fonts (line 169) | def fonts(self) -> dict[str, FontManager]: method _try_font (line 173) | def _try_font( method select_font_for_word (line 194) | def select_font_for_word( method _warn_missing_font (line 240) | def _warn_missing_font(self, word_text: str, line_language: str | None... method _has_all_glyphs (line 274) | def _has_all_glyphs(self, font: FontManager, text: str) -> bool: method has_font (line 297) | def has_font(self, font_name: str) -> bool: method has_all_glyphs (line 308) | def has_all_glyphs(self, font_name: str, text: str) -> bool: method get_all_fonts (line 323) | def get_all_fonts(self) -> dict[str, FontManager]: FILE: src/ocrmypdf/font/system_font_provider.py class SystemFontProvider (line 22) | class SystemFontProvider: method __init__ (line 152) | def __init__(self) -> None: method _get_platform (line 161) | def _get_platform(self) -> str: method _get_font_dirs (line 176) | def _get_font_dirs(self) -> list[Path]: method _find_font_file (line 202) | def _find_font_file(self, font_name: str) -> Path | None: method get_font (line 235) | def get_font(self, font_name: str) -> FontManager | None: method get_available_fonts (line 275) | def get_available_fonts(self) -> list[str]: method get_fallback_font (line 287) | def get_fallback_font(self) -> FontManager: FILE: src/ocrmypdf/fpdf_renderer/renderer.py function transform_point (line 27) | def transform_point(matrix: Matrix, x: float, y: float) -> tuple[float, ... function transform_box (line 44) | def transform_box( class DebugRenderOptions (line 70) | class DebugRenderOptions: class CoordinateTransform (line 81) | class CoordinateTransform: method __init__ (line 88) | def __init__(self, dpi: float, page_width_px: float, page_height_px: f... method page_width_pt (line 95) | def page_width_pt(self) -> float: method page_height_pt (line 100) | def page_height_pt(self) -> float: method px_to_pt (line 104) | def px_to_pt(self, value: float) -> float: method bbox_to_pt (line 108) | def bbox_to_pt(self, bbox) -> tuple[float, float, float, float]: class Fpdf2PdfRenderer (line 118) | class Fpdf2PdfRenderer: method __init__ (line 125) | def __init__( method render (line 172) | def render(self, output_path: Path) -> None: method render_to_pdf (line 207) | def render_to_pdf(self, pdf: FPDF) -> None: method _register_font (line 243) | def _register_font(self, pdf: FPDF, font_manager: FontManager) -> str: method _render_paragraph (line 263) | def _render_paragraph(self, pdf: FPDF, para: OcrElement) -> None: method _render_line (line 274) | def _render_line(self, pdf: FPDF, line: OcrElement) -> None: method _check_aspect_ratio_plausible (line 487) | def _check_aspect_ratio_plausible( method _emit_line_bt_block (line 564) | def _emit_line_bt_block( method _encode_shaped_text (line 720) | def _encode_shaped_text(self, pdf: FPDF, text: str) -> str: method _is_cjk_only (line 741) | def _is_cjk_only(self, text: str) -> bool: method _render_debug_line_bbox (line 779) | def _render_debug_line_bbox( method _render_debug_baseline (line 792) | def _render_debug_baseline( method _render_debug_word_bbox (line 810) | def _render_debug_word_bbox( class Fpdf2MultiPageRenderer (line 824) | class Fpdf2MultiPageRenderer: method __init__ (line 831) | def __init__( method render (line 851) | def render(self, output_path: Path) -> None: FILE: src/ocrmypdf/helpers.py class Resolution (line 37) | class Resolution(Generic[T]): method __init__ (line 49) | def __init__(self, x: T, y: T): method round (line 58) | def round(self, ndigits: int) -> Resolution: method to_int (line 62) | def to_int(self) -> Resolution[int]: method _isclose (line 67) | def _isclose(cls, a, b): method is_square (line 71) | def is_square(self) -> bool: method is_finite (line 76) | def is_finite(self) -> bool: method to_scalar (line 80) | def to_scalar(self) -> float: method _take_minmax (line 89) | def _take_minmax( method take_max (line 101) | def take_max( method take_min (line 107) | def take_min( method flip_axis (line 113) | def flip_axis(self) -> Resolution[T]: method __getitem__ (line 117) | def __getitem__(self, idx: int | slice) -> T: method __str__ (line 121) | def __str__(self): method __repr__ (line 125) | def __repr__(self): # pragma: no cover method __eq__ (line 129) | def __eq__(self, other): function safe_symlink (line 138) | def safe_symlink(input_file: os.PathLike, soft_link_name: os.PathLike) -... function samefile (line 179) | def samefile(file1: os.PathLike, file2: os.PathLike) -> bool: function is_iterable_notstr (line 190) | def is_iterable_notstr(thing: Any) -> bool: function monotonic (line 195) | def monotonic(seq: Sequence) -> bool: function page_number (line 200) | def page_number(input_file: os.PathLike) -> int: function available_cpu_count (line 205) | def available_cpu_count() -> int: function is_file_writable (line 217) | def is_file_writable(test_file: os.PathLike) -> bool: function check_pdf (line 252) | def check_pdf(input_file: Path) -> bool: function clamp (line 298) | def clamp(n: T, smallest: T, largest: T) -> T: function remove_all_log_handlers (line 303) | def remove_all_log_handlers(logger: logging.Logger) -> None: function pikepdf_enable_mmap (line 316) | def pikepdf_enable_mmap() -> None: function running_in_docker (line 332) | def running_in_docker() -> bool: function running_in_snap (line 337) | def running_in_snap() -> bool: FILE: src/ocrmypdf/hocrtransform/hocr_parser.py class HocrParseError (line 40) | class HocrParseError(Exception): class HocrParser (line 44) | class HocrParser: method __init__ (line 124) | def __init__(self, hocr_file: str | Path): method parse (line 144) | def parse(self) -> OcrElement: method _xpath (line 160) | def _xpath(self, html_tag: str, html_class: str | None = None) -> str: method _parse_page (line 175) | def _parse_page(self, page_elem: Element) -> OcrElement: method _parse_paragraph (line 217) | def _parse_paragraph(self, par_elem: Element) -> OcrElement | None: method _parse_line (line 265) | def _parse_line( method _parse_word (line 322) | def _parse_word(self, word_elem: Element) -> OcrElement | None: method _get_element_text (line 357) | def _get_element_text(self, element: Element) -> str: method _normalize_text (line 373) | def _normalize_text(text: str) -> str: method _parse_bbox (line 386) | def _parse_bbox(self, title: str) -> BoundingBox | None: method _parse_baseline (line 409) | def _parse_baseline(self, title: str) -> Baseline | None: method _parse_textangle (line 430) | def _parse_textangle(self, title: str) -> float | None: method _parse_x_wconf (line 448) | def _parse_x_wconf(self, title: str) -> float | None: method _parse_ppageno (line 466) | def _parse_ppageno(self, title: str) -> int | None: method _parse_scan_res (line 484) | def _parse_scan_res(self, title: str) -> float | None: method _parse_font_info (line 503) | def _parse_font_info(self, title: str) -> FontInfo | None: FILE: src/ocrmypdf/imageops.py function bytes_per_pixel (line 16) | def bytes_per_pixel(mode: str) -> int: function _calculate_downsample (line 29) | def _calculate_downsample( function calculate_downsample (line 89) | def calculate_downsample( function downsample_image (line 117) | def downsample_image( FILE: src/ocrmypdf/languages.py class ISOCodeData (line 14) | class ISOCodeData(NamedTuple): function iso_639_2_from_3 (line 847) | def iso_639_2_from_3(iso3: str) -> str: FILE: src/ocrmypdf/models/ocr_element.py class BoundingBox (line 18) | class BoundingBox: method width (line 36) | def width(self) -> float: method height (line 41) | def height(self) -> float: method __post_init__ (line 45) | def __post_init__(self): class Baseline (line 58) | class Baseline: class FontInfo (line 79) | class FontInfo: class OcrElement (line 104) | class OcrElement: method iter_by_class (line 176) | def iter_by_class(self, *ocr_classes: str) -> list[OcrElement]: method find_by_class (line 192) | def find_by_class(self, *ocr_classes: str) -> OcrElement | None: method get_text_recursive (line 209) | def get_text_recursive(self) -> str: method words (line 221) | def words(self) -> list[OcrElement]: method lines (line 226) | def lines(self) -> list[OcrElement]: method paragraphs (line 233) | def paragraphs(self) -> list[OcrElement]: class OcrClass (line 243) | class OcrClass: FILE: src/ocrmypdf/optimize.py class XrefExt (line 52) | class XrefExt(NamedTuple): function img_name (line 59) | def img_name(root: Path, xref: Xref, ext: str) -> Path: function png_name (line 64) | def png_name(root: Path, xref: Xref) -> Path: function jpg_name (line 69) | def jpg_name(root: Path, xref: Xref) -> Path: function extract_image_filter (line 74) | def extract_image_filter( function extract_image_jbig2 (line 142) | def extract_image_jbig2( function _should_optimize_jpeg (line 194) | def _should_optimize_jpeg(options, filtdp): function extract_image_generic (line 202) | def extract_image_generic( function _find_image_xrefs_container (line 255) | def _find_image_xrefs_container( function _find_image_xrefs (line 302) | def _find_image_xrefs(pdf: Pdf): function extract_images (line 316) | def extract_images( function extract_images_generic (line 355) | def extract_images_generic( function extract_images_jbig2 (line 371) | def extract_images_jbig2(pdf: Pdf, root: Path, options) -> list[XrefExt]: function _produce_jbig2_images (line 381) | def _produce_jbig2_images( function convert_to_jbig2 (line 410) | def convert_to_jbig2( function _optimize_jpeg (line 432) | def _optimize_jpeg( function transcode_jpegs (line 448) | def transcode_jpegs( function _already_flate_encoded (line 482) | def _already_flate_encoded(image: Stream) -> bool: function _find_deflatable_jpeg (line 492) | def _find_deflatable_jpeg( function _deflate_jpeg (line 522) | def _deflate_jpeg( function deflate_jpegs (line 537) | def deflate_jpegs(pdf: Pdf, root: Path, options, executor: Executor) -> ... function _transcode_png (line 582) | def _transcode_png(pdf: Pdf, filename: Path, xref: Xref) -> bool: function transcode_pngs (line 623) | def transcode_pngs( function optimize (line 671) | def optimize( function main (line 732) | def main(infile, outfile, level, jobs=1): FILE: src/ocrmypdf/pdfa.py function _postscript_objdef (line 22) | def _postscript_objdef( function _make_postscript (line 51) | def _make_postscript(icc_name: str, icc_data: bytes, colors: int) -> Ite... function generate_pdfa_ps (line 75) | def generate_pdfa_ps(target_filename: Path, icc: str = 'sRGB'): function file_claims_pdfa (line 113) | def file_claims_pdfa(filename: Path): function _load_srgb_icc_profile (line 140) | def _load_srgb_icc_profile() -> bytes: function _pdfa_part_conformance (line 145) | def _pdfa_part_conformance(output_type: str) -> tuple[str, str]: function add_pdfa_metadata (line 163) | def add_pdfa_metadata(pdf: Pdf, part: str, conformance: str) -> None: function add_srgb_output_intent (line 176) | def add_srgb_output_intent(pdf: Pdf) -> None: function speculative_pdfa_conversion (line 214) | def speculative_pdfa_conversion( FILE: src/ocrmypdf/pdfinfo/_contentstream.py class XobjectSettings (line 21) | class XobjectSettings(NamedTuple): class InlineSettings (line 29) | class InlineSettings(NamedTuple): class ContentsInfo (line 37) | class ContentsInfo(NamedTuple): class TextboxInfo (line 47) | class TextboxInfo(NamedTuple): class VectorMarker (line 55) | class VectorMarker: class TextMarker (line 59) | class TextMarker: function _is_unit_square (line 63) | def _is_unit_square(shorthand): function _normalize_stack (line 70) | def _normalize_stack(graphobjs): function _interpret_contents (line 81) | def _interpret_contents(contentstream: Object, initial_shorthand=UNIT_SQ... function _get_dpi (line 173) | def _get_dpi(ctm_shorthand, image_size) -> Resolution: FILE: src/ocrmypdf/pdfinfo/_image.py class ImageInfo (line 44) | class ImageInfo: method __init__ (line 57) | def __init__( method _init_icc (line 142) | def _init_icc(self, pim: PdfImage): method name (line 169) | def name(self): method type_ (line 174) | def type_(self): method width (line 179) | def width(self) -> int: method height (line 184) | def height(self) -> int: method bpc (line 189) | def bpc(self): method color (line 194) | def color(self): method comp (line 199) | def comp(self): method enc (line 204) | def enc(self): method renderable (line 209) | def renderable(self) -> bool: method dpi (line 226) | def dpi(self) -> Resolution: method printed_area (line 234) | def printed_area(self) -> float: method __repr__ (line 240) | def __repr__(self): function _find_inline_images (line 248) | def _find_inline_images(contentsinfo: ContentsInfo) -> Iterator[ImageInfo]: function _image_xobjects (line 256) | def _image_xobjects(container) -> Iterator[tuple[Object, str]]: function _find_regular_images (line 281) | def _find_regular_images( function _find_form_xobject_images (line 306) | def _find_form_xobject_images(pdf: Pdf, container: Object, contentsinfo:... function _process_content_streams (line 338) | def _process_content_streams( FILE: src/ocrmypdf/pdfinfo/_types.py class Colorspace (line 10) | class Colorspace(Enum): class Encoding (line 26) | class Encoding(Enum): FILE: src/ocrmypdf/pdfinfo/_worker.py function _pdf_pageinfo_sync_init (line 31) | def _pdf_pageinfo_sync_init(pdf: Pdf, infile: Path, pdfminer_loglevel): function _pdf_pageinfo_sync_pdf (line 49) | def _pdf_pageinfo_sync_pdf(thread_pdf: Pdf | None, infile: Path): function _pdf_pageinfo_sync (line 59) | def _pdf_pageinfo_sync( function _pdf_pageinfo_concurrent (line 77) | def _pdf_pageinfo_concurrent( FILE: src/ocrmypdf/pdfinfo/info.py function _page_has_text (line 37) | def _page_has_text(text_blocks: Iterable[FloatRect], page_width, page_he... function simplify_textboxes (line 66) | def simplify_textboxes( class PageResolutionProfile (line 83) | class PageResolutionProfile(NamedTuple): class PageInfo (line 109) | class PageInfo: method __init__ (line 116) | def __init__( method _gather_pageinfo (line 133) | def _gather_pageinfo( method pageno (line 210) | def pageno(self) -> int: method has_text (line 215) | def has_text(self) -> bool: method has_corrupt_text (line 220) | def has_corrupt_text(self) -> bool: method has_vector (line 227) | def has_vector(self) -> bool: method width_inches (line 236) | def width_inches(self) -> Decimal: method height_inches (line 241) | def height_inches(self) -> Decimal: method width_pixels (line 246) | def width_pixels(self) -> int: method height_pixels (line 251) | def height_pixels(self) -> int: method rotation (line 256) | def rotation(self) -> int: method rotation (line 264) | def rotation(self, value): method cropbox (line 271) | def cropbox(self) -> FloatRect: method mediabox (line 276) | def mediabox(self) -> FloatRect: method trimbox (line 281) | def trimbox(self) -> FloatRect: method artbox (line 286) | def artbox(self) -> FloatRect: method bleedbox (line 291) | def bleedbox(self) -> FloatRect: method images (line 296) | def images(self) -> list[ImageInfo]: method get_textareas (line 300) | def get_textareas(self, visible: bool | None = None, corrupt: bool | N... method dpi (line 321) | def dpi(self) -> Resolution: method userunit (line 328) | def userunit(self) -> Decimal: method min_version (line 333) | def min_version(self) -> str: method page_dpi_profile (line 340) | def page_dpi_profile(self) -> PageResolutionProfile | None: method __repr__ (line 379) | def __repr__(self): class PdfInfo (line 391) | class PdfInfo: method __init__ (line 402) | def __init__( method pages (line 454) | def pages(self) -> list[PageInfo | None]: method min_version (line 459) | def min_version(self) -> str: method has_userunit (line 465) | def has_userunit(self) -> bool: method has_acroform (line 470) | def has_acroform(self) -> bool: method has_signature (line 475) | def has_signature(self) -> bool: method is_tagged (line 480) | def is_tagged(self) -> bool: method filename (line 485) | def filename(self) -> str | Path: method needs_rendering (line 492) | def needs_rendering(self) -> bool: method __getitem__ (line 500) | def __getitem__(self, item) -> PageInfo: method __len__ (line 504) | def __len__(self): method __repr__ (line 508) | def __repr__(self): function main (line 513) | def main(): # pragma: no cover FILE: src/ocrmypdf/pdfinfo/layout.py function pdfsimplefont__init__ (line 40) | def pdfsimplefont__init__( function pdftype3font__pscript5_get_height (line 69) | def pdftype3font__pscript5_get_height(self): function pdftype3font__pscript5_get_descent (line 83) | def pdftype3font__pscript5_get_descent(self): function pdftype3font__pscript5_get_ascent (line 93) | def pdftype3font__pscript5_get_ascent(self): function _is_undefined_char (line 103) | def _is_undefined_char(s: str) -> bool: class LTStateAwareChar (line 108) | class LTStateAwareChar(LTChar): method __init__ (line 128) | def __init__( method is_compatible (line 157) | def is_compatible(self, obj: object) -> bool: method get_text (line 174) | def get_text(self) -> str: method __repr__ (line 180) | def __repr__(self) -> str: class TextPositionTracker (line 193) | class TextPositionTracker(PDFLayoutAnalyzer): method __init__ (line 198) | def __init__( method begin_page (line 208) | def begin_page(self, page: PDFPage, ctm: Matrix) -> None: method end_page (line 213) | def end_page(self, page: PDFPage) -> None: method render_string (line 222) | def render_string( method render_char (line 233) | def render_char( method receive_layout (line 268) | def receive_layout(self, ltpage: LTPage) -> None: method get_result (line 272) | def get_result(self) -> LTPage | None: function patch_pdfminer (line 278) | def patch_pdfminer(pscript5_mode: bool): function get_page_analysis (line 294) | def get_page_analysis( class PdfMinerState (line 324) | class PdfMinerState: method __init__ (line 331) | def __init__(self, infile: Path, pscript5_mode: bool) -> None: method __enter__ (line 346) | def __enter__(self): method __exit__ (line 352) | def __exit__(self, exc_type, exc_value, traceback): method get_page_analysis (line 358) | def get_page_analysis(self, pageno: int): function get_text_boxes (line 386) | def get_text_boxes(obj) -> Iterator[LTTextBox]: FILE: src/ocrmypdf/pluginspec.py class GhostscriptRasterDevice (line 35) | class GhostscriptRasterDevice(StrEnum): function get_logging_console (line 53) | def get_logging_console() -> Handler: # type: ignore[return-value] function initialize (line 65) | def initialize(plugin_manager: pluggy.PluginManager) -> None: function add_options (line 92) | def add_options(parser: ArgumentParser) -> None: function register_options (line 106) | def register_options() -> dict[str, type[BaseModel]]: function check_options (line 128) | def check_options(options: OcrOptions) -> None: function get_executor (line 151) | def get_executor(progressbar_class: type[ProgressBar]) -> Executor: # t... function get_progressbar_class (line 179) | def get_progressbar_class() -> type[ProgressBar]: # type: ignore[return... function validate (line 199) | def validate(pdfinfo: PdfInfo, options: OcrOptions) -> None: function rasterize_pdf_page (line 220) | def rasterize_pdf_page( function filter_ocr_image (line 275) | def filter_ocr_image(page: PageContext, image: Image.Image) -> Image.Ima... function filter_page_image (line 310) | def filter_page_image(page: PageContext, image_filename: Path) -> Path: ... function filter_pdf_page (line 347) | def filter_pdf_page(page: PageContext, image_filename: Path, output_pdf:... class OrientationConfidence (line 389) | class OrientationConfidence(NamedTuple): class OcrEngine (line 403) | class OcrEngine(ABC): method version (line 412) | def version() -> str: method creator_tag (line 417) | def creator_tag(options: OcrOptions) -> str: method __str__ (line 429) | def __str__(self) -> str: method languages (line 438) | def languages(options: OcrOptions) -> Set[str]: method get_orientation (line 447) | def get_orientation(input_file: Path, options: OcrOptions) -> Orientat... method get_deskew (line 451) | def get_deskew(input_file: Path, options: OcrOptions) -> float: method generate_hocr (line 457) | def generate_hocr( method generate_pdf (line 480) | def generate_pdf( method supports_generate_ocr (line 502) | def supports_generate_ocr() -> bool: method generate_ocr (line 515) | def generate_ocr( function get_ocr_engine (line 547) | def get_ocr_engine(options: OcrOptions | None) -> OcrEngine: # type: ig... function generate_pdfa (line 568) | def generate_pdfa( function optimize_pdf (line 618) | def optimize_pdf( function is_optimization_enabled (line 661) | def is_optimization_enabled(context: PdfContext) -> bool: # type: ignor... FILE: src/ocrmypdf/quality.py class OcrQualityDictionary (line 12) | class OcrQualityDictionary: method __init__ (line 15) | def __init__(self, *, wordlist: Iterable[str]): method measure_words_matched (line 23) | def measure_words_matched(self, ocr_text: str) -> float: FILE: src/ocrmypdf/subprocess/__init__.py function run (line 29) | def run( function run_polling_stderr (line 79) | def run_polling_stderr( function _fix_process_args (line 118) | def _fix_process_args( function get_version (line 140) | def get_version( function _get_platform (line 247) | def _get_platform() -> str: function _error_trailer (line 257) | def _error_trailer(program: str, package: str | Mapping[str, str], **kwa... function _error_missing_program (line 270) | def _error_missing_program( function _error_old_version (line 283) | def _error_old_version( function check_external_program (line 298) | def check_external_program( FILE: src/ocrmypdf/subprocess/_windows.py function ghostscript_version_key (line 40) | def ghostscript_version_key(s: str) -> tuple[int, int, int]: function registry_enum (line 51) | def registry_enum(key: HKEYType, enum_fn: Callable[[HKEYType, int], T]) ... function registry_subkeys (line 64) | def registry_subkeys(key: HKEYType) -> Iterator[str]: function registry_values (line 68) | def registry_values(key: HKEYType) -> Iterator[tuple[str, Any, int]]: function registry_path_ghostscript (line 72) | def registry_path_ghostscript(env=None) -> Iterator[Path]: function registry_path_tesseract (line 90) | def registry_path_tesseract(env=None) -> Iterator[Path]: function _gs_version_in_path_key (line 102) | def _gs_version_in_path_key(path: Path) -> tuple[str, Version | None]: function program_files_paths (line 126) | def program_files_paths(env=None) -> Iterator[Path]: function paths_from_env (line 149) | def paths_from_env(env=None) -> Iterator[Path]: function shim_path (line 153) | def shim_path(new_paths: Callable[[Any], Iterator[Path]], env=None) -> str: function fix_windows_args (line 167) | def fix_windows_args(program: str, args, env): function unique_everseen (line 185) | def unique_everseen(iterable: Iterable[T], key: Callable[[T], Tkey]) -> ... function _casefold_path (line 198) | def _casefold_path(path: Path) -> str: function shim_env_path (line 202) | def shim_env_path(env=None): FILE: tests/conftest.py class Gs106WarningFilter (line 21) | class Gs106WarningFilter(logging.Filter): method filter (line 24) | def filter(self, record: logging.LogRecord) -> bool: function suppress_gs106_warning (line 33) | def suppress_gs106_warning(): function is_linux (line 43) | def is_linux(): function is_macos (line 47) | def is_macos(): function have_unpaper (line 51) | def have_unpaper(): function resources (line 64) | def resources() -> Path: function ocrmypdf_exec (line 69) | def ocrmypdf_exec() -> list[str]: function outdir (line 74) | def outdir(tmp_path) -> Path: function outpdf (line 79) | def outpdf(tmp_path) -> Path: function outtxt (line 84) | def outtxt(tmp_path) -> Path: function no_outpdf (line 89) | def no_outpdf(tmp_path) -> Path: function multipage (line 101) | def multipage(resources): function check_ocrmypdf (line 105) | def check_ocrmypdf(input_file: Path, output_file: Path, *args) -> Path: function run_ocrmypdf_api (line 122) | def run_ocrmypdf_api(input_file: Path, output_file: Path, *args) -> Exit... function run_ocrmypdf (line 141) | def run_ocrmypdf( function first_page_dimensions (line 165) | def first_page_dimensions(pdf: Path): function pytest_addoption (line 171) | def pytest_addoption(parser): function pytest_collection_modifyitems (line 183) | def pytest_collection_modifyitems(config, items): function get_test_plugin_manager (line 193) | def get_test_plugin_manager(plugins=None): FILE: tests/plugins/gs_feature_elision.py function run_append_stderr (line 16) | def run_append_stderr(*args, **kwargs): function generate_pdfa (line 23) | def generate_pdfa(pdf_pages, pdfmark, output_file, context, pdf_version,... FILE: tests/plugins/gs_pdfa_failure.py function run_rig_args (line 13) | def run_rig_args(args, **kwargs): function generate_pdfa (line 25) | def generate_pdfa(pdf_pages, pdfmark, output_file, context, pdf_version,... FILE: tests/plugins/gs_raster_failure.py function raise_gs_fail (line 14) | def raise_gs_fail(*args, **kwargs): function rasterize_pdf_page (line 21) | def rasterize_pdf_page( FILE: tests/plugins/gs_raster_soft_error.py function fail_if_stoponerror (line 15) | def fail_if_stoponerror(args, **kwargs): function rasterize_pdf_page (line 22) | def rasterize_pdf_page( FILE: tests/plugins/gs_render_failure.py function raise_gs_fail (line 13) | def raise_gs_fail(*args, **kwargs): function generate_pdfa (line 20) | def generate_pdfa(pdf_pages, pdfmark, output_file, context, pdf_version,... FILE: tests/plugins/gs_render_soft_error.py function fail_if_stoponerror (line 14) | def fail_if_stoponerror(args, **kwargs): function generate_pdfa (line 21) | def generate_pdfa( FILE: tests/plugins/tesseract_badutf8.py function bad_utf8 (line 21) | def bad_utf8(*args, **kwargs): function patch_tesseract_run (line 31) | def patch_tesseract_run(): class BadUtf8OcrEngine (line 38) | class BadUtf8OcrEngine(TesseractOcrEngine): method generate_hocr (line 40) | def generate_hocr(input_file, output_hocr, output_text, options): method generate_pdf (line 47) | def generate_pdf(input_file, output_pdf, output_text, options): function get_ocr_engine (line 55) | def get_ocr_engine(): FILE: tests/plugins/tesseract_big_image_error.py function raise_size_exception (line 13) | def raise_size_exception(*args, **kwargs): function patch_tesseract_run (line 23) | def patch_tesseract_run(): class BigImageErrorOcrEngine (line 30) | class BigImageErrorOcrEngine(TesseractOcrEngine): method get_orientation (line 32) | def get_orientation(input_file, options): method generate_hocr (line 37) | def generate_hocr(input_file, output_hocr, output_text, options): method generate_pdf (line 44) | def generate_pdf(input_file, output_pdf, output_text, options): function get_ocr_engine (line 52) | def get_ocr_engine(): FILE: tests/plugins/tesseract_cache.py function get_cache_folder (line 66) | def get_cache_folder(source_pdf, run_args, parsed_args): function cached_run (line 85) | def cached_run(options, run_args, **run_kwargs): class CacheOcrEngine (line 178) | class CacheOcrEngine(TesseractOcrEngine): method get_orientation (line 186) | def get_orientation(input_file, options): method get_deskew (line 194) | def get_deskew(input_file, options) -> float: method generate_hocr (line 202) | def generate_hocr(input_file, output_hocr, output_text, options): method generate_pdf (line 212) | def generate_pdf(input_file, output_pdf, output_text, options): function get_ocr_engine (line 223) | def get_ocr_engine(): FILE: tests/plugins/tesseract_crash.py function raise_crash (line 14) | def raise_crash(*args, **kwargs): function patch_tesseract_run (line 25) | def patch_tesseract_run(): class CrashOcrEngine (line 32) | class CrashOcrEngine(TesseractOcrEngine): method get_orientation (line 34) | def get_orientation(input_file, options): method generate_hocr (line 39) | def generate_hocr(input_file, output_hocr, output_text, options): method generate_pdf (line 46) | def generate_pdf(input_file, output_pdf, output_text, options): function get_ocr_engine (line 54) | def get_ocr_engine(): FILE: tests/plugins/tesseract_debug_rotate.py class FixedRotateNoopOcrEngine (line 48) | class FixedRotateNoopOcrEngine(OcrEngine): method version (line 50) | def version(): method creator_tag (line 54) | def creator_tag(options): method __str__ (line 58) | def __str__(self): method languages (line 62) | def languages(options): method get_orientation (line 66) | def get_orientation(input_file, options): method generate_hocr (line 74) | def generate_hocr(input_file, output_hocr, output_text, options): method generate_pdf (line 85) | def generate_pdf(input_file, output_pdf, output_text, options): function get_ocr_engine (line 97) | def get_ocr_engine(): FILE: tests/plugins/tesseract_noop.py class NoopOcrEngine (line 46) | class NoopOcrEngine(OcrEngine): method version (line 48) | def version(): method creator_tag (line 52) | def creator_tag(options): method __str__ (line 56) | def __str__(self): method languages (line 60) | def languages(options): method get_orientation (line 64) | def get_orientation(input_file, options): method get_deskew (line 68) | def get_deskew(input_file, options): method generate_hocr (line 72) | def generate_hocr(input_file, output_hocr, output_text, options): method generate_pdf (line 83) | def generate_pdf(input_file, output_pdf, output_text, options): function get_ocr_engine (line 95) | def get_ocr_engine(): FILE: tests/plugins/tesseract_simulate_oom_killer.py class Page4Engine (line 34) | class Page4Engine(NoopOcrEngine): # type: ignore method __str__ (line 35) | def __str__(self): method generate_hocr (line 39) | def generate_hocr(input_file: Path, output_hocr, output_text, options): method generate_pdf (line 49) | def generate_pdf(input_file, output_pdf, output_text, options): function check_options (line 60) | def check_options(options): function get_ocr_engine (line 66) | def get_ocr_engine(): FILE: tests/test_acroform.py function acroform (line 19) | def acroform(resources): function test_acroform_and_redo (line 23) | def test_acroform_and_redo(acroform, no_outpdf): function test_acroform_message (line 31) | def test_acroform_message(acroform, caplog, outpdf): function digitally_signed (line 39) | def digitally_signed(acroform, outdir): function test_digital_signature (line 47) | def test_digital_signature(digitally_signed, no_outpdf): function test_digital_signature_invalidate (line 52) | def test_digital_signature_invalidate(digitally_signed, no_outpdf): FILE: tests/test_annots.py function test_remove_broken_goto_annotations (line 11) | def test_remove_broken_goto_annotations(resources): FILE: tests/test_api.py function test_language_list (line 18) | def test_language_list(): function test_language_parameter_mapped_to_languages (line 25) | def test_language_parameter_mapped_to_languages(): function test_stream_api (line 83) | def test_stream_api(resources: Path): function test_sidecar_stringio (line 92) | def test_sidecar_stringio(resources: Path, outdir: Path, outpdf: Path): function test_hocr_api_multipage (line 104) | def test_hocr_api_multipage(resources: Path, outdir: Path, outpdf: Path): function test_hocr_to_pdf_api (line 120) | def test_hocr_to_pdf_api(resources: Path, outdir: Path, outpdf: Path): function test_hocr_result_json (line 139) | def test_hocr_result_json(): function test_hocr_result_pickle (line 155) | def test_hocr_result_pickle(): function test_nested_plugin_option_access (line 166) | def test_nested_plugin_option_access(): function test_default_tesseract_timeout (line 203) | def test_default_tesseract_timeout(): FILE: tests/test_check_pdf.py function test_pdf_error (line 9) | def test_pdf_error(resources): FILE: tests/test_completion.py function test_fish (line 19) | def test_fish(): function test_bash (line 35) | def test_bash(): FILE: tests/test_concurrency.py function test_simulate_oom_killer (line 20) | def test_simulate_oom_killer(multipage, no_outpdf): FILE: tests/test_fpdf_renderer.py function font_dir (line 23) | def font_dir(): function multi_font_manager (line 29) | def multi_font_manager(font_dir): function resources (line 35) | def resources(): class TestFpdf2RendererImports (line 40) | class TestFpdf2RendererImports: method test_imports (line 43) | def test_imports(self): class TestDebugRenderOptions (line 56) | class TestDebugRenderOptions: method test_defaults (line 59) | def test_defaults(self): method test_custom_values (line 66) | def test_custom_values(self): class TestFpdf2PdfRenderer (line 78) | class TestFpdf2PdfRenderer: method test_requires_page_element (line 81) | def test_requires_page_element(self, multi_font_manager): method test_requires_bbox (line 99) | def test_requires_bbox(self, multi_font_manager): method test_render_simple_page (line 112) | def test_render_simple_page(self, multi_font_manager, tmp_path): method test_render_invisible_text (line 146) | def test_render_invisible_text(self, multi_font_manager, tmp_path): class TestFpdf2MultiPageRenderer (line 180) | class TestFpdf2MultiPageRenderer: method test_requires_pages (line 183) | def test_requires_pages(self, multi_font_manager): method test_render_multiple_pages (line 192) | def test_render_multiple_pages(self, multi_font_manager, tmp_path): class TestFpdf2RendererWithHocr (line 228) | class TestFpdf2RendererWithHocr: method test_render_latin_hocr (line 231) | def test_render_latin_hocr(self, resources, multi_font_manager, tmp_pa... method test_render_cjk_hocr (line 257) | def test_render_cjk_hocr(self, resources, multi_font_manager, tmp_path): method test_render_arabic_hocr (line 279) | def test_render_arabic_hocr(self, resources, multi_font_manager, tmp_p... method test_render_hello_world_scripts_hocr (line 301) | def test_render_hello_world_scripts_hocr( method test_render_hello_world_scripts_multipage (line 345) | def test_render_hello_world_scripts_multipage( class TestWordSegmentation (line 376) | class TestWordSegmentation: method test_word_segmentation_with_pdfminer (line 379) | def test_word_segmentation_with_pdfminer(self, multi_font_manager, tmp... method test_cjk_no_spurious_spaces (line 446) | def test_cjk_no_spurious_spaces(self, multi_font_manager, tmp_path): method test_latin_hocr_word_segmentation (line 504) | def test_latin_hocr_word_segmentation( FILE: tests/test_ghostscript.py function francais (line 31) | def francais(resources): function test_rasterize_size (line 36) | def test_rasterize_size(francais, outdir): function test_rasterize_rotated (line 59) | def test_rasterize_rotated(francais, outdir, caplog): function test_rasterize_low_dpi (line 84) | def test_rasterize_low_dpi(francais, outdir): function test_rasterize_low_dpi_one_axis (line 114) | def test_rasterize_low_dpi_one_axis(francais, outdir): function test_gs_render_failure (line 140) | def test_gs_render_failure(resources, outpdf, caplog): function test_gs_raster_failure (line 155) | def test_gs_raster_failure(resources, outpdf, caplog): function test_ghostscript_pdfa_failure (line 168) | def test_ghostscript_pdfa_failure(resources, outpdf, caplog): function test_ghostscript_feature_elision (line 184) | def test_ghostscript_feature_elision(resources, outpdf): function test_ghostscript_mandatory_color_conversion (line 195) | def test_ghostscript_mandatory_color_conversion(resources, outpdf): function test_rasterize_pdf_errors (line 207) | def test_rasterize_pdf_errors(resources, no_outpdf, caplog): class TestDuplicateFilter (line 224) | class TestDuplicateFilter: method duplicate_filter_logger (line 226) | def duplicate_filter_logger(self): method test_filter_duplicate_messages (line 237) | def test_filter_duplicate_messages(self, duplicate_filter_logger, capl... method test_filter_does_not_affect_unique_messages (line 253) | def test_filter_does_not_affect_unique_messages( method test_filter_alt_messages (line 270) | def test_filter_alt_messages(self, duplicate_filter_logger, caplog): function pdf_with_invalid_image (line 289) | def pdf_with_invalid_image(outdir): function test_recoverable_image_error (line 321) | def test_recoverable_image_error(pdf_with_invalid_image, outdir, caplog): function test_recoverable_image_error_with_stop (line 338) | def test_recoverable_image_error_with_stop(pdf_with_invalid_image, outdi... class TestGs106JpegCorruptionRepair (line 356) | class TestGs106JpegCorruptionRepair: method create_damaged_pdf (line 360) | def create_damaged_pdf(self, resources, outdir): method test_repair_truncated_jpeg (line 394) | def test_repair_truncated_jpeg(self, create_damaged_pdf, caplog): method test_no_repair_when_not_truncated (line 448) | def test_no_repair_when_not_truncated(self, resources, outdir, caplog): method test_no_repair_when_truncation_too_large (line 463) | def test_no_repair_when_truncation_too_large(self, create_damaged_pdf,... FILE: tests/test_graft.py function test_no_glyphless_graft (line 13) | def test_no_glyphless_graft(resources, outdir): function test_links (line 34) | def test_links(resources, outpdf): function test_redo_ocr_with_offset_mediabox (line 45) | def test_redo_ocr_with_offset_mediabox(resources, outdir): function test_strip_invisble_text (line 93) | def test_strip_invisble_text(): FILE: tests/test_helpers.py class TestSafeSymlink (line 22) | class TestSafeSymlink: method test_safe_symlink_link_self (line 23) | def test_safe_symlink_link_self(self, tmp_path, caplog): method test_safe_symlink_overwrite (line 27) | def test_safe_symlink_overwrite(self, tmp_path): method test_safe_symlink_relink (line 33) | def test_safe_symlink_relink(self, tmp_path): function test_no_cpu_count (line 43) | def test_no_cpu_count(monkeypatch): class TestFileIsWritable (line 60) | class TestFileIsWritable: method non_existent (line 62) | def non_existent(self, tmp_path): method basic_file (line 66) | def basic_file(self, tmp_path): method test_plain (line 71) | def test_plain(self, non_existent): method test_symlink_loop (line 75) | def test_symlink_loop(self, tmp_path): method test_chmod (line 81) | def test_chmod(self, basic_file): method test_permission_error (line 88) | def test_permission_error(self, basic_file): function test_gs_install_locations (line 97) | def test_gs_install_locations(): function test_shim_paths (line 108) | def test_shim_paths(tmp_path): function test_resolution (line 128) | def test_resolution(): FILE: tests/test_hocr_parser.py function simple_hocr (line 21) | def simple_hocr(tmp_path) -> Path: function multiline_hocr (line 49) | def multiline_hocr(tmp_path) -> Path: function rtl_hocr (line 82) | def rtl_hocr(tmp_path) -> Path: function rotated_hocr (line 104) | def rotated_hocr(tmp_path) -> Path: function header_hocr (line 126) | def header_hocr(tmp_path) -> Path: function font_info_hocr (line 157) | def font_info_hocr(tmp_path) -> Path: class TestHocrParserBasic (line 178) | class TestHocrParserBasic: method test_parse_simple_hocr (line 181) | def test_parse_simple_hocr(self, simple_hocr): method test_parse_page_number (line 190) | def test_parse_page_number(self, simple_hocr): method test_parse_paragraphs (line 196) | def test_parse_paragraphs(self, simple_hocr): method test_parse_lines (line 206) | def test_parse_lines(self, simple_hocr): method test_parse_words (line 219) | def test_parse_words(self, simple_hocr): method test_parse_word_confidence (line 228) | def test_parse_word_confidence(self, simple_hocr): method test_parse_word_bbox (line 236) | def test_parse_word_bbox(self, simple_hocr): class TestHocrParserMultiline (line 248) | class TestHocrParserMultiline: method test_multiple_lines (line 251) | def test_multiple_lines(self, multiline_hocr): method test_multiple_paragraphs_languages (line 258) | def test_multiple_paragraphs_languages(self, multiline_hocr): method test_word_count (line 266) | def test_word_count(self, multiline_hocr): class TestHocrParserRTL (line 273) | class TestHocrParserRTL: method test_rtl_direction (line 276) | def test_rtl_direction(self, rtl_hocr): method test_rtl_line_inherits_direction (line 284) | def test_rtl_line_inherits_direction(self, rtl_hocr): class TestHocrParserRotation (line 292) | class TestHocrParserRotation: method test_textangle (line 295) | def test_textangle(self, rotated_hocr): class TestHocrParserLineTypes (line 303) | class TestHocrParserLineTypes: method test_header_line (line 306) | def test_header_line(self, header_hocr): method test_all_line_types_have_words (line 319) | def test_all_line_types_have_words(self, header_hocr): class TestHocrParserFontInfo (line 327) | class TestHocrParserFontInfo: method test_font_name_and_size (line 330) | def test_font_name_and_size(self, font_info_hocr): class TestHocrParserErrors (line 340) | class TestHocrParserErrors: method test_missing_file (line 343) | def test_missing_file(self, tmp_path): method test_invalid_xml (line 347) | def test_invalid_xml(self, tmp_path): method test_missing_ocr_page (line 354) | def test_missing_ocr_page(self, tmp_path): method test_missing_page_bbox (line 364) | def test_missing_page_bbox(self, tmp_path): class TestHocrParserEdgeCases (line 376) | class TestHocrParserEdgeCases: method test_empty_word_text (line 379) | def test_empty_word_text(self, tmp_path): method test_whitespace_only_word (line 406) | def test_whitespace_only_word(self, tmp_path): method test_line_without_bbox (line 432) | def test_line_without_bbox(self, tmp_path): method test_unicode_normalization (line 461) | def test_unicode_normalization(self, tmp_path): method test_words_directly_under_page (line 487) | def test_words_directly_under_page(self, tmp_path): method test_no_namespace (line 511) | def test_no_namespace(self, tmp_path): FILE: tests/test_hocrtransform.py function text_from_pdf (line 28) | def text_from_pdf(filename): function font_dir (line 45) | def font_dir(): function multi_font_manager (line 51) | def multi_font_manager(font_dir): function blank_hocr (line 57) | def blank_hocr(tmp_path): function test_mono_image (line 76) | def test_mono_image(blank_hocr, outdir, multi_font_manager): function test_fpdf2_matches_sandwich (line 102) | def test_fpdf2_matches_sandwich(resources, outdir): FILE: tests/test_image_input.py function baiona (line 21) | def baiona(resources): function test_image_to_pdf (line 25) | def test_image_to_pdf(resources, outpdf): function test_no_dpi_info (line 36) | def test_no_dpi_info(caplog, baiona, outdir, no_outpdf): function test_dpi_not_credible (line 47) | def test_dpi_not_credible(caplog, baiona, outdir, no_outpdf): function test_cmyk_no_icc (line 58) | def test_cmyk_no_icc(caplog, resources, no_outpdf): function test_img2pdf_fails (line 64) | def test_img2pdf_fails(resources, no_outpdf): function test_jpeg_in_jpeg_out (line 76) | def test_jpeg_in_jpeg_out(resources, outpdf): FILE: tests/test_imageops.py function test_bytes_per_pixel (line 18) | def test_bytes_per_pixel(): function test_calculate_downsample (line 25) | def test_calculate_downsample(): function test_calculate_downsample_hypothesis (line 41) | def test_calculate_downsample_hypothesis(mode, im_w, im_h, max_x, max_y,... function test_downsample_image (line 53) | def test_downsample_image(): FILE: tests/test_json_serialization.py function register_plugin_models (line 15) | def register_plugin_models(): function worker_function (line 22) | def worker_function(options_json: str) -> str: function test_json_serialization_multiprocessing (line 54) | def test_json_serialization_multiprocessing(): function test_json_serialization_with_streams (line 111) | def test_json_serialization_with_streams(): function test_json_serialization_with_none_values (line 136) | def test_json_serialization_with_none_values(): FILE: tests/test_logging.py function test_debug_logging (line 11) | def test_debug_logging(tmp_path): FILE: tests/test_main.py function test_quick (line 41) | def test_quick(resources, outpdf): function test_oversample (line 48) | def test_oversample(renderer, resources, outpdf): function test_repeat_ocr (line 67) | def test_repeat_ocr(resources, no_outpdf): function test_force_ocr (line 72) | def test_force_ocr(resources, outpdf): function test_skip_ocr (line 84) | def test_skip_ocr(resources, outpdf): function test_redo_ocr (line 96) | def test_redo_ocr(resources, outpdf): function test_argsfile (line 108) | def test_argsfile(resources, outdir): function test_ocr_timeout (line 128) | def test_ocr_timeout(renderer, resources, outpdf): function test_skip_big (line 141) | def test_skip_big(resources, outpdf): function test_maximum_options (line 156) | def test_maximum_options(renderer, output_type, multipage, outpdf): function test_tesseract_missing_tessdata (line 185) | def test_tesseract_missing_tessdata(monkeypatch, resources, no_outpdf, t... function test_invalid_input_pdf (line 191) | def test_invalid_input_pdf(resources, no_outpdf): function test_blank_input_pdf (line 196) | def test_blank_input_pdf(resources, outpdf): function test_force_ocr_on_pdf_with_no_images (line 201) | def test_force_ocr_on_pdf_with_no_images(resources, no_outpdf): function test_german (line 220) | def test_german(resources, outdir): function test_klingon (line 242) | def test_klingon(resources, outpdf): function test_missing_docinfo (line 247) | def test_missing_docinfo(resources, outpdf): function test_uppercase_extension (line 260) | def test_uppercase_extension(resources, outdir): function test_input_file_not_found (line 271) | def test_input_file_not_found(caplog, no_outpdf): function test_input_file_not_readable (line 279) | def test_input_file_not_readable(caplog, resources, outdir, no_outpdf): function test_input_file_not_a_pdf (line 288) | def test_input_file_not_a_pdf(caplog, no_outpdf): function test_pagesegmode (line 297) | def test_pagesegmode(renderer, resources, outpdf): function test_tesseract_oem (line 312) | def test_tesseract_oem(resources, outpdf): function test_tesseract_thresholding (line 324) | def test_tesseract_thresholding(value, resources, outpdf): function test_tesseract_thresholding_invalid (line 336) | def test_tesseract_thresholding_invalid(value, resources, no_outpdf): function test_tesseract_crash (line 349) | def test_tesseract_crash(renderer, resources, no_outpdf, caplog): function test_tesseract_crash_autorotate (line 365) | def test_tesseract_crash_autorotate(resources, no_outpdf, caplog): function test_tesseract_image_too_big (line 380) | def test_tesseract_image_too_big(renderer, resources, outpdf): function test_encrypted (line 395) | def test_encrypted(resources, outpdf, encryption_level, caplog): function test_jbig2_passthrough (line 431) | def test_jbig2_passthrough(resources, outpdf): function test_masks (line 446) | def test_masks(resources, outpdf): function test_linearized_pdf_and_indirect_object (line 455) | def test_linearized_pdf_and_indirect_object(resources, outpdf): function test_very_high_dpi (line 461) | def test_very_high_dpi(resources, outpdf): function test_overlay (line 476) | def test_overlay(resources, outpdf): function protected_file (line 487) | def protected_file(outdir): function test_destination_not_writable (line 497) | def test_destination_not_writable(resources, protected_file): function valid_tess_config (line 508) | def valid_tess_config(outdir): function test_tesseract_config_valid (line 521) | def test_tesseract_config_valid(resources, valid_tess_config, outpdf): function invalid_tess_config (line 533) | def invalid_tess_config(outdir): function test_tesseract_config_invalid (line 546) | def test_tesseract_config_invalid(renderer, resources, invalid_tess_conf... function test_user_words_ocr (line 562) | def test_user_words_ocr(resources, outdir): function test_form_xobject (line 582) | def test_form_xobject(resources, outpdf): function test_pagesize_consistency (line 593) | def test_pagesize_consistency(renderer, resources, outpdf): function test_skip_big_with_no_images (line 618) | def test_skip_big_with_no_images(resources, outpdf): function test_no_contents (line 630) | def test_no_contents(resources, outpdf): function test_compression_preserved (line 643) | def test_compression_preserved(ocrmypdf_exec, resources, image, outpdf): function test_compression_changed (line 700) | def test_compression_changed(ocrmypdf_exec, resources, image, compressio... function test_sidecar_pagecount (line 751) | def test_sidecar_pagecount(resources, outpdf): function test_sidecar_nonempty (line 776) | def test_sidecar_nonempty(resources, outpdf): function test_pdfa_n (line 793) | def test_pdfa_n(pdfa_level, resources, outpdf): function test_decompression_bomb_error (line 807) | def test_decompression_bomb_error(resources, outpdf, caplog): function test_decompression_bomb_succeeds (line 814) | def test_decompression_bomb_succeeds(resources, outpdf): function test_text_curves (line 821) | def test_text_curves(resources, outpdf): function test_text_curves_force (line 834) | def test_text_curves_force(resources, outpdf): function test_output_is_dir (line 848) | def test_output_is_dir(resources, outdir, caplog): function test_output_is_symlink (line 861) | def test_output_is_symlink(resources, outdir): function test_livecycle (line 875) | def test_livecycle(resources, no_outpdf, caplog): function test_version_check (line 881) | def test_version_check(): function test_fast_web_view (line 905) | def test_fast_web_view(resources, outpdf, threshold, optimize, output_ty... function test_image_dpi_not_image (line 922) | def test_image_dpi_not_image(caplog, resources, outpdf): function test_outputtype_none_bad_setup (line 934) | def test_outputtype_none_bad_setup(resources, outpdf): function test_outputtype_none (line 946) | def test_outputtype_none(resources, outtxt): function graph_bad_icc (line 961) | def graph_bad_icc(resources, outdir): function test_corrupt_icc (line 974) | def test_corrupt_icc(graph_bad_icc, outpdf, caplog): FILE: tests/test_metadata.py function test_preserve_docinfo (line 27) | def test_preserve_docinfo(output_type, resources, outpdf): function test_override_metadata (line 47) | def test_override_metadata(output_type, resources, outpdf, caplog): function test_unset_metadata (line 82) | def test_unset_metadata(output_type, field, resources, outpdf, caplog): function test_high_unicode (line 125) | def test_high_unicode(resources, no_outpdf): function test_bookmarks_preserved (line 147) | def test_bookmarks_preserved(output_type, ocr_option, resources, outpdf): function seconds_between_dates (line 168) | def seconds_between_dates(date1, date2): function test_creation_date_preserved (line 174) | def test_creation_date_preserved(output_type, resources, infile, outpdf): function libxmp_file_to_dict (line 204) | def libxmp_file_to_dict(): function test_xml_metadata_preserved (line 230) | def test_xml_metadata_preserved( function test_kodak_toc (line 307) | def test_kodak_toc(resources, outpdf): function test_metadata_fixup_warning (line 322) | def test_metadata_fixup_warning(resources, outdir, caplog): function test_prevent_gs_invalid_xml (line 356) | def test_prevent_gs_invalid_xml(resources, outdir): FILE: tests/test_multi_font_manager.py function font_dir (line 17) | def font_dir(): function multi_font_manager (line 23) | def multi_font_manager(font_dir): function has_cjk_font (line 28) | def has_cjk_font(manager: MultiFontManager) -> bool: function has_arabic_font (line 33) | def has_arabic_font(manager: MultiFontManager) -> bool: function has_devanagari_font (line 38) | def has_devanagari_font(manager: MultiFontManager) -> bool: function test_init_loads_builtin_fonts (line 53) | def test_init_loads_builtin_fonts(multi_font_manager): function test_missing_font_directory (line 65) | def test_missing_font_directory(): function test_select_font_for_arabic_language (line 75) | def test_select_font_for_arabic_language(multi_font_manager): function test_select_font_for_persian_language (line 83) | def test_select_font_for_persian_language(multi_font_manager): function test_select_font_for_urdu_language (line 91) | def test_select_font_for_urdu_language(multi_font_manager): function test_farsi_language_code (line 99) | def test_farsi_language_code(multi_font_manager): function test_select_font_for_hindi_language (line 111) | def test_select_font_for_hindi_language(multi_font_manager): function test_select_font_for_sanskrit_language (line 119) | def test_select_font_for_sanskrit_language(multi_font_manager): function test_select_font_for_marathi_language (line 127) | def test_select_font_for_marathi_language(multi_font_manager): function test_select_font_for_nepali_language (line 135) | def test_select_font_for_nepali_language(multi_font_manager): function test_select_font_for_chinese_language (line 147) | def test_select_font_for_chinese_language(multi_font_manager): function test_select_font_for_chinese_generic (line 155) | def test_select_font_for_chinese_generic(multi_font_manager): function test_select_font_for_chinese_simplified (line 163) | def test_select_font_for_chinese_simplified(multi_font_manager): function test_select_font_for_chinese_traditional (line 171) | def test_select_font_for_chinese_traditional(multi_font_manager): function test_select_font_for_japanese_language (line 179) | def test_select_font_for_japanese_language(multi_font_manager): function test_select_font_for_korean_language (line 187) | def test_select_font_for_korean_language(multi_font_manager): function test_select_font_for_english_text (line 198) | def test_select_font_for_english_text(multi_font_manager): function test_select_font_without_language_hint (line 204) | def test_select_font_without_language_hint(multi_font_manager): function test_select_font_arabic_text_without_language_hint (line 213) | def test_select_font_arabic_text_without_language_hint(multi_font_manager): function test_devanagari_text_without_language_hint (line 222) | def test_devanagari_text_without_language_hint(multi_font_manager): function test_cjk_text_without_language_hint (line 230) | def test_cjk_text_without_language_hint(multi_font_manager): function test_fallback_to_occulta_font (line 238) | def test_fallback_to_occulta_font(multi_font_manager): function test_fallback_fonts_constant (line 246) | def test_fallback_fonts_constant(multi_font_manager): function test_has_all_glyphs_for_english (line 261) | def test_has_all_glyphs_for_english(multi_font_manager): function test_has_all_glyphs_for_arabic (line 267) | def test_has_all_glyphs_for_arabic(multi_font_manager): function test_has_all_glyphs_for_devanagari (line 274) | def test_has_all_glyphs_for_devanagari(multi_font_manager): function test_has_all_glyphs_for_cjk (line 281) | def test_has_all_glyphs_for_cjk(multi_font_manager): function test_empty_text_has_all_glyphs (line 288) | def test_empty_text_has_all_glyphs(multi_font_manager): function test_has_all_glyphs_missing_font (line 293) | def test_has_all_glyphs_missing_font(multi_font_manager): function test_font_selection_caching (line 301) | def test_font_selection_caching(multi_font_manager): function test_language_font_map_coverage (line 315) | def test_language_font_map_coverage(): function test_get_all_fonts (line 328) | def test_get_all_fonts(multi_font_manager): class MockFontProvider (line 342) | class MockFontProvider: method __init__ (line 345) | def __init__( method get_font (line 352) | def get_font(self, font_name: str) -> FontManager | None: method get_available_fonts (line 355) | def get_available_fonts(self) -> list[str]: method get_fallback_font (line 358) | def get_fallback_font(self) -> FontManager: function test_custom_font_provider (line 362) | def test_custom_font_provider(font_dir): function test_missing_font_uses_fallback (line 378) | def test_missing_font_uses_fallback(font_dir): function test_builtin_font_provider_loads_expected_fonts (line 393) | def test_builtin_font_provider_loads_expected_fonts(font_dir): function test_builtin_font_provider_get_font (line 406) | def test_builtin_font_provider_get_font(font_dir): function test_builtin_font_provider_get_fallback (line 418) | def test_builtin_font_provider_get_fallback(font_dir): function test_builtin_font_provider_missing_font_logs_warning (line 427) | def test_builtin_font_provider_missing_font_logs_warning(tmp_path, font_... function test_builtin_font_provider_missing_occulta_raises (line 443) | def test_builtin_font_provider_missing_occulta_raises(tmp_path): FILE: tests/test_multilingual_direct.py function pdftotext (line 29) | def pdftotext(): function font_dir (line 49) | def font_dir(): function multi_font_manager (line 55) | def multi_font_manager(font_dir): function multi_font_manager_arabic (line 61) | def multi_font_manager_arabic(font_dir): class TestLatinScript (line 74) | class TestLatinScript: method latin_hocr (line 78) | def latin_hocr(self): method test_render_latin_basic (line 82) | def test_render_latin_basic( method test_latin_font_selection (line 123) | def test_latin_font_selection(self, latin_hocr, multi_font_manager): class TestArabicScript (line 146) | class TestArabicScript: method arabic_hocr (line 150) | def arabic_hocr(self): method test_render_arabic_basic (line 154) | def test_render_arabic_basic( method test_arabic_font_selection (line 186) | def test_arabic_font_selection(self, arabic_hocr, multi_font_manager_a... method test_arabic_rtl_handling (line 203) | def test_arabic_rtl_handling(self, arabic_hocr): function _latin_font_works (line 221) | def _latin_font_works(multi_font_manager) -> bool: function _arabic_font_works (line 226) | def _arabic_font_works(multi_font_manager) -> bool: function _devanagari_font_works (line 231) | def _devanagari_font_works(multi_font_manager) -> bool: function _cjk_font_works (line 236) | def _cjk_font_works(multi_font_manager) -> bool: class TestCJKScript (line 241) | class TestCJKScript: method cjk_hocr (line 245) | def cjk_hocr(self): method test_render_cjk_basic (line 249) | def test_render_cjk_basic(self, cjk_hocr, multi_font_manager, tmp_path... method test_cjk_font_selection (line 291) | def test_cjk_font_selection(self, cjk_hocr, multi_font_manager): class TestDevanagariScript (line 319) | class TestDevanagariScript: method devanagari_hocr (line 323) | def devanagari_hocr(self): method test_render_devanagari_basic (line 327) | def test_render_devanagari_basic( method test_devanagari_font_selection (line 359) | def test_devanagari_font_selection(self, devanagari_hocr, multi_font_m... class TestMultilingual (line 386) | class TestMultilingual: method multilingual_hocr (line 390) | def multilingual_hocr(self): method test_render_multilingual_hocr_basic (line 394) | def test_render_multilingual_hocr_basic( method test_render_multilingual_with_debug_options (line 430) | def test_render_multilingual_with_debug_options( method test_multilingual_invisible_text (line 456) | def test_multilingual_invisible_text( method test_multilingual_font_selection (line 479) | def test_multilingual_font_selection( class TestBaselineHandling (line 516) | class TestBaselineHandling: method multilingual_hocr (line 520) | def multilingual_hocr(self): method test_multilingual_baseline_handling (line 524) | def test_multilingual_baseline_handling(self, multilingual_hocr): class TestFontCoverage (line 542) | class TestFontCoverage: method test_noto_sans_latin_coverage (line 545) | def test_noto_sans_latin_coverage(self, multi_font_manager): method test_noto_sans_arabic_coverage (line 563) | def test_noto_sans_arabic_coverage(self, multi_font_manager_arabic): method test_noto_sans_devanagari_coverage (line 576) | def test_noto_sans_devanagari_coverage(self, multi_font_manager): method test_noto_sans_cjk_coverage (line 592) | def test_noto_sans_cjk_coverage(self, multi_font_manager): FILE: tests/test_null_ocr_engine.py class TestNullOcrEngineExists (line 18) | class TestNullOcrEngineExists: method test_null_ocr_module_importable (line 21) | def test_null_ocr_module_importable(self): method test_null_ocr_engine_class_exists (line 27) | def test_null_ocr_engine_class_exists(self): class TestNullOcrEngineInterface (line 34) | class TestNullOcrEngineInterface: method test_version_returns_none (line 37) | def test_version_returns_none(self): method test_creator_tag (line 43) | def test_creator_tag(self): method test_languages_returns_empty_set (line 51) | def test_languages_returns_empty_set(self): method test_supports_generate_ocr_returns_true (line 58) | def test_supports_generate_ocr_returns_true(self): method test_get_orientation_returns_zero (line 64) | def test_get_orientation_returns_zero(self): method test_get_deskew_returns_zero (line 71) | def test_get_deskew_returns_zero(self): class TestNullOcrEngineGenerateOcr (line 79) | class TestNullOcrEngineGenerateOcr: method sample_image (line 83) | def sample_image(self, tmp_path): method test_generate_ocr_returns_tuple (line 92) | def test_generate_ocr_returns_tuple(self, sample_image): method test_generate_ocr_returns_empty_text (line 104) | def test_generate_ocr_returns_empty_text(self, sample_image): method test_generate_ocr_returns_page_element (line 112) | def test_generate_ocr_returns_page_element(self, sample_image): method test_generate_ocr_page_has_correct_dimensions (line 121) | def test_generate_ocr_page_has_correct_dimensions(self, sample_image): class TestOcrEngineOption (line 132) | class TestOcrEngineOption: method test_ocr_engine_option_accepted (line 135) | def test_ocr_engine_option_accepted(self): method test_ocr_engine_choices_include_none (line 145) | def test_ocr_engine_choices_include_none(self): method test_ocr_engine_choices_include_auto (line 159) | def test_ocr_engine_choices_include_auto(self): FILE: tests/test_ocr_element.py class TestBoundingBox (line 19) | class TestBoundingBox: method test_basic_creation (line 22) | def test_basic_creation(self): method test_width_height (line 29) | def test_width_height(self): method test_zero_size_box (line 34) | def test_zero_size_box(self): method test_invalid_left_right (line 39) | def test_invalid_left_right(self): method test_invalid_top_bottom (line 43) | def test_invalid_top_bottom(self): class TestBaseline (line 48) | class TestBaseline: method test_defaults (line 51) | def test_defaults(self): method test_with_values (line 56) | def test_with_values(self): class TestFontInfo (line 62) | class TestFontInfo: method test_defaults (line 65) | def test_defaults(self): method test_with_values (line 72) | def test_with_values(self): class TestOcrElement (line 80) | class TestOcrElement: method test_minimal_element (line 83) | def test_minimal_element(self): method test_element_with_bbox (line 90) | def test_element_with_bbox(self): method test_element_hierarchy (line 96) | def test_element_hierarchy(self): method test_iter_by_class_single (line 107) | def test_iter_by_class_single(self): method test_iter_by_class_multiple (line 116) | def test_iter_by_class_multiple(self): method test_iter_by_class_multiple_types (line 129) | def test_iter_by_class_multiple_types(self): method test_find_by_class (line 138) | def test_find_by_class(self): method test_find_by_class_not_found (line 147) | def test_find_by_class_not_found(self): method test_get_text_recursive_leaf (line 154) | def test_get_text_recursive_leaf(self): method test_get_text_recursive_nested (line 158) | def test_get_text_recursive_nested(self): method test_words_property (line 165) | def test_words_property(self): method test_lines_property (line 176) | def test_lines_property(self): method test_paragraphs_property (line 184) | def test_paragraphs_property(self): method test_direction_ltr (line 191) | def test_direction_ltr(self): method test_direction_rtl (line 195) | def test_direction_rtl(self): method test_language (line 199) | def test_language(self): method test_baseline (line 203) | def test_baseline(self): method test_textangle (line 209) | def test_textangle(self): method test_confidence (line 213) | def test_confidence(self): method test_page_properties (line 217) | def test_page_properties(self): class TestOcrClass (line 229) | class TestOcrClass: method test_class_values (line 232) | def test_class_values(self): method test_line_types_frozenset (line 240) | def test_line_types_frozenset(self): FILE: tests/test_ocr_engine_interface.py class TestOcrEngineInterface (line 20) | class TestOcrEngineInterface: method test_generate_ocr_method_exists (line 23) | def test_generate_ocr_method_exists(self): method test_supports_generate_ocr_method_exists (line 27) | def test_supports_generate_ocr_method_exists(self): method test_supports_generate_ocr_default_false (line 31) | def test_supports_generate_ocr_default_false(self): method test_generate_ocr_raises_not_implemented_by_default (line 71) | def test_generate_ocr_raises_not_implemented_by_default(self): class TestOcrElementExport (line 112) | class TestOcrElementExport: method test_ocrelement_importable_from_ocrmypdf (line 115) | def test_ocrelement_importable_from_ocrmypdf(self): method test_ocrclass_importable_from_ocrmypdf (line 121) | def test_ocrclass_importable_from_ocrmypdf(self): method test_boundingbox_importable_from_ocrmypdf (line 127) | def test_boundingbox_importable_from_ocrmypdf(self): FILE: tests/test_ocr_engine_selection.py class TestOcrEngineCliOption (line 15) | class TestOcrEngineCliOption: method test_ocr_engine_option_exists (line 18) | def test_ocr_engine_option_exists(self): method test_ocr_engine_accepts_tesseract (line 30) | def test_ocr_engine_accepts_tesseract(self): method test_ocr_engine_accepts_auto (line 39) | def test_ocr_engine_accepts_auto(self): method test_ocr_engine_accepts_none (line 48) | def test_ocr_engine_accepts_none(self): method test_ocr_engine_default_is_auto (line 57) | def test_ocr_engine_default_is_auto(self): method test_ocr_engine_rejects_invalid (line 66) | def test_ocr_engine_rejects_invalid(self): class TestOcrEngineOptionsModel (line 76) | class TestOcrEngineOptionsModel: method test_ocr_options_has_ocr_engine_field (line 79) | def test_ocr_options_has_ocr_engine_field(self): class TestOcrEnginePluginSelection (line 87) | class TestOcrEnginePluginSelection: method test_tesseract_selected_when_auto (line 90) | def test_tesseract_selected_when_auto(self): method test_tesseract_selected_when_tesseract (line 103) | def test_tesseract_selected_when_tesseract(self): method test_null_selected_when_none (line 116) | def test_null_selected_when_none(self): method test_null_returns_none_when_auto (line 129) | def test_null_returns_none_when_auto(self): FILE: tests/test_optimize.py function palette (line 37) | def palette(resources): function test_basic (line 43) | def test_basic(multipage, palette, pdf, outpdf): function test_mono_not_inverted (line 51) | def test_mono_not_inverted(resources, outdir): function test_jpg_png_params (line 67) | def test_jpg_png_params(resources, outpdf): function test_jbig2_lossless (line 85) | def test_jbig2_lossless(resources, outpdf): function test_flate_to_jbig2 (line 115) | def test_flate_to_jbig2(resources, outdir): function test_multiple_pngs (line 143) | def test_multiple_pngs(resources, outdir): function test_optimize_off (line 185) | def test_optimize_off(resources, outpdf): function test_group3 (line 197) | def test_group3(resources): function test_find_formx (line 210) | def test_find_formx(resources): function test_extract_image_filter_with_pdf_image (line 218) | def test_extract_image_filter_with_pdf_image(): function test_extract_image_filter_with_non_image (line 233) | def test_extract_image_filter_with_non_image(): function test_extract_image_filter_with_small_stream_size (line 239) | def test_extract_image_filter_with_small_stream_size(): function test_extract_image_filter_with_small_dimensions (line 246) | def test_extract_image_filter_with_small_dimensions(): function test_extract_image_filter_with_multiple_compression_filters (line 255) | def test_extract_image_filter_with_multiple_compression_filters(): function test_extract_image_filter_with_wide_gamut_image (line 266) | def test_extract_image_filter_with_wide_gamut_image(): function test_extract_image_filter_with_jpeg2000_image (line 277) | def test_extract_image_filter_with_jpeg2000_image(): function test_extract_image_filter_with_ccitt_group_3_image (line 294) | def test_extract_image_filter_with_ccitt_group_3_image(): function test_extract_image_filter_with_rgb_smask_matte (line 320) | def test_extract_image_filter_with_rgb_smask_matte(): FILE: tests/test_page_boxes.py function test_media_box (line 53) | def test_media_box( function test_crop_box (line 105) | def test_crop_box( FILE: tests/test_page_numbers.py function test_pages (line 37) | def test_pages(pages, result): function test_nonmonotonic_warning (line 45) | def test_nonmonotonic_warning(caplog): function test_limited_pages (line 51) | def test_limited_pages(multipage, outpdf): FILE: tests/test_pdf_renderer.py function text_from_pdf (line 30) | def text_from_pdf(filename: Path) -> str: function font_dir (line 45) | def font_dir(): function multi_font_manager (line 51) | def multi_font_manager(font_dir): function create_simple_page (line 56) | def create_simple_page( class TestFpdf2PdfRendererBasic (line 107) | class TestFpdf2PdfRendererBasic: method test_render_simple_page (line 110) | def test_render_simple_page(self, tmp_path, multi_font_manager): method test_rendered_text_extractable (line 123) | def test_rendered_text_extractable(self, tmp_path, multi_font_manager): method test_invisible_text_mode (line 137) | def test_invisible_text_mode(self, tmp_path, multi_font_manager): method test_visible_text_mode (line 154) | def test_visible_text_mode(self, tmp_path, multi_font_manager): class TestFpdf2PdfRendererPageSize (line 172) | class TestFpdf2PdfRendererPageSize: method test_page_dimensions (line 175) | def test_page_dimensions(self, tmp_path, multi_font_manager): method test_high_dpi_page (line 189) | def test_high_dpi_page(self, tmp_path, multi_font_manager): class TestFpdf2PdfRendererMultiLine (line 205) | class TestFpdf2PdfRendererMultiLine: method test_multiple_lines (line 208) | def test_multiple_lines(self, tmp_path, multi_font_manager): class TestFpdf2PdfRendererTextDirection (line 274) | class TestFpdf2PdfRendererTextDirection: method test_ltr_text (line 277) | def test_ltr_text(self, tmp_path, multi_font_manager): method test_rtl_text (line 289) | def test_rtl_text(self, tmp_path, multi_font_manager): class TestFpdf2PdfRendererBaseline (line 325) | class TestFpdf2PdfRendererBaseline: method test_sloped_baseline (line 328) | def test_sloped_baseline(self, tmp_path, multi_font_manager): class TestFpdf2PdfRendererTextangle (line 365) | class TestFpdf2PdfRendererTextangle: method test_rotated_text (line 368) | def test_rotated_text(self, tmp_path, multi_font_manager): class TestFpdf2PdfRendererWordBreaks (line 406) | class TestFpdf2PdfRendererWordBreaks: method test_word_breaks_english (line 409) | def test_word_breaks_english(self, tmp_path, multi_font_manager): method test_cjk_text (line 424) | def test_cjk_text(self, tmp_path, multi_font_manager): class TestFpdf2PdfRendererDebugOptions (line 466) | class TestFpdf2PdfRendererDebugOptions: method test_debug_render_options_default (line 469) | def test_debug_render_options_default(self, multi_font_manager): method test_debug_render_options_enabled (line 480) | def test_debug_render_options_enabled(self, tmp_path, multi_font_manag... class TestFpdf2PdfRendererErrors (line 506) | class TestFpdf2PdfRendererErrors: method test_invalid_ocr_class (line 509) | def test_invalid_ocr_class(self, multi_font_manager): method test_page_without_bbox (line 518) | def test_page_without_bbox(self, multi_font_manager): class TestFpdf2PdfRendererLineTypes (line 526) | class TestFpdf2PdfRendererLineTypes: method test_header_line (line 529) | def test_header_line(self, tmp_path, multi_font_manager): method test_caption_line (line 565) | def test_caption_line(self, tmp_path, multi_font_manager): FILE: tests/test_pdfa.py function test_pdfa (line 18) | def test_pdfa(resources, outpdf, optimize, pdfa_level): FILE: tests/test_pdfinfo.py function single_page_text (line 33) | def single_page_text(outdir): function test_single_page_text (line 48) | def test_single_page_text(single_page_text): function eight_by_eight (line 59) | def eight_by_eight(): function eight_by_eight_regular_image (line 67) | def eight_by_eight_regular_image(eight_by_eight, outpdf): function test_single_page_image (line 87) | def test_single_page_image(eight_by_eight_regular_image): function eight_by_eight_inline_image (line 106) | def eight_by_eight_inline_image(eight_by_eight, outpdf): function test_single_page_inline_image (line 115) | def test_single_page_inline_image(eight_by_eight_inline_image): function test_jpeg (line 124) | def test_jpeg(resources): function flate_jpeg_pdf (line 135) | def flate_jpeg_pdf(outpdf): function test_flate_jpeg (line 175) | def test_flate_jpeg(flate_jpeg_pdf): function test_form_xobject (line 183) | def test_form_xobject(resources): function test_no_contents (line 191) | def test_no_contents(resources): function test_oversized_page (line 199) | def test_oversized_page(resources): function test_pickle (line 205) | def test_pickle(resources): function test_vector (line 214) | def test_vector(resources): function test_ocr_detection (line 221) | def test_ocr_detection(resources): function test_corrupt_font_detection (line 231) | def test_corrupt_font_detection(resources, testfile): function test_stack_abuse (line 237) | def test_stack_abuse(): function test_pages_issue700 (line 253) | def test_pages_issue700(monkeypatch, resources): function image_scale0 (line 270) | def image_scale0(resources, outpdf): function test_image_scale0 (line 287) | def test_image_scale0(image_scale0): FILE: tests/test_pipeline.py function rgb_image (line 25) | def rgb_image(): function test_dpi_needed (line 50) | def test_dpi_needed(image, text, vector, result, rgb_image, outdir): function test_enumerate_compress_ranges (line 152) | def test_enumerate_compress_ranges(name, input, output): function test_should_visible_page_image_use_jpg (line 174) | def test_should_visible_page_image_use_jpg(encodings, expected): FILE: tests/test_pipeline_generate_ocr.py class TestOcrEngineDirect (line 19) | class TestOcrEngineDirect: method test_ocr_engine_direct_function_exists (line 22) | def test_ocr_engine_direct_function_exists(self): method test_ocr_engine_direct_returns_tuple (line 28) | def test_ocr_engine_direct_returns_tuple(self, tmp_path): class TestPageResultExtension (line 51) | class TestPageResultExtension: method test_page_result_has_ocr_tree_field (line 54) | def test_page_result_has_ocr_tree_field(self): method test_page_result_ocr_tree_default_none (line 61) | def test_page_result_ocr_tree_default_none(self): class TestFpdf2DirectPage (line 69) | class TestFpdf2DirectPage: method test_fpdf2_direct_page_exists (line 72) | def test_fpdf2_direct_page_exists(self): method test_fpdf2_direct_page_has_ocr_tree (line 78) | def test_fpdf2_direct_page_has_ocr_tree(self): class TestHOCRResultExtension (line 86) | class TestHOCRResultExtension: method test_hocr_result_has_ocr_tree_field (line 89) | def test_hocr_result_has_ocr_tree_field(self): method test_hocr_result_ocr_tree_default_none (line 96) | def test_hocr_result_ocr_tree_default_none(self): FILE: tests/test_preprocessing.py function test_deskew (line 22) | def test_deskew(resources, outdir): function test_deskew_blank_page (line 43) | def test_deskew_blank_page(resources, outpdf): function test_remove_background (line 49) | def test_remove_background(resources, outdir): function test_exotic_image (line 85) | def test_exotic_image(pdf, renderer, output_type, resources, outdir): function test_non_square_resolution (line 107) | def test_non_square_resolution(renderer, resources, outpdf): function test_convert_to_square_resolution (line 132) | def test_convert_to_square_resolution(renderer, resources, outpdf): FILE: tests/test_quality.py function test_quality_measurement (line 9) | def test_quality_measurement(): FILE: tests/test_rasterizer.py class TestRasterizerOption (line 30) | class TestRasterizerOption: method test_rasterizer_auto_default (line 33) | def test_rasterizer_auto_default(self, resources, outpdf): method test_rasterizer_ghostscript (line 44) | def test_rasterizer_ghostscript(self, resources, outpdf): method test_rasterizer_pypdfium (line 56) | def test_rasterizer_pypdfium(self, resources, outpdf): method test_rasterizer_invalid (line 67) | def test_rasterizer_invalid(self): class TestRasterizerWithRotation (line 75) | class TestRasterizerWithRotation: method test_ghostscript_with_rotation (line 78) | def test_ghostscript_with_rotation(self, resources, outpdf): method test_pypdfium_with_rotation (line 93) | def test_pypdfium_with_rotation(self, resources, outpdf): method test_auto_with_rotation (line 107) | def test_auto_with_rotation(self, resources, outpdf): class TestRasterizerHookDirect (line 122) | class TestRasterizerHookDirect: method test_ghostscript_hook_respects_option (line 125) | def test_ghostscript_hook_respects_option(self, resources, tmp_path): method test_pypdfium_hook_respects_option (line 160) | def test_pypdfium_hook_respects_option(self, resources, tmp_path): method test_auto_uses_pypdfium_when_available (line 189) | def test_auto_uses_pypdfium_when_available(self, resources, tmp_path): function _create_gradient_image (line 217) | def _create_gradient_image(width: int, height: int) -> Image.Image: function pdf_with_nonstandard_boxes (line 250) | def pdf_with_nonstandard_boxes(tmp_path): function pdf_with_negative_mediabox (line 284) | def pdf_with_negative_mediabox(tmp_path): class TestRasterizerWithNonStandardBoxes (line 311) | class TestRasterizerWithNonStandardBoxes: method test_ghostscript_nonstandard_boxes (line 314) | def test_ghostscript_nonstandard_boxes(self, pdf_with_nonstandard_boxe... method test_pypdfium_nonstandard_boxes (line 326) | def test_pypdfium_nonstandard_boxes(self, pdf_with_nonstandard_boxes, ... method test_ghostscript_negative_mediabox (line 337) | def test_ghostscript_negative_mediabox(self, pdf_with_negative_mediabo... method test_pypdfium_negative_mediabox (line 349) | def test_pypdfium_negative_mediabox(self, pdf_with_negative_mediabox, ... method test_compare_rasterizers_nonstandard_boxes (line 360) | def test_compare_rasterizers_nonstandard_boxes( class TestRasterizerWithRotationAndBoxes (line 421) | class TestRasterizerWithRotationAndBoxes: method _get_expected_size (line 433) | def _get_expected_size(self, rotation: int) -> tuple[int, int]: method test_ghostscript_rotation_dimensions (line 442) | def test_ghostscript_rotation_dimensions( method test_pypdfium_rotation_dimensions (line 484) | def test_pypdfium_rotation_dimensions(self, pdf_with_nonstandard_boxes... method test_rasterizers_produce_same_dimensions (line 524) | def test_rasterizers_produce_same_dimensions( FILE: tests/test_rotation.py function compare_images_monochrome (line 31) | def compare_images_monochrome( function test_monochrome_comparison (line 71) | def test_monochrome_comparison(resources, outdir): function test_autorotate (line 94) | def test_autorotate(renderer, resources, outdir): function test_autorotate_threshold (line 126) | def test_autorotate_threshold(threshold, op, comparison_threshold, resou... function test_rotated_skew_timeout (line 151) | def test_rotated_skew_timeout(resources, outpdf, rasterizer): function test_rotate_deskew_ocr_timeout (line 193) | def test_rotate_deskew_ocr_timeout(resources, outdir, rasterizer): function make_rotate_test (line 223) | def make_rotate_test(imagefile, outdir, prefix, image_angle, page_angle,... function test_rotate_page_level (line 251) | def test_rotate_page_level(image_angle, page_angle, resources, outdir, c... function test_page_rotate_tag (line 273) | def test_page_rotate_tag(page_rotate_angle, resources, outdir, caplog): function test_rotate_and_crop (line 301) | def test_rotate_and_crop( function test_rasterize_rotates (line 337) | def test_rasterize_rotates(resources, tmp_path, rasterizer): function test_simulated_scan (line 382) | def test_simulated_scan(outdir): FILE: tests/test_semfree.py function test_semfree (line 20) | def test_semfree(resources, outpdf): FILE: tests/test_soft_error.py function test_raster_continue_on_soft_error (line 15) | def test_raster_continue_on_soft_error(resources, outpdf): function test_raster_stop_on_soft_error (line 28) | def test_raster_stop_on_soft_error(resources, outpdf): function test_render_continue_on_soft_error (line 40) | def test_render_continue_on_soft_error(resources, outpdf): function test_render_stop_on_soft_error (line 56) | def test_render_stop_on_soft_error(resources, outpdf): FILE: tests/test_stdio.py function test_stdin (line 16) | def test_stdin(ocrmypdf_exec, resources, outpdf): function test_stdout (line 31) | def test_stdout(ocrmypdf_exec, resources, outpdf): function test_dev_null (line 52) | def test_dev_null(resources): FILE: tests/test_system_font_provider.py class TestSystemFontProviderPlatform (line 23) | class TestSystemFontProviderPlatform: method test_get_platform_linux (line 26) | def test_get_platform_linux(self): method test_get_platform_darwin (line 32) | def test_get_platform_darwin(self): method test_get_platform_windows (line 38) | def test_get_platform_windows(self): method test_get_platform_freebsd (line 44) | def test_get_platform_freebsd(self): class TestSystemFontProviderDirectories (line 51) | class TestSystemFontProviderDirectories: method test_linux_font_dirs (line 54) | def test_linux_font_dirs(self): method test_darwin_font_dirs (line 63) | def test_darwin_font_dirs(self): method test_windows_font_dirs_with_windir (line 72) | def test_windows_font_dirs_with_windir(self): method test_windows_font_dirs_default (line 86) | def test_windows_font_dirs_default(self): method test_windows_font_dirs_with_localappdata (line 99) | def test_windows_font_dirs_with_localappdata(self): method test_font_dirs_cached (line 120) | def test_font_dirs_cached(self): class TestSystemFontProviderLazyLoading (line 128) | class TestSystemFontProviderLazyLoading: method test_no_scanning_on_init (line 131) | def test_no_scanning_on_init(self): method test_get_font_unknown_name_returns_none (line 138) | def test_get_font_unknown_name_returns_none(self): method test_negative_cache (line 146) | def test_negative_cache(self): method test_positive_cache (line 161) | def test_positive_cache(self): class TestSystemFontProviderAvailableFonts (line 182) | class TestSystemFontProviderAvailableFonts: method test_returns_all_patterns (line 185) | def test_returns_all_patterns(self): method test_fallback_font_raises (line 194) | def test_fallback_font_raises(self): class TestChainedFontProvider (line 204) | class TestChainedFontProvider: method test_requires_at_least_one_provider (line 207) | def test_requires_at_least_one_provider(self): method test_get_font_tries_providers_in_order (line 212) | def test_get_font_tries_providers_in_order(self): method test_get_font_stops_on_first_match (line 228) | def test_get_font_stops_on_first_match(self): method test_get_font_returns_none_if_all_fail (line 243) | def test_get_font_returns_none_if_all_fail(self): method test_get_available_fonts_combines_providers (line 256) | def test_get_available_fonts_combines_providers(self): method test_get_fallback_font_from_first_provider (line 269) | def test_get_fallback_font_from_first_provider(self): method test_get_fallback_font_skips_not_implemented (line 283) | def test_get_fallback_font_skips_not_implemented(self): method test_get_fallback_font_raises_if_none_available (line 297) | def test_get_fallback_font_raises_if_none_available(self): class TestChainedFontProviderIntegration (line 310) | class TestChainedFontProviderIntegration: method font_dir (line 314) | def font_dir(self): method test_builtin_then_system_chain (line 318) | def test_builtin_then_system_chain(self, font_dir): method test_system_fonts_extend_builtin (line 333) | def test_system_fonts_extend_builtin(self, font_dir): FILE: tests/test_tagged.py function test_block_tagged (line 11) | def test_block_tagged(resources): function test_force_tagged_warns (line 16) | def test_force_tagged_warns(resources, outpdf, caplog): function test_tagged_pdf_mode_ignore_with_skip_text (line 27) | def test_tagged_pdf_mode_ignore_with_skip_text(resources, outpdf, caplog): function test_tagged_pdf_mode_ignore_with_force (line 40) | def test_tagged_pdf_mode_ignore_with_force(resources, outpdf, caplog): FILE: tests/test_tesseract.py function test_skip_pages_does_not_replicate (line 24) | def test_skip_pages_does_not_replicate(resources, basename, outdir): function test_content_preservation (line 49) | def test_content_preservation(resources, outpdf): function test_no_languages (line 64) | def test_no_languages(tmp_path, monkeypatch): function test_image_too_large_hocr (line 71) | def test_image_too_large_hocr(monkeypatch, resources, outdir): function test_image_too_large_pdf (line 92) | def test_image_too_large_pdf(monkeypatch, resources, outdir): function test_timeout (line 115) | def test_timeout(caplog): function test_tesseract_log_output (line 136) | def test_tesseract_log_output(caplog, in_, logged): function test_tesseract_log_output_raises (line 145) | def test_tesseract_log_output_raises(caplog): function test_blocked_language (line 151) | def test_blocked_language(resources, no_outpdf): FILE: tests/test_unpaper.py function test_no_unpaper (line 26) | def test_no_unpaper(resources, no_outpdf): function test_old_unpaper (line 39) | def test_old_unpaper(resources, no_outpdf): function test_unpaper_version_chatter (line 52) | def test_unpaper_version_chatter(resources, no_outpdf): function test_clean (line 66) | def test_clean(resources, outpdf): function test_unpaper_args_valid (line 77) | def test_unpaper_args_valid(resources, outpdf): function test_unpaper_args_invalid_filename (line 90) | def test_unpaper_args_invalid_filename(resources, outpdf, caplog): function test_unpaper_args_invalid (line 104) | def test_unpaper_args_invalid(resources, outpdf): function test_unpaper_image_too_big (line 120) | def test_unpaper_image_too_big(resources, outdir, caplog): function test_palette_image (line 133) | def test_palette_image(resources, outpdf): FILE: tests/test_userunit.py function poster (line 18) | def poster(resources): function test_userunit_pdf_passes (line 23) | def test_userunit_pdf_passes(mode, poster, outpdf): function test_rotate_interaction (line 37) | def test_rotate_interaction(poster, outpdf): FILE: tests/test_validation.py function make_opts_pm (line 25) | def make_opts_pm(input_file='a.pdf', output_file='b.pdf', language='eng'... function make_opts (line 39) | def make_opts(*args, **kwargs): function make_ocr_opts (line 44) | def make_ocr_opts(input_file='a.pdf', output_file='b.pdf', **kwargs): function test_old_tesseract_error (line 49) | def test_old_tesseract_error(): function test_tesseract_not_installed (line 57) | def test_tesseract_not_installed(caplog): function test_lossless_redo (line 69) | def test_lossless_redo(): function test_mutex_options (line 74) | def test_mutex_options(): function test_optimizing (line 89) | def test_optimizing(caplog): function test_pillow_options (line 96) | def test_pillow_options(): function test_output_tty (line 106) | def test_output_tty(): function test_report_file_size (line 111) | def test_report_file_size(tmp_path, caplog): function test_false_action_store_true (line 144) | def test_false_action_store_true(): function test_no_progress_bar (line 152) | def test_no_progress_bar(progress_bar, resources): function make_version (line 173) | def make_version(version): function test_version_comparison (line 180) | def test_version_comparison(): function test_optional_program_recommended (line 245) | def test_optional_program_recommended(caplog): function test_pagesegmode_warning (line 266) | def test_pagesegmode_warning(caplog): function test_two_languages (line 273) | def test_two_languages(): function test_sidecar_equals_output (line 285) | def test_sidecar_equals_output(resources, no_outpdf): function test_devnull_sidecar (line 291) | def test_devnull_sidecar(resources): FILE: tests/test_verapdf.py class TestVerapdfModule (line 21) | class TestVerapdfModule: method test_output_type_to_flavour (line 24) | def test_output_type_to_flavour(self): method test_version (line 33) | def test_version(self): method test_validate_non_pdfa (line 38) | def test_validate_non_pdfa(self, tmp_path): class TestPdfaPartConformance (line 50) | class TestPdfaPartConformance: method test_pdfa_part_conformance (line 53) | def test_pdfa_part_conformance(self): class TestAddPdfaMetadata (line 62) | class TestAddPdfaMetadata: method test_add_pdfa_metadata (line 65) | def test_add_pdfa_metadata(self, tmp_path): class TestAddSrgbOutputIntent (line 83) | class TestAddSrgbOutputIntent: method test_add_srgb_output_intent (line 86) | def test_add_srgb_output_intent(self, tmp_path): method test_add_srgb_output_intent_idempotent (line 101) | def test_add_srgb_output_intent_idempotent(self, tmp_path): class TestSpeculativePdfaConversion (line 115) | class TestSpeculativePdfaConversion: method test_speculative_conversion_creates_pdfa_structures (line 118) | def test_speculative_conversion_creates_pdfa_structures(self, tmp_path... method test_speculative_conversion_different_parts (line 131) | def test_speculative_conversion_different_parts(self, tmp_path, resour... class TestVerapdfIntegration (line 148) | class TestVerapdfIntegration: method test_speculative_conversion_validation (line 151) | def test_speculative_conversion_validation(self, tmp_path, resources): FILE: tests/test_watcher.py function test_watcher (line 17) | def test_watcher(tmp_path, resources, year_month):