SYMBOL INDEX (866 symbols across 190 files) FILE: benchmarks/overall/display/dataset.py function build_dataset (line 11) | def build_dataset(bench_dataset: datasets.Dataset, result: FullResult, s... FILE: benchmarks/overall/display/table.py function write_table (line 8) | def write_table(title: str, rows: list, headers: list, out_path: Path, f... function print_scores (line 17) | def print_scores(result: FullResult, out_path: Path, methods: List[str],... FILE: benchmarks/overall/download/base.py class Downloader (line 9) | class Downloader: method __init__ (line 13) | def __init__(self, api_key, app_id, max_rows: int = 2200): method get_html (line 20) | def get_html(self, pdf_bytes): method upload_ds (line 23) | def upload_ds(self): method generate_data (line 37) | def generate_data(self): method __call__ (line 61) | def __call__(self): FILE: benchmarks/overall/download/llamaparse.py class LlamaParseDownloader (line 9) | class LlamaParseDownloader(Downloader): method get_html (line 12) | def get_html(self, pdf_bytes): function upload_and_parse_file (line 27) | def upload_and_parse_file(api_key: str, fname: str, buff, max_retries: i... FILE: benchmarks/overall/download/main.py function main (line 13) | def main(service: str, max_rows: int, api_key: str, app_id: str): FILE: benchmarks/overall/download/mathpix.py class MathpixDownloader (line 9) | class MathpixDownloader(Downloader): method get_html (line 12) | def get_html(self, pdf_bytes): function mathpix_request (line 33) | def mathpix_request(buffer, headers): function mathpix_status (line 54) | def mathpix_status(pdf_id, headers): function mathpix_results (line 76) | def mathpix_results(pdf_id, headers, ext="md"): FILE: benchmarks/overall/download/mistral.py class MistralDownloader (line 8) | class MistralDownloader(Downloader): method get_html (line 11) | def get_html(self, pdf_bytes): function upload_and_process_file (line 26) | def upload_and_process_file(api_key: str, fname: str, buff): FILE: benchmarks/overall/elo.py class ComparerSchema (line 93) | class ComparerSchema(BaseModel): class Comparer (line 101) | class Comparer: method __init__ (line 102) | def __init__(self): method __call__ (line 105) | def __call__( method llm_rater (line 125) | def llm_rater(self, img: Image.Image, prompt: str): method llm_response_wrapper (line 133) | def llm_response_wrapper( function display_win_rates_table (line 164) | def display_win_rates_table(win_rates: dict): function main (line 180) | def main( FILE: benchmarks/overall/methods/__init__.py class BaseMethod (line 14) | class BaseMethod: method __init__ (line 15) | def __init__(self, **kwargs): method convert_to_md (line 21) | def convert_to_md(html: str): method __call__ (line 26) | def __call__(self, sample) -> BenchmarkResult: method render (line 29) | def render(self, markdown: str): method convert_to_html (line 33) | def convert_to_html(md: str): method html_to_image (line 63) | def html_to_image(self, html: str) -> Image.Image: FILE: benchmarks/overall/methods/docling.py class DoclingMethod (line 7) | class DoclingMethod(BaseMethod): method __call__ (line 11) | def __call__(self, sample) -> BenchmarkResult: FILE: benchmarks/overall/methods/gt.py class GTMethod (line 9) | class GTMethod(BaseMethod): method __call__ (line 10) | def __call__(self, sample) -> BenchmarkResult: method render (line 19) | def render(self, html: List[str]) -> Image.Image: FILE: benchmarks/overall/methods/llamaparse.py class LlamaParseMethod (line 6) | class LlamaParseMethod(BaseMethod): method __call__ (line 9) | def __call__(self, sample) -> BenchmarkResult: FILE: benchmarks/overall/methods/marker.py class MarkerMethod (line 10) | class MarkerMethod(BaseMethod): method __call__ (line 14) | def __call__(self, sample) -> BenchmarkResult: FILE: benchmarks/overall/methods/mathpix.py class MathpixMethod (line 6) | class MathpixMethod(BaseMethod): method __call__ (line 9) | def __call__(self, sample) -> BenchmarkResult: FILE: benchmarks/overall/methods/mistral.py class MistralMethod (line 6) | class MistralMethod(BaseMethod): method __call__ (line 9) | def __call__(self, sample) -> BenchmarkResult: FILE: benchmarks/overall/methods/olmocr.py function convert_single_page (line 13) | def convert_single_page(filename: str, model, processor, device): class OlmOCRMethod (line 75) | class OlmOCRMethod(BaseMethod): method __call__ (line 79) | def __call__(self, sample) -> BenchmarkResult: FILE: benchmarks/overall/methods/schema.py class BenchmarkResult (line 4) | class BenchmarkResult(TypedDict): FILE: benchmarks/overall/overall.py function get_method_scores (line 24) | def get_method_scores(benchmark_dataset: datasets.Dataset, methods: List... function main (line 98) | def main( FILE: benchmarks/overall/schema.py class FullResult (line 7) | class FullResult(TypedDict): FILE: benchmarks/overall/scorers/__init__.py class BaseScorer (line 6) | class BaseScorer: method __init__ (line 7) | def __init__(self): method __call__ (line 10) | def __call__(self, sample, gt_markdown: List[str], method_markdown: st... FILE: benchmarks/overall/scorers/clean.py class MarkdownCleaner (line 8) | class MarkdownCleaner: method __init__ (line 9) | def __init__(self): method __call__ (line 12) | def __call__(self, markdown): method normalize_markdown (line 39) | def normalize_markdown(md_text: str) -> str: method standardize_math (line 78) | def standardize_math(self, match): method clean_latex (line 92) | def clean_latex(latex_str): FILE: benchmarks/overall/scorers/heuristic.py class HeuristicScorer (line 10) | class HeuristicScorer(BaseScorer): method __call__ (line 11) | def __call__(self, sample, gt_markdown: List[str], method_markdown: st... method kendall_tau (line 50) | def kendall_tau(correct_order: List[int], actual_order: List[int]) -> ... method find_fuzzy_alignments (line 74) | def find_fuzzy_alignments( method clean_input (line 103) | def clean_input(md: str): FILE: benchmarks/overall/scorers/llm.py class LLMScorer (line 94) | class LLMScorer(BaseScorer): method __call__ (line 95) | def __call__(self, sample, gt_markdown: List[str], markdown: str) -> B... method llm_rater (line 108) | def llm_rater(self, img: Image.Image, markdown: str) -> BlockScores: method llm_response_wrapper (line 136) | def llm_response_wrapper(self, prompt, response_schema, depth=0): FILE: benchmarks/overall/scorers/schema.py class BlockScores (line 4) | class BlockScores(TypedDict): FILE: benchmarks/table/gemini.py class TableSchema (line 25) | class TableSchema(BaseModel): function gemini_table_rec (line 28) | def gemini_table_rec(image: Image.Image): FILE: benchmarks/table/inference.py function extract_tables (line 21) | def extract_tables(children: List[JSONBlockOutput]): function fix_table_html (line 30) | def fix_table_html(table_html: str) -> str: function inference_tables (line 45) | def inference_tables(dataset, use_llm: bool, table_rec_batch_size: int |... FILE: benchmarks/table/scoring.py function wrap_table_html (line 11) | def wrap_table_html(table_html:str)->str: class TableTree (line 14) | class TableTree(Tree): method __init__ (line 15) | def __init__(self, tag, colspan=None, rowspan=None, content=None, *chi... method bracket (line 24) | def bracket(self): class CustomConfig (line 35) | class CustomConfig(Config): method maximum (line 37) | def maximum(*sequences): method normalized_distance (line 40) | def normalized_distance(self, *sequences): method rename (line 43) | def rename(self, node1, node2): function tokenize (line 51) | def tokenize(node): function tree_convert_html (line 66) | def tree_convert_html(node, convert_cell=False, parent=None): function similarity_eval_html (line 92) | def similarity_eval_html(pred, true, structure_only=False): FILE: benchmarks/table/table.py function update_teds_score (line 21) | def update_teds_score(result, prefix: str = "marker"): function main (line 37) | def main( FILE: benchmarks/throughput/main.py function get_next_pdf (line 15) | def get_next_pdf(ds: datasets.Dataset, i: int): function single_batch (line 26) | def single_batch( function main (line 108) | def main( FILE: benchmarks/verify_scores.py function verify_scores (line 5) | def verify_scores(file_path): function verify_table_scores (line 16) | def verify_table_scores(file_path): FILE: examples/marker_modal_deployment.py function setup_models_with_cache_check (line 33) | def setup_models_with_cache_check(logger, commit_volume=False): function download_models (line 77) | def download_models(): class MarkerModalDemoService (line 104) | class MarkerModalDemoService: method load_models (line 106) | def load_models(self): method marker_api (line 123) | def marker_api(self): function invoke_conversion (line 308) | async def invoke_conversion( FILE: marker/builders/__init__.py class BaseBuilder (line 8) | class BaseBuilder: method __init__ (line 9) | def __init__(self, config: Optional[BaseModel | dict] = None): method __call__ (line 12) | def __call__(self, data, *args, **kwargs): FILE: marker/builders/document.py class DocumentBuilder (line 14) | class DocumentBuilder(BaseBuilder): method __call__ (line 31) | def __call__(self, provider: PdfProvider, layout_builder: LayoutBuilde... method build_document (line 39) | def build_document(self, provider: PdfProvider): FILE: marker/builders/layout.py class LayoutBuilder (line 16) | class LayoutBuilder(BaseBuilder): method __init__ (line 46) | def __init__(self, layout_model: LayoutPredictor, config=None): method __call__ (line 51) | def __call__(self, document: Document, provider: PdfProvider): method get_batch_size (line 60) | def get_batch_size(self): method forced_layout (line 67) | def forced_layout(self, pages: List[PageGroup]) -> List[LayoutResult]: method surya_layout (line 86) | def surya_layout(self, pages: List[PageGroup]) -> List[LayoutResult]: method expand_layout_blocks (line 94) | def expand_layout_blocks(self, document: Document): method add_blocks_to_pages (line 131) | def add_blocks_to_pages( FILE: marker/builders/line.py class LineBuilder (line 25) | class LineBuilder(BaseBuilder): method __init__ (line 85) | def __init__( method __call__ (line 96) | def __call__(self, document: Document, provider: PdfProvider): method get_detection_batch_size (line 102) | def get_detection_batch_size(self): method get_ocr_error_batch_size (line 109) | def get_ocr_error_batch_size(self): method get_detection_results (line 116) | def get_detection_results( method get_all_lines (line 138) | def get_all_lines(self, document: Document, provider: PdfProvider): method ocr_error_detection (line 234) | def ocr_error_detection( method check_line_overlaps (line 251) | def check_line_overlaps( method check_layout_coverage (line 281) | def check_layout_coverage( method filter_blank_lines (line 329) | def filter_blank_lines(self, page: PageGroup, lines: List[ProviderOutp... method merge_blocks (line 346) | def merge_blocks( FILE: marker/builders/ocr.py class OcrBuilder (line 25) | class OcrBuilder(BaseBuilder): method __init__ (line 77) | def __init__(self, recognition_model: RecognitionPredictor, config=None): method __call__ (line 82) | def __call__(self, document: Document, provider: PdfProvider): method get_recognition_batch_size (line 96) | def get_recognition_batch_size(self): method select_ocr_blocks_by_mode (line 105) | def select_ocr_blocks_by_mode( method get_ocr_images_polygons_ids (line 120) | def get_ocr_images_polygons_ids( method ocr_extraction (line 165) | def ocr_extraction( method link_and_break_span (line 233) | def link_and_break_span(self, span: Span, text: str, match_text, url: ... method replace_line_spans (line 252) | def replace_line_spans( method assign_chars (line 294) | def assign_chars(self, span: Span, current_chars: List[Char]): method store_char (line 300) | def store_char(self, char: Char, current_chars: List[Char], page: Page... method spans_from_html_chars (line 305) | def spans_from_html_chars( FILE: marker/builders/structure.py class StructureBuilder (line 12) | class StructureBuilder(BaseBuilder): method __init__ (line 25) | def __init__(self, config=None): method __call__ (line 28) | def __call__(self, document: Document): method group_caption_blocks (line 34) | def group_caption_blocks(self, page: PageGroup): method group_lists (line 79) | def group_lists(self, page: PageGroup): method unmark_lists (line 116) | def unmark_lists(self, page: PageGroup): FILE: marker/config/crawler.py class ConfigCrawler (line 16) | class ConfigCrawler: method __init__ (line 17) | def __init__( method _crawl_config (line 34) | def _crawl_config(self): method _gather_super_annotations (line 64) | def _gather_super_annotations(cls: Type) -> Dict[str, Type]: method attr_counts (line 81) | def attr_counts(self) -> Dict[str, int]: method attr_set (line 90) | def attr_set(self) -> Set[str]: method _find_subclasses (line 99) | def _find_subclasses(self, base_class): method _format_type (line 116) | def _format_type(self, t: Type) -> str: FILE: marker/config/parser.py class ConfigParser (line 19) | class ConfigParser: method __init__ (line 20) | def __init__(self, cli_options: dict): method common_options (line 24) | def common_options(fn): method generate_config_dict (line 86) | def generate_config_dict(self) -> Dict[str, any]: method get_llm_service (line 117) | def get_llm_service(self): method get_renderer (line 127) | def get_renderer(self): method get_processors (line 141) | def get_processors(self): method get_converter_cls (line 154) | def get_converter_cls(self): method get_output_folder (line 167) | def get_output_folder(self, filepath: str): method get_base_filename (line 174) | def get_base_filename(self, filepath: str): FILE: marker/config/printer.py class CustomClickPrinter (line 8) | class CustomClickPrinter(click.Command): method parse_args (line 9) | def parse_args(self, ctx, args): FILE: marker/converters/__init__.py class BaseConverter (line 12) | class BaseConverter: method __init__ (line 13) | def __init__(self, config: Optional[BaseModel | dict] = None): method __call__ (line 21) | def __call__(self, *args, **kwargs): method resolve_dependencies (line 24) | def resolve_dependencies(self, cls): method initialize_processors (line 43) | def initialize_processors(self, processor_cls_lst: List[Type[BaseProce... FILE: marker/converters/extraction.py class ExtractionConverter (line 21) | class ExtractionConverter(PdfConverter): method build_document (line 27) | def build_document(self, filepath: str): method __call__ (line 44) | def __call__(self, filepath: str) -> ExtractionOutput: FILE: marker/converters/ocr.py class OCRConverter (line 13) | class OCRConverter(PdfConverter): method __init__ (line 16) | def __init__(self, *args, **kwargs): method build_document (line 25) | def build_document(self, filepath: str): method __call__ (line 40) | def __call__(self, filepath: str): FILE: marker/converters/pdf.py class PdfConverter (line 58) | class PdfConverter(BaseConverter): method __init__ (line 106) | def __init__( method filepath_to_str (line 154) | def filepath_to_str(self, file_input: Union[str, io.BytesIO]): method build_document (line 176) | def build_document(self, filepath: str) -> Document: method __call__ (line 193) | def __call__(self, filepath: str | io.BytesIO): FILE: marker/converters/table.py class TableConverter (line 17) | class TableConverter(PdfConverter): method build_document (line 31) | def build_document(self, filepath: str): method __call__ (line 52) | def __call__(self, filepath: str): FILE: marker/extractors/__init__.py class BaseExtractor (line 12) | class BaseExtractor: method __init__ (line 26) | def __init__(self, llm_service: BaseService, config=None): method extract_image (line 30) | def extract_image( method __call__ (line 43) | def __call__(self, document: Document, *args, **kwargs): FILE: marker/extractors/document.py class DocumentExtractionSchema (line 13) | class DocumentExtractionSchema(BaseModel): class DocumentExtractor (line 18) | class DocumentExtractor(BaseExtractor): method assemble_document_notes (line 106) | def assemble_document_notes(self, page_notes: List[PageExtractionSchem... method __call__ (line 114) | def __call__( FILE: marker/extractors/page.py class PageExtractionSchema (line 15) | class PageExtractionSchema(BaseModel): class PageExtractor (line 20) | class PageExtractor(BaseExtractor): method chunk_page_markdown (line 101) | def chunk_page_markdown(self, page_markdown: List[str]) -> List[str]: method inference_single_chunk (line 113) | def inference_single_chunk( method __call__ (line 138) | def __call__( FILE: marker/logger.py function configure_logging (line 7) | def configure_logging(): function get_logger (line 31) | def get_logger(): FILE: marker/models.py function create_model_dict (line 16) | def create_model_dict( FILE: marker/output.py function unwrap_outer_tag (line 17) | def unwrap_outer_tag(html: str): function json_to_html (line 27) | def json_to_html(block: JSONBlockOutput | BlockOutput): function output_exists (line 47) | def output_exists(output_dir: str, fname_base: str): function text_from_rendered (line 55) | def text_from_rendered(rendered: BaseModel): function convert_if_not_rgb (line 74) | def convert_if_not_rgb(image: Image.Image) -> Image.Image: function save_output (line 80) | def save_output(rendered: BaseModel, output_dir: str, fname_base: str): FILE: marker/processors/__init__.py class BaseProcessor (line 10) | class BaseProcessor: method __init__ (line 13) | def __init__(self, config: Optional[BaseModel | dict] = None): method __call__ (line 16) | def __call__(self, document: Document, *args, **kwargs): FILE: marker/processors/blank_page.py class BlankPageProcessor (line 17) | class BlankPageProcessor(BaseProcessor): method is_blank (line 29) | def is_blank(self, image: Image.Image): method __call__ (line 55) | def __call__(self, document: Document): FILE: marker/processors/block_relabel.py class BlockRelabelProcessor (line 13) | class BlockRelabelProcessor(BaseProcessor): method __init__ (line 28) | def __init__(self, config=None): method __call__ (line 58) | def __call__(self, document: Document): FILE: marker/processors/blockquote.py class BlockquoteProcessor (line 8) | class BlockquoteProcessor(BaseProcessor): method __init__ (line 32) | def __init__(self, config): method __call__ (line 35) | def __call__(self, document: Document): FILE: marker/processors/code.py class CodeProcessor (line 7) | class CodeProcessor(BaseProcessor): method __call__ (line 13) | def __call__(self, document: Document): method format_block (line 19) | def format_block(self, document: Document, block: Code): FILE: marker/processors/debug.py class DebugProcessor (line 16) | class DebugProcessor(BaseProcessor): method __call__ (line 41) | def __call__(self, document: Document): method draw_pdf_debug_images (line 62) | def draw_pdf_debug_images(self, document: Document): method draw_layout_debug_images (line 96) | def draw_layout_debug_images(self, document: Document, pdf_mode=False): method render_layout_boxes (line 130) | def render_layout_boxes(self, page, png_image): method dump_block_debug_data (line 162) | def dump_block_debug_data(self, document: Document): method get_text_size (line 180) | def get_text_size(self, text, font): method render_on_image (line 186) | def render_on_image( FILE: marker/processors/document_toc.py class DocumentTOCProcessor (line 6) | class DocumentTOCProcessor(BaseProcessor): method __call__ (line 12) | def __call__(self, document: Document): FILE: marker/processors/equation.py class EquationProcessor (line 17) | class EquationProcessor(BaseProcessor): method __init__ (line 41) | def __init__(self, recognition_model: RecognitionPredictor, config=None): method get_batch_size (line 46) | def get_batch_size(self): method __call__ (line 56) | def __call__(self, document: Document): method fix_latex (line 97) | def fix_latex(self, math_html: str): method get_latex_batched (line 123) | def get_latex_batched( FILE: marker/processors/footnote.py class FootnoteProcessor (line 9) | class FootnoteProcessor(BaseProcessor): method __call__ (line 15) | def __call__(self, document: Document): method push_footnotes_to_bottom (line 20) | def push_footnotes_to_bottom(self, page: PageGroup, document: Document): method assign_superscripts (line 31) | def assign_superscripts(self, page: PageGroup, document: Document): FILE: marker/processors/ignoretext.py class IgnoreTextProcessor (line 14) | class IgnoreTextProcessor(BaseProcessor): method __call__ (line 44) | def __call__(self, document: Document): method clean_text (line 66) | def clean_text(text): method filter_common_elements (line 72) | def filter_common_elements(self, document, blocks: List[Block]): FILE: marker/processors/line_merge.py class LineMergeProcessor (line 11) | class LineMergeProcessor(BaseProcessor): method __init__ (line 41) | def __init__(self, config): method merge_lines (line 44) | def merge_lines(self, lines: List[Line], block: Block): method __call__ (line 116) | def __call__(self, document: Document): FILE: marker/processors/line_numbers.py class LineNumbersProcessor (line 8) | class LineNumbersProcessor(BaseProcessor): method __init__ (line 32) | def __init__(self, config): method __call__ (line 35) | def __call__(self, document: Document): method ignore_line_number_spans (line 40) | def ignore_line_number_spans(self, document: Document): method ignore_line_number_blocks (line 61) | def ignore_line_number_blocks(self, document: Document): method ignore_line_starts_ends (line 76) | def ignore_line_starts_ends(self, document: Document): FILE: marker/processors/list.py class ListProcessor (line 9) | class ListProcessor(BaseProcessor): method __init__ (line 23) | def __init__(self, config): method __call__ (line 26) | def __call__(self, document: Document): method list_group_continuation (line 30) | def list_group_continuation(self, document: Document): method list_group_indentation (line 57) | def list_group_indentation(self, document: Document): FILE: marker/processors/llm/__init__.py class PromptData (line 23) | class PromptData(TypedDict): class BlockData (line 32) | class BlockData(TypedDict): class BaseLLMProcessor (line 37) | class BaseLLMProcessor(BaseProcessor): method __init__ (line 60) | def __init__(self, llm_service: BaseService, config=None): method extract_image (line 69) | def extract_image( method normalize_block_json (line 82) | def normalize_block_json(self, block: Block, document: Document, page:... method load_blocks (line 107) | def load_blocks(self, response: dict): method handle_rewrites (line 110) | def handle_rewrites(self, blocks: list, document: Document): class BaseLLMComplexBlockProcessor (line 132) | class BaseLLMComplexBlockProcessor(BaseLLMProcessor): method __call__ (line 137) | def __call__(self, document: Document): method process_rewriting (line 146) | def process_rewriting(self, document: Document, page: PageGroup, block... method rewrite_blocks (line 149) | def rewrite_blocks(self, document: Document): class BaseLLMSimpleBlockProcessor (line 177) | class BaseLLMSimpleBlockProcessor(BaseLLMProcessor): method __init__ (line 183) | def __init__(self, config=None): method __call__ (line 186) | def __call__(self, result: dict, prompt_data: PromptData, document: Do... method inference_blocks (line 193) | def inference_blocks(self, document: Document) -> List[BlockData]: method block_prompts (line 200) | def block_prompts(self, document: Document) -> List[PromptData]: method rewrite_block (line 203) | def rewrite_block( FILE: marker/processors/llm/llm_complex.py class LLMComplexRegionProcessor (line 12) | class LLMComplexRegionProcessor(BaseLLMSimpleBlockProcessor): method block_prompts (line 54) | def block_prompts(self, document: Document) -> List[PromptData]: method rewrite_block (line 69) | def rewrite_block(self, response: dict, prompt_data: PromptData, docum... class ComplexSchema (line 92) | class ComplexSchema(BaseModel): FILE: marker/processors/llm/llm_equation.py class LLMEquationProcessor (line 10) | class LLMEquationProcessor(BaseLLMSimpleBlockProcessor): method inference_blocks (line 71) | def inference_blocks(self, document: Document) -> List[BlockData]: method block_prompts (line 87) | def block_prompts(self, document: Document) -> List[PromptData]: method rewrite_block (line 106) | def rewrite_block(self, response: dict, prompt_data: PromptData, docum... class EquationSchema (line 130) | class EquationSchema(BaseModel): FILE: marker/processors/llm/llm_form.py class LLMFormProcessor (line 12) | class LLMFormProcessor(BaseLLMSimpleBlockProcessor): method inference_blocks (line 65) | def inference_blocks(self, document: Document) -> List[BlockData]: method block_prompts (line 77) | def block_prompts(self, document: Document) -> List[PromptData]: method rewrite_block (line 94) | def rewrite_block(self, response: dict, prompt_data: PromptData, docum... class FormSchema (line 116) | class FormSchema(BaseModel): FILE: marker/processors/llm/llm_handwriting.py class LLMHandwritingProcessor (line 11) | class LLMHandwritingProcessor(BaseLLMSimpleBlockProcessor): method inference_blocks (line 36) | def inference_blocks(self, document: Document) -> List[BlockData]: method block_prompts (line 52) | def block_prompts(self, document: Document) -> List[PromptData]: method rewrite_block (line 68) | def rewrite_block(self, response: dict, prompt_data: PromptData, docum... class HandwritingSchema (line 84) | class HandwritingSchema(BaseModel): FILE: marker/processors/llm/llm_image_description.py class LLMImageDescriptionProcessor (line 11) | class LLMImageDescriptionProcessor(BaseLLMSimpleBlockProcessor): method inference_blocks (line 42) | def inference_blocks(self, document: Document) -> List[BlockData]: method block_prompts (line 48) | def block_prompts(self, document: Document) -> List[PromptData]: method rewrite_block (line 69) | def rewrite_block( class ImageSchema (line 86) | class ImageSchema(BaseModel): FILE: marker/processors/llm/llm_mathblock.py class LLMMathBlockProcessor (line 16) | class LLMMathBlockProcessor(BaseLLMComplexBlockProcessor): method rewrite_blocks (line 76) | def rewrite_blocks(self, document: Document): method get_block_text (line 161) | def get_block_text(self, block: Block, document: Document) -> str: method get_block_lines (line 166) | def get_block_lines(self, block: Block, document: Document) -> Tuple[l... method process_rewriting (line 171) | def process_rewriting(self, document: Document, page: PageGroup, block... class LLMTextSchema (line 198) | class LLMTextSchema(BaseModel): FILE: marker/processors/llm/llm_meta.py class LLMSimpleBlockMetaProcessor (line 14) | class LLMSimpleBlockMetaProcessor(BaseLLMProcessor): method __init__ (line 19) | def __init__( method __call__ (line 28) | def __call__(self, document: Document): method get_response (line 67) | def get_response(self, prompt_data: Dict[str, Any]): FILE: marker/processors/llm/llm_page_correction.py class LLMPageCorrectionProcessor (line 32) | class LLMPageCorrectionProcessor(BaseLLMComplexBlockProcessor): method get_selected_blocks (line 138) | def get_selected_blocks( method process_rewriting (line 150) | def process_rewriting(self, document: Document, page1: PageGroup): method load_blocks (line 183) | def load_blocks(self, response): method handle_reorder (line 187) | def handle_reorder(self, blocks: list, page1: PageGroup): method handle_rewrites (line 247) | def handle_rewrites(self, blocks: list, document: Document): method rewrite_blocks (line 268) | def rewrite_blocks(self, document: Document): class BlockSchema (line 296) | class BlockSchema(BaseModel): class PageSchema (line 302) | class PageSchema(BaseModel): FILE: marker/processors/llm/llm_sectionheader.py class LLMSectionHeaderProcessor (line 17) | class LLMSectionHeaderProcessor(BaseLLMComplexBlockProcessor): method get_selected_blocks (line 99) | def get_selected_blocks( method process_rewriting (line 111) | def process_rewriting( method load_blocks (line 141) | def load_blocks(self, response): method rewrite_blocks (line 145) | def rewrite_blocks(self, document: Document): class BlockSchema (line 167) | class BlockSchema(BaseModel): class SectionHeaderSchema (line 172) | class SectionHeaderSchema(BaseModel): FILE: marker/processors/llm/llm_table.py class LLMTableProcessor (line 18) | class LLMTableProcessor(BaseLLMComplexBlockProcessor): method handle_image_rotation (line 94) | def handle_image_rotation(self, children: List[TableCell], image: Imag... method process_rewriting (line 121) | def process_rewriting(self, document: Document, page: PageGroup, block... method rewrite_single_chunk (line 188) | def rewrite_single_chunk( method get_cell_text (line 243) | def get_cell_text(element, keep_tags=("br", "i", "b", "span", "math"))... method parse_html_table (line 249) | def parse_html_table( class TableSchema (line 323) | class TableSchema(BaseModel): FILE: marker/processors/llm/llm_table_merge.py class LLMTableMergeProcessor (line 17) | class LLMTableMergeProcessor(BaseLLMComplexBlockProcessor): method get_row_count (line 127) | def get_row_count(cells: List[TableCell]): method get_column_count (line 142) | def get_column_count(cells: List[TableCell]): method rewrite_blocks (line 156) | def rewrite_blocks(self, document: Document): method process_rewriting (line 244) | def process_rewriting(self, document: Document, blocks: List[Block]): method validate_merge (line 295) | def validate_merge(self, cells1: List[TableCell], cells2: List[TableCe... method join_cells (line 308) | def join_cells(self, cells1: List[TableCell], cells2: List[TableCell],... method join_images (line 324) | def join_images(image1: Image.Image, image2: Image.Image, direction: L... class MergeSchema (line 344) | class MergeSchema(BaseModel): FILE: marker/processors/order.py class OrderProcessor (line 9) | class OrderProcessor(BaseProcessor): method __call__ (line 15) | def __call__(self, document: Document): FILE: marker/processors/page_header.py class PageHeaderProcessor (line 7) | class PageHeaderProcessor(BaseProcessor): method __call__ (line 13) | def __call__(self, document: Document): method move_page_header_to_top (line 17) | def move_page_header_to_top(self, page: PageGroup, document: Document): FILE: marker/processors/reference.py class ReferenceProcessor (line 13) | class ReferenceProcessor(BaseProcessor): method __init__ (line 18) | def __init__(self, config): method __call__ (line 21) | def __call__(self, document: Document): FILE: marker/processors/sectionheader.py class SectionHeaderProcessor (line 16) | class SectionHeaderProcessor(BaseProcessor): method __call__ (line 38) | def __call__(self, document: Document): method bucket_headings (line 69) | def bucket_headings(self, line_heights: List[float], num_levels=4): FILE: marker/processors/table.py class TableProcessor (line 28) | class TableProcessor(BaseProcessor): method __init__ (line 72) | def __init__( method __call__ (line 85) | def __call__(self, document: Document): method finalize_cell_text (line 187) | def finalize_cell_text(self, cell: SuryaTableCell): method normalize_spaces (line 222) | def normalize_spaces(text): method combine_dollar_column (line 234) | def combine_dollar_column(self, tables: List[TableResult]): method split_combined_rows (line 312) | def split_combined_rows(self, tables: List[TableResult]): method assign_text_to_cells (line 427) | def assign_text_to_cells(self, tables: List[TableResult], table_data: ... method assign_pdftext_lines (line 457) | def assign_pdftext_lines(self, extract_blocks: list, filepath: str): method align_table_cells (line 498) | def align_table_cells( method needs_ocr (line 569) | def needs_ocr(self, tables: List[TableResult], table_blocks: List[dict]): method get_ocr_results (line 604) | def get_ocr_results( method assign_ocr_lines (line 671) | def assign_ocr_lines(self, tables: List[TableResult], table_blocks: li... method get_table_rec_batch_size (line 696) | def get_table_rec_batch_size(self): method get_recognition_batch_size (line 705) | def get_recognition_batch_size(self): method get_detection_batch_size (line 714) | def get_detection_batch_size(self): FILE: marker/processors/text.py class TextProcessor (line 12) | class TextProcessor(BaseProcessor): method __init__ (line 24) | def __init__(self, config): method __call__ (line 27) | def __call__(self, document: Document): FILE: marker/processors/util.py function escape_latex_commands (line 11) | def escape_latex_commands(text: str): function add_math_spans_to_line (line 19) | def add_math_spans_to_line(corrected_text: str, text_line: Line, page: P... function text_to_spans (line 47) | def text_to_spans(text): FILE: marker/providers/__init__.py class ProviderOutput (line 20) | class ProviderOutput(BaseModel): method raw_text (line 26) | def raw_text(self): method __hash__ (line 29) | def __hash__(self): method merge (line 32) | def merge(self, other: "ProviderOutput"): class BaseProvider (line 51) | class BaseProvider: method __init__ (line 52) | def __init__(self, filepath: str, config: Optional[BaseModel | dict] =... method __len__ (line 56) | def __len__(self): method get_images (line 59) | def get_images(self, idxs: List[int], dpi: int) -> List[Image.Image]: method get_page_bbox (line 62) | def get_page_bbox(self, idx: int) -> PolygonBox | None: method get_page_lines (line 65) | def get_page_lines(self, idx: int) -> List[Line]: method get_page_refs (line 68) | def get_page_refs(self, idx: int) -> List[Reference]: method __enter__ (line 71) | def __enter__(self): method get_font_css (line 75) | def get_font_css(): FILE: marker/providers/document.py class DocumentProvider (line 52) | class DocumentProvider(PdfProvider): method __init__ (line 53) | def __init__(self, filepath: str, config=None): method __del__ (line 67) | def __del__(self): method convert_docx_to_pdf (line 71) | def convert_docx_to_pdf(self, filepath: str): method _preprocess_base64_images (line 86) | def _preprocess_base64_images(html_content): FILE: marker/providers/epub.py class EpubProvider (line 47) | class EpubProvider(PdfProvider): method __init__ (line 48) | def __init__(self, filepath: str, config=None): method __del__ (line 62) | def __del__(self): method convert_epub_to_pdf (line 66) | def convert_epub_to_pdf(self, filepath): FILE: marker/providers/html.py class HTMLProvider (line 7) | class HTMLProvider(PdfProvider): method __init__ (line 8) | def __init__(self, filepath: str, config=None): method __del__ (line 22) | def __del__(self): method convert_html_to_pdf (line 26) | def convert_html_to_pdf(self, filepath: str): FILE: marker/providers/image.py class ImageProvider (line 10) | class ImageProvider(BaseProvider): method __init__ (line 19) | def __init__(self, filepath: str, config=None): method __len__ (line 37) | def __len__(self): method get_images (line 40) | def get_images(self, idxs: List[int], dpi: int) -> List[Image.Image]: method get_page_bbox (line 43) | def get_page_bbox(self, idx: int) -> PolygonBox | None: method get_page_lines (line 48) | def get_page_lines(self, idx: int) -> List[Line]: method get_page_refs (line 51) | def get_page_refs(self, idx: int) -> List[Reference]: FILE: marker/providers/pdf.py class PdfProvider (line 29) | class PdfProvider(BaseProvider): method __init__ (line 84) | def __init__(self, filepath: str, config=None): method get_doc (line 110) | def get_doc(self): method __len__ (line 124) | def __len__(self) -> int: method font_flags_to_format (line 127) | def font_flags_to_format(self, flags: Optional[int]) -> Set[str]: method font_names_to_format (line 178) | def font_names_to_format(self, font_name: str | None) -> Set[str]: method normalize_spaces (line 190) | def normalize_spaces(text): method pdftext_extraction (line 202) | def pdftext_extraction(self, doc: PdfDocument) -> ProviderPageLines: method check_line_spans (line 306) | def check_line_spans(self, page_lines: List[ProviderOutput]) -> bool: method check_page (line 321) | def check_page(self, page_id: int, doc: PdfDocument) -> bool: method detect_bad_ocr (line 379) | def detect_bad_ocr(self, text): method _render_image (line 404) | def _render_image( method get_images (line 415) | def get_images(self, idxs: List[int], dpi: int) -> List[Image.Image]: method get_page_bbox (line 422) | def get_page_bbox(self, idx: int) -> PolygonBox | None: method get_page_lines (line 427) | def get_page_lines(self, idx: int) -> List[ProviderOutput]: method get_page_refs (line 430) | def get_page_refs(self, idx: int) -> List[Reference]: method _get_fontname (line 434) | def _get_fontname(font) -> str: FILE: marker/providers/powerpoint.py class PowerPointProvider (line 42) | class PowerPointProvider(PdfProvider): method __init__ (line 45) | def __init__(self, filepath: str, config=None): method __del__ (line 60) | def __del__(self): method convert_pptx_to_pdf (line 64) | def convert_pptx_to_pdf(self, filepath): method _handle_group (line 112) | def _handle_group(self, group_shape) -> str: method _handle_text (line 140) | def _handle_text(self, shape) -> str: method _handle_image (line 213) | def _handle_image(self, shape) -> str: method _handle_table (line 227) | def _handle_table(self, shape) -> str: method _escape_html (line 244) | def _escape_html(self, text: str) -> str: FILE: marker/providers/registry.py function load_matchers (line 28) | def load_matchers(doctype: str): function load_extensions (line 32) | def load_extensions(doctype: str): function provider_from_ext (line 36) | def provider_from_ext(filepath: str): function provider_from_filepath (line 59) | def provider_from_filepath(filepath: str): FILE: marker/providers/spreadsheet.py class SpreadSheetProvider (line 31) | class SpreadSheetProvider(PdfProvider): method __init__ (line 32) | def __init__(self, filepath: str, config=None): method __del__ (line 46) | def __del__(self): method convert_xlsx_to_pdf (line 50) | def convert_xlsx_to_pdf(self, filepath: str): method _get_merged_cell_ranges (line 70) | def _get_merged_cell_ranges(sheet): method _excel_to_html_table (line 81) | def _excel_to_html_table(self, sheet): FILE: marker/providers/utils.py function alphanum_ratio (line 1) | def alphanum_ratio(text): FILE: marker/renderers/__init__.py class BaseRenderer (line 17) | class BaseRenderer: method __init__ (line 36) | def __init__(self, config: Optional[BaseModel | dict] = None): method __call__ (line 45) | def __call__(self, document): method extract_image (line 49) | def extract_image(self, document: Document, image_id, to_base64=False): method merge_consecutive_math (line 68) | def merge_consecutive_math(html, tag="math"): method merge_consecutive_tags (line 79) | def merge_consecutive_tags(html, tag): method generate_page_stats (line 100) | def generate_page_stats(self, document: Document, document_output): method generate_document_metadata (line 117) | def generate_document_metadata(self, document: Document, document_outp... method extract_block_html (line 127) | def extract_block_html(self, document: Document, block_output: BlockOu... FILE: marker/renderers/chunk.py class FlatBlockOutput (line 11) | class FlatBlockOutput(BaseModel): class ChunkOutput (line 22) | class ChunkOutput(BaseModel): function collect_images (line 27) | def collect_images(block: JSONBlockOutput) -> dict[str, str]: function assemble_html_with_images (line 36) | def assemble_html_with_images(block: JSONBlockOutput, image_blocks: set[... function json_to_chunks (line 55) | def json_to_chunks( class ChunkRenderer (line 74) | class ChunkRenderer(JSONRenderer): method __call__ (line 76) | def __call__(self, document: Document) -> ChunkOutput: FILE: marker/renderers/extraction.py class ExtractionOutput (line 7) | class ExtractionOutput(BaseModel): class ExtractionRenderer (line 13) | class ExtractionRenderer(BaseRenderer): method __call__ (line 14) | def __call__( FILE: marker/renderers/html.py class HTMLOutput (line 23) | class HTMLOutput(BaseModel): class HTMLRenderer (line 29) | class HTMLRenderer(BaseRenderer): method extract_image (line 43) | def extract_image(self, document, image_id): method insert_block_id (line 50) | def insert_block_id(self, soup, block_id: BlockId): method extract_html (line 81) | def extract_html(self, document, document_output, level=0): method __call__ (line 143) | def __call__(self, document) -> HTMLOutput: FILE: marker/renderers/json.py class JSONBlockOutput (line 12) | class JSONBlockOutput(BaseModel): class JSONOutput (line 23) | class JSONOutput(BaseModel): function reformat_section_hierarchy (line 29) | def reformat_section_hierarchy(section_hierarchy): class JSONRenderer (line 36) | class JSONRenderer(BaseRenderer): method extract_json (line 50) | def extract_json(self, document: Document, block_output: BlockOutput): method __call__ (line 83) | def __call__(self, document: Document) -> JSONOutput: FILE: marker/renderers/markdown.py function escape_dollars (line 19) | def escape_dollars(text): function cleanup_text (line 23) | def cleanup_text(full_text): function get_formatted_table_text (line 29) | def get_formatted_table_text(element): class Markdownify (line 58) | class Markdownify(MarkdownConverter): method __init__ (line 59) | def __init__( method convert_div (line 75) | def convert_div(self, el, text, parent_tags): method convert_p (line 86) | def convert_p(self, el, text, parent_tags): method convert_math (line 101) | def convert_math(self, el, text, parent_tags): method convert_table (line 120) | def convert_table(self, el, text, parent_tags): method convert_a (line 220) | def convert_a(self, el, text, parent_tags): method convert_span (line 226) | def convert_span(self, el, text, parent_tags): method escape (line 232) | def escape(self, text, parent_tags=None): method process_text (line 238) | def process_text(self, el, parent_tags=None): class MarkdownOutput (line 260) | class MarkdownOutput(BaseModel): class MarkdownRenderer (line 266) | class MarkdownRenderer(HTMLRenderer): method md_cls (line 281) | def md_cls(self): method __call__ (line 298) | def __call__(self, document: Document) -> MarkdownOutput: FILE: marker/renderers/ocr_json.py class OCRJSONCharOutput (line 10) | class OCRJSONCharOutput(BaseModel): class OCRJSONLineOutput (line 18) | class OCRJSONLineOutput(BaseModel): class OCRJSONPageOutput (line 27) | class OCRJSONPageOutput(BaseModel): class OCRJSONOutput (line 35) | class OCRJSONOutput(BaseModel): class OCRJSONRenderer (line 41) | class OCRJSONRenderer(BaseRenderer): method extract_json (line 55) | def extract_json(self, document: Document) -> List[OCRJSONPageOutput]: method __call__ (line 133) | def __call__(self, document: Document) -> OCRJSONOutput: FILE: marker/schema/__init__.py class BlockTypes (line 4) | class BlockTypes(str, Enum): method __str__ (line 34) | def __str__(self): FILE: marker/schema/blocks/base.py class BlockMetadata (line 16) | class BlockMetadata(BaseModel): method merge (line 24) | def merge(self, model2): class BlockOutput (line 33) | class BlockOutput(BaseModel): class BlockId (line 41) | class BlockId(BaseModel): method __str__ (line 46) | def __str__(self): method __hash__ (line 51) | def __hash__(self): method __repr__ (line 54) | def __repr__(self): method __eq__ (line 57) | def __eq__(self, other): method validate_block_type (line 72) | def validate_block_type(cls, v): method to_path (line 79) | def to_path(self): class Block (line 83) | class Block(BaseModel): method id (line 108) | def id(self) -> BlockId: method from_block (line 114) | def from_block(cls, block: Block) -> Block: method set_internal_metadata (line 118) | def set_internal_metadata(self, key, data): method get_internal_metadata (line 123) | def get_internal_metadata(self, key): method get_image (line 128) | def get_image( method structure_blocks (line 150) | def structure_blocks(self, document_page: Document | PageGroup) -> Lis... method get_prev_block (line 155) | def get_prev_block( method get_next_block (line 172) | def get_next_block( method add_structure (line 191) | def add_structure(self, block: Block): method update_structure_item (line 197) | def update_structure_item(self, old_id: BlockId, new_id: BlockId): method remove_structure_items (line 204) | def remove_structure_items(self, block_ids: List[BlockId]): method raw_text (line 208) | def raw_text(self, document: Document) -> str: method assemble_html (line 227) | def assemble_html( method assign_section_hierarchy (line 247) | def assign_section_hierarchy(self, section_hierarchy): method contained_blocks (line 257) | def contained_blocks( method replace_block (line 275) | def replace_block(self, block: Block, new_block: Block): method render (line 282) | def render( method line_height (line 318) | def line_height(self, document: Document) -> float: method update_metadata (line 324) | def update_metadata(self, **kwargs): method handle_html_output (line 337) | def handle_html_output( FILE: marker/schema/blocks/basetable.py class BaseTable (line 8) | class BaseTable(Block): method format_cells (line 13) | def format_cells( method assemble_html (line 38) | def assemble_html( FILE: marker/schema/blocks/caption.py class Caption (line 5) | class Caption(Block): method assemble_html (line 11) | def assemble_html(self, document, child_blocks, parent_structure, bloc... FILE: marker/schema/blocks/code.py class Code (line 7) | class Code(Block): method assemble_html (line 13) | def assemble_html(self, document, child_blocks, parent_structure, bloc... FILE: marker/schema/blocks/complexregion.py class ComplexRegion (line 5) | class ComplexRegion(Block): method assemble_html (line 10) | def assemble_html(self, document, child_blocks, parent_structure, bloc... FILE: marker/schema/blocks/equation.py class Equation (line 5) | class Equation(Block): method assemble_html (line 10) | def assemble_html( FILE: marker/schema/blocks/figure.py class Figure (line 5) | class Figure(Block): method assemble_html (line 11) | def assemble_html( FILE: marker/schema/blocks/footnote.py class Footnote (line 5) | class Footnote(Block): method assemble_html (line 13) | def assemble_html( FILE: marker/schema/blocks/form.py class Form (line 7) | class Form(BaseTable): FILE: marker/schema/blocks/handwriting.py class Handwriting (line 5) | class Handwriting(Block): method assemble_html (line 11) | def assemble_html( FILE: marker/schema/blocks/inlinemath.py class InlineMath (line 5) | class InlineMath(Block): method assemble_html (line 13) | def assemble_html( FILE: marker/schema/blocks/listitem.py function replace_bullets (line 7) | def replace_bullets(child_blocks): class ListItem (line 19) | class ListItem(Block): method assemble_html (line 25) | def assemble_html( FILE: marker/schema/blocks/pagefooter.py class PageFooter (line 5) | class PageFooter(Block): method assemble_html (line 14) | def assemble_html(self, document, child_blocks, parent_structure, bloc... FILE: marker/schema/blocks/pageheader.py class PageHeader (line 5) | class PageHeader(Block): method assemble_html (line 14) | def assemble_html(self, document, child_blocks, parent_structure, bloc... FILE: marker/schema/blocks/picture.py class Picture (line 5) | class Picture(Block): method assemble_html (line 11) | def assemble_html( FILE: marker/schema/blocks/reference.py class Reference (line 5) | class Reference(Block): method assemble_html (line 10) | def assemble_html( FILE: marker/schema/blocks/sectionheader.py class SectionHeader (line 7) | class SectionHeader(Block): method assemble_html (line 13) | def assemble_html( FILE: marker/schema/blocks/table.py class Table (line 5) | class Table(BaseTable): FILE: marker/schema/blocks/tablecell.py class TableCell (line 7) | class TableCell(Block): method text (line 18) | def text(self): method assemble_html (line 21) | def assemble_html( FILE: marker/schema/blocks/text.py class Text (line 5) | class Text(Block): method assemble_html (line 13) | def assemble_html( FILE: marker/schema/blocks/toc.py class TableOfContents (line 5) | class TableOfContents(BaseTable): FILE: marker/schema/document.py class DocumentOutput (line 12) | class DocumentOutput(BaseModel): class TocItem (line 18) | class TocItem(BaseModel): class Document (line 25) | class Document(BaseModel): method get_block (line 32) | def get_block(self, block_id: BlockId): method get_page (line 39) | def get_page(self, page_id): method get_next_block (line 45) | def get_next_block( method get_next_page (line 65) | def get_next_page(self, page: PageGroup): method get_prev_block (line 71) | def get_prev_block(self, block: Block): method get_prev_page (line 81) | def get_prev_page(self, page: PageGroup): method assemble_html (line 87) | def assemble_html( method render (line 95) | def render(self, block_config: Optional[dict] = None): method contained_blocks (line 108) | def contained_blocks(self, block_types: Sequence[BlockTypes] = None) -... FILE: marker/schema/groups/base.py class Group (line 4) | class Group(Block): FILE: marker/schema/groups/figure.py class FigureGroup (line 5) | class FigureGroup(Group): method assemble_html (line 10) | def assemble_html( FILE: marker/schema/groups/list.py class ListGroup (line 5) | class ListGroup(Group): method assemble_html (line 11) | def assemble_html( FILE: marker/schema/groups/page.py class PageGroup (line 21) | class PageGroup(Group): method incr_block_id (line 39) | def incr_block_id(self): method add_child (line 45) | def add_child(self, block: Block): method get_image (line 51) | def get_image( method current_children (line 82) | def current_children(self) -> List[Block]: method get_next_block (line 85) | def get_next_block( method get_prev_block (line 104) | def get_prev_block(self, block: Block): method add_block (line 110) | def add_block(self, block_cls: type[Block], polygon: PolygonBox) -> Bl... method add_full_block (line 120) | def add_full_block(self, block: Block) -> Block: method get_block (line 126) | def get_block(self, block_id: BlockId) -> Block | None: method assemble_html (line 131) | def assemble_html( method compute_line_block_intersections (line 139) | def compute_line_block_intersections( method compute_max_structure_block_intersection_pct (line 163) | def compute_max_structure_block_intersection_pct(self): method replace_block (line 178) | def replace_block(self, block: Block, new_block: Block): method identify_missing_blocks (line 192) | def identify_missing_blocks( method create_missing_blocks (line 232) | def create_missing_blocks( method add_initial_blocks (line 262) | def add_initial_blocks( method merge_blocks (line 308) | def merge_blocks( method aggregate_block_metadata (line 360) | def aggregate_block_metadata(self) -> BlockMetadata: FILE: marker/schema/groups/picture.py class PictureGroup (line 5) | class PictureGroup(Group): method assemble_html (line 10) | def assemble_html( FILE: marker/schema/groups/table.py class TableGroup (line 8) | class TableGroup(Group): method assemble_html (line 13) | def assemble_html( FILE: marker/schema/polygon.py class PolygonBox (line 9) | class PolygonBox(BaseModel): method check_elements (line 14) | def check_elements(cls, v: List[List[float]]) -> List[List[float]]: method height (line 34) | def height(self): method width (line 38) | def width(self): method area (line 42) | def area(self): method center (line 46) | def center(self): method size (line 50) | def size(self): method x_start (line 54) | def x_start(self): method y_start (line 58) | def y_start(self): method x_end (line 62) | def x_end(self): method y_end (line 66) | def y_end(self): method bbox (line 71) | def bbox(self) -> List[float]: method expand (line 78) | def expand(self, x_margin: float, y_margin: float) -> PolygonBox: method expand_y2 (line 93) | def expand_y2(self, y_margin: float) -> PolygonBox: method expand_y1 (line 105) | def expand_y1(self, y_margin: float) -> PolygonBox: method minimum_gap (line 117) | def minimum_gap(self, other: PolygonBox): method center_distance (line 147) | def center_distance(self, other: PolygonBox, x_weight: float = 1, y_we... method tl_distance (line 153) | def tl_distance(self, other: PolygonBox): method rescale (line 156) | def rescale(self, old_size, new_size): method fit_to_bounds (line 170) | def fit_to_bounds(self, bounds): method overlap_x (line 177) | def overlap_x(self, other: PolygonBox): method overlap_y (line 180) | def overlap_y(self, other: PolygonBox): method intersection_area (line 183) | def intersection_area(self, other: PolygonBox): method intersection_pct (line 186) | def intersection_pct(self, other: PolygonBox): method merge (line 193) | def merge(self, others: List[PolygonBox]) -> PolygonBox: method from_bbox (line 214) | def from_bbox(cls, bbox: List[float], ensure_nonzero_area=False): FILE: marker/schema/registry.py function register_block_class (line 41) | def register_block_class(block_type: BlockTypes, block_cls: Type[Block]): function get_block_class (line 45) | def get_block_class(block_type: BlockTypes) -> Type[Block]: FILE: marker/schema/text/char.py class Char (line 5) | class Char(Block): FILE: marker/schema/text/line.py function remove_tags (line 13) | def remove_tags(text): function replace_last (line 17) | def replace_last(string, old, new): function strip_trailing_hyphens (line 25) | def strip_trailing_hyphens(line_text, next_line_text, line_html) -> str: class Line (line 39) | class Line(Block): method ocr_input_text (line 46) | def ocr_input_text(self, document): method formatted_text (line 60) | def formatted_text(self, document, skip_urls=False): method assemble_html (line 84) | def assemble_html(self, document, child_blocks, parent_structure, bloc... method render (line 102) | def render( method merge (line 125) | def merge(self, other: "Line"): FILE: marker/schema/text/span.py function cleanup_text (line 10) | def cleanup_text(full_text): class Span (line 16) | class Span(Block): method bold (line 47) | def bold(self): method italic (line 51) | def italic(self): method math (line 55) | def math(self): method highlight (line 59) | def highlight(self): method superscript (line 63) | def superscript(self): method subscript (line 67) | def subscript(self): method small (line 71) | def small(self): method code (line 75) | def code(self): method underline (line 79) | def underline(self): method assemble_html (line 82) | def assemble_html(self, document, child_blocks, parent_structure, bloc... FILE: marker/scripts/chunk_convert.py function chunk_convert_cli (line 7) | def chunk_convert_cli(): FILE: marker/scripts/common.py function parse_args (line 22) | def parse_args(): function load_models (line 46) | def load_models(): function open_pdf (line 50) | def open_pdf(pdf_file): function img_to_html (line 55) | def img_to_html(img, img_alt): function get_page_image (line 65) | def get_page_image(pdf_file, page_num, dpi=96): function page_count (line 82) | def page_count(pdf_file: UploadedFile): function pillow_image_to_base64_string (line 90) | def pillow_image_to_base64_string(img: Image) -> str: function extract_root_pydantic_class (line 96) | def extract_root_pydantic_class(schema_code: str) -> Optional[str]: function get_root_class (line 161) | def get_root_class(schema_code: str) -> Optional[BaseModel]: FILE: marker/scripts/convert.py function worker_init (line 42) | def worker_init(): function worker_exit (line 52) | def worker_exit(): function process_single_pdf (line 60) | def process_single_pdf(args): function convert_cli (line 139) | def convert_cli(in_folder: str, **kwargs): FILE: marker/scripts/convert_single.py function convert_single_cli (line 25) | def convert_single_cli(fpath: str, **kwargs): FILE: marker/scripts/extraction_app.py function extract_data (line 29) | def extract_data( FILE: marker/scripts/file_to_s3.py function main (line 19) | def main(filepath: str, s3_path: str, bucket_name: str, access_key_id: s... FILE: marker/scripts/run_streamlit_app.py function streamlit_app_cli (line 6) | def streamlit_app_cli(app_name: str = "streamlit_app.py"): function extraction_app_cli (line 24) | def extraction_app_cli(): FILE: marker/scripts/server.py function lifespan (line 30) | async def lifespan(app: FastAPI): function root (line 43) | async def root(): class CommonParams (line 55) | class CommonParams(BaseModel): function _convert_pdf (line 86) | async def _convert_pdf(params: CommonParams): function convert_pdf (line 131) | async def convert_pdf(params: CommonParams): function convert_pdf_upload (line 136) | async def convert_pdf_upload( function server_cli (line 165) | def server_cli(port: int, host: str): FILE: marker/scripts/streamlit_app.py function convert_pdf (line 29) | def convert_pdf(fname: str, config_parser: ConfigParser) -> (str, Dict[s... function markdown_insert_images (line 43) | def markdown_insert_images(markdown, images): FILE: marker/services/__init__.py class BaseService (line 12) | class BaseService: method img_to_base64 (line 22) | def img_to_base64(self, img: PIL.Image.Image, format: str = "WEBP"): method process_images (line 27) | def process_images(self, images: List[PIL.Image.Image]) -> list: method format_image_for_llm (line 30) | def format_image_for_llm(self, image): method __init__ (line 40) | def __init__(self, config: Optional[BaseModel | dict] = None): method __call__ (line 46) | def __call__( FILE: marker/services/azure_openai.py class AzureOpenAIService (line 17) | class AzureOpenAIService(BaseService): method process_images (line 29) | def process_images(self, images: List[PIL.Image.Image]) -> list: method __call__ (line 43) | def __call__( method get_client (line 111) | def get_client(self) -> AzureOpenAI: FILE: marker/services/claude.py class ClaudeService (line 18) | class ClaudeService(BaseService): method process_images (line 27) | def process_images(self, images: List[Image.Image]) -> List[dict]: method validate_response (line 40) | def validate_response(self, response_text: str, schema: type[T]) -> T: method get_client (line 61) | def get_client(self): method __call__ (line 66) | def __call__( FILE: marker/services/gemini.py class BaseGeminiService (line 20) | class BaseGeminiService(BaseService): method img_to_bytes (line 28) | def img_to_bytes(self, img: PIL.Image.Image): method get_google_client (line 33) | def get_google_client(self, timeout: int): method process_images (line 36) | def process_images(self, images): method __call__ (line 43) | def __call__( class GoogleGeminiService (line 134) | class GoogleGeminiService(BaseGeminiService): method get_google_client (line 137) | def get_google_client(self, timeout: int): FILE: marker/services/ollama.py class OllamaService (line 15) | class OllamaService(BaseService): method process_images (line 23) | def process_images(self, images): method __call__ (line 27) | def __call__( FILE: marker/services/openai.py class OpenAIService (line 18) | class OpenAIService(BaseService): method process_images (line 33) | def process_images(self, images: List[Image.Image]) -> List[dict]: method __call__ (line 61) | def __call__( method get_client (line 129) | def get_client(self) -> openai.OpenAI: FILE: marker/services/vertex.py class GoogleVertexService (line 7) | class GoogleVertexService(BaseGeminiService): method get_google_client (line 25) | def get_google_client(self, timeout: int): FILE: marker/settings.py class Settings (line 10) | class Settings(BaseSettings): method TORCH_DEVICE_MODEL (line 35) | def TORCH_DEVICE_MODEL(self) -> str: method MODEL_DTYPE (line 49) | def MODEL_DTYPE(self) -> torch.dtype: class Config (line 55) | class Config: FILE: marker/util.py function strings_to_classes (line 28) | def strings_to_classes(items: List[str]) -> List[type]: function classes_to_strings (line 37) | def classes_to_strings(items: List[type]) -> List[str]: function verify_config_keys (line 45) | def verify_config_keys(obj): function assign_config (line 58) | def assign_config(cls, config: BaseModel | dict | None): function parse_range_str (line 82) | def parse_range_str(range_str: str) -> List[int]: function matrix_intersection_area (line 95) | def matrix_intersection_area(boxes1: List[List[float]], boxes2: List[Lis... function matrix_distance (line 116) | def matrix_distance(boxes1: List[List[float]], boxes2: List[List[float]]... function sort_text_lines (line 135) | def sort_text_lines(lines: List[PolygonBox], tolerance=1.25): function download_font (line 153) | def download_font(): function get_opening_tag_type (line 162) | def get_opening_tag_type(tag): function get_closing_tag_type (line 181) | def get_closing_tag_type(tag): function normalize_latex_escapes (line 211) | def normalize_latex_escapes(s: str) -> str: function unwrap_math (line 216) | def unwrap_math(text: str, math_symbols: List[str] = MATH_SYMBOLS) -> str: FILE: marker/utils/batch.py function get_batch_sizes_worker_counts (line 4) | def get_batch_sizes_worker_counts(gpu_manager: GPUManager, peak_worker_v... FILE: marker/utils/gpu.py class GPUManager (line 11) | class GPUManager: method __init__ (line 14) | def __init__(self, device_idx: int): method __enter__ (line 19) | def __enter__(self): method __exit__ (line 24) | def __exit__(self, exc_type, exc_val, exc_tb): method using_cuda (line 29) | def using_cuda(): method check_cuda_available (line 32) | def check_cuda_available(self) -> bool: method get_gpu_vram (line 41) | def get_gpu_vram(self): method start_mps_server (line 66) | def start_mps_server(self) -> bool: method stop_mps_server (line 98) | def stop_mps_server(self) -> None: method cleanup (line 127) | def cleanup(self) -> None: FILE: marker/utils/image.py function is_blank_image (line 6) | def is_blank_image(image: Image.Image, polygon: Optional[List[List[int]]... FILE: tests/builders/test_blank_page.py function test_blank_page (line 8) | def test_blank_page(config, doc_provider, layout_model, ocr_error_model,... FILE: tests/builders/test_document_builder.py function test_document_builder (line 9) | def test_document_builder(pdf_document): function test_document_builder_inline_eq (line 28) | def test_document_builder_inline_eq(pdf_document): FILE: tests/builders/test_garbled_pdf.py function test_garbled_pdf (line 10) | def test_garbled_pdf(pdf_document, recognition_model, table_rec_model, d... function test_garbled_builder (line 33) | def test_garbled_builder(config, doc_provider, detection_model, ocr_erro... function test_nongarbled_builder (line 47) | def test_nongarbled_builder(config, doc_provider, detection_model, ocr_e... FILE: tests/builders/test_layout_replace.py function test_layout_replace (line 13) | def test_layout_replace( FILE: tests/builders/test_ocr_builder.py function test_blank_char_builder (line 6) | def test_blank_char_builder(recognition_model): FILE: tests/builders/test_ocr_pipeline.py function _ocr_pipeline_test (line 7) | def _ocr_pipeline_test(pdf_document): function test_ocr_pipeline (line 39) | def test_ocr_pipeline(pdf_document): function test_ocr_with_inline_pipeline (line 44) | def test_ocr_with_inline_pipeline(pdf_document): FILE: tests/builders/test_overriding.py class NewSectionHeader (line 14) | class NewSectionHeader(SectionHeader): class NewLine (line 18) | class NewLine(Line): function test_overriding (line 26) | def test_overriding(pdf_document: Document): function get_lines (line 31) | def get_lines(pdf: str, config=None): function test_overriding_mp (line 39) | def test_overriding_mp(): FILE: tests/builders/test_pdf_links.py function test_pdf_links (line 15) | def test_pdf_links(pdf_document: Document, config, renderer, model_dict,... FILE: tests/builders/test_rotated_bboxes.py function test_rotated_bboxes (line 8) | def test_rotated_bboxes(pdf_document): FILE: tests/builders/test_strip_existing_ocr.py function test_strip_ocr (line 6) | def test_strip_ocr(doc_provider): function test_keep_ocr (line 13) | def test_keep_ocr(doc_provider): FILE: tests/builders/test_structure.py function test_structure_builder (line 7) | def test_structure_builder(pdf_document): FILE: tests/config/test_config.py function capture_kwargs (line 10) | def capture_kwargs(argv): function test_config_parser (line 29) | def test_config_parser(): function test_config_none (line 50) | def test_config_none(): function test_config_llm (line 59) | def test_config_llm(): function test_config_force_ocr (line 68) | def test_config_force_ocr(): FILE: tests/conftest.py function model_dict (line 28) | def model_dict(): function layout_model (line 35) | def layout_model(model_dict): function detection_model (line 40) | def detection_model(model_dict): function recognition_model (line 45) | def recognition_model(model_dict): function table_rec_model (line 50) | def table_rec_model(model_dict): function ocr_error_model (line 55) | def ocr_error_model(model_dict): function config (line 60) | def config(request): function pdf_dataset (line 72) | def pdf_dataset(): function temp_doc (line 77) | def temp_doc(request, pdf_dataset): function doc_provider (line 91) | def doc_provider(request, config, temp_doc): function pdf_document (line 97) | def pdf_document( function pdf_converter (line 117) | def pdf_converter(request, config, model_dict, renderer, llm_service): function renderer (line 130) | def renderer(request, config): function llm_service (line 148) | def llm_service(request, config): function temp_image (line 157) | def temp_image(): FILE: tests/converters/test_extraction_converter.py class MockLLMService (line 10) | class MockLLMService(BaseService): method __call__ (line 11) | def __call__(self, prompt, image=None, page=None, response_schema=None... function mock_llm_service (line 26) | def mock_llm_service(): function extraction_converter (line 31) | def extraction_converter(config, model_dict, mock_llm_service): function test_extraction_converter (line 52) | def test_extraction_converter(config, model_dict, mock_llm_service, temp... function test_extraction_converter_multiple_pages (line 66) | def test_extraction_converter_multiple_pages(extraction_converter, temp_... FILE: tests/converters/test_ocr_converter.py function _ocr_converter (line 7) | def _ocr_converter(config, model_dict, temp_pdf, line_count: int, eq_cou... function check_bboxes (line 20) | def check_bboxes(page: OCRJSONPageOutput, lines): function test_ocr_converter (line 37) | def test_ocr_converter(config, model_dict, temp_doc): function test_ocr_converter_force (line 43) | def test_ocr_converter_force(config, model_dict, temp_doc): function test_ocr_converter_keep (line 51) | def test_ocr_converter_keep(config, model_dict, temp_doc): FILE: tests/converters/test_pdf_converter.py function test_pdf_converter (line 10) | def test_pdf_converter(pdf_converter: PdfConverter, temp_doc): function test_epub_converter (line 33) | def test_epub_converter(pdf_converter: PdfConverter, temp_doc): function test_xlsx_converter (line 43) | def test_xlsx_converter(pdf_converter: PdfConverter, temp_doc): function test_html_converter (line 53) | def test_html_converter(pdf_converter: PdfConverter, temp_doc): function test_docx_converter (line 63) | def test_docx_converter(pdf_converter: PdfConverter, temp_doc): function test_pptx_converter (line 73) | def test_pptx_converter(pdf_converter: PdfConverter, temp_doc): function test_pdf_converter_bytes (line 83) | def test_pdf_converter_bytes(pdf_converter: PdfConverter, temp_doc): FILE: tests/converters/test_table_converter.py function _table_converter (line 6) | def _table_converter(config, model_dict, renderer, temp_pdf): function test_table_converter (line 23) | def test_table_converter(config, model_dict, renderer, temp_doc): function test_table_converter_ocr (line 28) | def test_table_converter_ocr(config, model_dict, renderer, temp_doc): FILE: tests/processors/test_document_toc_processor.py function test_document_toc_processor (line 7) | def test_document_toc_processor(pdf_document, detection_model, recogniti... FILE: tests/processors/test_equation_processor.py function test_equation_processor (line 8) | def test_equation_processor(pdf_document, recognition_model): FILE: tests/processors/test_footnote_processor.py function test_footnote_processor (line 9) | def test_footnote_processor(pdf_document): FILE: tests/processors/test_ignoretext.py function test_ignoretext_processor (line 10) | def test_ignoretext_processor(pdf_document): FILE: tests/processors/test_llm_processors.py function test_llm_form_processor_no_config (line 19) | def test_llm_form_processor_no_config(pdf_document, llm_service): function test_llm_form_processor_no_cells (line 30) | def test_llm_form_processor_no_cells(pdf_document, llm_service): function test_llm_form_processor (line 42) | def test_llm_form_processor(pdf_document, table_rec_model, recognition_m... function test_llm_table_processor (line 64) | def test_llm_table_processor(pdf_document, table_rec_model, recognition_... function test_llm_caption_processor_disabled (line 107) | def test_llm_caption_processor_disabled(pdf_document): function test_llm_caption_processor (line 119) | def test_llm_caption_processor(pdf_document): function test_llm_complex_region_processor (line 141) | def test_llm_complex_region_processor(pdf_document): function test_multi_llm_processors (line 167) | def test_multi_llm_processors(pdf_document): FILE: tests/processors/test_table_merge.py function test_llm_table_processor_nomerge (line 11) | def test_llm_table_processor_nomerge(pdf_document, table_rec_model, reco... FILE: tests/processors/test_table_processor.py function test_table_processor (line 12) | def test_table_processor( function test_avoid_double_ocr (line 34) | def test_avoid_double_ocr( function test_overlap_blocks (line 53) | def test_overlap_blocks( function test_ocr_table (line 71) | def test_ocr_table(pdf_document, recognition_model, table_rec_model, det... function test_split_rows (line 81) | def test_split_rows(pdf_document, recognition_model, table_rec_model, de... FILE: tests/providers/test_document_providers.py function test_pptx_provider (line 6) | def test_pptx_provider(doc_provider): function test_epub_provider (line 20) | def test_epub_provider(doc_provider): function test_html_provider (line 31) | def test_html_provider(doc_provider): function test_docx_provider (line 41) | def test_docx_provider(doc_provider): function test_xlsx_provider (line 52) | def test_xlsx_provider(doc_provider): FILE: tests/providers/test_image_provider.py function test_image_provider (line 5) | def test_image_provider(config, temp_image): function test_image_provider_conversion (line 13) | def test_image_provider_conversion(pdf_converter, temp_image): FILE: tests/providers/test_pdf_provider.py function test_pdf_provider (line 5) | def test_pdf_provider(doc_provider): FILE: tests/renderers/test_chunk_renderer.py function test_chunk_renderer (line 7) | def test_chunk_renderer(pdf_document): FILE: tests/renderers/test_extract_images.py function test_disable_extract_images (line 8) | def test_disable_extract_images(pdf_document): function test_extract_images (line 18) | def test_extract_images(pdf_document): FILE: tests/renderers/test_html_renderer.py function test_html_renderer_block_ids (line 14) | def test_html_renderer_block_ids(pdf_document, config): FILE: tests/renderers/test_json_renderer.py function test_markdown_renderer_pagination (line 7) | def test_markdown_renderer_pagination(pdf_document): FILE: tests/renderers/test_markdown_renderer.py function test_markdown_renderer (line 9) | def test_markdown_renderer(pdf_document): function test_markdown_renderer_auto_ocr (line 18) | def test_markdown_renderer_auto_ocr(pdf_document): function test_markdown_renderer_pagination (line 27) | def test_markdown_renderer_pagination(pdf_document): function test_markdown_renderer_pagination_blank_last_page (line 36) | def test_markdown_renderer_pagination_blank_last_page(pdf_document): function test_markdown_renderer_metadata (line 52) | def test_markdown_renderer_metadata(pdf_document): function test_markdown_renderer_images (line 59) | def test_markdown_renderer_images(pdf_document): function test_markdown_renderer_tables (line 68) | def test_markdown_renderer_tables(pdf_document): FILE: tests/schema/groups/test_list_grouping.py function test_list_grouping (line 9) | def test_list_grouping(pdf_document): FILE: tests/services/test_service_init.py function test_empty_llm (line 13) | def test_empty_llm(pdf_converter: PdfConverter, temp_doc): function test_llm_no_keys (line 18) | def test_llm_no_keys(model_dict, config): function test_llm_gemini (line 25) | def test_llm_gemini(pdf_converter: PdfConverter, temp_doc): function test_llm_vertex (line 39) | def test_llm_vertex(pdf_converter: PdfConverter, temp_doc): function test_llm_ollama (line 52) | def test_llm_ollama(pdf_converter: PdfConverter, temp_doc): function test_llm_openai (line 66) | def test_llm_openai(pdf_converter: PdfConverter, temp_doc): function test_llm_azure_openai (line 83) | def test_llm_azure_openai(pdf_converter: PdfConverter, temp_doc): FILE: tests/utils.py function setup_pdf_provider (line 7) | def setup_pdf_provider(