SYMBOL INDEX (270 symbols across 56 files) FILE: benchmark/process_single_doc.py function process_file (line 11) | async def process_file(megaparse: MegaParse, file_path: str | Path): function test_process_file (line 24) | async def test_process_file(file: str | Path): FILE: benchmark/test_quality_sim.py function jaccard_similarity (line 9) | def jaccard_similarity(str1, str2): function compare_files (line 24) | def compare_files(file_name): function main (line 43) | def main(): FILE: libs/megaparse/examples/parse_file_fast.py class File (line 9) | class File: function list_files_in_directory (line 15) | def list_files_in_directory(directory_path: str) -> dict[str, list[File]]: function main (line 36) | def main(): FILE: libs/megaparse/examples/parse_file_mp.py class File (line 10) | class File: function list_files_in_directory (line 16) | def list_files_in_directory(directory_path: str) -> dict[str, list[File]]: function main (line 37) | def main(): FILE: libs/megaparse/examples/parse_file_unstructured.py class File (line 9) | class File: function list_files_in_directory (line 15) | def list_files_in_directory(directory_path: str) -> dict[str, list[File]]: function main (line 36) | def main(): FILE: libs/megaparse/src/megaparse/api/app.py function parser_builder_dep (line 36) | def parser_builder_dep(): function get_playwright_loader (line 40) | def get_playwright_loader(): function healthz (line 45) | def healthz(): function _check_free_memory (line 49) | def _check_free_memory() -> bool: function parse_file (line 62) | async def parse_file( function upload_url (line 122) | async def upload_url( FILE: libs/megaparse/src/megaparse/api/exceptions/megaparse_exceptions.py class HTTPModelNotSupported (line 4) | class HTTPModelNotSupported(HTTPException): method __init__ (line 5) | def __init__( class HTTPFileNotFound (line 13) | class HTTPFileNotFound(HTTPException): method __init__ (line 14) | def __init__( class HTTPDownloadError (line 21) | class HTTPDownloadError(HTTPException): method __init__ (line 22) | def __init__(self, file_name, message="Failed to download the file"): class HTTPParsingException (line 27) | class HTTPParsingException(HTTPException): method __init__ (line 28) | def __init__(self, file_name, message="Failed to parse the file"): class ParsingException (line 33) | class ParsingException(Exception): method __init__ (line 36) | def __init__(self, message="An error occurred during parsing"): FILE: libs/megaparse/src/megaparse/api/models/base.py class MarkDownType (line 4) | class MarkDownType(str, Enum): FILE: libs/megaparse/src/megaparse/configs/auto.py class TextDetConfig (line 7) | class TextDetConfig(BaseModel): class AutoStrategyConfig (line 16) | class AutoStrategyConfig(BaseModel): class TextRecoConfig (line 21) | class TextRecoConfig(BaseModel): class DeviceEnum (line 26) | class DeviceEnum(str, Enum): class DoctrConfig (line 32) | class DoctrConfig(BaseModel): class MegaParseConfig (line 40) | class MegaParseConfig(BaseSettings): FILE: libs/megaparse/src/megaparse/examples/parse_file.py class MyCustomFormat (line 7) | class MyCustomFormat(BaseModel): function main (line 13) | def main(): FILE: libs/megaparse/src/megaparse/examples/parsing_process.py function get_strategy_page (line 30) | def get_strategy_page( function validate_input (line 86) | def validate_input( function _generate_crops (line 118) | def _generate_crops( function _prepare_crops (line 143) | def _prepare_crops( function _process_predictions (line 169) | def _process_predictions( function main (line 189) | def main(): FILE: libs/megaparse/src/megaparse/exceptions/base.py class ParsingException (line 1) | class ParsingException(Exception): method __init__ (line 4) | def __init__(self, message="An error occurred during parsing"): FILE: libs/megaparse/src/megaparse/formatter/base.py class BaseFormatter (line 9) | class BaseFormatter(ABC): method __init__ (line 22) | def __init__(self, model: BaseChatModel | None = None): method format (line 25) | def format( method aformat (line 30) | async def aformat( FILE: libs/megaparse/src/megaparse/formatter/structured_formatter/__init__.py class StructuredFormatter (line 9) | class StructuredFormatter(BaseFormatter): method __init__ (line 10) | def __init__(self, model: BaseChatModel, output_model: type[BaseModel]): method aformat (line 14) | async def aformat( method format (line 21) | def format( FILE: libs/megaparse/src/megaparse/formatter/structured_formatter/custom_structured_formatter.py class CustomStructuredFormatter (line 8) | class CustomStructuredFormatter(StructuredFormatter): method format (line 9) | def format( method aformat (line 45) | async def aformat( FILE: libs/megaparse/src/megaparse/formatter/table_formatter/__init__.py class TableFormatter (line 7) | class TableFormatter(BaseFormatter): method format (line 8) | def format( method aformat (line 13) | async def aformat( FILE: libs/megaparse/src/megaparse/formatter/table_formatter/llm_table_formatter.py class SimpleMDTableFormatter (line 12) | class SimpleMDTableFormatter(TableFormatter): method __init__ (line 21) | def __init__(self, model: Optional[BaseChatModel] = None): method aformat (line 24) | async def aformat( method format (line 34) | def format( method format_table (line 62) | def format_table( FILE: libs/megaparse/src/megaparse/formatter/table_formatter/vision_table_formatter.py class VisionMDTableFormatter (line 19) | class VisionMDTableFormatter(TableFormatter): method __init__ (line 28) | def __init__(self, model: Optional[BaseChatModel] = None): method _crop_table_image (line 31) | def _crop_table_image(self, table_element: TableBlock, file_path: str)... method aformat (line 55) | async def aformat( method format (line 80) | def format( method aformat_table (line 105) | async def aformat_table( method format_table (line 123) | def format_table(self, table_element: TableBlock, file_path: str) -> T... method process_file (line 139) | def process_file(self, images: List[Image.Image], image_format="PNG") ... method avision_extract (line 154) | async def avision_extract(self, table_image: str) -> str: method vision_extract (line 175) | def vision_extract(self, table_image: str) -> str: FILE: libs/megaparse/src/megaparse/layout_detection/layout_detector.py class LayoutDetector (line 45) | class LayoutDetector: method __init__ (line 46) | def __init__( method __call__ (line 81) | def __call__( method extract_bboxes_from_page (line 115) | def extract_bboxes_from_page( method nms (line 165) | def nms( method topK (line 199) | def topK( method _save_layout (line 211) | def _save_layout( FILE: libs/megaparse/src/megaparse/layout_detection/output.py class LayoutDetectionOutput (line 7) | class LayoutDetectionOutput(BaseModel): FILE: libs/megaparse/src/megaparse/megaparse.py class MegaParse (line 26) | class MegaParse: method __init__ (line 27) | def __init__( method validate_input (line 48) | def validate_input( method extract_page_strategies (line 80) | def extract_page_strategies( method load (line 145) | def load( method aload (line 223) | async def aload( FILE: libs/megaparse/src/megaparse/models/page.py class PageDimension (line 10) | class PageDimension(BaseModel): class Page (line 19) | class Page(BaseModel): class GatewayDocument (line 34) | class GatewayDocument(BaseModel): FILE: libs/megaparse/src/megaparse/parser/base.py class BaseParser (line 9) | class BaseParser(ABC): method check_supported_extension (line 14) | def check_supported_extension( method aconvert (line 30) | async def aconvert( method convert (line 53) | def convert( FILE: libs/megaparse/src/megaparse/parser/builder.py class ParserBuilder (line 15) | class ParserBuilder: method build (line 16) | def build(self, config: ParseFileConfig) -> BaseParser: FILE: libs/megaparse/src/megaparse/parser/doctr_parser.py class DoctrParser (line 58) | class DoctrParser(NestedObject, _OCRPredictor): method __init__ (line 61) | def __init__( method get_text_detections (line 110) | def get_text_detections(self, pages: list[Page], **kwargs) -> List[Page]: method get_text_recognition (line 188) | def get_text_recognition( method _get_block_cls (line 261) | def _get_block_cls( method __to_elements_list (line 291) | def __to_elements_list( FILE: libs/megaparse/src/megaparse/parser/entity.py class TagEnum (line 5) | class TagEnum(str, Enum): class SupportedModel (line 14) | class SupportedModel(Enum): method __init__ (line 20) | def __init__(self, model_name: str, supported_releases: Optional[List[... method is_supported (line 25) | def is_supported(cls, model_name: str) -> bool: FILE: libs/megaparse/src/megaparse/parser/llama.py class LlamaParser (line 14) | class LlamaParser(BaseParser): method __init__ (line 17) | def __init__( method aconvert (line 34) | async def aconvert( method convert (line 58) | def convert( method __to_elements_list__ (line 82) | def __to_elements_list__(self, llama_doc: List[LlamaDocument]) -> MPDo... FILE: libs/megaparse/src/megaparse/parser/megaparse_vision.py class MegaParseVision (line 57) | class MegaParseVision(BaseParser): method __init__ (line 60) | def __init__(self, model: BaseChatModel, **kwargs): method process_file (line 71) | def process_file(self, file_path: str, image_format: str = "PNG") -> L... method get_element (line 91) | def get_element(self, tag: TagEnum, chunk: str): method asend_to_mlm (line 99) | async def asend_to_mlm(self, images_data: List[str]) -> str: method send_to_mlm (line 122) | def send_to_mlm(self, images_data: List[str]) -> str: method aconvert (line 145) | async def aconvert( method convert (line 178) | def convert( method get_cleaned_content (line 214) | def get_cleaned_content(self, parsed_file: str) -> str: method __to_elements_list__ (line 253) | def __to_elements_list__(self, mpv_doc: str, n_pages: int) -> MPDocument: FILE: libs/megaparse/src/megaparse/parser/unstructured_parser.py class UnstructuredParser (line 32) | class UnstructuredParser(BaseParser): method __init__ (line 49) | def __init__( method convert (line 55) | def convert( method aconvert (line 73) | async def aconvert( method __to_mp_document (line 88) | def __to_mp_document(self, elements: List[Element]) -> MPDocument: method __convert_element_to_block (line 98) | def __convert_element_to_block(self, element: Element) -> Block | None: FILE: libs/megaparse/src/megaparse/predictor/layout_predictor.py function extract_layout (line 7) | def extract_layout( FILE: libs/megaparse/src/megaparse/utils/extract_metadata.py function get_doc_metdata (line 6) | def get_doc_metdata(pdfium_document: pdfium.PdfDocument) -> Dict[str, Any]: FILE: libs/megaparse/src/megaparse/utils/onnx.py function get_providers (line 10) | def get_providers(device: DeviceEnum) -> List[str]: FILE: libs/megaparse/src/megaparse/utils/strategy.py function get_page_strategy (line 10) | def get_page_strategy( function determine_global_strategy (line 66) | def determine_global_strategy(pages: List[Page], threshold: float) -> St... FILE: libs/megaparse/tests/conftest.py class FakeParserBuilder (line 15) | class FakeParserBuilder: method build (line 16) | def build(self, *args, **kwargs) -> BaseParser: function test_client (line 62) | async def test_client(): FILE: libs/megaparse/tests/pdf/test_detect_ocr.py function test_hi_res_strategy (line 16) | def test_hi_res_strategy(hi_res_pdf): function test_fast_strategy (line 31) | def test_fast_strategy(native_pdf): FILE: libs/megaparse/tests/pdf/test_pdf_processing.py function native_pdf (line 16) | def native_pdf() -> Path: function scanned_pdf (line 22) | def scanned_pdf() -> Path: function test_async_megaparse_pdf_processor_file_path (line 34) | async def test_async_megaparse_pdf_processor_file_path(pdf_name, request): function test_sync_megaparse_pdf_processor_file_path (line 42) | def test_sync_megaparse_pdf_processor_file_path(pdf_name, request): function test_megaparse_pdf_processor_file (line 51) | async def test_megaparse_pdf_processor_file(pdf_name, request): function test_strategy_native (line 59) | def test_strategy_native(native_pdf): function test_strategy_scanned (line 74) | def test_strategy_scanned(scanned_pdf): FILE: libs/megaparse/tests/pdf/test_pdfium_parser.py function test_pdfium (line 6) | def test_pdfium(): FILE: libs/megaparse/tests/test_endpoints.py function test_parse_file_endpoint (line 5) | async def test_parse_file_endpoint(test_client): function test_parse_url_endpoint (line 23) | async def test_parse_url_endpoint(test_client): FILE: libs/megaparse/tests/test_import.py function test_load (line 6) | def test_load(): FILE: libs/megaparse/tests/test_parsers.py function test_sync_parser (line 18) | def test_sync_parser(parser, extension): FILE: libs/megaparse_sdk/examples/usage_example.py function main (line 7) | async def main(): FILE: libs/megaparse_sdk/megaparse_sdk/__init__.py class MegaParseSDK (line 6) | class MegaParseSDK: method __init__ (line 7) | def __init__(self, api_key: str | None = None, base_url: str | None = ... method close (line 12) | async def close(self): FILE: libs/megaparse_sdk/megaparse_sdk/client.py class MegaParseClient (line 40) | class MegaParseClient: method __init__ (line 41) | def __init__( method request (line 57) | async def request(self, method: str, endpoint: str, **kwargs: Any) -> ... method close (line 71) | async def close(self): class ClientState (line 75) | class ClientState(enum.Enum): class MegaParseNATSClient (line 84) | class MegaParseNATSClient: method __init__ (line 85) | def __init__(self, config: ClientNATSConfig): method _get_nc (line 97) | async def _get_nc(self): method __aenter__ (line 109) | async def __aenter__(self: Self) -> Self: method __aexit__ (line 124) | async def __aexit__( method parse_url (line 133) | async def parse_url(self, url: str): method parse_file (line 137) | async def parse_file( method _send_req (line 158) | async def _send_req(self, inp: MPInput) -> str | Document: method _send_req_inner (line 171) | async def _send_req_inner(self, inp: MPInput): method _handle_mp_output (line 181) | def _handle_mp_output(self, response: MPOutput) -> str | Document: method aclose (line 200) | async def aclose(self): FILE: libs/megaparse_sdk/megaparse_sdk/config.py class MegaParseSDKConfig (line 5) | class MegaParseSDKConfig(BaseSettings): class SSLConfig (line 17) | class SSLConfig(BaseModel): class ClientNATSConfig (line 23) | class ClientNATSConfig(BaseSettings): FILE: libs/megaparse_sdk/megaparse_sdk/endpoints/file_upload.py class UploadFileConfig (line 11) | class UploadFileConfig(BaseModel): class FileUpload (line 20) | class FileUpload: method __init__ (line 21) | def __init__(self, client: MegaParseClient): method upload (line 24) | async def upload( FILE: libs/megaparse_sdk/megaparse_sdk/endpoints/url_upload.py class URLUpload (line 6) | class URLUpload: method __init__ (line 7) | def __init__(self, client: MegaParseClient): method upload (line 10) | async def upload(self, url: str, max_retries: int = 3) -> Response: FILE: libs/megaparse_sdk/megaparse_sdk/schema/document.py class Point2D (line 10) | class Point2D(NamedTuple): class BlockType (line 15) | class BlockType(str, Enum): class BBOX (line 19) | class BBOX(NamedTuple): method to_numpy (line 23) | def to_numpy(self): method iou (line 28) | def iou(self, other: Self): class BlockLayout (line 44) | class BlockLayout(BaseModel): class TextDetection (line 50) | class TextDetection: method __init__ (line 59) | def __init__( method __repr__ (line 73) | def __repr__(self) -> str: method render (line 76) | def render( method get_loc_preds (line 104) | def get_loc_preds(self) -> np.ndarray: method get_objectness_scores (line 114) | def get_objectness_scores(self) -> np.ndarray: method get_origin_page_shapes (line 124) | def get_origin_page_shapes(self) -> np.ndarray: method get_orientations (line 134) | def get_orientations(self) -> np.ndarray: class Block (line 145) | class Block(BaseModel): method validate_range (line 161) | def validate_range(cls, value): class TextBlock (line 172) | class TextBlock(Block): method __str__ (line 180) | def __str__(self): class UndefinedBlock (line 184) | class UndefinedBlock(TextBlock): class TitleBlock (line 193) | class TitleBlock(TextBlock): method __str__ (line 199) | def __str__(self): class SubTitleBlock (line 203) | class SubTitleBlock(TextBlock): method __str__ (line 210) | def __str__(self): class CaptionBlock (line 215) | class CaptionBlock(TextBlock): class ImageBlock (line 223) | class ImageBlock(Block): method __str__ (line 231) | def __str__(self) -> str: class TableBlock (line 235) | class TableBlock(ImageBlock): method __str__ (line 241) | def __str__(self): class ListElementBlock (line 245) | class ListElementBlock(TextBlock): class ListBlock (line 254) | class ListBlock(Block): method __str__ (line 264) | def __str__(self): class HeaderBlock (line 271) | class HeaderBlock(TextBlock): method __str__ (line 277) | def __str__(self): class FooterBlock (line 281) | class FooterBlock(TextBlock): method __str__ (line 287) | def __str__(self): class SectionBlock (line 291) | class SectionBlock(Block): method __str__ (line 301) | def __str__(self): class TOCItem (line 307) | class TOCItem(BaseModel): method validate_range (line 313) | def validate_range(cls, value): method __str__ (line 321) | def __str__(self): class TOC (line 331) | class TOC(BaseModel): method text (line 335) | def text(self) -> str: method __str__ (line 338) | def __str__(self): class Document (line 342) | class Document(BaseModel): method __str__ (line 355) | def __str__(self) -> str: method clean (line 369) | def clean(self): FILE: libs/megaparse_sdk/megaparse_sdk/schema/extensions.py class FileExtension (line 4) | class FileExtension(str, Enum): method __new__ (line 9) | def __new__(cls, value: str, mimetype: str): method mimetype (line 39) | def mimetype(self) -> str: FILE: libs/megaparse_sdk/megaparse_sdk/schema/languages.py class Language (line 4) | class Language(str, Enum): FILE: libs/megaparse_sdk/megaparse_sdk/schema/mp_exceptions.py class ModelNotSupported (line 1) | class ModelNotSupported(Exception): method __init__ (line 2) | def __init__( class MemoryLimitExceeded (line 9) | class MemoryLimitExceeded(Exception): method __init__ (line 10) | def __init__(self, message="The service is under high memory pressure"): class InternalServiceError (line 14) | class InternalServiceError(Exception): method __init__ (line 15) | def __init__(self, message="Internal service error occured"): class DownloadError (line 19) | class DownloadError(Exception): method __init__ (line 20) | def __init__(self, message="Failed to download the file"): class ParsingException (line 24) | class ParsingException(Exception): method __init__ (line 25) | def __init__(self, message="An error occurred during parsing"): FILE: libs/megaparse_sdk/megaparse_sdk/schema/mp_inputs.py class FileInput (line 10) | class FileInput(BaseModel): method decode_data (line 16) | def decode_data(cls, value): method serialize_data (line 27) | def serialize_data(self, data: bytes, _info): class MPParseType (line 31) | class MPParseType(str, Enum): class ParseFileInput (line 36) | class ParseFileInput(BaseModel): class ParseUrlInput (line 42) | class ParseUrlInput(BaseModel): class MPInput (line 47) | class MPInput(BaseModel): FILE: libs/megaparse_sdk/megaparse_sdk/schema/mp_outputs.py class MPErrorType (line 9) | class MPErrorType(Enum): class ParseError (line 17) | class ParseError(BaseModel): class MPOutputType (line 22) | class MPOutputType(str, Enum): class MPOutput (line 27) | class MPOutput(BaseModel): FILE: libs/megaparse_sdk/megaparse_sdk/schema/parser_config.py class ParserType (line 10) | class ParserType(str, Enum): class StrategyEnum (line 18) | class StrategyEnum(str, Enum): class ParseFileConfig (line 26) | class ParseFileConfig(BaseModel): FILE: libs/megaparse_sdk/megaparse_sdk/schema/supported_models.py class SupportedModel (line 4) | class SupportedModel(str, Enum): method __str__ (line 24) | def __str__(self): method is_supported (line 28) | def is_supported(cls, model_name: str) -> bool: method get_supported_models (line 33) | def get_supported_models(cls) -> list[str]: FILE: libs/megaparse_sdk/megaparse_sdk/utils/load_ssl.py function load_ssl_cxt (line 6) | def load_ssl_cxt(ssl_config: SSLConfig): FILE: libs/megaparse_sdk/tests/test_nats_client.py function ssl_config (line 36) | def ssl_config() -> SSLConfig: function nc_config (line 45) | def nc_config(ssl_config: SSLConfig) -> ClientNATSConfig: function nats_service (line 61) | async def nats_service(nc_config: ClientNATSConfig): function test_client_state_transition (line 76) | async def test_client_state_transition(nc_config: ClientNATSConfig): function test_client_parse_file (line 89) | async def test_client_parse_file(nats_service: Client, nc_config: Client... function test_client_parse_url (line 105) | async def test_client_parse_url(nats_service: Client, nc_config: ClientN... function test_client_parse_timeout (line 120) | async def test_client_parse_timeout(nats_service: Client, ssl_config: SS... function test_client_parse_timeout_retry (line 142) | async def test_client_parse_timeout_retry(nats_service: Client, ssl_conf... function test_client_parse_file_excp (line 178) | async def test_client_parse_file_excp(