SYMBOL INDEX (1511 symbols across 161 files) FILE: build-scripts/fetch_shaded_jar.py function find_latest_jar_by_semver (line 19) | def find_latest_jar_by_semver(target_dir: Path) -> Optional[Path]: function main (line 50) | def main(): FILE: build-scripts/set_version.py function set_version (line 7) | def set_version(version_file, pom_file, pyproject_toml_file): FILE: docs/hybrid/experiments/chunking_strategy/docling_page_range_benchmark.py class BenchmarkResult (line 31) | class BenchmarkResult: function get_project_root (line 40) | def get_project_root() -> Path: function create_converter (line 45) | def create_converter() -> DocumentConverter: function convert_with_page_range (line 55) | def convert_with_page_range( function pages_to_ranges (line 64) | def pages_to_ranges(pages: list[int]) -> list[tuple[int, int]]: function run_benchmark_for_ranges (line 85) | def run_benchmark_for_ranges( function get_chunks_for_pages (line 121) | def get_chunks_for_pages( function run_scenario_benchmark (line 134) | def run_scenario_benchmark( function main (line 210) | def main(): FILE: examples/python/batch/batch_processing.py function batch_convert (line 26) | def batch_convert(pdf_paths: list[str], output_dir: str) -> list[Path]: function convert_directory (line 38) | def convert_directory(directory: str, output_dir: str) -> list[Path]: function summarize_results (line 49) | def summarize_results(json_files: list[Path]) -> None: function main (line 71) | def main(): FILE: examples/python/rag/basic_chunking.py function convert_pdf_to_json (line 22) | def convert_pdf_to_json(pdf_path: str, output_dir: str) -> Path: function load_document (line 35) | def load_document(json_path: Path) -> dict: function chunk_by_element (line 41) | def chunk_by_element(doc: dict) -> list[dict]: function chunk_by_section (line 63) | def chunk_by_section(doc: dict) -> list[dict]: function chunk_with_min_size (line 111) | def chunk_with_min_size(doc: dict, min_chars: int = 200) -> list[dict]: function format_citation (line 155) | def format_citation(metadata: dict) -> str: function main (line 170) | def main(): FILE: examples/python/rag/langchain_example.py function main (line 18) | def main(): FILE: java/opendataloader-pdf-cli/src/main/java/org/opendataloader/pdf/cli/CLIMain.java class CLIMain (line 29) | public class CLIMain { method main (line 35) | public static void main(String[] args) { method run (line 48) | static int run(String[] args) { method configureLogging (line 97) | private static void configureLogging(boolean quiet) { method processPath (line 112) | private static boolean processPath(File file, Config config) { method processDirectory (line 125) | private static boolean processDirectory(File file, Config config) { method processFile (line 145) | private static boolean processFile(File file, Config config) { method isPdfFile (line 162) | private static boolean isPdfFile(File file) { FILE: java/opendataloader-pdf-cli/src/main/java/org/opendataloader/pdf/cli/CLIOptions.java class CLIOptions (line 32) | public class CLIOptions { method defineOptions (line 195) | public static Options defineOptions() { method createConfigFromCommandLine (line 204) | public static Config createConfigFromCommandLine(CommandLine commandLi... method applyImageOptions (line 272) | private static void applyImageOptions(Config config, CommandLine comma... method applyPagesOption (line 306) | private static void applyPagesOption(Config config, CommandLine comman... method applyTableMethodOption (line 312) | private static void applyTableMethodOption(Config config, CommandLine ... method applyContentSafetyOption (line 330) | private static void applyContentSafetyOption(Config config, CommandLin... method applySanitizeOption (line 380) | private static void applySanitizeOption(Config config, CommandLine com... method applyFormatOption (line 386) | private static void applyFormatOption(Config config, CommandLine comma... method parseOptionValues (line 436) | private static Set parseOptionValues(String[] optionValues) { method applyHybridOptions (line 453) | private static void applyHybridOptions(Config config, CommandLine comm... method exportOptionsAsJson (line 519) | public static void exportOptionsAsJson(PrintStream out) { method escapeJson (line 558) | private static String escapeJson(String value) { class OptionDefinition (line 574) | private static class OptionDefinition { method OptionDefinition (line 582) | OptionDefinition(String longName, String shortName, String type, Obj... method toOption (line 593) | Option toOption() { FILE: java/opendataloader-pdf-cli/src/test/java/org/opendataloader/pdf/cli/CLIMainTest.java class CLIMainTest (line 27) | class CLIMainTest { method testProcessingFailureReturnsNonZeroExitCode (line 42) | @Test method testDirectoryWithFailingFileReturnsNonZeroExitCode (line 66) | @Test method testNoArgumentsReturnsZero (line 86) | @Test method testInvalidArgumentsReturnsExitCode2 (line 96) | @Test method testNonExistentFileReturnsNonZeroExitCode (line 105) | @Test FILE: java/opendataloader-pdf-cli/src/test/java/org/opendataloader/pdf/cli/CLIOptionsContentSafetyTest.java class CLIOptionsContentSafetyTest (line 30) | class CLIOptionsContentSafetyTest { method parseArgs (line 32) | private Config parseArgs(String... args) throws Exception { method sanitizeFlagEnablesSensitiveDataFilter (line 39) | @Test method defaultDoesNotEnableSensitiveDataFilter (line 46) | @Test method sanitizeWithContentSafetyOffAllStillEnablesSanitize (line 53) | @Test method contentSafetyOffAllDoesNotTouchSensitiveData (line 68) | @Test method deprecatedSensitiveDataValueIsAccepted (line 75) | @Test method deprecatedSensitiveDataValuePrintsWarning (line 82) | @Test method sanitizeWithDeprecatedSensitiveDataStillEnablesSanitize (line 96) | @Test FILE: java/opendataloader-pdf-cli/src/test/java/org/opendataloader/pdf/cli/CLIOptionsTest.java class CLIOptionsTest (line 38) | class CLIOptionsTest { method setUp (line 47) | @BeforeEach method testDefineOptions_containsImageOutputOption (line 55) | @Test method testDefineOptions_containsImageFormatOption (line 60) | @Test method testCreateConfig_withImageOutputEmbedded (line 65) | @Test method testCreateConfig_withImageOutputExternal (line 76) | @Test method testCreateConfig_defaultImageOutput (line 87) | @Test method testCreateConfig_withImageOutputOff (line 100) | @Test method testCreateConfig_withValidImageFormat (line 112) | @ParameterizedTest method testCreateConfig_withUppercaseImageFormat (line 123) | @Test method testCreateConfig_withInvalidImageFormat (line 133) | @Test method testCreateConfig_withEmptyImageFormat (line 143) | @Test method testCreateConfig_withImageOutputAndImageFormat (line 153) | @Test method testCreateConfig_imageFormatWithExternalOutput (line 164) | @Test method testCreateConfig_withWebpImageFormat_shouldFail (line 175) | @Test method testDefaultImageFormat (line 186) | @Test method testCreateConfig_withInvalidImageOutput (line 196) | @Test method testCreateConfig_withUppercaseImageOutput (line 206) | @Test method testCreateConfig_defaultReadingOrder (line 216) | @Test method testCreateConfig_withReadingOrderOff (line 227) | @Test method testDefineOptions_containsPagesOption (line 239) | @Test method testCreateConfig_withPages (line 244) | @Test method testCreateConfig_withSinglePage (line 255) | @Test method testCreateConfig_withPageRange (line 266) | @Test method testCreateConfig_defaultPages (line 277) | @Test method testCreateConfig_withInvalidPages (line 288) | @Test method testCreateConfig_withReversePageRange (line 298) | @Test method testDefineOptions_containsImageDirOption (line 310) | @Test method testCreateConfig_withImageDir (line 315) | @Test method testCreateConfig_defaultImageDir (line 326) | @Test method testCreateConfig_withImageDirAndOutputDir (line 336) | @Test method testCreateConfig_withEmptyImageDir (line 349) | @Test method testCreateConfig_withWhitespaceImageDir (line 359) | @Test method testDefineOptions_containsHybridModeOption (line 371) | @Test method testDefineOptions_containsHybridOcrOption (line 376) | @Test method testCreateConfig_withHybridModeAuto (line 382) | @Test method testCreateConfig_withHybridModeFull (line 393) | @Test method testCreateConfig_withInvalidHybridMode (line 404) | @Test method testCreateConfig_withDeprecatedHybridOcr (line 414) | @Test method testCreateConfig_defaultHybridMode (line 425) | @Test method testCreateConfig_withDoclingBackend (line 435) | @Test method testCreateConfig_defaultHybridFallbackIsFalse (line 446) | @Test method testCreateConfig_withHybridFallbackExplicit (line 457) | @Test FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/api/Config.java class Config (line 30) | public class Config { method getFilterConfig (line 131) | public FilterConfig getFilterConfig() { method Config (line 138) | public Config() { method getPassword (line 146) | public String getPassword() { method setPassword (line 155) | public void setPassword(String password) { method isGenerateMarkdown (line 167) | public boolean isGenerateMarkdown() { method setGenerateMarkdown (line 176) | public void setGenerateMarkdown(boolean generateMarkdown) { method isGenerateHtml (line 185) | public boolean isGenerateHtml() { method setGenerateHtml (line 194) | public void setGenerateHtml(boolean generateHtml) { method isGeneratePDF (line 203) | public boolean isGeneratePDF() { method setGeneratePDF (line 212) | public void setGeneratePDF(boolean generatePDF) { method isKeepLineBreaks (line 221) | public boolean isKeepLineBreaks() { method setKeepLineBreaks (line 230) | public void setKeepLineBreaks(boolean keepLineBreaks) { method isGenerateJSON (line 239) | public boolean isGenerateJSON() { method setGenerateJSON (line 248) | public void setGenerateJSON(boolean generateJSON) { method isGenerateText (line 257) | public boolean isGenerateText() { method setGenerateText (line 266) | public void setGenerateText(boolean generateText) { method isUseHTMLInMarkdown (line 275) | public boolean isUseHTMLInMarkdown() { method setUseHTMLInMarkdown (line 285) | public void setUseHTMLInMarkdown(boolean useHTMLInMarkdown) { method isAddImageToMarkdown (line 294) | public boolean isAddImageToMarkdown() { method setAddImageToMarkdown (line 304) | public void setAddImageToMarkdown(boolean addImageToMarkdown) { method getOutputFolder (line 313) | public String getOutputFolder() { method setOutputFolder (line 323) | public void setOutputFolder(String outputFolder) { method getReplaceInvalidChars (line 332) | public String getReplaceInvalidChars() { method setReplaceInvalidChars (line 341) | public void setReplaceInvalidChars(String replaceInvalidChars) { method isUseStructTree (line 350) | public boolean isUseStructTree() { method setUseStructTree (line 359) | public void setUseStructTree(boolean useStructTree) { method isClusterTableMethod (line 368) | public boolean isClusterTableMethod() { method getTableMethod (line 377) | public String getTableMethod() { method setTableMethod (line 387) | public void setTableMethod(String tableMethod) { method getTableMethodOptions (line 402) | public static String getTableMethodOptions(CharSequence delimiter) { method isValidTableMethod (line 412) | public static boolean isValidTableMethod(String method) { method getReadingOrder (line 421) | public String getReadingOrder() { method setReadingOrder (line 431) | public void setReadingOrder(String readingOrder) { method getReadingOrderOptions (line 446) | public static String getReadingOrderOptions(CharSequence delimiter) { method isValidReadingOrder (line 456) | public static boolean isValidReadingOrder(String order) { method getMarkdownPageSeparator (line 465) | public String getMarkdownPageSeparator() { method setMarkdownPageSeparator (line 474) | public void setMarkdownPageSeparator(String markdownPageSeparator) { method getTextPageSeparator (line 483) | public String getTextPageSeparator() { method setTextPageSeparator (line 492) | public void setTextPageSeparator(String textPageSeparator) { method getHtmlPageSeparator (line 501) | public String getHtmlPageSeparator() { method setHtmlPageSeparator (line 510) | public void setHtmlPageSeparator(String htmlPageSeparator) { method isEmbedImages (line 519) | public boolean isEmbedImages() { method isImageOutputOff (line 528) | public boolean isImageOutputOff() { method getImageOutput (line 537) | public String getImageOutput() { method setImageOutput (line 547) | public void setImageOutput(String imageOutput) { method getImageOutputOptions (line 562) | public static String getImageOutputOptions(CharSequence delimiter) { method isValidImageOutput (line 572) | public static boolean isValidImageOutput(String mode) { method getImageFormat (line 581) | public String getImageFormat() { method setImageFormat (line 591) | public void setImageFormat(String imageFormat) { method getImageFormatOptions (line 606) | public static String getImageFormatOptions(CharSequence delimiter) { method isValidImageFormat (line 616) | public static boolean isValidImageFormat(String format) { method getImageDir (line 625) | public String getImageDir() { method setImageDir (line 635) | public void setImageDir(String imageDir) { method getPages (line 652) | public String getPages() { method setPages (line 662) | public void setPages(String pages) { method getPageNumbers (line 676) | public List getPageNumbers() { method parsePageRanges (line 690) | private static List parsePageRanges(String pages) { method parseRange (line 710) | private static void parseRange(String range, String fullInput, List getFilterRules() { method addFilterRule (line 193) | public void addFilterRule(String pattern, String replacement) { method removeFilterRule (line 202) | public void removeFilterRule(String pattern) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/api/OpenDataLoaderPDF.java class OpenDataLoaderPDF (line 27) | public final class OpenDataLoaderPDF { method OpenDataLoaderPDF (line 29) | private OpenDataLoaderPDF() { method processFile (line 39) | public static void processFile(String inputPdfName, Config config) thr... method shutdown (line 49) | public static void shutdown() { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/containers/StaticLayoutContainers.java class StaticLayoutContainers (line 31) | public class StaticLayoutContainers { method clearContainers (line 45) | public static void clearContainers() { method getCurrentContentId (line 58) | public static long getCurrentContentId() { method incrementContentId (line 62) | public static long incrementContentId() { method setCurrentContentId (line 68) | public static void setCurrentContentId(long currentContentId) { method getImagesDirectory (line 72) | public static String getImagesDirectory() { method getImagesDirectoryName (line 76) | public static String getImagesDirectoryName() { method setImagesDirectory (line 81) | public static void setImagesDirectory(String imagesDirectory) { method getContrastRatioConsumer (line 85) | public static ContrastRatioConsumer getContrastRatioConsumer(String so... method closeContrastRatioConsumer (line 97) | public static void closeContrastRatioConsumer() { method getHeadings (line 108) | public static List getHeadings() { method setHeadings (line 112) | public static void setHeadings(List headings) { method isUseStructTree (line 116) | public static Boolean isUseStructTree() { method setIsUseStructTree (line 120) | public static void setIsUseStructTree(Boolean isUseStructTree) { method incrementImageIndex (line 124) | public static int incrementImageIndex() { method resetImageIndex (line 130) | public static void resetImageIndex() { method isEmbedImages (line 134) | public static boolean isEmbedImages() { method setEmbedImages (line 138) | public static void setEmbedImages(boolean embedImages) { method getImageFormat (line 142) | public static String getImageFormat() { method setImageFormat (line 147) | public static void setImageFormat(String format) { method setReplacementCharRatio (line 151) | public static void setReplacementCharRatio(int pageNumber, double rati... method getReplacementCharRatio (line 155) | public static double getReplacementCharRatio(int pageNumber) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/entities/SemanticFormula.java class SemanticFormula (line 29) | public class SemanticFormula extends BaseObject { method SemanticFormula (line 39) | public SemanticFormula(BoundingBox boundingBox, String latex) { method getLatex (line 49) | public String getLatex() { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/entities/SemanticPicture.java class SemanticPicture (line 30) | public class SemanticPicture extends BaseObject { method SemanticPicture (line 41) | public SemanticPicture(BoundingBox boundingBox, int index) { method SemanticPicture (line 52) | public SemanticPicture(BoundingBox boundingBox, int index, String desc... method getPictureIndex (line 63) | public int getPictureIndex() { method getDescription (line 72) | public String getDescription() { method hasDescription (line 81) | public boolean hasDescription() { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/html/HtmlGenerator.java class HtmlGenerator (line 52) | public class HtmlGenerator implements Closeable { method HtmlGenerator (line 85) | public HtmlGenerator(File inputPdf, Config config) throws IOException { method writeToHtml (line 102) | public void writeToHtml(List> contents) { method writePageSeparator (line 129) | protected void writePageSeparator(int pageNumber) throws IOException { method write (line 144) | protected void write(IObject object) throws IOException { method writeHeaderOrFooter (line 181) | protected void writeHeaderOrFooter(SemanticHeaderOrFooter headerOrFoot... method writeFormula (line 193) | protected void writeFormula(SemanticFormula formula) throws IOException { method writeImage (line 207) | protected void writeImage(ImageChunk image) { method writePicture (line 240) | protected void writePicture(SemanticPicture picture) { method writeList (line 291) | protected void writeList(PDFList list) throws IOException { method writeSemanticTextNode (line 317) | protected void writeSemanticTextNode(SemanticTextNode textNode) throws... method writeTable (line 330) | protected void writeTable(TableBorder table) throws IOException { method writeParagraph (line 373) | protected void writeParagraph(SemanticParagraph paragraph) throws IOEx... method writeHeading (line 397) | protected void writeHeading(SemanticHeading heading) throws IOException { method writeCellTag (line 405) | private void writeCellTag(TableBorderCell cell, boolean isHeader) thro... method enterTable (line 424) | protected void enterTable() { method leaveTable (line 431) | protected void leaveTable() { method isInsideTable (line 442) | protected boolean isInsideTable() { method getCorrectString (line 452) | protected String getCorrectString(String value) { method escapeHtmlAttribute (line 466) | protected String escapeHtmlAttribute(String value) { method close (line 479) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/html/HtmlGeneratorFactory.java class HtmlGeneratorFactory (line 26) | public class HtmlGeneratorFactory { method getHtmlGenerator (line 36) | public static HtmlGenerator getHtmlGenerator(File inputPdf, Config con... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/html/HtmlSyntax.java class HtmlSyntax (line 21) | public class HtmlSyntax { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/hybrid/DoclingFastServerClient.java class DoclingFastServerClient (line 53) | public class DoclingFastServerClient implements HybridClient { method DoclingFastServerClient (line 75) | public DoclingFastServerClient(HybridConfig config) { method DoclingFastServerClient (line 92) | DoclingFastServerClient(String baseUrl, OkHttpClient httpClient, Objec... method checkAvailability (line 98) | @Override method convert (line 129) | @Override method convertAsync (line 139) | @Override method getBaseUrl (line 155) | public String getBaseUrl() { method buildConvertRequest (line 162) | private Request buildConvertRequest(HybridRequest request) { method parseResponse (line 184) | private HybridResponse parseResponse(Response response) throws IOExcep... method extractPageContents (line 240) | private Map extractPageContents(JsonNode jsonConten... method extractFailedPages (line 272) | private List extractFailedPages(JsonNode root) { method shutdown (line 294) | public void shutdown() { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/hybrid/DoclingSchemaTransformer.java class DoclingSchemaTransformer (line 71) | public class DoclingSchemaTransformer implements HybridSchemaTransformer { method getBackendType (line 94) | @Override method transform (line 99) | @Override method transformPage (line 151) | @Override method determinePageCount (line 176) | private int determinePageCount(JsonNode json, Map pag... method scanContentForPageCount (line 211) | private int scanContentForPageCount(JsonNode json) { method getPageNumberFromProv (line 234) | private int getPageNumberFromProv(JsonNode node) { method transformText (line 249) | private void transformText(JsonNode textNode, List> resu... method createHeading (line 300) | private SemanticHeading createHeading(String text, BoundingBox bbox, J... method createParagraph (line 326) | private SemanticParagraph createParagraph(String text, BoundingBox bbo... method createFormula (line 347) | private SemanticFormula createFormula(String latex, BoundingBox bbox) { method transformPicture (line 356) | private void transformPicture(JsonNode pictureNode, List... method extractPictureDescription (line 394) | private String extractPictureDescription(JsonNode pictureNode) { method transformTable (line 410) | private void transformTable(JsonNode tableNode, List> re... method extractBoundingBox (line 528) | private BoundingBox extractBoundingBox(JsonNode bboxNode, int pageInde... method getTextValue (line 565) | private String getTextValue(JsonNode node, String fieldName) { method sortByReadingOrder (line 578) | private void sortByReadingOrder(List contents) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/hybrid/HancomClient.java class HancomClient (line 50) | public class HancomClient implements HybridClient { method HancomClient (line 80) | public HancomClient(HybridConfig config) { method HancomClient (line 98) | HancomClient(String baseUrl, OkHttpClient httpClient, ObjectMapper obj... method checkAvailability (line 104) | @Override method convert (line 128) | @Override method convertAsync (line 149) | @Override method getBaseUrl (line 165) | public String getBaseUrl() { method uploadFile (line 176) | private String uploadFile(byte[] pdfBytes) throws IOException { method getVisualInfo (line 222) | private JsonNode getVisualInfo(String fileId) throws IOException { method deleteFile (line 257) | private void deleteFile(String fileId) { method normalizeUrl (line 280) | private static String normalizeUrl(String url) { method shutdown (line 294) | public void shutdown() { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/hybrid/HancomSchemaTransformer.java class HancomSchemaTransformer (line 71) | public class HancomSchemaTransformer implements HybridSchemaTransformer { method getBackendType (line 90) | @Override method transform (line 95) | @Override method transformPage (line 131) | @Override method determinePageCount (line 161) | private int determinePageCount(JsonNode json, Map pag... method scanElementsForPageCount (line 180) | private int scanElementsForPageCount(JsonNode json) { method transformElement (line 199) | private void transformElement(JsonNode element, List> re... method createParagraph (line 285) | private SemanticParagraph createParagraph(String text, BoundingBox bbo... method createHeading (line 302) | private SemanticHeading createHeading(String text, BoundingBox bbox) { method createFormula (line 320) | private SemanticFormula createFormula(String latex, BoundingBox bbox) { method createPicture (line 329) | private SemanticPicture createPicture(BoundingBox bbox) { method transformTable (line 354) | private TableBorder transformTable(JsonNode element, BoundingBox table... method extractBoundingBox (line 482) | private BoundingBox extractBoundingBox(JsonNode bboxNode, int pageInde... method getTextValue (line 505) | private String getTextValue(JsonNode node, String fieldName) { method sortByReadingOrder (line 518) | private void sortByReadingOrder(List contents) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/hybrid/HybridClient.java type HybridClient (line 39) | public interface HybridClient { type OutputFormat (line 44) | enum OutputFormat { method OutputFormat (line 54) | OutputFormat(String apiValue) { method getApiValue (line 59) | public String getApiValue() { class HybridRequest (line 70) | final class HybridRequest { method HybridRequest (line 82) | public HybridRequest(byte[] pdfBytes, Set pageNumbers, method allPages (line 97) | public static HybridRequest allPages(byte[] pdfBytes) { method allPages (line 108) | public static HybridRequest allPages(byte[] pdfBytes, Set p... method forPages (line 131) | public static HybridRequest forPages(byte[] pdfBytes, Set p... method getPdfBytes (line 136) | public byte[] getPdfBytes() { method getPageNumbers (line 140) | public Set getPageNumbers() { method getOutputFormats (line 149) | public Set getOutputFormats() { method wantsJson (line 158) | public boolean wantsJson() { method wantsMarkdown (line 167) | public boolean wantsMarkdown() { method wantsHtml (line 176) | public boolean wantsHtml() { class HybridResponse (line 184) | final class HybridResponse { method HybridResponse (line 200) | public HybridResponse(String markdown, String html, JsonNode json, method HybridResponse (line 219) | public HybridResponse(String markdown, String html, JsonNode json, M... method HybridResponse (line 230) | public HybridResponse(String markdown, JsonNode json, Map getPageContents() { method getFailedPages (line 268) | public List getFailedPages() { method hasFailedPages (line 277) | public boolean hasFailedPages() { method equals (line 281) | @Override method hashCode (line 293) | @Override method checkAvailability (line 307) | void checkAvailability() throws IOException; method convert (line 316) | HybridResponse convert(HybridRequest request) throws IOException; method convertAsync (line 327) | CompletableFuture convertAsync(HybridRequest request); FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/hybrid/HybridClientFactory.java class HybridClientFactory (line 43) | public class HybridClientFactory { method HybridClientFactory (line 60) | private HybridClientFactory() { method getOrCreate (line 76) | public static HybridClient getOrCreate(String hybrid, HybridConfig con... method createClient (line 89) | private static HybridClient createClient(String hybrid, HybridConfig c... method create (line 113) | @Deprecated method create (line 126) | @Deprecated method shutdown (line 137) | public static void shutdown() { method isSupported (line 154) | public static boolean isSupported(String hybrid) { method getSupportedBackends (line 168) | public static String getSupportedBackends() { method getAllKnownBackends (line 177) | public static String getAllKnownBackends() { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/hybrid/HybridConfig.java class HybridConfig (line 24) | public class HybridConfig { method HybridConfig (line 55) | public HybridConfig() { method getUrl (line 63) | public String getUrl() { method setUrl (line 72) | public void setUrl(String url) { method getTimeoutMs (line 81) | public int getTimeoutMs() { method setTimeoutMs (line 91) | public void setTimeoutMs(int timeoutMs) { method isFallbackToJava (line 103) | public boolean isFallbackToJava() { method setFallbackToJava (line 112) | public void setFallbackToJava(boolean fallbackToJava) { method getMaxConcurrentRequests (line 121) | public int getMaxConcurrentRequests() { method setMaxConcurrentRequests (line 131) | public void setMaxConcurrentRequests(int maxConcurrentRequests) { method getDefaultUrl (line 144) | public static String getDefaultUrl(String hybrid) { method getEffectiveUrl (line 167) | public String getEffectiveUrl(String hybrid) { method getMode (line 179) | public String getMode() { method setMode (line 188) | public void setMode(String mode) { method isFullMode (line 197) | public boolean isFullMode() { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/hybrid/HybridSchemaTransformer.java type HybridSchemaTransformer (line 35) | public interface HybridSchemaTransformer { method transform (line 49) | List> transform(HybridResponse response, Map transformPage(int pageNumber, JsonNode pageContent, doub... method getBackendType (line 69) | String getBackendType(); FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/hybrid/TriageLogger.java class TriageLogger (line 67) | public class TriageLogger { method TriageLogger (line 79) | public TriageLogger() { method logToFile (line 93) | public void logToFile( method logToWriter (line 120) | public void logToWriter( method createTriageJson (line 138) | public ObjectNode createTriageJson( method createSignalsNode (line 197) | private ObjectNode createSignalsNode(TriageSignals signals) { method toJsonString (line 217) | public String toJsonString( FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/hybrid/TriageProcessor.java class TriageProcessor (line 49) | public class TriageProcessor { type TriageDecision (line 121) | public enum TriageDecision { class TriageResult (line 131) | public static final class TriageResult { method TriageResult (line 145) | public TriageResult(int pageNumber, TriageDecision decision, double ... method java (line 160) | public static TriageResult java(int pageNumber, double confidence, T... method backend (line 172) | public static TriageResult backend(int pageNumber, double confidence... method getPageNumber (line 181) | public int getPageNumber() { method getDecision (line 190) | public TriageDecision getDecision() { method getConfidence (line 199) | public double getConfidence() { method getSignals (line 208) | public TriageSignals getSignals() { method equals (line 212) | @Override method hashCode (line 223) | @Override method toString (line 228) | @Override class TriageSignals (line 242) | public static final class TriageSignals { method TriageSignals (line 279) | public TriageSignals(int lineChunkCount, int textChunkCount, double ... method TriageSignals (line 290) | public TriageSignals(int lineChunkCount, int textChunkCount, double ... method empty (line 323) | public static TriageSignals empty() { method getLineChunkCount (line 334) | public int getLineChunkCount() { method getTextChunkCount (line 343) | public int getTextChunkCount() { method getLineToTextRatio (line 352) | public double getLineToTextRatio() { method getAlignedLineGroups (line 361) | public int getAlignedLineGroups() { method hasTableBorder (line 370) | public boolean hasTableBorder() { method hasSuspiciousPattern (line 379) | public boolean hasSuspiciousPattern() { method hasVectorTableSignal (line 388) | public boolean hasVectorTableSignal() { method hasTextTablePattern (line 398) | public boolean hasTextTablePattern() { method getHorizontalLineCount (line 405) | public int getHorizontalLineCount() { method getVerticalLineCount (line 409) | public int getVerticalLineCount() { method getLineArtCount (line 413) | public int getLineArtCount() { method hasGridLines (line 417) | public boolean hasGridLines() { method hasTableBorderLines (line 421) | public boolean hasTableBorderLines() { method hasRowSeparatorPattern (line 425) | public boolean hasRowSeparatorPattern() { method hasAlignedShortLines (line 429) | public boolean hasAlignedShortLines() { method getTablePatternCount (line 433) | public int getTablePatternCount() { method getMaxConsecutiveStreak (line 437) | public int getMaxConsecutiveStreak() { method getPatternDensity (line 441) | public double getPatternDensity() { method hasConsecutivePatterns (line 445) | public boolean hasConsecutivePatterns() { method getLargeImageRatio (line 454) | public double getLargeImageRatio() { method hasLargeImage (line 464) | public boolean hasLargeImage() { method getLargeImageAspectRatio (line 474) | public double getLargeImageAspectRatio() { method equals (line 478) | @Override method hashCode (line 504) | @Override method toString (line 515) | @Override class TriageThresholds (line 545) | public static class TriageThresholds { method TriageThresholds (line 553) | public TriageThresholds() { method getLineRatioThreshold (line 561) | public double getLineRatioThreshold() { method setLineRatioThreshold (line 570) | public void setLineRatioThreshold(double lineRatioThreshold) { method getAlignedLineGroupsThreshold (line 579) | public int getAlignedLineGroupsThreshold() { method setAlignedLineGroupsThreshold (line 588) | public void setAlignedLineGroupsThreshold(int alignedLineGroupsThres... method getGridGapMultiplier (line 597) | public double getGridGapMultiplier() { method setGridGapMultiplier (line 606) | public void setGridGapMultiplier(double gridGapMultiplier) { method TriageProcessor (line 611) | private TriageProcessor() { method classifyPage (line 633) | public static TriageResult classifyPage( method classifyPage (line 648) | public static TriageResult classifyPage( method extractSignals (line 717) | static TriageSignals extractSignals( class SignalAccumulator (line 810) | private static class SignalAccumulator { method processLineChunk (line 828) | void processLineChunk(LineChunk lineChunk) { method processLineArtChunk (line 850) | void processLineArtChunk() { method processImageChunk (line 854) | void processImageChunk(ImageChunk imageChunk) { method processTextChunk (line 866) | void processTextChunk(TextChunk textChunk) { method areSuspiciousTextChunks (line 894) | private boolean areSuspiciousTextChunks(TextChunk previous, TextChun... method hasAlignedShortHorizontalLines (line 917) | boolean hasAlignedShortHorizontalLines() { method checkTableBorderPresence (line 953) | private static boolean checkTableBorderPresence(int pageNumber) { method checkSuspiciousPatterns (line 971) | private static boolean checkSuspiciousPatterns(List textChu... method areOnSameBaseline (line 1004) | private static boolean areOnSameBaseline(TextChunk chunk1, TextChunk c... method countAlignedLineGroups (line 1018) | private static int countAlignedLineGroups(List textChunks, ... method triageAllPages (line 1086) | public static Map triageAllPages( method triageAllPages (line 1099) | public static Map triageAllPages( method triageAllPages (line 1122) | public static Map triageAllPages( FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/JsonName.java class JsonName (line 18) | public class JsonName { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/JsonWriter.java class JsonWriter (line 41) | public class JsonWriter { method getJsonGenerator (line 43) | private static JsonGenerator getJsonGenerator(String fileName) throws ... method writeToJson (line 50) | public static void writeToJson(File inputPDF, String outputFolder, Lis... method writeDocumentInfo (line 73) | private static void writeDocumentInfo(JsonGenerator generator, String ... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/ObjectMapperHolder.java class ObjectMapperHolder (line 35) | public class ObjectMapperHolder { method getObjectMapper (line 97) | public static ObjectMapper getObjectMapper() { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/CaptionSerializer.java class CaptionSerializer (line 29) | public class CaptionSerializer extends StdSerializer { method CaptionSerializer (line 36) | public CaptionSerializer(Class t) { method serialize (line 40) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/DoubleSerializer.java class DoubleSerializer (line 30) | public class DoubleSerializer extends StdSerializer { method DoubleSerializer (line 37) | public DoubleSerializer(Class t) { method serialize (line 43) | @Override method round (line 49) | private static double round(double value, int decimalPlaces) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/FormulaSerializer.java class FormulaSerializer (line 40) | public class FormulaSerializer extends StdSerializer { method FormulaSerializer (line 42) | public FormulaSerializer(Class t) { method serialize (line 46) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/HeaderFooterSerializer.java class HeaderFooterSerializer (line 32) | public class HeaderFooterSerializer extends StdSerializer t) { method serialize (line 43) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/HeadingSerializer.java class HeadingSerializer (line 30) | public class HeadingSerializer extends StdSerializer { method HeadingSerializer (line 37) | public HeadingSerializer(Class t) { method serialize (line 41) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/ImageSerializer.java class ImageSerializer (line 31) | public class ImageSerializer extends StdSerializer { method ImageSerializer (line 33) | public ImageSerializer(Class t) { method serialize (line 37) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/LineChunkSerializer.java class LineChunkSerializer (line 26) | public class LineChunkSerializer extends StdSerializer { method LineChunkSerializer (line 28) | public LineChunkSerializer(Class t) { method serialize (line 32) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/ListItemSerializer.java class ListItemSerializer (line 29) | public class ListItemSerializer extends StdSerializer { method ListItemSerializer (line 31) | public ListItemSerializer(Class t) { method serialize (line 35) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/ListSerializer.java class ListSerializer (line 27) | public class ListSerializer extends StdSerializer { method ListSerializer (line 29) | public ListSerializer(Class t) { method serialize (line 33) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/ParagraphSerializer.java class ParagraphSerializer (line 26) | public class ParagraphSerializer extends StdSerializer { method ParagraphSerializer (line 28) | public ParagraphSerializer(Class t) { method serialize (line 32) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/PictureSerializer.java class PictureSerializer (line 36) | public class PictureSerializer extends StdSerializer { method PictureSerializer (line 38) | public PictureSerializer(Class t) { method serialize (line 42) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/SemanticTextNodeSerializer.java class SemanticTextNodeSerializer (line 26) | public class SemanticTextNodeSerializer extends StdSerializer t) { method serialize (line 32) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/SerializerUtil.java class SerializerUtil (line 27) | public class SerializerUtil { method writeEssentialInfo (line 28) | public static void writeEssentialInfo(JsonGenerator jsonGenerator, IOb... method writeTextInfo (line 46) | public static void writeTextInfo(JsonGenerator jsonGenerator, Semantic... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/TableCellSerializer.java class TableCellSerializer (line 28) | public class TableCellSerializer extends StdSerializer { method TableCellSerializer (line 30) | public TableCellSerializer(Class t) { method serialize (line 34) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/TableRowSerializer.java class TableRowSerializer (line 27) | public class TableRowSerializer extends StdSerializer { method TableRowSerializer (line 29) | public TableRowSerializer(Class t) { method serialize (line 33) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/TableSerializer.java class TableSerializer (line 29) | public class TableSerializer extends StdSerializer { method TableSerializer (line 31) | public TableSerializer(Class t) { method serialize (line 35) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/TextChunkSerializer.java class TextChunkSerializer (line 26) | public class TextChunkSerializer extends StdSerializer { method TextChunkSerializer (line 28) | public TextChunkSerializer(Class t) { method serialize (line 32) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/serializers/TextLineSerializer.java class TextLineSerializer (line 26) | public class TextLineSerializer extends StdSerializer { method TextLineSerializer (line 28) | public TextLineSerializer(Class t) { method serialize (line 32) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/markdown/MarkdownGenerator.java class MarkdownGenerator (line 46) | public class MarkdownGenerator implements Closeable { method MarkdownGenerator (line 58) | MarkdownGenerator(File inputPdf, Config config) throws IOException { method writeToMarkdown (line 69) | public void writeToMarkdown(List> contents) { method writePageSeparator (line 88) | protected void writePageSeparator(int pageNumber) throws IOException { method isSupportedContent (line 97) | protected boolean isSupportedContent(IObject content) { method writeContentsSeparator (line 109) | protected void writeContentsSeparator() throws IOException { method write (line 114) | protected void write(IObject object) throws IOException { method writeImage (line 136) | protected void writeImage(ImageChunk image) { method writePicture (line 167) | protected void writePicture(SemanticPicture picture) { method writeFormula (line 208) | protected void writeFormula(SemanticFormula formula) throws IOException { method writeHeaderOrFooter (line 216) | protected void writeHeaderOrFooter(SemanticHeaderOrFooter headerOrFoot... method writeList (line 225) | protected void writeList(PDFList list) throws IOException { method writeSemanticTextNode (line 242) | protected void writeSemanticTextNode(SemanticTextNode textNode) throws... method writeTable (line 258) | protected void writeTable(TableBorder table) throws IOException { method writeContents (line 287) | protected void writeContents(List contents, boolean isTable) ... method writeParagraph (line 306) | protected void writeParagraph(SemanticParagraph textNode) throws IOExc... method writeHeading (line 310) | protected void writeHeading(SemanticHeading heading) throws IOException { method enterTable (line 322) | protected void enterTable() { method leaveTable (line 326) | protected void leaveTable() { method isInsideTable (line 332) | protected boolean isInsideTable() { method getLineBreak (line 336) | protected String getLineBreak() { method writeLineBreak (line 344) | protected void writeLineBreak() throws IOException { method writeSpace (line 348) | protected void writeSpace() throws IOException { method getCorrectMarkdownString (line 352) | protected String getCorrectMarkdownString(String value) { method close (line 359) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/markdown/MarkdownGeneratorFactory.java class MarkdownGeneratorFactory (line 23) | public class MarkdownGeneratorFactory { method getMarkdownGenerator (line 24) | public static MarkdownGenerator getMarkdownGenerator(File inputPdf, FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/markdown/MarkdownHTMLGenerator.java class MarkdownHTMLGenerator (line 28) | public class MarkdownHTMLGenerator extends MarkdownGenerator { method MarkdownHTMLGenerator (line 30) | protected MarkdownHTMLGenerator(File inputPdf, Config config) throws I... method writeTable (line 34) | @Override method writeCellTagBegin (line 68) | private void writeCellTagBegin(TableBorderCell cell, boolean isHeader)... method writeCellTagEnd (line 86) | private void writeCellTagEnd(boolean isHeader) throws IOException { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/markdown/MarkdownSyntax.java class MarkdownSyntax (line 18) | public class MarkdownSyntax { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/pdf/PDFLayer.java type PDFLayer (line 18) | public enum PDFLayer { method PDFLayer (line 29) | PDFLayer(String value) { method getValue (line 33) | public String getValue() { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/pdf/PDFWriter.java class PDFWriter (line 54) | public class PDFWriter { method updatePDF (line 62) | public void updatePDF(File inputPDF, String password, String outputFol... method drawContent (line 90) | private void drawContent(IObject content, PDFLayer layer) throws IOExc... method drawContent (line 94) | private void drawContent(IObject content, PDFLayer layer, Map an... method draw (line 150) | public Map draw(BoundingBox boundingBox, float[... method getFloat (line 189) | private static float getFloat(double value) { method getContents (line 200) | public static String getContents(IObject content) { method getColor (line 245) | public static float[] getColor(IObject content) { method getColor (line 265) | public static float[] getColor(SemanticType semanticType) { method createOptContentsForAnnotations (line 290) | private void createOptContentsForAnnotations(PDDocument document) { method getOptionalContent (line 308) | public PDOptionalContentGroup getOptionalContent(PDFLayer layer) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/AbstractTableProcessor.java class AbstractTableProcessor (line 33) | public abstract class AbstractTableProcessor { method processTables (line 44) | public void processTables(List> contents) { method processTables (line 55) | public void processTables(List> contents, List ... method getTables (line 69) | protected abstract List> getTables(List>... method getPagesWithPossibleTables (line 98) | public static List getPagesWithPossibleTables(List contents) { method isImageSubtle (line 100) | private static boolean isImageSubtle(ImageChunk imageChunk) { method isTextNotContainedInImage (line 118) | public static boolean isTextNotContainedInImage(SemanticFigure image, ... method acceptImageCaption (line 129) | private static void acceptImageCaption(List contents, Semanti... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/ClusterTableProcessor.java class ClusterTableProcessor (line 34) | public class ClusterTableProcessor extends AbstractTableProcessor { method getTables (line 36) | @Override method processClusterDetectionTables (line 51) | public static List processClusterDetectionTables(List getFilteredContents(String inputPdfName, L... method processBackgrounds (line 95) | public static void processBackgrounds(int pageNumber, List co... method filterConsecutiveSpaces (line 114) | private static void filterConsecutiveSpaces(List pageContents) { method isBackground (line 122) | private static boolean isBackground(IObject content, BoundingBox pageB... method filterOutOfPageContents (line 129) | private static void filterOutOfPageContents(int pageNumber, List splitTextChunksByWhiteSpacesInPageContent... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/DocumentProcessor.java class DocumentProcessor (line 65) | public class DocumentProcessor { method processFile (line 75) | public static void processFile(String inputPdfName, Config config) thr... method getValidPageNumbers (line 101) | private static Set getValidPageNumbers(Config config) { method processDocument (line 136) | private static List> processDocument(String inputPdfName... method shouldProcessPage (line 192) | private static boolean shouldProcessPage(int pageNumber, Set ... method generateOutputs (line 196) | private static void generateOutputs(String inputPdfName, List contents) { method setIndexesForDocumentContents (line 305) | public static void setIndexesForDocumentContents(List> c... method setIndexesForContentsList (line 316) | public static void setIndexesForContentsList(List contents) { method removeNullObjectsFromList (line 328) | public static List removeNullObjectsFromList(List co... method calculateDocumentInfo (line 338) | private static void calculateDocumentInfo() { method getInfo (line 349) | private static GFCosInfo getInfo(COSTrailer trailer) { method getContentsValueForTextNode (line 360) | public static String getContentsValueForTextNode(SemanticTextNode text... method getPageBoundingBox (line 373) | public static BoundingBox getPageBoundingBox(int pageNumber) { method sortPageContents (line 391) | public static List sortPageContents(List contents) { method sortContents (line 437) | public static void sortContents(List> contents, Config c... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/HeaderFooterProcessor.java class HeaderFooterProcessor (line 43) | public class HeaderFooterProcessor { method processHeadersAndFooters (line 51) | public static void processHeadersAndFooters(List> conten... method processHeadersOrFootersContents (line 72) | private static void processHeadersOrFootersContents(List updatePageContents(List pageCont... method getHeaderOrFooterContentsIndexes (line 106) | private static Set getHeaderOrFooterContentsIndexes(SemanticH... method getHeadersOrFooters (line 117) | private static List getHeadersOrFooters(List processHeaderOrFooterContent(List getNumberOfHeaderOrFooterContentsForEachP... method getIndexesOfHeaderOrFootersContents (line 183) | private static Set getIndexesOfHeaderOrFootersContents(List filterHeaderOrFooterContents(List getHeadersOrFootersIntervals(List getHeadersOfFooterIntervals(List getEqualsItems(List... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/HeadingProcessor.java class HeadingProcessor (line 43) | public class HeadingProcessor { method processHeadings (line 53) | public static void processHeadings(List contents, boolean isT... method disassemblePDFList (line 96) | private static List disassemblePDFList(PDFList list) { method convertListItemToSemanticTextNode (line 107) | private static SemanticTextNode convertListItemToSemanticTextNode(Text... method getTextNodesFromContents (line 115) | private static List getTextNodesFromContents(List textNodes, I... method isNotHeadings (line 150) | private static boolean isNotHeadings(PDFList list) { method setHeadings (line 170) | private static void setHeadings(List contents) { method detectHeadingsLevels (line 192) | public static void detectHeadingsLevels() { method findClosestLevel (line 221) | private static int findClosestLevel(SemanticHeading heading, SortedMap... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/HiddenTextProcessor.java class HiddenTextProcessor (line 30) | public class HiddenTextProcessor { method findHiddenText (line 42) | public static List findHiddenText(String pdfName, List> processDocument( method processDocument (line 100) | public static List> processDocument( method createEmptyContents (line 209) | private static List> createEmptyContents(int totalPages) { method filterAllPages (line 220) | private static Map> filterAllPages( method filterByDecision (line 249) | private static Set filterByDecision( method processJavaPath (line 262) | private static Map> processJavaPath( method applyJavaPagePostProcessing (line 325) | private static void applyJavaPagePostProcessing(List> co... method processBackendPath (line 350) | private static Map> processBackendPath( method getClient (line 424) | private static HybridClient getClient(Config config) { method createTransformer (line 431) | private static HybridSchemaTransformer createTransformer(Config config) { method getPageHeights (line 450) | private static Map getPageHeights(Set pageNu... method mergeResults (line 466) | private static void mergeResults( method postProcess (line 494) | private static void postProcess( method shouldProcessPage (line 514) | private static boolean shouldProcessPage(int pageNumber, Set ... method determineOutputFormats (line 531) | private static Set determineOutputFormats(Config config) { method logTriageSummary (line 538) | private static void logTriageSummary(Map triage... method logTriageToFile (line 564) | private static void logTriageToFile( FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/LevelProcessor.java class LevelProcessor (line 35) | public class LevelProcessor { method detectLevels (line 41) | public static void detectLevels(List> contents) { method setLevels (line 45) | private static void setLevels(List> contents, Stack levelInfos, ... method setLevelForTable (line 136) | private static void setLevelForTable(TableBorder tableBorder) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/ListProcessor.java class ListProcessor (line 44) | public class ListProcessor { method processLists (line 53) | public static void processLists(List> contents, boolean ... method processListItemContent (line 96) | private static List processListItemContent(List cont... method processTextNodeListItemContent (line 107) | private static void processTextNodeListItemContent(List conte... method getTextLabelListIntervals (line 111) | private static List getTextLabelListIntervals(List listInterva... method createListItemTextInfo (line 191) | private static ListItemTextInfo createListItemTextInfo(int i, TextLine... method calculateList (line 203) | private static PDFList calculateList(TextListInterval interval, int st... method addContentToListItem (line 241) | private static void addContentToListItem(int nextIndex, ListItemInfo c... method addContentToLastPageListItem (line 281) | private static void addContentToLastPageListItem(int nextIndex, ListIt... method isListItemLine (line 311) | private static boolean isListItemLine(ListItem listItem, TextLine curr... method getMaxXGap (line 342) | private static double getMaxXGap(double fontSize) { method processListsFromTextNodes (line 346) | public static List processListsFromTextNodes(List co... method calculateTextChildrenInfo (line 373) | private static List calculateTextChildrenInfo(List> contents) { method addMiddleContentToList (line 463) | private static void addMiddleContentToList(PDFList previousList, PDFLi... method addFirstLBodyToList (line 476) | private static void addFirstLBodyToList(PDFList currentList, SemanticT... method isNeighborLists (line 484) | public static boolean isNeighborLists(PDFList previousList, PDFList cu... method isMiddleContentPartOfList (line 500) | private static boolean isMiddleContentPartOfList(PDFList previousList,... method getTextChildrenInfosForNeighborLists (line 519) | private static List getTextChildrenInfosForNeighborL... method createListItemTextInfoFromListItem (line 532) | private static ListItemTextInfo createListItemTextInfoFromListItem(int... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/ParagraphProcessor.java class ParagraphProcessor (line 32) | public class ParagraphProcessor { method processParagraphs (line 36) | public static List processParagraphs(List contents) { method getContentsWithDetectedParagraphs (line 56) | private static List getContentsWithDetectedParagraphs(List detectParagraphsWithJustifyAlignments(L... method detectParagraphsWithCenterAlignments (line 97) | private static List detectParagraphsWithCenterAlignments(Li... method areLinesOfParagraphsWithCenterAlignments (line 117) | private static boolean areLinesOfParagraphsWithCenterAlignments(TextBl... method detectFirstAndLastLinesOfParagraphsWithJustifyAlignments (line 132) | private static List detectFirstAndLastLinesOfParagraphsWith... method detectParagraphsWithLeftAlignments (line 159) | private static List detectParagraphsWithLeftAlignments(List... method areLinesOfParagraphsWithRightAlignments (line 180) | private static boolean areLinesOfParagraphsWithRightAlignments(TextBlo... method areLinesOfParagraphsWithLeftAlignments (line 201) | private static boolean areLinesOfParagraphsWithLeftAlignments(TextBloc... method detectFirstLinesOfParagraphWithLeftAlignments (line 245) | private static List detectFirstLinesOfParagraphWithLeftAlig... method isFirstLineOfParagraphWithLeftAlignment (line 266) | private static boolean isFirstLineOfParagraphWithLeftAlignment(TextBlo... method detectTwoLinesParagraphs (line 292) | private static List detectTwoLinesParagraphs(List detectParagraphsWithRightAlignments(Lis... method detectBulletedParagraphsWithLeftAlignments (line 384) | private static List detectBulletedParagraphsWithLeftAlignme... method processOtherLines (line 405) | private static List processOtherLines(List textB... method isOneParagraph (line 424) | private static boolean isOneParagraph(TextBlock previousBlock, TextBlo... method isFirstLineOfBlock (line 450) | private static boolean isFirstLineOfBlock(TextBlock previousBlock, Tex... method isLastLineOfBlock (line 473) | private static boolean isLastLineOfBlock(TextBlock previousBlock, Text... method createParagraphFromTextBlock (line 496) | public static SemanticParagraph createParagraphFromTextBlock(TextBlock... method getDifferentLinesProbability (line 506) | private static double getDifferentLinesProbability(TextBlock previousB... method areCloseStyle (line 526) | private static boolean areCloseStyle(TextBlock previousBlock, TextBloc... method areTextBlocksHaveSameTextSize (line 532) | private static boolean areTextBlocksHaveSameTextSize(TextBlock firstBl... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/SpecialTableProcessor.java class SpecialTableProcessor (line 28) | public class SpecialTableProcessor { method detectSpecialTables (line 32) | public static List detectSpecialTables(List contents) { method detectSpecialKoreanTables (line 37) | private static void detectSpecialKoreanTables(List contents) { method detectSpecialKoreanTable (line 64) | private static TableBorder detectSpecialKoreanTable(List lin... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/StrikethroughProcessor.java class StrikethroughProcessor (line 41) | public class StrikethroughProcessor { method processStrikethroughs (line 61) | public static List processStrikethroughs(List pageCo... method isTableBorderLine (line 112) | static boolean isTableBorderLine(LineChunk line) { method isStrikethroughLine (line 124) | static boolean isStrikethroughLine(LineChunk line, TextChunk textChunk) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/TableBorderProcessor.java class TableBorderProcessor (line 37) | public class TableBorderProcessor { method processTableBorders (line 54) | public static List processTableBorders(List contents... method addContentToTableBorder (line 120) | private static TableBorder addContentToTableBorder(IObject content) { method processTableBorder (line 161) | public static void processTableBorder(TableBorder tableBorder, int pag... method normalizeAndProcessTableBorder (line 165) | static TableBorder normalizeAndProcessTableBorder(List rawPag... method processTableBorderContents (line 171) | private static void processTableBorderContents(TableBorder tableBorder... method processTableCellContent (line 183) | private static List processTableCellContent(List con... method checkNeighborTables (line 201) | public static void checkNeighborTables(List> contents) { method checkNeighborTables (line 221) | private static void checkNeighborTables(TableBorder previousTable, Tab... method getTextChunkPartForRange (line 239) | static TextChunk getTextChunkPartForRange(TextChunk textChunk, double ... method getTextChunkPartForTableCell (line 255) | private static TextChunk getTextChunkPartForTableCell(TextChunk textCh... method getTextChunkPartBeforeTable (line 259) | public static TextChunk getTextChunkPartBeforeTable(TextChunk textChun... method getTextChunkPartAfterTable (line 271) | public static TextChunk getTextChunkPartAfterTable(TextChunk textChunk... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/TableStructureNormalizer.java class TableStructureNormalizer (line 32) | class TableStructureNormalizer { method normalize (line 50) | static TableBorder normalize(List rawPageContents, TableBorde... method collectColumnSnapshots (line 81) | private static List collectColumnSnapshots(List columnSnapsh... method collectRowBands (line 155) | private static List collectRowBands(TableBorder tableBorder, ... method findMatchingRowBand (line 187) | private static RowBand findMatchingRowBand(List rowBands, Tex... method findBestRowBand (line 199) | private static RowBand findBestRowBand(List rowBands, IObject... method rebuildTable (line 225) | private static TableBorder rebuildTable(TableBorder originalTable, Lis... method isReplacementQualityBetter (line 255) | private static boolean isReplacementQualityBetter(TableBorder original... method countNonEmptyRows (line 279) | private static int countNonEmptyRows(TableBorder tableBorder) { method countNonEmptyColumns (line 298) | private static int countNonEmptyColumns(TableBorder tableBorder) { method hasMeaningfulContent (line 317) | private static boolean hasMeaningfulContent(List contents) { method hasMonotonicRowOrder (line 337) | private static boolean hasMonotonicRowOrder(TableBorder tableBorder) { method collectTableLineStats (line 355) | private static TableLineStats collectTableLineStats(TableBorder tableB... method countMeaningfulTextLines (line 373) | private static int countMeaningfulTextLines(List contents) { class ColumnSnapshot (line 392) | private static final class ColumnSnapshot { method addContent (line 398) | private void addContent(IObject content) { method finalizeSnapshot (line 402) | private void finalizeSnapshot() { class TableLineStats (line 422) | private static final class TableLineStats { method TableLineStats (line 427) | private TableLineStats(int oversizedCellCount, int maxMeaningfulText... class RowBand (line 433) | private static final class RowBand { method RowBand (line 442) | private RowBand(int columnCount) { method addLine (line 449) | private void addLine(TextLine textLine) { method addContent (line 453) | private void addContent(int columnNumber, IObject content) { method updateBounds (line 458) | private void updateBounds(double contentTopY, double contentBottomY,... method hasVerticalOverlap (line 466) | private boolean hasVerticalOverlap(double contentTopY, double conten... method isEmpty (line 470) | private boolean isEmpty() { method sortContents (line 479) | private void sortContents() { method getContents (line 485) | private List getContents(int columnNumber) { method createRowBoundingBox (line 489) | private BoundingBox createRowBoundingBox(TableBorder tableBorder) { method createCellBoundingBox (line 494) | private BoundingBox createCellBoundingBox(TableBorder tableBorder, i... method getCenterY (line 499) | private double getCenterY() { method getAverageHeight (line 503) | private double getAverageHeight() { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/TaggedDocumentProcessor.java class TaggedDocumentProcessor (line 23) | public class TaggedDocumentProcessor { method processDocument (line 29) | public static List> processDocument(String inputPdfName,... method collectArtifacts (line 64) | private static List> collectArtifacts(int totalPages) { method shouldProcessPage (line 91) | private static boolean shouldProcessPage(int pageNumber) { method processStructElem (line 95) | private static void processStructElem(INode node) { method addObjectToContent (line 141) | private static void addObjectToContent(IObject object) { method processParagraph (line 152) | private static void processParagraph(INode paragraph) { method createParagraph (line 156) | private static SemanticParagraph createParagraph(INode paragraph) { method processHeading (line 171) | private static void processHeading(INode node) { method processNumberedHeading (line 177) | private static void processNumberedHeading(INode node) { method processList (line 185) | private static void processList(INode node) { method processListItem (line 203) | private static ListItem processListItem(INode node) { method processTable (line 218) | private static void processTable(INode tableNode) { method processTableRows (line 269) | private static List processTableRows(INode table) { method processTableRowsChildren (line 293) | private static void processTableRowsChildren(INode tableRow) { method addTableRow (line 302) | private static void addTableRow(int numberOfColumns, List> table) { method processTableCell (line 316) | private static void processTableCell(TableBorderCell cell, INode elem) { method processChildContents (line 325) | private static void processChildContents(INode elem, List con... method createRowsForTable (line 333) | private static TableBorderRow[] createRowsForTable(List getContents(INode node) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/TextLineProcessor.java class TextLineProcessor (line 33) | public class TextLineProcessor { method processTextLines (line 39) | public static List processTextLines(List contents) { method getTextLineWithSpaces (line 80) | private static TextLine getTextLineWithSpaces(TextLine textLine, doubl... method linkTextLinesWithConnectedLineArtBullet (line 103) | private static void linkTextLinesWithConnectedLineArtBullet(List contents, ... method measureReplacementCharRatio (line 55) | public static double measureReplacementCharRatio(List content... method filterTinyText (line 76) | public static void filterTinyText(List contents) { method trimTextChunksWhiteSpaces (line 88) | public static void trimTextChunksWhiteSpaces(List contents) { method mergeCloseTextChunks (line 97) | public static void mergeCloseTextChunks(List contents) { method removeSameTextChunks (line 114) | public static void removeSameTextChunks(List contents) { method areSameTextChunks (line 130) | public static boolean areSameTextChunks(TextChunk firstTextChunk, Text... method removeTextDecorationImages (line 137) | public static void removeTextDecorationImages(List contents) { method isTextChunkDecorationImage (line 150) | public static boolean isTextChunkDecorationImage(ImageChunk imageChunk... method areNeighborsTextChunks (line 157) | private static boolean areNeighborsTextChunks(TextChunk firstTextChunk... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/readingorder/XYCutPlusPlusSorter.java class XYCutPlusPlusSorter (line 45) | public class XYCutPlusPlusSorter { method XYCutPlusPlusSorter (line 70) | private XYCutPlusPlusSorter() { method sort (line 82) | public static List sort(List objects) { method sort (line 94) | public static List sort(List objects, double beta, d... method identifyCrossLayoutElements (line 146) | static List identifyCrossLayoutElements(List objects... method hasMinimumOverlaps (line 196) | static boolean hasMinimumOverlaps(IObject element, List objec... method calculateHorizontalOverlapRatio (line 233) | static double calculateHorizontalOverlapRatio(BoundingBox box1, Boundi... method computeDensityRatio (line 260) | static double computeDensityRatio(List objects) { method calculateBoundingRegion (line 286) | static BoundingBox calculateBoundingRegion(List objects) { method calculateTotalArea (line 303) | static double calculateTotalArea(List objects) { method recursiveSegment (line 331) | static List recursiveSegment(List objects, boolean p... class CutInfo (line 378) | private static class CutInfo { method CutInfo (line 382) | CutInfo(double position, double gap) { method flatMapRecursive (line 391) | private static List flatMapRecursive(List> grou... method findBestVerticalCutWithProjection (line 406) | private static CutInfo findBestVerticalCutWithProjection(List... method findVerticalCutByEdges (line 450) | private static CutInfo findVerticalCutByEdges(List objects) { method findBestHorizontalCutWithProjection (line 484) | private static CutInfo findBestHorizontalCutWithProjection(List> splitByHorizontalCut(List objects,... method splitByVerticalCut (line 556) | static List> splitByVerticalCut(List objects, d... method mergeCrossLayoutElements (line 590) | static List mergeCrossLayoutElements(List sortedMain... method sortByYThenX (line 644) | static List sortByYThenX(List objects) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/text/TextGenerator.java class TextGenerator (line 45) | public class TextGenerator implements Closeable { method TextGenerator (line 56) | public TextGenerator(File inputPdf, Config config) throws IOException { method writeToText (line 64) | public void writeToText(List> contents) { method writePageSeparator (line 80) | private void writePageSeparator(int pageIndex) throws IOException { method writeContents (line 89) | private void writeContents(List contents, int indentLevel) th... method write (line 98) | private void write(IObject object, int indentLevel) throws IOException { method writeHeaderOrFooter (line 116) | private void writeHeaderOrFooter(SemanticHeaderOrFooter headerOrFooter... method writeList (line 120) | private void writeList(PDFList list, int indentLevel) throws IOExcepti... method writeTable (line 135) | private void writeTable(TableBorder table, int indentLevel) throws IOE... method collectPlainText (line 151) | private String collectPlainText(List contents) { method extractPlainText (line 166) | private String extractPlainText(IObject content) { method writeMultiline (line 197) | private void writeMultiline(String value, int indentLevel) throws IOEx... method indent (line 214) | private String indent(int level) { method sanitize (line 221) | private String sanitize(String value) { method compactWhitespace (line 225) | private String compactWhitespace(String value) { method close (line 233) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/Base64ImageUtils.java class Base64ImageUtils (line 28) | public final class Base64ImageUtils { method Base64ImageUtils (line 37) | private Base64ImageUtils() { method toDataUri (line 49) | public static String toDataUri(File imageFile, String format) { method getMimeType (line 73) | public static String getMimeType(String format) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/BulletedParagraphUtils.java class BulletedParagraphUtils (line 29) | public class BulletedParagraphUtils { method getLabel (line 46) | public static String getLabel(SemanticTextNode semanticTextNode) { method isBulletedParagraph (line 56) | public static boolean isBulletedParagraph(SemanticTextNode textNode) { method isBulletedLine (line 66) | public static boolean isBulletedLine(TextLine textLine) { method isLabeledLine (line 79) | public static boolean isLabeledLine(TextLine textLine) { method isBulletedLineArtParagraph (line 105) | public static boolean isBulletedLineArtParagraph(SemanticTextNode text... method getLabelRegex (line 115) | public static String getLabelRegex(SemanticTextNode textNode) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/ContentSanitizer.java class ContentSanitizer (line 24) | public class ContentSanitizer { method ContentSanitizer (line 29) | public ContentSanitizer(List rules) { method ContentSanitizer (line 34) | public ContentSanitizer(List rules, boolean contentS... method sanitizeContents (line 39) | public void sanitizeContents(List> contents) { method processObject (line 51) | private void processObject(IObject obj) { method processSemanticHeaderOrFooter (line 65) | private void processSemanticHeaderOrFooter(SemanticHeaderOrFooter head... method processPDFList (line 71) | private void processPDFList(PDFList pdfList) { method processTableBorder (line 82) | private void processTableBorder(TableBorder tableBorder) { method processSemanticTextNode (line 96) | private void processSemanticTextNode(SemanticTextNode node) { method processTextLine (line 106) | private void processTextLine(TextLine textLine) { method applyReplacementsToChunks (line 126) | protected List applyReplacementsToChunks(List or... method doReplacementsOverlap (line 197) | private static boolean doReplacementsOverlap(ReplacementInfo a, Replac... method removeOverlappingReplacements (line 201) | private static void removeOverlappingReplacements(List originalChunk... method findEndChunkIndex (line 233) | private int findEndChunkIndex(int currentChunkIndex, List c... method isNotEmptyChunk (line 249) | private boolean isNotEmptyChunk(TextChunk chunk) { method findAllReplacements (line 253) | protected List findAllReplacements(String originalTex... method updateBBoxForReplacement (line 265) | private void updateBBoxForReplacement(TextChunk replacementChunk, class ReplacementInfo (line 285) | protected static class ReplacementInfo { method ReplacementInfo (line 290) | ReplacementInfo(int originalStart, int originalEnd, String replaceme... class ChunkInfo (line 297) | private static class ChunkInfo { method ChunkInfo (line 302) | ChunkInfo(int start, int length) { method getChunkInfos (line 309) | private List getChunkInfos(List textChunks) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/ImagesUtils.java class ImagesUtils (line 41) | public class ImagesUtils { method getContrastRatioConsumer (line 45) | public ContrastRatioConsumer getContrastRatioConsumer() { method createImagesDirectory (line 49) | public void createImagesDirectory(String path) { method write (line 56) | public void write(List> contents, String pdfFilePath, St... method writeFromContents (line 64) | private void writeFromContents(IObject content, String pdfFilePath, St... method writeImage (line 94) | protected void writeImage(ImageChunk chunk, String pdfFilePath, String... method writePicture (line 106) | protected void writePicture(SemanticPicture picture, String pdfFilePat... method createImageFile (line 117) | private void createImageFile(BoundingBox imageBox, String fileName, St... method isImageFileExists (line 130) | public static boolean isImageFileExists(String fileName) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/ModeWeightStatistics.java class ModeWeightStatistics (line 9) | public class ModeWeightStatistics { method ModeWeightStatistics (line 19) | public ModeWeightStatistics(double scoreMin, double scoreMax, double m... method addScore (line 26) | public void addScore(double score) { method getBoost (line 30) | public double getBoost(double score) { method sortByFrequency (line 44) | public void sortByFrequency() { method getMode (line 49) | public double getMode() { method initHigherScores (line 59) | private void initHigherScores() { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/SanitizationRule.java class SanitizationRule (line 5) | public class SanitizationRule { method SanitizationRule (line 9) | public SanitizationRule(Pattern pattern, String replacement) { method getPattern (line 14) | public Pattern getPattern() { method getReplacement (line 18) | public String getReplacement() { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/TextNodeStatistics.java class TextNodeStatistics (line 5) | public class TextNodeStatistics { method TextNodeStatistics (line 10) | public TextNodeStatistics() { method TextNodeStatistics (line 14) | public TextNodeStatistics(TextNodeStatisticsConfig config) { method addTextNode (line 30) | public void addTextNode(SemanticTextNode textNode) { method fontSizeRarityBoost (line 38) | public double fontSizeRarityBoost(SemanticTextNode textNode) { method fontWeightRarityBoost (line 43) | public double fontWeightRarityBoost(SemanticTextNode textNode) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/TextNodeStatisticsConfig.java class TextNodeStatisticsConfig (line 8) | public class TextNodeStatisticsConfig { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/TextNodeUtils.java class TextNodeUtils (line 23) | public class TextNodeUtils { method getTextColorOrDefault (line 32) | public static double[] getTextColorOrDefault(SemanticTextNode textNode) { method getTextColorOrNull (line 47) | public static double[] getTextColorOrNull(SemanticTextNode textNode) { FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/levels/LevelInfo.java class LevelInfo (line 24) | public class LevelInfo { method LevelInfo (line 31) | public LevelInfo(double left, double right) { method areSameLevelsInfos (line 36) | public static boolean areSameLevelsInfos(LevelInfo levelInfo1, LevelIn... method checkBoundingBoxes (line 72) | public static boolean checkBoundingBoxes(LevelInfo levelInfo1, LevelIn... method isTable (line 84) | public boolean isTable() { method isList (line 88) | public boolean isList() { method isLineArtBulletParagraph (line 92) | public boolean isLineArtBulletParagraph() { method isTextBulletParagraph (line 96) | public boolean isTextBulletParagraph() { method getMaxXGap (line 100) | public double getMaxXGap() { method getMaxXGap (line 104) | public static double getMaxXGap(LevelInfo levelInfo1, LevelInfo levelI... FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/levels/LineArtBulletParagraphLevelInfo.java class LineArtBulletParagraphLevelInfo (line 21) | public class LineArtBulletParagraphLevelInfo extends LevelInfo { method LineArtBulletParagraphLevelInfo (line 25) | public LineArtBulletParagraphLevelInfo(SemanticTextNode textNode) { method isLineArtBulletParagraph (line 31) | @Override method getBullet (line 36) | public LineArtChunk getBullet() { method getMaxXGap (line 40) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/levels/ListLevelInfo.java class ListLevelInfo (line 20) | public class ListLevelInfo extends LevelInfo { method ListLevelInfo (line 25) | public ListLevelInfo(PDFList pdfList) { method isList (line 33) | @Override method getCommonPrefix (line 38) | public String getCommonPrefix() { method getNumberingStyle (line 42) | public String getNumberingStyle() { method getMaxXGap (line 46) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/levels/TableLevelInfo.java class TableLevelInfo (line 20) | public class TableLevelInfo extends LevelInfo { method TableLevelInfo (line 21) | public TableLevelInfo(TableBorder table) { method isTable (line 25) | @Override FILE: java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/utils/levels/TextBulletParagraphLevelInfo.java class TextBulletParagraphLevelInfo (line 21) | public class TextBulletParagraphLevelInfo extends LevelInfo { method TextBulletParagraphLevelInfo (line 26) | public TextBulletParagraphLevelInfo(SemanticTextNode semanticTextNode) { method isTextBulletParagraph (line 33) | @Override method getLabel (line 38) | public String getLabel() { method getLabelRegex (line 42) | public String getLabelRegex() { method getMaxXGap (line 46) | @Override FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/EmbedImagesIntegrationTest.java class EmbedImagesIntegrationTest (line 36) | class EmbedImagesIntegrationTest { method setUp (line 46) | @BeforeEach method tearDown (line 55) | @AfterEach method testEmbedImagesInJsonOutput (line 60) | @Test method testEmbedImagesInHtmlOutput (line 95) | @Test method testEmbedImagesInMarkdownOutput (line 128) | @Test method testNoEmbedImagesUsesFilePaths (line 161) | @Test method testEmbedImagesWithJpegFormat (line 190) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/ImageDirIntegrationTest.java class ImageDirIntegrationTest (line 35) | class ImageDirIntegrationTest { method tearDown (line 43) | @AfterEach method testCustomImageDir_imagesWrittenToCustomPath (line 48) | @Test method testDefaultImageDir_imagesWrittenToDefaultPath (line 76) | @Test method testCustomImageDir_jsonReferencesCorrectPath (line 100) | @Test method testCustomImageDir_markdownReferencesCorrectPath (line 130) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/IntegrationTest.java class IntegrationTest (line 35) | public class IntegrationTest { method integrationTestParams (line 37) | static Stream integrationTestParams() { method test (line 42) | @ParameterizedTest(name = "{index}: ({0}) => {0}") method checkJsonNodes (line 63) | private static void checkJsonNodes(JsonNode node1, JsonNode node2) { method checkArrayFields (line 71) | private static void checkArrayFields(JsonNode node1, JsonNode node2, S... FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/Issue336IntegrationTest.java class Issue336IntegrationTest (line 37) | class Issue336IntegrationTest { method setUp (line 47) | @BeforeEach method testSpreadsheetExportedTableKeepsFinancialRowsSeparatedAcrossStandardOutputs (line 53) | @Test method testSpreadsheetExportedTableKeepsFinancialRowsSeparatedInMarkdownHtmlOutput (line 84) | @Test method testSpreadsheetExportedTableKeepsFinancialRowsSeparatedInMarkdownImageMode (line 98) | @Test method assertJsonContainsExpectedRow (line 112) | private static void assertJsonContainsExpectedRow(Path jsonOutput) thr... method assertMarkdownTableContainsExpectedRow (line 120) | private static void assertMarkdownTableContainsExpectedRow(Path markdo... method assertHtmlTableContainsExpectedRow (line 130) | private static void assertHtmlTableContainsExpectedRow(Path htmlOutput... method assertTextContainsExpectedRow (line 147) | private static void assertTextContainsExpectedRow(Path textOutput) thr... method expectedFinancialRow (line 155) | private static List expectedFinancialRow() { method containsExpectedValues (line 165) | private static boolean containsExpectedValues(String value, List> extractTableRows(JsonNode root) { method collectTables (line 200) | private static void collectTables(JsonNode node, List tables) { method collectContent (line 219) | private static String collectContent(JsonNode node) { method appendContent (line 225) | private static void appendContent(JsonNode node, StringBuilder builder) { method normalizeText (line 260) | private static String normalizeText(String value) { FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/PageSeparatorIntegrationTest.java class PageSeparatorIntegrationTest (line 36) | class PageSeparatorIntegrationTest { method setUp (line 46) | @BeforeEach method testMarkdownPageSeparatorSimple (line 54) | @Test method testMarkdownPageSeparatorWithPageNumber (line 71) | @Test method testMarkdownPageSeparatorEmpty (line 88) | @Test method testTextPageSeparatorSimple (line 107) | @Test method testTextPageSeparatorWithPageNumber (line 124) | @Test method testTextPageSeparatorEmpty (line 141) | @Test method testHtmlPageSeparatorSimple (line 160) | @Test method testHtmlPageSeparatorWithPageNumber (line 177) | @Test method testHtmlPageSeparatorEmpty (line 194) | @Test method testConfigPageSeparatorDefaults (line 214) | @Test method testConfigPageSeparatorSetters (line 223) | @Test method testConfigPageNumberConstant (line 237) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/PagesOptionIntegrationTest.java class PagesOptionIntegrationTest (line 42) | class PagesOptionIntegrationTest { method setUp (line 57) | @BeforeEach method testPagesOptionSinglePage (line 64) | @Test method testPagesOptionMultiplePages (line 82) | @Test method testPagesOptionPageRange (line 104) | @Test method testPagesOptionMixedRangeAndSingle (line 125) | @Test method testPagesOptionAllPages (line 147) | @Test method testPagesOptionMarkdown (line 166) | @Test method testPagesOptionExceedsDocumentPages (line 187) | @Test method testPagesOptionAllPagesExceedDocument (line 208) | @Test method testPagesOptionAllPagesExceedDocumentInHybridMode (line 227) | @Test method testPagesOptionTaggedPdfSinglePage (line 255) | @Test method testPagesOptionTaggedPdfMultiplePages (line 277) | @Test method testPagesOptionTaggedPdfAllPages (line 300) | @Test method parseJson (line 322) | private JsonNode parseJson(Path jsonPath) throws IOException { method getPageNumbersFromKids (line 331) | private Set getPageNumbersFromKids(JsonNode root) { FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/api/ConfigTest.java class ConfigTest (line 26) | class ConfigTest { method testDefaultValues (line 28) | @Test method testSetImageOutputAffectsIsEmbedImages (line 40) | @Test method testSetImageFormat (line 57) | @Test method testIsValidImageFormat_withValidFormats (line 68) | @ParameterizedTest method testIsValidImageFormat_withInvalidFormats (line 74) | @ParameterizedTest method testIsValidImageFormat_withNull (line 80) | @Test method testGetImageFormatOptions (line 85) | @Test method testImageFormatConstants (line 94) | @Test method testSetImageFormatNormalizesToLowercase (line 100) | @Test method testSetImageFormatWithNullDefaultsToPng (line 111) | @Test method testSetImageFormatThrowsExceptionForInvalidFormat (line 119) | @ParameterizedTest method testSetImageOutput (line 132) | @Test method testIsValidImageOutput_withValidModes (line 145) | @ParameterizedTest method testIsValidImageOutput_withInvalidModes (line 151) | @ParameterizedTest method testGetImageOutputOptions (line 157) | @Test method testImageOutputConstants (line 166) | @Test method testSetImageOutputNormalizesToLowercase (line 173) | @Test method testSetImageOutputWithNullDefaultsToExternal (line 184) | @Test method testSetImageOutputThrowsExceptionForInvalidMode (line 192) | @ParameterizedTest method testExistingConfigFields (line 206) | @Test method testDefaultPages (line 230) | @Test method testSetPages_singlePage (line 237) | @Test method testSetPages_commaSeparated (line 245) | @Test method testSetPages_range (line 252) | @Test method testSetPages_mixed (line 259) | @Test method testSetPages_complexMixed (line 266) | @Test method testSetPages_withSpaces (line 273) | @Test method testSetPages_invalidFormat (line 280) | @ParameterizedTest method testSetPages_nullAndEmpty (line 293) | @Test method testSetPages_reverseRangeThrows (line 308) | @Test method testSetPages_zeroPageThrows (line 318) | @Test method testSetPages_negativePageThrows (line 328) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/api/FilterConfigTest.java class FilterConfigTest (line 8) | class FilterConfigTest { method defaultsKeepInvisibleContentFiltersEnabledButSensitiveDataDisabled (line 10) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/containers/StaticLayoutContainersTest.java class StaticLayoutContainersTest (line 23) | class StaticLayoutContainersTest { method setUp (line 25) | @BeforeEach method testClearContainers_resetsEmbedImages (line 30) | @Test method testClearContainers_resetsImageFormat (line 40) | @Test method testSetAndGetEmbedImages (line 50) | @Test method testSetAndGetImageFormat (line 61) | @Test method testGetImageFormat_withNullValue_returnsDefaultPng (line 72) | @Test method testIsEmbedImages_withNullValue_returnsFalse (line 79) | @Test method testSetImagesDirectory (line 86) | @Test method testIncrementImageIndex (line 94) | @Test method testResetImageIndex (line 103) | @Test method testCurrentContentId (line 113) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/hybrid/DoclingFastServerClientTest.java class DoclingFastServerClientTest (line 38) | class DoclingFastServerClientTest { method setUp (line 43) | @BeforeEach method tearDown (line 55) | @AfterEach method testSuccessResponseHasNoFailedPages (line 61) | @Test method testPartialSuccessResponseWithFailedPages (line 82) | @Test method testPartialSuccessMultipleFailedPages (line 103) | @Test method testFailureResponseThrowsIOException (line 124) | @Test method testLegacyResponseWithoutFailedPagesField (line 141) | @Test method testMalformedFailedPagesValues (line 161) | @Test method testCheckAvailabilitySucceeds (line 184) | @Test method testCheckAvailabilityFailsWhenServerUnavailable (line 192) | @Test method testCheckAvailabilityFailsOnUnhealthyServer (line 201) | @Test method testPartialSuccessAllPagesFailed (line 210) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/hybrid/DoclingSchemaTransformerTest.java class DoclingSchemaTransformerTest (line 41) | public class DoclingSchemaTransformerTest { method setUp (line 46) | @BeforeEach method testGetBackendType (line 53) | @Test method testTransformNullJson (line 58) | @Test method testTransformEmptyJson (line 69) | @Test method testTransformSimpleParagraph (line 82) | @Test method testTransformSectionHeader (line 106) | @Test method testFilterPageHeaderFooter (line 130) | @Test method testTransformCaption (line 164) | @Test method testTransformFootnote (line 185) | @Test method testTransformSimpleTable (line 206) | @Test method testTransformTableWithSpans (line 250) | @Test method testTransformMultiplePages (line 299) | @Test method testCoordinateTransformBottomLeft (line 337) | @Test method testCoordinateTransformTopLeft (line 373) | @Test method testReadingOrderSort (line 409) | @Test method testMixedContent (line 449) | @Test method testTransformPage (line 494) | @Test method testTextMissingProv (line 510) | @Test method testTableMissingData (line 530) | @Test method testTransformFormula (line 550) | @Test method testTransformFormulaWithComplexLatex (line 574) | @Test method testMixedContentWithFormula (line 598) | @Test method testTransformPictureWithDescription (line 639) | @Test method testTransformPictureWithoutDescription (line 669) | @Test method testTransformMultiplePicturesWithDescriptions (line 693) | @Test method createDoclingDocument (line 739) | private ObjectNode createDoclingDocument() { method addProvenance (line 746) | private void addProvenance(ObjectNode node, int pageNo, double l, doub... method addTableCell (line 758) | private void addTableCell(ArrayNode tableCells, int row, int col, int ... FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/hybrid/HancomClientTest.java class HancomClientTest (line 39) | public class HancomClientTest { method setUp (line 47) | @BeforeEach method tearDown (line 60) | @AfterEach method testDefaultUrlConfiguration (line 66) | @Test method testConvertFullWorkflow (line 74) | @Test method testConvertWithCleanupOnProcessingError (line 118) | @Test method testConvertWithSpecificPages (line 144) | @Test method testUploadFailure (line 171) | @Test method testDeleteFailureIsIgnored (line 188) | @Test method testConvertAsync (line 213) | @Test method createVisualInfoResponse (line 234) | private String createVisualInfoResponse() { FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/hybrid/HancomSchemaTransformerTest.java class HancomSchemaTransformerTest (line 43) | public class HancomSchemaTransformerTest { method setUp (line 48) | @BeforeEach method testGetBackendType (line 55) | @Test method testTransformNullJson (line 60) | @Test method testTransformEmptyJson (line 71) | @Test method testTransformSimpleParagraph (line 87) | @Test method testTransformHeading (line 108) | @Test method testFilterPageHeaderFooter (line 129) | @Test method testTransformFormula (line 154) | @Test method testTransformFigure (line 175) | @Test method testTransformSimpleTable (line 193) | @Test method testTransformTableWithSpans (line 222) | @Test method testTransformMultiplePages (line 252) | @Test method testBoundingBoxTransformation (line 287) | @Test method testReadingOrderSort (line 314) | @Test method testMixedContent (line 343) | @Test method testTransformListItem (line 374) | @Test method testElementMissingBbox (line 393) | @Test method testTransformPage (line 419) | @Test method testTransformWithHtmlContent (line 432) | @Test method createVisualInfoDto (line 469) | private ObjectNode createVisualInfoDto() { method addElement (line 488) | private void addElement(ArrayNode elements, String type, String label,... method addTableElement (line 509) | private ObjectNode addTableElement(ArrayNode elements, int pageIndex, method addTableContentStructure (line 536) | private ArrayNode addTableContentStructure(ObjectNode tableElement) { method addTableCell (line 544) | private void addTableCell(ArrayNode cells, String text, int row, int col, FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/hybrid/HealthCheckTest.java class HealthCheckTest (line 37) | class HealthCheckTest { method setUp (line 41) | @BeforeEach method tearDown (line 46) | @AfterEach method testDoclingHealthCheckSucceeds (line 53) | @Test method testDoclingHealthCheckFailsWhenServerDown (line 71) | @Test method testDoclingHealthCheckFailsOnServerError (line 96) | @Test method testHancomHealthCheckSucceeds (line 116) | @Test method testHancomHealthCheckFailsWhenServerDown (line 132) | @Test method testHealthCheckTimesOutQuickly (line 152) | @Test method stripTrailingSlash (line 174) | private static String stripTrailingSlash(String url) { FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/hybrid/HybridClientFactoryTest.java class HybridClientFactoryTest (line 27) | class HybridClientFactoryTest { method testCreateDoclingFastClient (line 29) | @Test method testCreateDoclingFastClientCaseInsensitive (line 41) | @Test method testCreateHancomClient (line 54) | @Test method testCreateHancomClientCaseInsensitive (line 66) | @Test method testCreateAzureClientThrowsUnsupported (line 79) | @Test method testCreateGoogleClientThrowsUnsupported (line 91) | @Test method testCreateUnknownBackendThrows (line 103) | @ParameterizedTest method testCreateNullBackendThrows (line 117) | @Test method testCreateEmptyBackendThrows (line 125) | @Test method testIsSupportedDoclingFast (line 133) | @Test method testIsSupportedHancom (line 140) | @Test method testIsSupportedUnsupportedBackends (line 147) | @Test method testIsSupportedNullAndEmpty (line 155) | @Test method testGetSupportedBackends (line 161) | @Test method testGetAllKnownBackends (line 170) | @Test method testBackendConstants (line 180) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/hybrid/TriageLoggerTest.java class TriageLoggerTest (line 39) | public class TriageLoggerTest { method setUp (line 44) | @BeforeEach method testCreateTriageJsonWithEmptyResults (line 50) | @Test method testCreateTriageJsonWithResults (line 64) | @Test method testToJsonString (line 119) | @Test method testLogToWriter (line 133) | @Test method testLogToFile (line 150) | @Test method testPageOrdering (line 179) | @Test method testDifferentHybridBackends (line 202) | @Test method testSummaryWithAllJavaPages (line 219) | @Test method testSummaryWithAllBackendPages (line 236) | @Test method testDefaultFilename (line 253) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/hybrid/TriageProcessorIntegrationTest.java class TriageProcessorIntegrationTest (line 46) | public class TriageProcessorIntegrationTest { method checkBenchmarkDir (line 75) | @BeforeAll method testTriageAccuracyOnBenchmarkPDFs (line 94) | @Test method triageDocument (line 179) | private TriageDecision triageDocument(File pdfFile) throws IOException { method testSingleDocumentTriage (line 211) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/hybrid/TriageProcessorTest.java class TriageProcessorTest (line 46) | public class TriageProcessorTest { method setUp (line 48) | @BeforeEach method testEmptyContentReturnsJava (line 57) | @Test method testNullContentReturnsJava (line 69) | @Test method testSimpleTextReturnsJava (line 77) | @Test method testHighLineRatioReturnsBackend (line 92) | @Test method testTableBorderPresenceReturnsBackend (line 110) | @Test method testSuspiciousPatternDetectedButDisabled (line 136) | @Test method testAlignedLineGroupsDetectedButDisabled (line 152) | @Test method testTriageAllPagesWithMap (line 181) | @Test method testTriageAllPagesWithList (line 205) | @Test method testCustomThresholds (line 224) | @Test method testOutOfReadingOrderReturnsBackend (line 245) | @Test method testTriageSignalsEmpty (line 262) | @Test method testTriageResultFactoryMethods (line 274) | @Test method testThresholdsGettersAndSetters (line 289) | @Test method testExtractSignalsDirectly (line 311) | @Test method testClassifyPageHighReplacementRatioRoutesToBackend (line 325) | @Test method testClassifyPageLowReplacementRatioNoEffect (line 339) | @Test method testClassifyPageExactThresholdRoutesToBackend (line 352) | @Test method createTextChunk (line 368) | private TextChunk createTextChunk(double leftX, double bottomY, double... method createLineChunk (line 375) | private LineChunk createLineChunk(double x1, double y1, double x2, dou... method setupTableBorderRows (line 379) | private void setupTableBorderRows(TableBorder tableBorder) { FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/json/serializers/ImageSerializerTest.java class ImageSerializerTest (line 39) | class ImageSerializerTest { method setUp (line 47) | @BeforeEach method tearDown (line 64) | @AfterEach method createTestImageFile (line 69) | private void createTestImageFile(int index, String format) throws IOEx... method createImageChunk (line 81) | private ImageChunk createImageChunk(int index) { method testSerializeWithEmbedImagesTrueOutputsDataField (line 88) | @Test method testSerializeWithEmbedImagesFalseOutputsSourceField (line 101) | @Test method testSerializeWithJpegFormat (line 114) | @Test method testSerializeWithNonExistentImageNoSourceOrData (line 127) | @Test method testSerializeContainsTypeField (line 138) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/json/serializers/LineArtSerializerTest.java class LineArtSerializerTest (line 29) | class LineArtSerializerTest { method lineArtChunkIsNotSerializedAsImage (line 31) | @Test method tableCellSerializerSkipsLineArtChunkChildren (line 45) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/markdown/MarkdownGeneratorTest.java class MarkdownGeneratorTest (line 32) | public class MarkdownGeneratorTest { method testValidHeadingLevels (line 37) | @ParameterizedTest method testHeadingLevelsCappedAt6 (line 49) | @ParameterizedTest method testHeadingLevelsMinimumIs1 (line 61) | @ParameterizedTest method testMaxHeadingLevelIs6 (line 73) | @Test method testMinHeadingLevelIs1 (line 83) | @Test method generateHeadingPrefix (line 97) | private String generateHeadingPrefix(int headingLevel) { FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/markdown/MarkdownTableTest.java class MarkdownTableTest (line 52) | public class MarkdownTableTest { method initStaticContainers (line 57) | @BeforeAll method testKoreanSpecialTableMergedRow (line 73) | @Test method testColspanCellsAreNotDuplicated (line 125) | @Test method testSimpleTableWithoutMergedCells (line 174) | @Test method testRowspanCellsAreNotDuplicated (line 205) | @Test method addTextContent (line 244) | private void addTextContent(TableBorderCell cell, String text) { method generateMarkdownTable (line 253) | private String generateMarkdownTable(TableBorder table) throws IOExcep... method countOccurrences (line 268) | private long countOccurrences(String str, String sub) { FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/CaptionProcessorTest.java class CaptionProcessorTest (line 33) | public class CaptionProcessorTest { method testProcessCaptions (line 35) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/CidFontDetectionTest.java class CidFontDetectionTest (line 46) | public class CidFontDetectionTest { method checkFixture (line 53) | @BeforeAll method testCidPdfHighReplacementRatioDetected (line 62) | @Test method testCidPdfWarningLogEmitted (line 90) | @Test method testBoundaryBelowThreshold29percent (line 134) | @Test method testBoundaryAtThreshold30percent (line 150) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/ContentFilterProcessorTest.java class ContentFilterProcessorTest (line 27) | public class ContentFilterProcessorTest { method testShortTextWithAbnormallyWideBoundingBox (line 44) | @Test method testNormalTextWidthNotAbnormal (line 71) | @Test method testLongTextNotTargetedForCorrection (line 96) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/HeaderFooterProcessorTest.java class HeaderFooterProcessorTest (line 33) | public class HeaderFooterProcessorTest { method testProcessHeadersAndFooters (line 35) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/HeadingProcessorTest.java class HeadingProcessorTest (line 32) | public class HeadingProcessorTest { method testProcessHeadings (line 34) | @Test method testDetectHeadingsLevels (line 54) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/HybridDocumentProcessorTest.java class HybridDocumentProcessorTest (line 40) | public class HybridDocumentProcessorTest { method testHybridModeEnabled (line 42) | @Test method testHybridModeDisabled (line 51) | @Test method testHybridModeDefaultIsOff (line 60) | @Test method testHybridConfigDefaults (line 68) | @Test method testHybridConfigEffectiveUrl (line 78) | @Test method testTriageResultFilterByDecision (line 90) | @Test method testPageNumberConversion (line 125) | @Test method testShouldProcessPageWithNullFilter (line 144) | @Test method testShouldProcessPageWithFilter (line 152) | @Test method testInvalidHybridBackendThrows (line 167) | @Test method testHybridConfigTimeout (line 175) | @Test method testHybridConfigMaxConcurrentRequests (line 189) | @Test method testHybridConfigFallbackToggle (line 200) | @Test method shouldProcessPage (line 215) | private static boolean shouldProcessPage(int pageNumber, Set ... method testOutputFormatApiValue (line 221) | @Test method testHybridRequestDefaultOutputFormats (line 228) | @Test method testHybridRequestWithJsonOnly (line 244) | @Test method testHybridRequestWithMarkdownOnly (line 258) | @Test method testHybridRequestEmptyFormatsFallsBackToAll (line 272) | @Test method testHybridRequestNullFormatsFallsBackToAll (line 286) | @Test method testHybridRequestWithHtmlOnly (line 299) | @Test method testHybridConfigModeDefaults (line 317) | @Test method testHybridConfigModeFullMode (line 325) | @Test method testDoclingBackendEnabled (line 334) | @Test method testDoclingEffectiveUrl (line 343) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/LevelProcessorTest.java class LevelProcessorTest (line 37) | public class LevelProcessorTest { method testDetectLevelsForParagraphs (line 39) | @Test method testDetectLevelsForLists (line 62) | @Test method testDetectLevelsForTables (line 109) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/ListProcessorTest.java class ListProcessorTest (line 33) | public class ListProcessorTest { method testProcessLists (line 35) | @Test method testProcessListsFromTextNodes (line 51) | @Test method testCheckNeighborLists (line 70) | @Test method testProcessListsWithSingleCharacterLabels (line 104) | @Test method testProcessListsWithEdgeCaseLabels (line 127) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/ParagraphProcessorTest.java class ParagraphProcessorTest (line 30) | public class ParagraphProcessorTest { method testProcessParagraphs (line 32) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/SpecialTableProcessorTest.java class SpecialTableProcessorTest (line 32) | public class SpecialTableProcessorTest { method testDetectSpecialTables (line 34) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/StrikethroughProcessorTest.java class StrikethroughProcessorTest (line 30) | public class StrikethroughProcessorTest { method setUp (line 32) | @BeforeEach method testStrikethroughDetected (line 39) | @Test method testUnderlineNotDetectedAsStrikethrough (line 59) | @Test method testLineAboveTextNotDetected (line 78) | @Test method testPartialHorizontalOverlapNotDetected (line 97) | @Test method testNoLinesNoChange (line 116) | @Test method testVerticalLineIgnored (line 130) | @Test method testDoubleWrappingPrevented (line 149) | @Test method testWideLineSpanningMultipleChunksRejected (line 168) | @Test method testLineMuchWiderThanTextRejected (line 193) | @Test method testThickLineRejectedAsBackgroundFill (line 213) | @Test method testThinLineAcceptedAsStrikethrough (line 234) | @Test method testIsStrikethroughLineAtExactCenter (line 246) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/TableBorderProcessorTest.java class TableBorderProcessorTest (line 41) | public class TableBorderProcessorTest { method testProcessTableBorders (line 43) | @Test method testCheckNeighborTables (line 98) | @Test method testNormalSmallTableDoesNotTriggerStructuralNormalization (line 153) | @Test method testUndersegmentedFiveColumnTableIsRebuiltFromRawPageContents (line 179) | @Test method testNormalizationKeepsOriginalTableWhenRebuildLosesColumns (line 214) | @Test method testTextBlockTableIsNeverNormalized (line 234) | @Test method testProcessTableBordersDepthLimitNoStackOverflow (line 261) | @Test method testProcessTableBordersNormalNestedTableProcessedCorrectly (line 297) | @Test method createSimpleTable (line 329) | private TableBorder createSimpleTable(int pageNumber, double leftX, do... method createTable (line 334) | private TableBorder createTable(int pageNumber, double leftX, double b... method populateOriginalTableContents (line 362) | private void populateOriginalTableContents(TableBorder table) { method getSingleResultTable (line 373) | private TableBorder getSingleResultTable(List contents, int p... method createTextChunk (line 380) | private TextChunk createTextChunk(int pageNumber, double leftX, double... FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/TextLineProcessorTest.java class TextLineProcessorTest (line 29) | public class TextLineProcessorTest { method testProcessTextLines (line 31) | @Test method testProcessTextLinesSortsChunksByLeftX (line 57) | @Test method testProcessTextLinesAddsSpacesBetweenDistantChunks (line 90) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/TextProcessorTest.java class TextProcessorTest (line 28) | public class TextProcessorTest { method testReplaceUndefinedCharacters (line 30) | @Test method testReplaceUndefinedCharactersSkipsWhenDefault (line 45) | @Test method testReplaceUndefinedCharactersMultipleOccurrences (line 58) | @Test method testReplaceUndefinedCharactersWithRegexSpecialChars (line 69) | @Test method testReplaceUndefinedCharactersSkipsNonTextChunks (line 81) | @Test method testRemoveSameTextChunks (line 94) | @Test method testRemoveTextDecorationImages (line 106) | @Test method testMergeCloseTextChunksSeparatedByLargeGapNotMerged (line 122) | @Test method testMergeCloseTextChunksAdjacentMerged (line 156) | @Test method testMeasureReplacementCharRatioAllReplacement (line 189) | @Test method testMeasureReplacementCharRatioNoReplacement (line 199) | @Test method testMeasureReplacementCharRatioMixed (line 209) | @Test method testMeasureReplacementCharRatioEmptyContents (line 220) | @Test method testMeasureReplacementCharRatioNonTextChunksIgnored (line 228) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/processors/readingorder/XYCutPlusPlusSorterTest.java class XYCutPlusPlusSorterTest (line 36) | class XYCutPlusPlusSorterTest { method setUp (line 38) | @BeforeEach method sort_nullList_returnsNull (line 46) | @Test method sort_emptyList_returnsEmpty (line 52) | @Test method sort_singleObject_returnsSame (line 58) | @Test method sort_singleColumn_topToBottom (line 69) | @Test method identifyCrossLayoutElements_wideHeader_detected (line 88) | @Test method identifyCrossLayoutElements_narrowElements_notDetected (line 108) | @Test method identifyCrossLayoutElements_wideButNoOverlaps_notDetected (line 121) | @Test method hasMinimumOverlaps_sufficientOverlaps_returnsTrue (line 134) | @Test method hasMinimumOverlaps_insufficientOverlaps_returnsFalse (line 147) | @Test method computeDensityRatio_denseLayout_highRatio (line 161) | @Test method computeDensityRatio_sparseLayout_lowRatio (line 174) | @Test method computeDensityRatio_emptyList_defaultRatio (line 187) | @Test method splitByHorizontalCut_validCut_correctGroups (line 196) | @Test method splitByVerticalCut_validCut_correctGroups (line 211) | @Test method sort_twoColumns_leftColumnFirst (line 228) | @Test method sort_twoColumnsWithHeader_headerFirst (line 249) | @Test method sort_headerAndFooter_correctPositions (line 273) | @Test method sort_horizontalSections_largerYGap_horizontalCutFirst (line 294) | @Test method sort_withCustomParameters_respectsParameters (line 319) | @Test method calculateBoundingRegion_multipleObjects_correctBounds (line 336) | @Test method calculateTotalArea_multipleObjects_sumOfAreas (line 351) | @Test method mergeCrossLayoutElements_emptyCrossLayout_returnsSortedMain (line 364) | @Test method mergeCrossLayoutElements_crossLayoutAtTop_insertsFirst (line 377) | @Test method sort_academicPaperTwoColumn_correctReadingOrder (line 415) | @Test method sort_twoColumnsOverlappingY_leftColumnFirst (line 501) | @Test method sort_twoColumnsWithNarrowBridge_leftColumnFirst (line 546) | @Test method sort_1901_03003_moran_paper_correctReadingOrder (line 589) | @Test method findPosition (line 694) | private int findPosition(List objects, String text) { method createTextLineWithId (line 703) | private IObject createTextLineWithId(double leftX, double topY, double... method createTextLine (line 711) | private IObject createTextLine(double leftX, double topY, double right... method getText (line 720) | private String getText(IObject obj) { method sort_noStackOverflowWithComplexLayout_issue179 (line 757) | @Test method sort_wideAndNarrowObjects_noInfiniteRecursion (line 786) | @Test method sort_manySmallGaps_noInfiniteRecursion (line 825) | @Test method sort_horizontalGapWithCentersOnOneSide_noInfiniteRecursion (line 846) | @Test method sort_issue179_regressionTest (line 889) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/regression/ToUnicodeRegressionTest.java class ToUnicodeRegressionTest (line 35) | class ToUnicodeRegressionTest { method testIssue166ToUnicodeIntervalByteCarry (line 50) | @Test method testIssue166ToUnicodeIntervalByteCarryAtLowBoundary (line 70) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/utils/Base64ImageUtilsTest.java class Base64ImageUtilsTest (line 31) | class Base64ImageUtilsTest { method testToDataUri_withPngFormat (line 36) | @Test method testToDataUri_withJpegFormat (line 53) | @Test method testToDataUri_withNonExistentFile (line 68) | @Test method testGetMimeType_withValidFormats (line 80) | @ParameterizedTest method testGetMimeType_withNullFormat (line 93) | @Test method testGetMimeType_withUnknownFormat (line 98) | @Test method testMaxEmbeddedImageSizeConstant (line 107) | @Test method testToDataUriWithImageAtSizeLimit (line 113) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/utils/ContentSanitizerTest.java class ContentSanitizerTest (line 17) | class ContentSanitizerTest { method setUp (line 20) | @BeforeEach method createTextChunk (line 26) | TextChunk createTextChunk(String value, double left, double bottom, do... method assertChunksContainValues (line 33) | private void assertChunksContainValues(List chunks, String.... method testMultipleReplacementsInSingleChunk (line 43) | @Test method testReplaceCoveringMultipleFullChunks (line 56) | @Test method testReplaceCoveringPartsOfChunks (line 76) | @Test method testReplaceCoveringOneFullChunkInArray (line 95) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/utils/ImageFormatSupportTest.java class ImageFormatSupportTest (line 38) | class ImageFormatSupportTest { method createTestImage (line 46) | private BufferedImage createTestImage() { method testPngFormatIsSupported (line 61) | @Test method testJpegFormatIsSupported (line 73) | @Test method testWebpFormatIsNotSupported (line 85) | @Test method testListAvailableWriterFormats (line 96) | @Test method testStandardFormatsAreSupported (line 107) | @ParameterizedTest method testUnsupportedFormatReturnsFalse (line 119) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/utils/ImagesUtilsTest.java class ImagesUtilsTest (line 31) | class ImagesUtilsTest { method testCreateImagesDirectory (line 33) | @Test method testWriteImageInitializesContrastRatioConsumer (line 67) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/utils/ModeWeightStatisticsTest.java class ModeWeightStatisticsTest (line 8) | class ModeWeightStatisticsTest { method getModeReturnsMostFrequentScoreWithinRange (line 10) | @Test method getModeReturnsNaNWhenNoScoresWithinRange (line 25) | @Test method getBoostGivesFractionalRankForScoresAboveMode (line 37) | @Test FILE: java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/utils/TextNodeStatisticsTest.java class TextNodeStatisticsTest (line 9) | class TextNodeStatisticsTest { method fontSizeRarityBoostUsesRelativeRankOfScoresAboveBodyMode (line 11) | @Test method fontWeightRarityBoostUsesDominantWeightWindow (line 37) | @Test class StubSemanticTextNode (line 63) | private static class StubSemanticTextNode extends SemanticTextNode { method StubSemanticTextNode (line 67) | StubSemanticTextNode(double fontSize, double fontWeight) { method getFontSize (line 72) | @Override method getFontWeight (line 77) | @Override FILE: java/opendataloader-pdf-core/src/test/resources/generate-cid-test-pdf.py function find_ttf_font (line 31) | def find_ttf_font(): function read_ttf_tables (line 46) | def read_ttf_tables(font_path): function build_pdf_with_real_font (line 53) | def build_pdf_with_real_font(output_path, font_path): function main (line 312) | def main(): FILE: node/opendataloader-pdf/src/cli-options.generated.ts function registerCliOptions (line 9) | function registerCliOptions(program: Command): void { FILE: node/opendataloader-pdf/src/cli.ts function createProgram (line 7) | function createProgram(): Command { function main (line 33) | async function main(): Promise { FILE: node/opendataloader-pdf/src/convert-options.generated.ts type ConvertOptions (line 7) | interface ConvertOptions { type CliOptions (line 63) | interface CliOptions { function buildConvertOptions (line 94) | function buildConvertOptions(cliOptions: CliOptions): ConvertOptions { function buildArgs (line 179) | function buildArgs(options: ConvertOptions): string[] { FILE: node/opendataloader-pdf/src/index.ts constant JAR_NAME (line 15) | const JAR_NAME = 'opendataloader-pdf-cli.jar'; type JarExecutionOptions (line 17) | interface JarExecutionOptions { function executeJar (line 21) | function executeJar(args: string[], executionOptions: JarExecutionOption... function convert (line 83) | function convert( type RunOptions (line 108) | interface RunOptions { function run (line 127) | function run(inputPath: string, options: RunOptions = {}): Promise None: FILE: python/opendataloader-pdf/src/opendataloader_pdf/convert_generated.py function convert (line 12) | def convert( FILE: python/opendataloader-pdf/src/opendataloader_pdf/hybrid_server.py function build_conversion_response (line 81) | def build_conversion_response( function sanitize_unicode (line 148) | def sanitize_unicode(data: Any) -> Any: function _get_loop_setting (line 174) | def _get_loop_setting() -> str: function _check_dependencies (line 185) | def _check_dependencies(): function create_converter (line 211) | def create_converter( function create_app (line 274) | def create_app( function main (line 448) | def main(): FILE: python/opendataloader-pdf/src/opendataloader_pdf/runner.py function run_jar (line 14) | def run_jar(args: List[str], quiet: bool = False) -> str: FILE: python/opendataloader-pdf/src/opendataloader_pdf/wrapper.py function run (line 16) | def run( function main (line 92) | def main(argv=None) -> int: FILE: python/opendataloader-pdf/tests/conftest.py function input_pdf (line 8) | def input_pdf(): function output_dir (line 13) | def output_dir(): FILE: python/opendataloader-pdf/tests/test_cli_options.py class TestCLIOptions (line 7) | class TestCLIOptions: method test_cli_options_is_list (line 10) | def test_cli_options_is_list(self): method test_cli_options_not_empty (line 14) | def test_cli_options_not_empty(self): method test_each_option_has_required_fields (line 18) | def test_each_option_has_required_fields(self): method test_option_types_are_valid (line 33) | def test_option_types_are_valid(self): method test_python_name_is_snake_case (line 39) | def test_python_name_is_snake_case(self): method test_known_options_exist (line 44) | def test_known_options_exist(self): method test_sanitize_option_exists (line 60) | def test_sanitize_option_exists(self): class TestAddOptionsToParser (line 69) | class TestAddOptionsToParser: method test_adds_all_options (line 72) | def test_adds_all_options(self): method test_boolean_options_default_to_false (line 87) | def test_boolean_options_default_to_false(self): method test_string_options_default_to_none (line 100) | def test_string_options_default_to_none(self): method test_short_options_work (line 113) | def test_short_options_work(self): method test_long_options_work (line 132) | def test_long_options_work(self): FILE: python/opendataloader-pdf/tests/test_convert_integration.py function test_convert_generates_output (line 6) | def test_convert_generates_output(input_pdf, output_dir): FILE: python/opendataloader-pdf/tests/test_hybrid_server.py function test_gpu_detected_logging (line 8) | def test_gpu_detected_logging(caplog): function test_no_gpu_logging (line 38) | def test_no_gpu_logging(caplog): function test_no_pytorch_logging (line 57) | def test_no_pytorch_logging(caplog): function test_get_loop_setting_returns_asyncio_on_windows (line 75) | def test_get_loop_setting_returns_asyncio_on_windows(): function test_get_loop_setting_returns_auto_on_non_windows (line 83) | def test_get_loop_setting_returns_auto_on_non_windows(): FILE: python/opendataloader-pdf/tests/test_hybrid_server_nonblocking.py function mock_docling (line 18) | def mock_docling(): function app_with_converter (line 58) | def app_with_converter(mock_docling): function test_convert_runs_in_thread_pool (line 71) | async def test_convert_runs_in_thread_pool(app_with_converter, mock_docl... function test_health_responds_during_conversion (line 104) | async def test_health_responds_during_conversion(app_with_converter): FILE: python/opendataloader-pdf/tests/test_hybrid_server_partial_success.py class TestBuildConversionResponse (line 13) | class TestBuildConversionResponse: method test_success_status (line 16) | def test_success_status(self): method test_partial_success_status (line 29) | def test_partial_success_status(self): method test_partial_success_multiple_failed_pages (line 42) | def test_partial_success_multiple_failed_pages(self): method test_partial_success_no_page_range_with_total_pages (line 57) | def test_partial_success_no_page_range_with_total_pages(self): method test_partial_success_no_page_range_fallback (line 71) | def test_partial_success_no_page_range_fallback(self): method test_success_no_errors_field (line 83) | def test_success_no_errors_field(self): method test_document_field_present (line 94) | def test_document_field_present(self): method test_partial_success_first_page_failed_with_page_range (line 106) | def test_partial_success_first_page_failed_with_page_range(self): method test_partial_success_last_page_failed_with_page_range (line 117) | def test_partial_success_last_page_failed_with_page_range(self): method test_partial_success_all_pages_failed (line 128) | def test_partial_success_all_pages_failed(self): method test_partial_success_all_pages_failed_with_total_pages (line 140) | def test_partial_success_all_pages_failed_with_total_pages(self): method test_failure_status_no_failed_pages_detection (line 153) | def test_failure_status_no_failed_pages_detection(self): method test_partial_success_missing_pages_key (line 165) | def test_partial_success_missing_pages_key(self): FILE: python/opendataloader-pdf/tests/test_hybrid_server_unicode.py class TestSanitizeUnicode (line 15) | class TestSanitizeUnicode: method test_lone_surrogate_replaced (line 18) | def test_lone_surrogate_replaced(self): method test_all_surrogate_range_replaced (line 25) | def test_all_surrogate_range_replaced(self): method test_null_character_replaced (line 31) | def test_null_character_replaced(self): method test_nested_dict_sanitized (line 38) | def test_nested_dict_sanitized(self): method test_list_sanitized (line 45) | def test_list_sanitized(self): method test_clean_data_unchanged (line 53) | def test_clean_data_unchanged(self): method test_non_string_values_preserved (line 59) | def test_non_string_values_preserved(self): method test_sanitized_output_json_serializable (line 65) | def test_sanitized_output_json_serializable(self): method test_mixed_valid_and_invalid_unicode (line 80) | def test_mixed_valid_and_invalid_unicode(self): FILE: scripts/experiments/docling_baseline_bench.py function convert_pdf (line 29) | def convert_pdf(pdf_path: Path) -> dict: function main (line 51) | def main(): FILE: scripts/experiments/docling_fastapi_bench.py function run_server (line 38) | def run_server(): function convert_pdf (line 111) | def convert_pdf(pdf_path: Path) -> dict: function wait_for_server (line 137) | def wait_for_server(max_retries=60, delay=1.0): function main (line 150) | def main(): FILE: scripts/experiments/docling_speed_report.py function load_results (line 19) | def load_results(filename: str) -> dict | None: function main (line 28) | def main(): FILE: scripts/experiments/docling_subprocess_bench.py function convert_pdf (line 123) | def convert_pdf(process: subprocess.Popen, pdf_path: Path) -> dict: function main (line 165) | def main(): FILE: scripts/generate-options.mjs constant ROOT_DIR (line 15) | const ROOT_DIR = join(__dirname, '..'); constant AUTO_GENERATED_HEADER (line 21) | const AUTO_GENERATED_HEADER = `// AUTO-GENERATED FROM options.json - DO ... constant AUTO_GENERATED_HEADER_PYTHON (line 25) | const AUTO_GENERATED_HEADER_PYTHON = `# AUTO-GENERATED FROM options.json... constant AUTO_GENERATED_HEADER_MDX (line 29) | const AUTO_GENERATED_HEADER_MDX = `{/* AUTO-GENERATED FROM options.json ... function toCamelCase (line 37) | function toCamelCase(str) { function toSnakeCase (line 44) | function toSnakeCase(str) { constant LIST_OPTIONS (line 51) | const LIST_OPTIONS = new Set(['format', 'content-safety-off']); function isListOption (line 56) | function isListOption(opt) { function escapeString (line 67) | function escapeString(str, quote = "'", { escapePercent = false } = {}) { function generateNodeCliOptions (line 83) | function generateNodeCliOptions() { function generateNodeConvertOptions (line 112) | function generateNodeConvertOptions() { function generatePythonCliOptions (line 225) | function generatePythonCliOptions() { function generatePythonConvert (line 282) | function generatePythonConvert() { function generatePythonConvertOptionsMdx (line 374) | function generatePythonConvertOptionsMdx() { function generateNodeConvertOptionsMdx (line 418) | function generateNodeConvertOptionsMdx() { function generateOptionsReferenceMdx (line 459) | function generateOptionsReferenceMdx() { FILE: scripts/generate-schema.mjs constant ROOT_DIR (line 14) | const ROOT_DIR = join(__dirname, '..'); constant AUTO_GENERATED_HEADER_MDX (line 20) | const AUTO_GENERATED_HEADER_MDX = `{/* AUTO-GENERATED FROM schema.json -... function formatType (line 28) | function formatType(prop) { function isRequired (line 65) | function isRequired(propName, requiredList) { function generateJsonSchemaMdx (line 72) | function generateJsonSchemaMdx() { FILE: scripts/utils.mjs function escapeMarkdown (line 10) | function escapeMarkdown(str) { function formatTable (line 28) | function formatTable(headers, rows) {