Repository: Goldziher/html-to-markdown Branch: main Commit: 64ef6808077f Files: 1171 Total size: 10.2 MB Directory structure: gitextract_5vwofxnz/ ├── .ai-rulez/ │ ├── config.toml │ ├── context/ │ │ └── crate-structure.md │ ├── domains/ │ │ ├── conversion-algorithms/ │ │ │ └── DOMAIN.md │ │ ├── html-parsing/ │ │ │ └── DOMAIN.md │ │ └── safety-sanitization/ │ │ └── DOMAIN.md │ └── rules/ │ └── alef-generated-bindings.md ├── .cargo/ │ └── config.toml ├── .clang-format ├── .editorconfig ├── .github/ │ ├── CODEOWNERS │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.yml │ │ ├── config.yml │ │ ├── documentation.yml │ │ └── feature_request.yml │ ├── PULL_REQUEST_TEMPLATE.md │ ├── actions/ │ │ ├── build-typescript/ │ │ │ └── action.yml │ │ └── smoke-pie/ │ │ └── action.yml │ ├── dependabot.yaml │ └── workflows/ │ ├── ci.yaml │ ├── deploy-docs.yaml │ ├── publish.yaml │ ├── validate-issues.yml │ └── validate-pr.yml ├── .gitignore ├── .gitmodules ├── .golangci.yml ├── .mailmap ├── .markdownlint.yaml ├── .mvn/ │ └── wrapper/ │ ├── MavenWrapperDownloader.java │ ├── maven-wrapper.jar │ └── maven-wrapper.properties ├── .php-cs-fixer.dist.php ├── .pre-commit-config.yaml ├── .ruby-version ├── .rumdl.toml ├── .sdkmanrc ├── .task/ │ ├── README.md │ ├── checksum/ │ │ ├── _lint-typescript-lint │ │ ├── _test-typescript-test │ │ └── typescript-typecheck │ ├── config/ │ │ ├── platforms.yml │ │ └── vars.yml │ ├── languages/ │ │ ├── python.yml │ │ └── rust.yml │ ├── tools/ │ │ ├── docs.yml │ │ ├── general.yml │ │ └── version-sync.yml │ └── workflows/ │ └── e2e.yml ├── .typos.toml ├── ATTRIBUTIONS.md ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE ├── README.md ├── Taskfile.yaml ├── _typos.toml ├── alef.toml ├── composer.json ├── crates/ │ ├── html-to-markdown/ │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── examples/ │ │ │ ├── basic.rs │ │ │ ├── table.rs │ │ │ ├── test_deser.rs │ │ │ ├── test_escape.rs │ │ │ ├── test_inline_formatting.rs │ │ │ ├── test_lists.rs │ │ │ ├── test_semantic_tags.rs │ │ │ ├── test_tables.rs │ │ │ ├── test_task_lists.rs │ │ │ └── test_whitespace.rs │ │ ├── src/ │ │ │ ├── convert_api.rs │ │ │ ├── converter/ │ │ │ │ ├── block/ │ │ │ │ │ ├── blockquote.rs │ │ │ │ │ ├── container.rs │ │ │ │ │ ├── div.rs │ │ │ │ │ ├── heading.rs │ │ │ │ │ ├── horizontal_rule.rs │ │ │ │ │ ├── line_break.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── paragraph.rs │ │ │ │ │ ├── preformatted.rs │ │ │ │ │ ├── table/ │ │ │ │ │ │ ├── builder.rs │ │ │ │ │ │ ├── caption.rs │ │ │ │ │ │ ├── cell.rs │ │ │ │ │ │ ├── cells.rs │ │ │ │ │ │ ├── layout.rs │ │ │ │ │ │ ├── mod.rs │ │ │ │ │ │ ├── scanner.rs │ │ │ │ │ │ └── utils.rs │ │ │ │ │ └── unknown.rs │ │ │ │ ├── context.rs │ │ │ │ ├── dom_context.rs │ │ │ │ ├── form/ │ │ │ │ │ ├── elements.rs │ │ │ │ │ └── mod.rs │ │ │ │ ├── format/ │ │ │ │ │ ├── djot.rs │ │ │ │ │ ├── markdown.rs │ │ │ │ │ └── mod.rs │ │ │ │ ├── handlers/ │ │ │ │ │ ├── blockquote.rs │ │ │ │ │ ├── code_block.rs │ │ │ │ │ ├── graphic.rs │ │ │ │ │ ├── image.rs │ │ │ │ │ ├── link.rs │ │ │ │ │ └── mod.rs │ │ │ │ ├── inline/ │ │ │ │ │ ├── code.rs │ │ │ │ │ ├── emphasis.rs │ │ │ │ │ ├── link.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── ruby.rs │ │ │ │ │ └── semantic/ │ │ │ │ │ ├── marks.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── typography.rs │ │ │ │ ├── list/ │ │ │ │ │ ├── definition.rs │ │ │ │ │ ├── item.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── ordered.rs │ │ │ │ │ ├── unordered.rs │ │ │ │ │ └── utils.rs │ │ │ │ ├── main.rs │ │ │ │ ├── main_helpers.rs │ │ │ │ ├── media/ │ │ │ │ │ ├── embedded.rs │ │ │ │ │ ├── graphic.rs │ │ │ │ │ ├── image.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── svg.rs │ │ │ │ ├── metadata.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── plain_text.rs │ │ │ │ ├── preprocessing_helpers.rs │ │ │ │ ├── reference_collector.rs │ │ │ │ ├── semantic/ │ │ │ │ │ ├── attributes.rs │ │ │ │ │ ├── definition_list.rs │ │ │ │ │ ├── figure.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── sectioning.rs │ │ │ │ │ └── summary.rs │ │ │ │ ├── text/ │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── processing.rs │ │ │ │ ├── text_node.rs │ │ │ │ ├── utility/ │ │ │ │ │ ├── attributes.rs │ │ │ │ │ ├── caching.rs │ │ │ │ │ ├── content.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── preprocessing.rs │ │ │ │ │ ├── serialization.rs │ │ │ │ │ └── siblings.rs │ │ │ │ └── visitor_hooks.rs │ │ │ ├── error.rs │ │ │ ├── exports.rs │ │ │ ├── inline_images.rs │ │ │ ├── lib.rs │ │ │ ├── metadata/ │ │ │ │ ├── collector.rs │ │ │ │ ├── config.rs │ │ │ │ ├── extraction.rs │ │ │ │ ├── mod.rs │ │ │ │ └── types.rs │ │ │ ├── options/ │ │ │ │ ├── conversion.rs │ │ │ │ ├── inline_image.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── preprocessing.rs │ │ │ │ └── validation.rs │ │ │ ├── prelude.rs │ │ │ ├── rcdom.rs │ │ │ ├── text.rs │ │ │ ├── types/ │ │ │ │ ├── document.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── result.rs │ │ │ │ ├── structure_builder.rs │ │ │ │ ├── structure_collector.rs │ │ │ │ ├── tables.rs │ │ │ │ └── warnings.rs │ │ │ ├── validation.rs │ │ │ ├── visitor/ │ │ │ │ ├── default_impl.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── traits.rs │ │ │ │ └── types.rs │ │ │ ├── visitor_helpers/ │ │ │ │ └── helpers/ │ │ │ │ ├── callbacks/ │ │ │ │ │ └── mod.rs │ │ │ │ ├── content.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── state.rs │ │ │ │ └── traversal.rs │ │ │ ├── visitor_helpers.rs │ │ │ ├── wrapper/ │ │ │ │ ├── sync.rs │ │ │ │ └── utils.rs │ │ │ └── wrapper.rs │ │ └── tests/ │ │ ├── br_in_inline_test.rs │ │ ├── commonmark_compliance_test.rs │ │ ├── djot_output_test.rs │ │ ├── exclude_selectors_test.rs │ │ ├── integration_test.rs │ │ ├── issue_121_regressions.rs │ │ ├── issue_127_regressions.rs │ │ ├── issue_128_regressions.rs │ │ ├── issue_131_regressions.rs │ │ ├── issue_134_regressions.rs │ │ ├── issue_139_regressions.rs │ │ ├── issue_140_regressions.rs │ │ ├── issue_143_regressions.rs │ │ ├── issue_145_regressions.rs │ │ ├── issue_146_regressions.rs │ │ ├── issue_176_regressions.rs │ │ ├── issue_190_regressions.rs │ │ ├── issue_199_regressions.rs │ │ ├── issue_200_regressions.rs │ │ ├── issue_212_regressions.rs │ │ ├── issue_216_217_regressions.rs │ │ ├── json_ld_script_extraction.rs │ │ ├── lists_test.rs │ │ ├── plain_output_test.rs │ │ ├── preprocessing_tests.rs │ │ ├── reference_links_test.rs │ │ ├── sectioning_elements_test.rs │ │ ├── skip_images_test.rs │ │ ├── tables_test.rs │ │ ├── test_custom_elements.rs │ │ ├── test_issue_187.rs │ │ ├── test_issue_218.rs │ │ ├── test_issue_277.rs │ │ ├── test_max_depth.rs │ │ ├── test_nested_simple.rs │ │ ├── test_script_style_stripping.rs │ │ ├── test_spa_bisect.rs │ │ ├── visitor_code_integration_test.rs │ │ ├── visitor_integration_test.rs │ │ └── xml_tables_test.rs │ ├── html-to-markdown-cli/ │ │ ├── Cargo.toml │ │ ├── src/ │ │ │ ├── args.rs │ │ │ ├── convert.rs │ │ │ ├── main.rs │ │ │ ├── output.rs │ │ │ ├── utils.rs │ │ │ └── validators.rs │ │ └── tests/ │ │ └── cli_test.rs │ ├── html-to-markdown-ffi/ │ │ ├── Cargo.toml │ │ ├── build.rs │ │ ├── cbindgen.toml │ │ ├── cmake/ │ │ │ └── html-to-markdown-ffi-config.cmake │ │ ├── include/ │ │ │ └── html_to_markdown.h │ │ └── src/ │ │ └── lib.rs │ ├── html-to-markdown-node/ │ │ ├── Cargo.toml │ │ ├── index.d.ts │ │ ├── index.js │ │ ├── npm/ │ │ │ ├── darwin-arm64/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── darwin-x64/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-arm-gnueabihf/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-arm64-gnu/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-arm64-musl/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-x64-gnu/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-x64-musl/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── win32-arm64-msvc/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ └── win32-x64-msvc/ │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── package.json │ │ └── src/ │ │ └── lib.rs │ ├── html-to-markdown-php/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ ├── html-to-markdown-py/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ ├── html-to-markdown-rs-ffi/ │ │ └── README.md │ ├── html-to-markdown-rs-wasm/ │ │ └── README.md │ └── html-to-markdown-wasm/ │ ├── Cargo.toml │ ├── package.json │ ├── scripts/ │ │ ├── cleanup-gitignore.js │ │ └── patch-bundler-entry.js │ └── src/ │ └── lib.rs ├── deny.toml ├── docs/ │ ├── CNAME │ ├── api-reference.md │ ├── architecture.md │ ├── cli.md │ ├── configuration.md │ ├── contributing.md │ ├── css/ │ │ └── extra.css │ ├── demo/ │ │ ├── html_to_markdown_wasm.js │ │ ├── html_to_markdown_wasm_bg.wasm │ │ ├── index.html │ │ ├── script.js │ │ └── style.css │ ├── errors.md │ ├── index.md │ ├── installation.md │ ├── language-guides.md │ ├── llms.txt │ ├── migration.md │ ├── overrides/ │ │ └── main.html │ ├── reference/ │ │ ├── api-c.md │ │ ├── api-csharp.md │ │ ├── api-elixir.md │ │ ├── api-go.md │ │ ├── api-java.md │ │ ├── api-php.md │ │ ├── api-python.md │ │ ├── api-r.md │ │ ├── api-ruby.md │ │ ├── api-rust.md │ │ ├── api-typescript.md │ │ ├── api-wasm.md │ │ ├── configuration.md │ │ ├── errors.md │ │ └── types.md │ ├── snippets/ │ │ ├── c/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── csharp/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── elixir/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── feedback.md │ │ ├── go/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── java/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── php/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── python/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── r/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── ruby/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── rust/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── typescript/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ └── wasm/ │ │ ├── getting-started/ │ │ │ ├── basic_usage.md │ │ │ └── with_options.md │ │ ├── metadata/ │ │ │ └── basic_extraction.md │ │ ├── table-extraction/ │ │ │ └── basic_extraction.md │ │ └── visitor/ │ │ └── basic_visitor.md │ ├── tables.md │ ├── usage.md │ └── visitor.md ├── e2e/ │ ├── c/ │ │ ├── Makefile │ │ ├── download_ffi.sh │ │ ├── main.c │ │ ├── test_conversion.c │ │ ├── test_edge_cases.c │ │ ├── test_metadata.c │ │ ├── test_options.c │ │ ├── test_real_world.c │ │ ├── test_result.c │ │ ├── test_runner.h │ │ ├── test_smoke.c │ │ └── test_structure.c │ ├── csharp/ │ │ ├── HtmlToMarkdown.E2eTests.csproj │ │ └── tests/ │ │ ├── ConversionTests.cs │ │ ├── EdgeCasesTests.cs │ │ ├── MetadataTests.cs │ │ ├── OptionsTests.cs │ │ ├── RealWorldTests.cs │ │ ├── ResultTests.cs │ │ ├── SmokeTests.cs │ │ ├── StructureTests.cs │ │ └── VisitorTests.cs │ ├── dart/ │ │ └── pubspec.yaml │ ├── elixir/ │ │ ├── mix.exs │ │ └── test/ │ │ ├── conversion_test.exs │ │ ├── edge_cases_test.exs │ │ ├── metadata_test.exs │ │ ├── options_test.exs │ │ ├── real_world_test.exs │ │ ├── result_test.exs │ │ ├── smoke_test.exs │ │ ├── structure_test.exs │ │ ├── test_helper.exs │ │ └── visitor_test.exs │ ├── gleam/ │ │ └── gleam.toml │ ├── go/ │ │ ├── conversion_test.go │ │ ├── edge_cases_test.go │ │ ├── go.mod │ │ ├── go.sum │ │ ├── metadata_test.go │ │ ├── options_test.go │ │ ├── real_world_test.go │ │ ├── result_test.go │ │ ├── smoke_test.go │ │ ├── structure_test.go │ │ └── visitor_test.go │ ├── java/ │ │ ├── pom.xml │ │ └── src/ │ │ └── test/ │ │ └── java/ │ │ └── dev/ │ │ └── kreuzberg/ │ │ └── htmltomarkdown/ │ │ └── e2e/ │ │ ├── ConversionTest.java │ │ ├── EdgeCasesTest.java │ │ ├── MetadataTest.java │ │ ├── OptionsTest.java │ │ ├── RealWorldTest.java │ │ ├── ResultTest.java │ │ ├── SmokeTest.java │ │ ├── StructureTest.java │ │ └── VisitorTest.java │ ├── kotlin/ │ │ └── build.gradle.kts │ ├── node/ │ │ ├── package.json │ │ ├── tests/ │ │ │ ├── conversion.test.ts │ │ │ ├── edge_cases.test.ts │ │ │ ├── metadata.test.ts │ │ │ ├── options.test.ts │ │ │ ├── real_world.test.ts │ │ │ ├── result.test.ts │ │ │ ├── smoke.test.ts │ │ │ ├── structure.test.ts │ │ │ └── visitor.test.ts │ │ ├── tsconfig.json │ │ └── vitest.config.ts │ ├── php/ │ │ ├── bootstrap.php │ │ ├── composer.json │ │ ├── phpunit.xml │ │ └── tests/ │ │ ├── ConversionTest.php │ │ ├── EdgeCasesTest.php │ │ ├── MetadataTest.php │ │ ├── OptionsTest.php │ │ ├── RealWorldTest.php │ │ ├── ResultTest.php │ │ ├── SmokeTest.php │ │ ├── StructureTest.php │ │ └── VisitorTest.php │ ├── python/ │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── pyproject.toml │ │ └── tests/ │ │ ├── __init__.py │ │ ├── test_conversion.py │ │ ├── test_edge_cases.py │ │ ├── test_metadata.py │ │ ├── test_options.py │ │ ├── test_real_world.py │ │ ├── test_result.py │ │ ├── test_smoke.py │ │ ├── test_structure.py │ │ └── test_visitor.py │ ├── r/ │ │ ├── DESCRIPTION │ │ ├── run_tests.R │ │ └── tests/ │ │ ├── test_conversion.R │ │ ├── test_edge_cases.R │ │ ├── test_metadata.R │ │ ├── test_options.R │ │ ├── test_real_world.R │ │ ├── test_result.R │ │ ├── test_smoke.R │ │ ├── test_structure.R │ │ └── test_visitor.R │ ├── ruby/ │ │ ├── .rubocop.yaml │ │ ├── Gemfile │ │ └── spec/ │ │ ├── conversion_spec.rb │ │ ├── edge_cases_spec.rb │ │ ├── metadata_spec.rb │ │ ├── options_spec.rb │ │ ├── real_world_spec.rb │ │ ├── result_spec.rb │ │ ├── smoke_spec.rb │ │ ├── structure_spec.rb │ │ └── visitor_spec.rb │ ├── rust/ │ │ ├── Cargo.toml │ │ └── tests/ │ │ ├── conversion_test.rs │ │ ├── edge_cases_test.rs │ │ ├── metadata_test.rs │ │ ├── options_test.rs │ │ ├── real_world_test.rs │ │ ├── result_test.rs │ │ ├── smoke_test.rs │ │ ├── structure_test.rs │ │ └── visitor_test.rs │ ├── swift/ │ │ └── Package.swift │ ├── wasm/ │ │ ├── package.json │ │ ├── tests/ │ │ │ ├── conversion.test.ts │ │ │ ├── edge_cases.test.ts │ │ │ ├── metadata.test.ts │ │ │ ├── options.test.ts │ │ │ ├── real_world.test.ts │ │ │ ├── result.test.ts │ │ │ ├── smoke.test.ts │ │ │ ├── structure.test.ts │ │ │ └── visitor.test.ts │ │ ├── tsconfig.json │ │ └── vitest.config.ts │ └── zig/ │ ├── build.zig │ └── build.zig.zon ├── fixtures/ │ ├── conversion/ │ │ ├── blockquotes.json │ │ ├── code.json │ │ ├── emphasis.json │ │ ├── forms.json │ │ ├── headings.json │ │ ├── images.json │ │ ├── line_breaks.json │ │ ├── links.json │ │ ├── lists.json │ │ ├── paragraphs.json │ │ ├── semantic.json │ │ └── tables.json │ ├── edge-cases/ │ │ ├── empty.json │ │ ├── encoding.json │ │ ├── malformed.json │ │ ├── visitor_errors.json │ │ └── xss.json │ ├── metadata/ │ │ ├── basic.json │ │ ├── document_properties.json │ │ ├── links_and_images.json │ │ ├── open_graph.json │ │ └── structured_data.json │ ├── options/ │ │ ├── br_in_tables.json │ │ ├── code_block_style.json │ │ ├── code_options.json │ │ ├── escape_ascii.json │ │ ├── escaping.json │ │ ├── exclude_selectors.json │ │ ├── heading_style.json │ │ ├── highlight_style.json │ │ ├── inline_and_newlines.json │ │ ├── list_options.json │ │ ├── max_depth.json │ │ ├── newline_style.json │ │ ├── output_format.json │ │ ├── preprocessing.json │ │ ├── remaining_options.json │ │ ├── strong_em_symbol.json │ │ ├── sub_sup_symbols.json │ │ ├── tag_control.json │ │ ├── whitespace_mode.json │ │ └── wrapping.json │ ├── real-world/ │ │ └── articles.json │ ├── result/ │ │ ├── tables.json │ │ └── warnings.json │ ├── smoke/ │ │ └── basic.json │ ├── structure/ │ │ ├── basic.json │ │ └── nesting.json │ └── visitor/ │ ├── advanced_elements.json │ ├── basic.json │ ├── elements.json │ ├── formatting.json │ ├── forms_and_semantics.json │ ├── headings.json │ ├── images.json │ ├── links.json │ └── media.json ├── just ├── package.json ├── packages/ │ ├── csharp/ │ │ ├── .editorconfig │ │ ├── Directory.Build.props │ │ ├── HtmlToMarkdown/ │ │ │ ├── AnnotationKind.cs │ │ │ ├── CodeBlockStyle.cs │ │ │ ├── ConfigErrorException.cs │ │ │ ├── ConversionErrorException.cs │ │ │ ├── ConversionOptions.cs │ │ │ ├── ConversionOptionsBuilder.cs │ │ │ ├── ConversionOptionsUpdate.cs │ │ │ ├── ConversionResult.cs │ │ │ ├── DocumentMetadata.cs │ │ │ ├── DocumentNode.cs │ │ │ ├── DocumentStructure.cs │ │ │ ├── GridCell.cs │ │ │ ├── HeaderMetadata.cs │ │ │ ├── HeadingStyle.cs │ │ │ ├── HighlightStyle.cs │ │ │ ├── HtmlMetadata.cs │ │ │ ├── HtmlToMarkdown.csproj │ │ │ ├── HtmlToMarkdownRs.cs │ │ │ ├── HtmlToMarkdownRsException.cs │ │ │ ├── IVisitor.cs │ │ │ ├── ImageMetadata.cs │ │ │ ├── ImageType.cs │ │ │ ├── InvalidInputException.cs │ │ │ ├── IoErrorException.cs │ │ │ ├── LinkMetadata.cs │ │ │ ├── LinkStyle.cs │ │ │ ├── LinkType.cs │ │ │ ├── ListIndentType.cs │ │ │ ├── NativeMethods.cs │ │ │ ├── NewlineStyle.cs │ │ │ ├── NodeContent.cs │ │ │ ├── NodeContext.cs │ │ │ ├── NodeType.cs │ │ │ ├── OtherException.cs │ │ │ ├── OutputFormat.cs │ │ │ ├── PanicException.cs │ │ │ ├── ParseErrorException.cs │ │ │ ├── PreprocessingOptions.cs │ │ │ ├── PreprocessingOptionsUpdate.cs │ │ │ ├── PreprocessingPreset.cs │ │ │ ├── ProcessingWarning.cs │ │ │ ├── SanitizationErrorException.cs │ │ │ ├── StructuredData.cs │ │ │ ├── StructuredDataType.cs │ │ │ ├── TableData.cs │ │ │ ├── TableGrid.cs │ │ │ ├── TextAnnotation.cs │ │ │ ├── TextDirection.cs │ │ │ ├── TraitBridges.cs │ │ │ ├── VisitResult.cs │ │ │ ├── VisitorCallbacks.cs │ │ │ ├── VisitorHandle.cs │ │ │ ├── WarningKind.cs │ │ │ └── WhitespaceMode.cs │ │ ├── HtmlToMarkdown.Tests/ │ │ │ └── HtmlToMarkdown.Tests.csproj │ │ ├── HtmlToMarkdown.csproj │ │ └── README.md │ ├── elixir/ │ │ ├── .credo.exs │ │ ├── .formatter.exs │ │ ├── .gitignore │ │ ├── README.md │ │ ├── checksum-Elixir.HtmlToMarkdown.Native.exs │ │ ├── config/ │ │ │ └── config.exs │ │ ├── lib/ │ │ │ ├── html_to_markdown/ │ │ │ │ ├── annotation_kind.ex │ │ │ │ ├── code_block_style.ex │ │ │ │ ├── conversion_options.ex │ │ │ │ ├── conversion_options_update.ex │ │ │ │ ├── conversion_result.ex │ │ │ │ ├── document_metadata.ex │ │ │ │ ├── document_node.ex │ │ │ │ ├── document_structure.ex │ │ │ │ ├── grid_cell.ex │ │ │ │ ├── header_metadata.ex │ │ │ │ ├── heading_style.ex │ │ │ │ ├── highlight_style.ex │ │ │ │ ├── html_metadata.ex │ │ │ │ ├── html_visitor_bridge.ex │ │ │ │ ├── image_metadata.ex │ │ │ │ ├── image_type.ex │ │ │ │ ├── link_metadata.ex │ │ │ │ ├── link_style.ex │ │ │ │ ├── link_type.ex │ │ │ │ ├── list_indent_type.ex │ │ │ │ ├── native.ex │ │ │ │ ├── newline_style.ex │ │ │ │ ├── node_content.ex │ │ │ │ ├── node_context.ex │ │ │ │ ├── node_type.ex │ │ │ │ ├── output_format.ex │ │ │ │ ├── preprocessing_options.ex │ │ │ │ ├── preprocessing_options_update.ex │ │ │ │ ├── preprocessing_preset.ex │ │ │ │ ├── processing_warning.ex │ │ │ │ ├── structured_data.ex │ │ │ │ ├── structured_data_type.ex │ │ │ │ ├── table_data.ex │ │ │ │ ├── table_grid.ex │ │ │ │ ├── text_annotation.ex │ │ │ │ ├── text_direction.ex │ │ │ │ ├── visit_result.ex │ │ │ │ ├── warning_kind.ex │ │ │ │ └── whitespace_mode.ex │ │ │ └── html_to_markdown.ex │ │ ├── mix.exs │ │ ├── native/ │ │ │ └── html_to_markdown_nif/ │ │ │ ├── Cargo.toml │ │ │ └── src/ │ │ │ └── lib.rs │ │ └── test/ │ │ └── test_helper.exs │ ├── go/ │ │ ├── .golangci.yml │ │ ├── README.md │ │ ├── binding.go │ │ ├── go.mod │ │ └── v3/ │ │ └── README.md │ ├── java/ │ │ ├── README.md │ │ ├── checkstyle-suppressions.xml │ │ ├── checkstyle.properties │ │ ├── checkstyle.xml │ │ ├── eclipse-formatter.xml │ │ ├── pmd-ruleset.xml │ │ ├── pom.xml │ │ ├── pom.xml.versionsBackup │ │ ├── src/ │ │ │ └── main/ │ │ │ ├── java/ │ │ │ │ └── dev/ │ │ │ │ └── kreuzberg/ │ │ │ │ └── htmltomarkdown/ │ │ │ │ ├── AnnotationKind.java │ │ │ │ ├── CodeBlockStyle.java │ │ │ │ ├── ConfigErrorException.java │ │ │ │ ├── ConversionErrorException.java │ │ │ │ ├── ConversionOptions.java │ │ │ │ ├── ConversionOptionsBuilder.java │ │ │ │ ├── ConversionOptionsUpdate.java │ │ │ │ ├── ConversionOptionsUpdateBuilder.java │ │ │ │ ├── ConversionResult.java │ │ │ │ ├── ConversionResultBuilder.java │ │ │ │ ├── DocumentMetadata.java │ │ │ │ ├── DocumentMetadataBuilder.java │ │ │ │ ├── DocumentNode.java │ │ │ │ ├── DocumentStructure.java │ │ │ │ ├── GridCell.java │ │ │ │ ├── HeaderMetadata.java │ │ │ │ ├── HeadingStyle.java │ │ │ │ ├── HighlightStyle.java │ │ │ │ ├── HtmlMetadata.java │ │ │ │ ├── HtmlMetadataBuilder.java │ │ │ │ ├── HtmlToMarkdown.java │ │ │ │ ├── HtmlToMarkdownRs.java │ │ │ │ ├── HtmlToMarkdownRsException.java │ │ │ │ ├── HtmlVisitorBridge.java │ │ │ │ ├── IHtmlVisitor.java │ │ │ │ ├── ImageMetadata.java │ │ │ │ ├── ImageType.java │ │ │ │ ├── InvalidInputException.java │ │ │ │ ├── IoErrorException.java │ │ │ │ ├── LinkMetadata.java │ │ │ │ ├── LinkStyle.java │ │ │ │ ├── LinkType.java │ │ │ │ ├── ListIndentType.java │ │ │ │ ├── NativeLib.java │ │ │ │ ├── NewlineStyle.java │ │ │ │ ├── NodeContent.java │ │ │ │ ├── NodeContext.java │ │ │ │ ├── NodeType.java │ │ │ │ ├── OtherException.java │ │ │ │ ├── OutputFormat.java │ │ │ │ ├── PanicException.java │ │ │ │ ├── ParseErrorException.java │ │ │ │ ├── PreprocessingOptions.java │ │ │ │ ├── PreprocessingOptionsBuilder.java │ │ │ │ ├── PreprocessingOptionsUpdate.java │ │ │ │ ├── PreprocessingOptionsUpdateBuilder.java │ │ │ │ ├── PreprocessingPreset.java │ │ │ │ ├── ProcessingWarning.java │ │ │ │ ├── SanitizationErrorException.java │ │ │ │ ├── StructuredData.java │ │ │ │ ├── StructuredDataType.java │ │ │ │ ├── TableData.java │ │ │ │ ├── TableGrid.java │ │ │ │ ├── TableGridBuilder.java │ │ │ │ ├── TestVisitor.java │ │ │ │ ├── TestVisitorAdapter.java │ │ │ │ ├── TextAnnotation.java │ │ │ │ ├── TextDirection.java │ │ │ │ ├── VisitContext.java │ │ │ │ ├── VisitResult.java │ │ │ │ ├── Visitor.java │ │ │ │ ├── VisitorBridge.java │ │ │ │ ├── VisitorHandle.java │ │ │ │ ├── WarningKind.java │ │ │ │ ├── WhitespaceMode.java │ │ │ │ └── package-info.java │ │ │ └── resources/ │ │ │ └── .gitkeep │ │ └── versions-rules.xml │ ├── node/ │ │ ├── .oxfmtrc.json │ │ ├── .oxlintrc.json │ │ ├── biome.json │ │ ├── index.d.ts │ │ ├── package.json │ │ ├── src/ │ │ │ └── index.d.ts │ │ └── tsconfig.json │ ├── php/ │ │ ├── .gitignore │ │ ├── .php-cs-fixer.dist.php │ │ ├── README.md │ │ ├── composer.json │ │ ├── php-cs-fixer.php │ │ ├── phpstan-baseline.neon │ │ ├── phpstan-test.neon │ │ ├── phpstan.neon │ │ ├── phpunit.xml │ │ ├── src/ │ │ │ ├── HtmlToMarkdown.php │ │ │ └── functions.php │ │ ├── stubs/ │ │ │ └── html_to_markdown_extension.php │ │ └── tests/ │ │ └── .gitkeep │ ├── python/ │ │ ├── LICENSE │ │ ├── README.md │ │ ├── html_to_markdown/ │ │ │ ├── __init__.py │ │ │ ├── _html_to_markdown.pyi │ │ │ ├── api.py │ │ │ ├── exceptions.py │ │ │ ├── options.py │ │ │ └── py.typed │ │ ├── pyproject.toml │ │ └── tests/ │ │ └── commonmark_spec.json │ ├── r/ │ │ ├── .Rbuildignore │ │ ├── .gitignore │ │ ├── .lintr │ │ ├── DESCRIPTION │ │ ├── LICENSE │ │ ├── NAMESPACE │ │ ├── R/ │ │ │ ├── extendr-wrappers.R │ │ │ ├── htmltomarkdown-package.R │ │ │ ├── htmltomarkdown.R │ │ │ ├── options.R │ │ │ └── version.R │ │ ├── README.md │ │ ├── cleanup │ │ ├── cleanup.win │ │ ├── configure │ │ ├── configure.win │ │ ├── inst/ │ │ │ └── AUTHORS │ │ ├── man/ │ │ │ ├── conversion_options.Rd │ │ │ ├── convert.Rd │ │ │ ├── htmltomarkdown-package.Rd │ │ │ └── version.Rd │ │ ├── src/ │ │ │ ├── Makevars.in │ │ │ ├── Makevars.win.in │ │ │ ├── entrypoint.c │ │ │ └── rust/ │ │ │ ├── Cargo.toml │ │ │ ├── src/ │ │ │ │ ├── lib.rs │ │ │ │ ├── options.rs │ │ │ │ └── types.rs │ │ │ └── vendor-config.toml │ │ ├── tests/ │ │ │ └── testthat.R │ │ └── tools/ │ │ ├── config.R │ │ └── msrv.R │ ├── ruby/ │ │ ├── .gitignore │ │ ├── .rubocop.yml │ │ ├── Gemfile │ │ ├── README.md │ │ ├── Rakefile │ │ ├── Steepfile │ │ ├── exe/ │ │ │ └── html-to-markdown │ │ ├── ext/ │ │ │ └── html_to_markdown_rb/ │ │ │ ├── Cargo.toml │ │ │ ├── Makefile │ │ │ ├── extconf.rb │ │ │ ├── native/ │ │ │ │ └── Cargo.toml │ │ │ └── src/ │ │ │ ├── html-to-markdown/ │ │ │ │ └── version.rb │ │ │ ├── html-to-markdown.rb │ │ │ └── lib.rs │ │ ├── html_to_markdown.gemspec │ │ ├── lib/ │ │ │ ├── html_to_markdown/ │ │ │ │ └── version.rb │ │ │ └── html_to_markdown.rb │ │ ├── sig/ │ │ │ ├── html_to_markdown/ │ │ │ │ ├── cli.rbs │ │ │ │ └── cli_proxy.rbs │ │ │ ├── open3.rbs │ │ │ └── types.rbs │ │ └── spec/ │ │ ├── html_to_markdown_spec.rb │ │ └── spec_helper.rb │ ├── typescript/ │ │ ├── .npmignore │ │ ├── README.md │ │ ├── index.d.ts │ │ ├── package.json │ │ ├── src/ │ │ │ ├── helpers.ts │ │ │ └── index.ts │ │ └── tsconfig.json │ └── wasm/ │ └── src/ │ ├── helpers.ts │ └── index.ts ├── pnpm-workspace.yaml ├── pyproject.toml ├── readme_templates/ │ ├── language_package.md │ └── partials/ │ ├── _api_reference.md │ ├── _badges.md │ ├── _djot_output.md │ ├── _footer.md │ ├── _installation.md │ ├── _metadata_extraction.md │ ├── _plain_text_output.md │ ├── _quick_start.md │ └── _visitor_pattern.md ├── rust-toolchain.toml ├── rustfmt.toml ├── scripts/ │ ├── build-demo.sh │ ├── ci/ │ │ ├── elixir/ │ │ │ ├── install-deps.sh │ │ │ ├── install-hex-rebar.sh │ │ │ ├── run-credo.sh │ │ │ └── run-tests.sh │ │ ├── go/ │ │ │ ├── detect-go-modules.sh │ │ │ ├── install-golangci-lint.sh │ │ │ └── run-golangci-lint.sh │ │ ├── node/ │ │ │ ├── test-napi-cargo.sh │ │ │ ├── test-napi.sh │ │ │ └── test-typescript.sh │ │ ├── php/ │ │ │ ├── run-php-tests.sh │ │ │ ├── run-phpstan.sh │ │ │ └── set-php-config.sh │ │ ├── python/ │ │ │ ├── build-cli.sh │ │ │ └── run-pytest.sh │ │ ├── r/ │ │ │ ├── install-deps.sh │ │ │ ├── run-lintr.sh │ │ │ ├── run-tests.sh │ │ │ └── vendor-core-crate.py │ │ ├── ruby/ │ │ │ ├── run-rbs-validate.sh │ │ │ ├── run-rspec-unix.sh │ │ │ ├── run-rspec-windows.ps1 │ │ │ ├── run-rubocop.sh │ │ │ ├── run-steep.sh │ │ │ └── vendor-core-crate.py │ │ ├── rust/ │ │ │ ├── check-fmt.sh │ │ │ ├── install-cargo-llvm-cov.sh │ │ │ ├── run-clippy.sh │ │ │ ├── run-llvm-cov.sh │ │ │ └── run-tests.sh │ │ ├── smoke/ │ │ │ ├── capture-php-config.sh │ │ │ └── install-pnpm-deps.sh │ │ ├── validate/ │ │ │ ├── install-elixir-deps.sh │ │ │ ├── install-ruby-deps.sh │ │ │ ├── run-prek.sh │ │ │ └── run-rust-checks.sh │ │ └── wasm/ │ │ ├── run-wasmtime-tests.sh │ │ ├── test-wasm-bundle.sh │ │ └── test-wasm-rust.sh │ ├── common/ │ │ ├── enable-corepack.sh │ │ ├── ensure-wasm-target.sh │ │ ├── install-maven-latest.sh │ │ └── install-wasm-pack.sh │ ├── generate_visitor_callbacks.py │ ├── preferred-ruby.sh │ ├── preferred-rustc.sh │ ├── prepare_ruby_gem.rb │ ├── prepare_wheel.py │ ├── publish/ │ │ ├── cli/ │ │ │ ├── build-cli.sh │ │ │ ├── configure-cross-linker.sh │ │ │ ├── install-build-deps-linux.sh │ │ │ ├── install-cross.sh │ │ │ ├── package-cli-artifact.ps1 │ │ │ └── package-cli-artifact.sh │ │ ├── common/ │ │ │ ├── add-rust-target.sh │ │ │ └── ensure-target-commit.sh │ │ ├── crates/ │ │ │ ├── package-crates.sh │ │ │ ├── publish-cli.sh │ │ │ ├── publish-rs.sh │ │ │ ├── verify-cargo-version.sh │ │ │ └── wait-for-indexing.sh │ │ ├── csharp/ │ │ │ ├── pack.sh │ │ │ ├── restore.sh │ │ │ └── stage-ffi.sh │ │ ├── elixir/ │ │ │ ├── build-hex-package.sh │ │ │ ├── install-deps.sh │ │ │ ├── install-hex-rebar.sh │ │ │ ├── run-tests.sh │ │ │ ├── stage-rust-core.sh │ │ │ └── vendor-dependencies.sh │ │ ├── ensure-github-release-exists.sh │ │ ├── generate_elixir_checksums.sh │ │ ├── go/ │ │ │ └── create-module-tag.sh │ │ ├── java/ │ │ │ └── copy-native-libs.sh │ │ ├── maven/ │ │ │ ├── patch-legacy-gpg-args.sh │ │ │ └── prefer-gpg2.sh │ │ ├── node/ │ │ │ ├── build-native-module.ps1 │ │ │ ├── build-native-module.sh │ │ │ ├── clean-npm-dir.ps1 │ │ │ ├── clean-npm-dir.sh │ │ │ ├── create-npm-package-structure.sh │ │ │ ├── generate-typescript-defs.sh │ │ │ ├── install-node-deps.sh │ │ │ ├── pack-platform-packages.sh │ │ │ ├── package-artifacts.ps1 │ │ │ ├── package-artifacts.sh │ │ │ ├── prepare-artifact-directory.sh │ │ │ └── prepublish-main-package.sh │ │ ├── python/ │ │ │ ├── build-cli-for-sdist.sh │ │ │ ├── build-sdist.sh │ │ │ ├── install-build-deps.sh │ │ │ └── prepare-sdist-with-cli.sh │ │ ├── r/ │ │ │ ├── already-published-summary.sh │ │ │ ├── build-cran-package.sh │ │ │ ├── run-tests.sh │ │ │ ├── stage-rust-core.sh │ │ │ └── vendor-dependencies.sh │ │ ├── ruby/ │ │ │ ├── already-published-summary.sh │ │ │ ├── build-gem-unix.sh │ │ │ ├── build-gem-windows.ps1 │ │ │ ├── build-native-gem.rb │ │ │ ├── configure-bindgen-windows.sh │ │ │ ├── install-deps-unix.sh │ │ │ ├── install-deps-windows.ps1 │ │ │ ├── install-msys2-toolchain.ps1 │ │ │ ├── install-rust-gnu.ps1 │ │ │ └── remove-cached-cli.sh │ │ ├── typescript/ │ │ │ └── build-package.sh │ │ ├── upload-c-ffi-artifacts.sh │ │ ├── upload-cli-artifacts.sh │ │ ├── upload-elixir-package.sh │ │ ├── upload-go-ffi-artifacts.sh │ │ ├── upload-homebrew-bottles.sh │ │ ├── upload-php-pie.sh │ │ ├── validate-and-compute-metadata.sh │ │ └── wasm/ │ │ ├── build-bundles.sh │ │ ├── extract-artifacts.sh │ │ ├── install-deps.sh │ │ └── package-artifacts.sh │ ├── readme_config.yaml │ ├── readme_templates/ │ │ ├── language_package.md.jinja │ │ └── partials/ │ │ ├── _api_reference.md.jinja │ │ ├── _badges.md.jinja │ │ ├── _djot_output.md.jinja │ │ ├── _footer.md.jinja │ │ ├── _installation.md.jinja │ │ ├── _metadata_extraction.md.jinja │ │ ├── _plain_text_output.md.jinja │ │ ├── _quick_start.md.jinja │ │ └── _visitor_pattern.md.jinja │ └── update_dotnet_packages.py ├── skills/ │ └── html-to-markdown/ │ ├── SKILL.md │ └── references/ │ ├── cli-reference.md │ ├── configuration.md │ ├── other-bindings.md │ ├── python-api.md │ ├── rust-api.md │ └── typescript-api.md ├── test_apps/ │ ├── README.md │ ├── bun/ │ │ ├── README.md │ │ ├── package.json │ │ └── smoke.test.ts │ ├── c/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── README.md │ │ ├── download_ffi.sh │ │ ├── htm_test │ │ ├── main.c │ │ ├── run_tests │ │ ├── test_conversion.c │ │ ├── test_runner.h │ │ └── test_smoke.c │ ├── csharp/ │ │ ├── E2eTests.csproj │ │ ├── KreuzbergDev.HtmlToMarkdown.E2eTests.csproj │ │ ├── README.md │ │ └── tests/ │ │ ├── ConversionTests.cs │ │ └── SmokeTests.cs │ ├── elixir/ │ │ ├── README.md │ │ ├── deps/ │ │ │ ├── html_to_markdown/ │ │ │ │ ├── .formatter.exs │ │ │ │ ├── .hex │ │ │ │ ├── README.md │ │ │ │ ├── checksum-Elixir.HtmlToMarkdown.Native.exs │ │ │ │ ├── hex_metadata.config │ │ │ │ └── mix.exs │ │ │ ├── jason/ │ │ │ │ ├── .hex │ │ │ │ ├── CHANGELOG.md │ │ │ │ ├── LICENSE │ │ │ │ ├── README.md │ │ │ │ ├── hex_metadata.config │ │ │ │ └── mix.exs │ │ │ ├── rustler/ │ │ │ │ ├── .hex │ │ │ │ ├── README.md │ │ │ │ ├── hex_metadata.config │ │ │ │ ├── mix.exs │ │ │ │ └── priv/ │ │ │ │ └── templates/ │ │ │ │ ├── basic/ │ │ │ │ │ ├── Cargo.toml.eex │ │ │ │ │ ├── README.md │ │ │ │ │ └── src/ │ │ │ │ │ └── lib.rs │ │ │ │ └── root/ │ │ │ │ └── Cargo.toml.eex │ │ │ ├── rustler_precompiled/ │ │ │ │ ├── .hex │ │ │ │ ├── CHANGELOG.md │ │ │ │ ├── PRECOMPILATION_GUIDE.md │ │ │ │ ├── README.md │ │ │ │ ├── TROUBLESHOOTING.md │ │ │ │ ├── hex_metadata.config │ │ │ │ └── mix.exs │ │ │ └── toml/ │ │ │ ├── .hex │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── hex_metadata.config │ │ │ └── mix.exs │ │ ├── mix.exs │ │ └── test/ │ │ ├── conversion_test.exs │ │ ├── smoke_test.exs │ │ └── test_helper.exs │ ├── fixtures/ │ │ ├── README.md │ │ ├── basic-html.json │ │ ├── complex-html.json │ │ ├── edge-cases.json │ │ ├── metadata-extraction.json │ │ └── real-world.json │ ├── go/ │ │ ├── README.md │ │ ├── conversion_test.go │ │ ├── go.mod │ │ ├── go.sum │ │ ├── run_tests.sh │ │ └── smoke_test.go │ ├── java/ │ │ ├── .mvn/ │ │ │ └── wrapper/ │ │ │ └── maven-wrapper.properties │ │ ├── README.md │ │ ├── mvnw │ │ ├── mvnw.cmd │ │ ├── pom.xml │ │ └── src/ │ │ └── test/ │ │ └── java/ │ │ └── dev/ │ │ └── kreuzberg/ │ │ ├── e2e/ │ │ │ ├── ConversionTest.java │ │ │ └── SmokeTest.java │ │ └── htmltomarkdown/ │ │ └── e2e/ │ │ ├── ConversionTest.java │ │ └── SmokeTest.java │ ├── node/ │ │ ├── .nvmrc │ │ ├── README.md │ │ ├── package.json │ │ ├── tests/ │ │ │ ├── conversion.test.ts │ │ │ └── smoke.test.ts │ │ ├── tsconfig.json │ │ └── vitest.config.ts │ ├── php/ │ │ ├── README.md │ │ ├── bootstrap.php │ │ ├── composer.json │ │ ├── phpstan.neon │ │ ├── phpunit.xml │ │ └── tests/ │ │ ├── ConversionTest.php │ │ └── SmokeTest.php │ ├── php-ext/ │ │ ├── README.md │ │ ├── main.php │ │ └── run_tests.sh │ ├── python/ │ │ ├── .python-version │ │ ├── README.md │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── pyproject.toml │ │ └── tests/ │ │ ├── __init__.py │ │ ├── test_conversion.py │ │ └── test_smoke.py │ ├── r/ │ │ ├── DESCRIPTION │ │ ├── run_tests.R │ │ └── tests/ │ │ ├── test_conversion.R │ │ └── test_smoke.R │ ├── ruby/ │ │ ├── .bundle/ │ │ │ └── config │ │ ├── .rubocop.yaml │ │ ├── .ruby-version │ │ ├── Gemfile │ │ ├── README.md │ │ └── spec/ │ │ ├── conversion_spec.rb │ │ └── smoke_spec.rb │ ├── rust/ │ │ ├── Cargo.toml │ │ └── tests/ │ │ ├── conversion_test.rs │ │ └── smoke_test.rs │ └── wasm/ │ ├── .nvmrc │ ├── README.md │ ├── globalSetup.ts │ ├── package.json │ ├── tests/ │ │ ├── conversion.test.ts │ │ └── smoke.test.ts │ ├── tsconfig.json │ └── vitest.config.ts ├── test_documents/ │ └── html/ │ ├── issues/ │ │ ├── gh-121-hacker-news.html │ │ ├── gh-121-hacker-news.md │ │ ├── gh-121-minimal-failing.html │ │ ├── gh-121-spa-app.html │ │ ├── gh-121-spa-app.md │ │ ├── gh-127-issue.html │ │ ├── gh-134-pre-code.html │ │ ├── gh-134-pre-code.md │ │ ├── gh-140-table-cell-pipe-with-escape-misc.md │ │ ├── gh-140-table-cell-pipe.html │ │ ├── gh-140-table-cell-pipe.md │ │ ├── gh-143-links-wordwrap.html │ │ ├── gh-143-links-wordwrap.md │ │ ├── gh-190/ │ │ │ ├── firsteigen.html │ │ │ ├── flex2021.html │ │ │ ├── flex2025.html │ │ │ ├── insight.html │ │ │ ├── kimbrain.html │ │ │ ├── maxkim.html │ │ │ ├── mitrade.html │ │ │ ├── ozonekorea.html │ │ │ ├── plusblog.html │ │ │ ├── rbloggers.html │ │ │ ├── sjsu.html │ │ │ └── vipaarontours.html │ │ ├── test-nested-simple.html │ │ ├── test-nested-simple.md │ │ └── test-with-custom-elements.html │ ├── visitor/ │ │ ├── baseline.html │ │ ├── callbacks.html │ │ ├── complex.html │ │ └── custom.html │ └── wikipedia/ │ ├── large_rust.html │ ├── lists_timeline.html │ ├── medium_python.html │ ├── small_html.html │ └── tables_countries.html ├── tsconfig.base.json └── zensical.toml ================================================ FILE CONTENTS ================================================ ================================================ FILE: .ai-rulez/config.toml ================================================ # AI-Rulez Configuration (migrated to V4 TOML format) # Documentation: https://github.com/Goldziher/ai-rulez version = '4.0' name = 'html-to-markdown' description = 'High-performance HTML to Markdown converter with Rust core and polyglot bindings (Python, TypeScript, Ruby, PHP, Go, Java, C#, Elixir, R, WebAssembly, C FFI).' gitignore = true presets = ['claude', 'copilot', 'cursor', 'antigravity', 'codex'] builtins = ['rust', 'python', 'typescript', 'go', 'java', 'ruby', 'php', 'csharp', 'elixir', 'r', 'default-commands'] [header] style = 'minimal' [[includes]] name = 'kreuzberg-core' source = 'https://github.com/kreuzberg-dev/ai-rulez.git' path = 'modules/core' merge_strategy = 'local-override' [[includes]] name = 'kreuzberg-languages' source = 'https://github.com/kreuzberg-dev/ai-rulez.git' path = 'modules/languages' merge_strategy = 'local-override' [[includes]] name = 'kreuzberg-cicd' source = 'https://github.com/kreuzberg-dev/ai-rulez.git' path = 'modules/cicd' merge_strategy = 'local-override' [[includes]] name = 'kreuzberg-infrastructure' source = 'https://github.com/kreuzberg-dev/ai-rulez.git' path = 'modules/infrastructure' merge_strategy = 'local-override' [[includes]] name = 'kreuzberg-e2e-generator' source = 'https://github.com/kreuzberg-dev/ai-rulez.git' path = 'modules/e2e-generator' merge_strategy = 'local-override' [[installed_skills]] name = 'alef' source = 'https://github.com/kreuzberg-dev/alef.git' [[mcp_servers]] name = 'playwright' description = 'Playwright browser automation for E2E testing and docs verification' command = 'npx' args = ['-y', '@playwright/mcp@latest'] [defaults] effort = 'medium' ================================================ FILE: .ai-rulez/context/crate-structure.md ================================================ --- priority: high --- # Crate & Package Structure ## Workspace crates (`crates/`) - `html-to-markdown` — core library, primary Rust API, `unsafe_code = "forbid"` at workspace level - `html-to-markdown-cli` — CLI binary (clap) - `html-to-markdown-ffi` — C FFI bridge, cbindgen headers, **only crate that overrides unsafe_code lint** - `html-to-markdown-py` — PyO3 Python binding - `html-to-markdown-node` — NAPI-RS Node/TypeScript binding - `html-to-markdown-php` — ext-php-rs PHP binding - `html-to-markdown-wasm` — wasm-bindgen WebAssembly binding ## Out-of-workspace packages (`packages/`) - `csharp/`, `elixir/`, `go/`, `java/`, `r/`, `ruby/` — language-native packages wrapping the FFI crate - `php/`, `python/`, `typescript/`, `wasm/` — distribution packages ## Primary API - `convert(&str, Option) -> Result` - `ConversionResult`: `content`, `warnings`, optionally `metadata` and `inline_images` (feature-gated) - Feature flags: `inline-images`, `metadata`, `visitor` (custom traversal), `serde` - Dual parser: html5ever (spec-compliance) and astral-tl (performance), selectable via `ConversionOptions` ================================================ FILE: .ai-rulez/domains/conversion-algorithms/DOMAIN.md ================================================ # Conversion Algorithms Domain ## Purpose Core HTML-to-Markdown transformation logic. Converts parsed DOM trees into well-formatted Markdown output for 60+ HTML element types. ## Key Areas - **Block elements**: headings, paragraphs, blockquotes, lists, tables, code blocks, horizontal rules, semantic HTML5 elements - **Inline elements**: bold, italic, strikethrough, inline code, links, images, abbreviations - **Tables**: GFM pipe tables with alignment, colspan/rowspan handling, complex table fallbacks - **Lists**: ordered, unordered, nested, task lists, definition lists, tight vs loose detection - **Forms & media**: input fields, textareas, selects, audio, video, iframes, embeds - **Special elements**: line breaks, comments, SVG text extraction, ruby annotations ## Architecture Visitor pattern in `visitor.rs` dispatches to per-element converter functions. Conversion behavior is controlled by `ConversionOptions` (heading style, list indent, code block style, newline style, table format). ## Dependencies - Upstream: HTML Parsing domain (DOM tree), Safety-Sanitization domain (attribute validation) - Downstream: Output formatting, metadata extraction ================================================ FILE: .ai-rulez/domains/html-parsing/DOMAIN.md ================================================ # HTML Parsing Domain ## Purpose Foundation of the conversion pipeline: HTML parser selection, DOM tree construction, and tree traversal infrastructure. ## Key Areas - **Parser backends**: html5ever (HTML5 spec compliance, malformed HTML recovery) and tl/astral-tl (lightweight, fast) - **DOM traversal**: depth-first tree walking via visitor pattern, parent/child/sibling navigation - **Node types**: element nodes (60+ tags), text nodes, comment nodes, document/fragment nodes - **Text extraction**: text content from subtrees, configurable whitespace handling (preserve, minimal, collapse) - **Attribute access**: by name, iteration, class checking, case-insensitive per HTML spec - **Safety constraints**: depth limits, size limits, binary data rejection, encoding detection ## Architecture Parser infrastructure in `converter.rs` and `wrapper.rs`. DOM traversal via `DomWalker` trait in `visitor.rs`. Element classification into Block, Inline, Void, FormControl, Semantic categories. Configuration through `ConversionOptions` (parser type, encoding, whitespace mode, max depth, max size). ## Dependencies - Upstream: html5ever, astral-tl, encoding_rs - Downstream: Conversion Algorithms domain, Safety-Sanitization domain ================================================ FILE: .ai-rulez/domains/safety-sanitization/DOMAIN.md ================================================ # Safety & Sanitization Domain ## Purpose Protects the conversion pipeline from malicious or malformed input. Ensures converted Markdown output cannot be exploited for XSS, code injection, or data exfiltration. ## Key Areas - **Input validation**: binary data detection (magic numbers, null byte ratios, control char ratios), encoding detection, size/depth limits - **XSS prevention**: dangerous element removal (script, style, iframe, object, embed), event handler stripping, javascript:/data:/vbscript: URL blocking - **URL sanitization**: scheme whitelist (http, https, mailto, ftp), protocol normalization, URL-encoded payload detection, case-insensitive scheme matching - **Attribute filtering**: event handler removal, safe attribute whitelist (id, class, title, alt, href, src), style sanitization - **SVG handling**: script/style removal within SVG, event handler stripping, xlink:href validation, text extraction fallback - **Runtime safety**: stack overflow prevention (max nesting depth), memory bounds enforcement, ReDoS prevention ## Architecture Multi-layer defense: validate_input() -> sanitize -> parse -> convert with URL/attribute sanitization at each element. Configuration via `SafetyConfig` (max document size, max nesting depth, allowed tags/attributes/schemes, strip options). ## Dependencies - Upstream: url, encoding_rs - Downstream: HTML Parsing domain (operates on validated input), Conversion Algorithms domain (safe elements only) ================================================ FILE: .ai-rulez/rules/alef-generated-bindings.md ================================================ --- priority: critical --- - Files in `packages/*/` and binding crates are generated or managed by Alef — check `alef.toml` before editing - `alef.toml` defines: output paths, module names, rename mappings, e2e call overrides, README templates - Run `alef generate` after changing `alef.toml` — commit both source and generated files - Never hand-edit generated files; modify `alef.toml` or the Rust source instead - Fixtures under `fixtures/` feed `tools/e2e-generator/` — never add tests to `e2e/` directly ================================================ FILE: .cargo/config.toml ================================================ [build] incremental = true [target.wasm32-unknown-unknown] rustflags = ["-C", "target-feature=+bulk-memory", "--cfg", "getrandom_backend=\"wasm_js\""] [net] git-fetch-with-cli = true [registries.crates-io] protocol = "sparse" [target.'cfg(target_os = "macos")'] rustflags = ["-C", "link-arg=-Wl,-undefined,dynamic_lookup"] [target.x86_64-pc-windows-msvc] linker = "rust-lld" [target.i686-pc-windows-msvc] linker = "rust-lld" [target.x86_64-unknown-linux-musl] linker = "musl-gcc" [target.aarch64-unknown-linux-gnu] linker = "aarch64-linux-gnu-gcc" [env] RUBY = { value = "scripts/preferred-ruby.sh", relative = true } ================================================ FILE: .clang-format ================================================ --- BasedOnStyle: LLVM IndentWidth: 4 ColumnLimit: 100 BreakBeforeBraces: Attach AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: false SortIncludes: true ================================================ FILE: .editorconfig ================================================ # EditorConfig is awesome: https://EditorConfig.org # top-most EditorConfig file root = true # All files [*] charset = utf-8 insert_final_newline = true trim_trailing_whitespace = true end_of_line = lf # Code files [*.{cs,go,rs,py,js,ts,tsx,jsx,php,rb}] indent_style = space # C# files [*.cs] indent_size = 4 # Organize usings dotnet_sort_system_directives_first = true dotnet_separate_import_directive_groups = false # this. and Me. preferences dotnet_style_qualification_for_field = false:warning dotnet_style_qualification_for_property = false:warning dotnet_style_qualification_for_method = false:warning dotnet_style_qualification_for_event = false:warning # Language keywords vs BCL types preferences dotnet_style_predefined_type_for_locals_parameters_members = true:warning dotnet_style_predefined_type_for_member_access = true:warning # Parentheses preferences dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity:suggestion dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity:suggestion dotnet_style_parentheses_in_other_binary_operators = always_for_clarity:suggestion dotnet_style_parentheses_in_other_operators = never_if_unnecessary:suggestion # Modifier preferences dotnet_style_require_accessibility_modifiers = always:warning dotnet_style_readonly_field = true:warning csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:suggestion # Expression-level preferences dotnet_style_object_initializer = true:suggestion dotnet_style_collection_initializer = true:suggestion dotnet_style_explicit_tuple_names = true:warning dotnet_style_prefer_inferred_tuple_names = true:suggestion dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion dotnet_style_prefer_auto_properties = true:suggestion dotnet_style_prefer_conditional_expression_over_assignment = true:silent dotnet_style_prefer_conditional_expression_over_return = true:silent dotnet_style_prefer_compound_assignment = true:suggestion dotnet_style_prefer_simplified_interpolation = true:suggestion dotnet_style_prefer_simplified_boolean_expressions = true:suggestion # Null-checking preferences dotnet_style_coalesce_expression = true:warning dotnet_style_null_propagation = true:warning dotnet_style_prefer_is_null_check_over_reference_equality_method = true:warning # C# Code Style Rules # var preferences csharp_style_var_for_built_in_types = true:suggestion csharp_style_var_when_type_is_apparent = true:suggestion csharp_style_var_elsewhere = true:suggestion # Expression-bodied members csharp_style_expression_bodied_methods = when_on_single_line:suggestion csharp_style_expression_bodied_constructors = false:silent csharp_style_expression_bodied_operators = when_on_single_line:suggestion csharp_style_expression_bodied_properties = when_on_single_line:suggestion csharp_style_expression_bodied_indexers = when_on_single_line:suggestion csharp_style_expression_bodied_accessors = when_on_single_line:suggestion csharp_style_expression_bodied_lambdas = when_on_single_line:suggestion csharp_style_expression_bodied_local_functions = when_on_single_line:suggestion # Pattern matching preferences csharp_style_pattern_matching_over_is_with_cast_check = true:warning csharp_style_pattern_matching_over_as_with_null_check = true:warning csharp_style_prefer_switch_expression = true:suggestion csharp_style_prefer_pattern_matching = true:suggestion csharp_style_prefer_not_pattern = true:suggestion # Null-checking preferences csharp_style_throw_expression = true:suggestion csharp_style_conditional_delegate_call = true:warning # Code block preferences csharp_prefer_braces = true:warning csharp_prefer_simple_using_statement = true:suggestion # Expression preferences csharp_prefer_simple_default_expression = true:suggestion csharp_style_pattern_local_over_anonymous_function = true:suggestion csharp_style_inlined_variable_declaration = true:suggestion csharp_style_deconstructed_variable_declaration = true:suggestion csharp_style_prefer_index_operator = true:suggestion csharp_style_prefer_range_operator = true:suggestion csharp_style_implicit_object_creation_when_type_is_apparent = true:suggestion # C# Formatting Rules # New line preferences csharp_new_line_before_open_brace = all csharp_new_line_before_else = true csharp_new_line_before_catch = true csharp_new_line_before_finally = true csharp_new_line_before_members_in_object_initializers = true csharp_new_line_before_members_in_anonymous_types = true csharp_new_line_between_query_expression_clauses = true # Indentation preferences csharp_indent_case_contents = true csharp_indent_switch_labels = true csharp_indent_labels = no_change csharp_indent_block_contents = true csharp_indent_braces = false csharp_indent_case_contents_when_block = false # Space preferences csharp_space_after_cast = false csharp_space_after_keywords_in_control_flow_statements = true csharp_space_between_parentheses = false csharp_space_before_colon_in_inheritance_clause = true csharp_space_after_colon_in_inheritance_clause = true csharp_space_around_binary_operators = before_and_after csharp_space_between_method_declaration_parameter_list_parentheses = false csharp_space_between_method_declaration_empty_parameter_list_parentheses = false csharp_space_between_method_declaration_name_and_open_parenthesis = false csharp_space_between_method_call_parameter_list_parentheses = false csharp_space_between_method_call_empty_parameter_list_parentheses = false csharp_space_between_method_call_name_and_opening_parenthesis = false csharp_space_after_comma = true csharp_space_after_dot = false csharp_space_after_semicolon_in_for_statement = true csharp_space_before_semicolon_in_for_statement = false csharp_space_around_declaration_statements = false csharp_space_before_open_square_brackets = false csharp_space_between_empty_square_brackets = false csharp_space_between_square_brackets = false # Wrap preferences csharp_preserve_single_line_statements = false csharp_preserve_single_line_blocks = true # Using directive preferences csharp_using_directive_placement = outside_namespace:warning # Go files [*.go] indent_style = tab indent_size = 4 # Rust files [*.rs] indent_size = 4 # Python files [*.py] indent_size = 4 # JavaScript/TypeScript files [*.{js,ts,tsx,jsx}] indent_size = 2 # Ruby files [*.rb] indent_size = 2 # PHP files [*.php] indent_size = 4 # YAML files [*.{yml,yaml}] indent_size = 2 # Markdown files [*.md] trim_trailing_whitespace = false ================================================ FILE: .github/CODEOWNERS ================================================ # Default owner — everything * @Goldziher # Zensical config and documentation /zensical.toml @Goldziher @pratik-mahalle @v-tan /docs/ @Goldziher @pratik-mahalle @v-tan *.md @Goldziher @pratik-mahalle @v-tan # Rust crates /crates/ @Goldziher @kh3rld ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.yml ================================================ name: Bug Report description: Report a bug or unexpected behavior title: "bug: " labels: ["bug"] projects: ["kreuzberg-dev/1"] body: - type: textarea id: description attributes: label: Description description: What happened? What did you expect to happen? validations: required: true - type: textarea id: steps-to-reproduce attributes: label: Steps to reproduce description: Minimal steps to reproduce the issue. validations: required: true - type: textarea id: reproduction-files attributes: label: Relevant files and configuration description: >- Any configuration files, input files, or code snippets needed to reproduce the issue. render: text ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: true ================================================ FILE: .github/ISSUE_TEMPLATE/documentation.yml ================================================ name: Documentation Issue description: Report missing, unclear, or incorrect documentation title: "docs: " labels: ["documentation"] projects: ["kreuzberg-dev/1"] body: - type: textarea id: what attributes: label: What description: What documentation is missing, unclear, or incorrect? validations: required: true - type: textarea id: why attributes: label: Why description: Why does this need to change? validations: required: true ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.yml ================================================ name: Feature Request description: Suggest a new feature or improvement title: "feat: " labels: ["enhancement"] projects: ["kreuzberg-dev/1"] body: - type: textarea id: what attributes: label: What is the proposed feature? validations: required: true - type: textarea id: why attributes: label: Why would this be a good addition? validations: required: true ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ ## Related ## Description ## Checklist - [ ] CI passing - [ ] Tests added where applicable ================================================ FILE: .github/actions/build-typescript/action.yml ================================================ name: Build TypeScript package description: Builds TypeScript package (requires Node bindings to be built first) runs: using: composite steps: - name: Build TypeScript package shell: bash working-directory: packages/typescript run: pnpm run build ================================================ FILE: .github/actions/smoke-pie/action.yml ================================================ name: Smoke test PIE install description: Tests PHP extension installation via PIE inputs: pie-artifacts-dir: description: Directory containing PIE source artifacts required: true runs: using: composite steps: - name: Smoke PIE install shell: bash env: COMPOSER_ALLOW_SUPERUSER: 1 run: | set -euo pipefail # Download PIE curl -fsSL https://github.com/php/pie/releases/latest/download/pie.phar -o /tmp/pie.phar # Find the PIE source archive pie_archive=$(find "${{ inputs.pie-artifacts-dir }}" -name "php_html_to_markdown-*.tgz" | head -n 1) if [ -z "$pie_archive" ]; then echo "PIE source archive not found" >&2 exit 1 fi # Extract to temp dir and install via PIE tmp=$(mktemp -d) tar -xzf "$pie_archive" -C "$tmp" # Add as local repository and build php /tmp/pie.phar repository:add path "$tmp" CARGO_BIN=$(command -v cargo) php /tmp/pie.phar build kreuzberg-dev/html-to-markdown:*@dev --working-dir "$tmp" --with-cargo-bin="$CARGO_BIN" # Find the built extension ext_so=$(find "$tmp" -name "*.so" -path "*/html_to_markdown.so" | head -n 1) if [ -z "$ext_so" ]; then echo "Extension .so file not found after PIE build" >&2 exit 1 fi # Test the extension (placeholder for smoke test) # Note: PHP smoke example directory was removed # Consider implementing integration tests via packages/php/tests echo "✓ PIE install smoke test passed" ================================================ FILE: .github/dependabot.yaml ================================================ version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "daily" ignore: # Pin artifact actions to v4 until GitHub Actions runners support v6/v7 # v6 and v7 require Actions Runner 2.327.1+ (released Dec 12, 2025) - dependency-name: "actions/upload-artifact" update-types: ["version-update:semver-major"] - dependency-name: "actions/download-artifact" update-types: ["version-update:semver-major"] - package-ecosystem: "cargo" # Explicitly list root only — packages/ruby/ext has a standalone workspace # with path deps to vendored crates that only exist at build time directories: - "/" schedule: interval: "weekly" ignore: - dependency-name: "html-to-markdown-rs" - package-ecosystem: "pip" directories: - "/" - "/packages/python" schedule: interval: "weekly" - package-ecosystem: "npm" directories: - "/" - "/crates/html-to-markdown-node" - "/crates/html-to-markdown-wasm" - "/packages/typescript" schedule: interval: "weekly" - package-ecosystem: "bundler" directory: "/packages/ruby" schedule: interval: "weekly" - package-ecosystem: "composer" directories: - "/" - "/packages/php" schedule: interval: "weekly" - package-ecosystem: "gomod" directory: "/packages/go/v3" schedule: interval: "weekly" - package-ecosystem: "maven" directory: "/packages/java" schedule: interval: "weekly" - package-ecosystem: "nuget" directory: "/packages/csharp" schedule: interval: "weekly" - package-ecosystem: "mix" directory: "/packages/elixir" schedule: interval: "weekly" ================================================ FILE: .github/workflows/ci.yaml ================================================ name: CI on: push: branches: [main] paths: - "crates/**" - "packages/**" - "e2e/**" - "tools/**" - "scripts/**" - "fixtures/**" - ".github/**" - ".cargo/config.toml" - ".pre-commit-config.yaml" - ".golangci.yml" - "alef.toml" - "pyproject.toml" - "uv.lock" - "uv.toml" - "pnpm-lock.yaml" - "pnpm-workspace.yaml" - "package.json" - "Cargo.toml" - "Cargo.lock" - "Taskfile.yaml" - ".task/**" - "rustfmt.toml" - "rust-toolchain.toml" - "Gemfile" - "Gemfile.lock" - "composer.json" - "composer.lock" - "go.mod" - "go.sum" pull_request: branches: [main] paths: - "crates/**" - "packages/**" - "e2e/**" - "tools/**" - "scripts/**" - "fixtures/**" - ".github/**" - ".cargo/config.toml" - ".pre-commit-config.yaml" - ".golangci.yml" - "alef.toml" - "pyproject.toml" - "uv.lock" - "uv.toml" - "pnpm-lock.yaml" - "pnpm-workspace.yaml" - "package.json" - "Cargo.toml" - "Cargo.lock" - "Taskfile.yaml" - ".task/**" - "rustfmt.toml" - "rust-toolchain.toml" - "Gemfile" - "Gemfile.lock" - "composer.json" - "composer.lock" - "go.mod" - "go.sum" workflow_dispatch: {} concurrency: group: ci-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true env: CARGO_TERM_COLOR: always CARGO_INCREMENTAL: 0 RUST_BACKTRACE: short BUILD_PROFILE: "ci" GO_VERSION: "1.26.0" GO_TOOLCHAIN: "go1.26.0" GOLANGCI_LINT_VERSION: "latest" permissions: contents: read # --- Stage 1: Validate --- jobs: validate: name: "Validate" runs-on: ubuntu-24.04-arm timeout-minutes: 60 steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Install Task uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: components: rustfmt, clippy, llvm-tools-preview - name: Setup Python uses: kreuzberg-dev/actions/setup-python-env@v1 # v1 with: python-version: "3.13" - name: Setup Node Workspace uses: kreuzberg-dev/actions/setup-node-workspace@v1 # v1 - name: Setup Go uses: actions/setup-go@v6 # v6 with: go-version: ${{ env.GO_VERSION }} cache-dependency-path: packages/go/go.sum - name: Install golangci-lint uses: golangci/golangci-lint-action@v9 with: install-only: true - name: Setup Java uses: actions/setup-java@v5 # v5 with: distribution: temurin java-version: "25" - name: Setup Elixir uses: kreuzberg-dev/actions/setup-elixir@v1 # v1 - name: Setup Ruby uses: ruby/setup-ruby@v1 # v1 with: ruby-version: "3.4" bundler-cache: false - name: Setup PHP uses: kreuzberg-dev/actions/setup-php@v1 # v1 - name: Setup R uses: kreuzberg-dev/actions/setup-r@v1 # v1 with: install-deps-script: scripts/ci/r/install-deps.sh - name: Install C/C++ tools run: | sudo apt-get update -qq sudo apt-get install -y --no-install-recommends cppcheck clang-format - name: Install Alef uses: kreuzberg-dev/actions/install-alef@v1 - name: Install All Binding Dependencies run: alef setup shell: bash - name: Run Lint Checks run: task lint:check shell: bash - name: Check Code Formatting run: task format:check shell: bash - name: Install alef uses: kreuzberg-dev/actions/install-alef@v1 - name: Run Pre-commit Hooks uses: j178/prek-action@v2 # v2 with: extra-args: --all-files - name: Install Python README Dependencies run: pip install pyyaml jinja2 shell: bash - name: Validate READMEs run: task docs:generate-readme:check shell: bash validate-rust: name: "Validate: Rust" runs-on: ubuntu-24.04-arm timeout-minutes: 60 steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: components: rustfmt, clippy, llvm-tools-preview - name: Install Task uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Check Rust Formatting run: task rust:lint:check shell: bash - name: Run Clippy run: task rust:lint:check shell: bash - name: Check feature flags (html-to-markdown) shell: bash env: RUSTFLAGS: "-D warnings" run: | cargo check -p html-to-markdown-rs --no-default-features cargo check -p html-to-markdown-rs --no-default-features --features visitor cargo check -p html-to-markdown-rs --no-default-features --features metadata cargo check -p html-to-markdown-rs --no-default-features --features inline-images changes: name: "Detect Changes" runs-on: ubuntu-24.04-arm outputs: core: ${{ steps.filter.outputs.core }} rust: ${{ steps.filter.outputs.rust }} ffi: ${{ steps.filter.outputs.ffi }} python: ${{ steps.filter.outputs.python }} node: ${{ steps.filter.outputs.node }} ruby: ${{ steps.filter.outputs.ruby }} php: ${{ steps.filter.outputs.php }} go: ${{ steps.filter.outputs.go }} java: ${{ steps.filter.outputs.java }} elixir: ${{ steps.filter.outputs.elixir }} r: ${{ steps.filter.outputs.r }} wasm: ${{ steps.filter.outputs.wasm }} steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Detect changes uses: dorny/paths-filter@v4 # v3 id: filter with: filters: | core: - 'crates/html-to-markdown/**' - 'Cargo.toml' - 'Cargo.lock' - 'rust-toolchain.toml' - '.cargo/config.toml' - 'fixtures/**' - 'tools/e2e-generator/**' rust: - 'crates/html-to-markdown/**' - 'crates/html-to-markdown-cli/**' - 'e2e/rust/**' - 'Cargo.toml' - 'Cargo.lock' - 'rustfmt.toml' ffi: - 'crates/html-to-markdown-ffi/**' - 'crates/html-to-markdown/**' - 'Cargo.toml' - 'Cargo.lock' python: - 'crates/html-to-markdown-py/**' - 'packages/python/**' - 'e2e/python/**' - 'pyproject.toml' - 'uv.lock' - 'uv.toml' - 'fixtures/**' node: - 'crates/html-to-markdown-node/**' - 'packages/typescript/**' - 'e2e/node/**' - 'package.json' - 'pnpm-lock.yaml' - 'pnpm-workspace.yaml' - 'fixtures/**' ruby: - 'packages/ruby/**' - 'e2e/ruby/**' - 'Gemfile' - 'Gemfile.lock' - 'fixtures/**' php: - 'crates/html-to-markdown-php/**' - 'packages/php/**' - 'packages/php-ext/**' - 'e2e/php/**' - 'composer.json' - 'composer.lock' - 'fixtures/**' go: - 'packages/go/**' - 'e2e/go/**' - 'crates/html-to-markdown-ffi/**' - 'go.mod' - 'go.sum' - 'fixtures/**' java: - 'packages/java/**' - 'e2e/java/**' - 'crates/html-to-markdown-ffi/**' - 'fixtures/**' elixir: - 'packages/elixir/**' - 'e2e/elixir/**' - 'crates/html-to-markdown-ffi/**' - 'fixtures/**' r: - 'packages/r/**' - 'e2e/r/**' - 'fixtures/**' wasm: - 'crates/html-to-markdown-wasm/**' - 'crates/html-to-markdown-wasm-wasi/**' - 'packages/wasm/**' - 'e2e/wasm/**' - 'fixtures/**' # --- Stage 2: Core Builds --- build-ffi: needs: [validate, validate-rust, changes] name: "Build: FFI (${{ matrix.runner }})" if: | github.event_name == 'workflow_dispatch' || needs.changes.outputs.core == 'true' || needs.changes.outputs.ffi == 'true' runs-on: ${{ matrix.runner }} timeout-minutes: 60 strategy: fail-fast: false matrix: runner: - ubuntu-latest - ubuntu-24.04-arm - macos-latest - windows-latest steps: - name: Checkout uses: actions/checkout@v6 # v6 id: checkout - name: Free disk space if: startsWith(matrix.runner, 'ubuntu') uses: kreuzberg-dev/actions/free-disk-space-linux@v1 # v1 with: show-initial: "false" show-final: "true" - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: cache-key-prefix: ffi-${{ matrix.runner }} use-sccache: false - name: Build html-to-markdown-ffi (release, Unix) if: matrix.runner != 'windows-latest' run: cargo build --release -p html-to-markdown-ffi shell: bash env: CARGO_TERM_COLOR: always CARGO_INCREMENTAL: "0" RUST_BACKTRACE: short - name: Build html-to-markdown-ffi (debug, Windows) if: matrix.runner == 'windows-latest' run: cargo build -p html-to-markdown-ffi shell: bash env: CARGO_TERM_COLOR: always - name: Verify header exists shell: bash run: | HEADER="crates/html-to-markdown-ffi/include/html_to_markdown.h" test -f "$HEADER" echo "Header verified: $HEADER" - name: Upload FFI artifacts if: matrix.runner != 'windows-latest' uses: actions/upload-artifact@v7 # v7 with: name: ffi-${{ matrix.runner }} path: | target/release/libhtml_to_markdown_ffi.* target/release/html_to_markdown_ffi.* crates/html-to-markdown-ffi/include/html_to_markdown.h retention-days: 7 if-no-files-found: warn - name: Cleanup Rust cache if: always() && steps.checkout.outcome == 'success' uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1 rust-tests: needs: [validate, validate-rust, changes] name: "Test: Rust (${{ matrix.os }})" if: | github.event_name == 'workflow_dispatch' || needs.changes.outputs.core == 'true' || needs.changes.outputs.rust == 'true' runs-on: ${{ matrix.os }} timeout-minutes: 60 strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] steps: - name: Checkout uses: actions/checkout@v6 # v6 id: checkout - name: Set up Python uses: actions/setup-python@v6 # v6 with: python-version: "3.13" - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: components: rustfmt, clippy, llvm-tools-preview cache-key-prefix: rust-tests-${{ matrix.os }} - name: Install Task uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Run Rust Tests env: RUST_BACKTRACE: full run: task rust:test:ci shell: bash - name: Run E2E Tests run: task rust:e2e:test shell: bash - name: Cleanup Rust cache if: always() && steps.checkout.outcome == 'success' uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1 rust-coverage: needs: [validate, validate-rust, changes] name: "Coverage: Rust" if: | github.event_name == 'workflow_dispatch' || needs.changes.outputs.core == 'true' || needs.changes.outputs.rust == 'true' runs-on: ubuntu-latest timeout-minutes: 60 steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: components: rustfmt, clippy, llvm-tools-preview - name: Install Task uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Generate Rust Coverage run: task rust:coverage shell: bash - name: Upload Coverage Artifacts if: always() uses: actions/upload-artifact@v7 # v7 with: name: coverage-report-${{ github.sha }} path: rust-coverage.lcov retention-days: 7 # --- Stage 3: Language Builds --- build-python: needs: [rust-tests, changes] name: "Build: Python (${{ matrix.os }})" if: | github.event_name == 'workflow_dispatch' || needs.changes.outputs.core == 'true' || needs.changes.outputs.python == 'true' runs-on: ${{ matrix.os }} timeout-minutes: 45 strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] python: ["3.10", "3.12", "3.14"] steps: - name: Checkout uses: actions/checkout@v6 # v6 id: checkout - name: Install Task uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Install uv uses: astral-sh/setup-uv@v7 # v7 with: enable-cache: true - name: Set up Python uses: actions/setup-python@v6 # v6 with: python-version: ${{ matrix.python }} - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: components: rustfmt, clippy, llvm-tools-preview cache-key-prefix: python-${{ matrix.os }}-${{ matrix.python }} - name: Install Python Dependencies uses: nick-fields/retry@v4 # v4 with: timeout_minutes: 5 max_attempts: 3 retry_wait_seconds: 30 command: | if [[ "${{ runner.os }}" == "Windows" ]] && [[ -d ".venv" ]]; then echo "Removing existing .venv directory on Windows" rm -rf .venv fi uv sync --all-extras --no-install-workspace shell: bash - name: Build Python Bindings run: | uv pip install maturin cd packages/python && uv run maturin develop --release shell: bash - name: Build CLI binary run: cargo build --release -p html-to-markdown-cli shell: bash - name: Cleanup Rust cache if: always() && steps.checkout.outcome == 'success' uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1 build-node: needs: [rust-tests, changes] name: "Build: Node (${{ matrix.os }}, ${{ matrix.runtime }})" if: | github.event_name == 'workflow_dispatch' || needs.changes.outputs.core == 'true' || needs.changes.outputs.node == 'true' runs-on: ${{ matrix.os }} timeout-minutes: 60 strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] runtime: [node, bun] exclude: - os: windows-latest runtime: bun steps: - name: Checkout uses: actions/checkout@v6 # v6 id: checkout - name: Install Task uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: cache-key-prefix: node-${{ matrix.os }}-${{ matrix.runtime }} - name: Setup Node.js workspace if: matrix.runtime == 'node' uses: kreuzberg-dev/actions/setup-node-workspace@v1 # v1 - name: Setup Bun if: matrix.runtime == 'bun' uses: oven-sh/setup-bun@v2 # v2 with: bun-version: latest - name: Build NAPI-RS Bindings (Node.js) if: matrix.runtime == 'node' uses: kreuzberg-dev/actions/build-node-napi@v1 # v1 with: crate-dir: crates/html-to-markdown-node - name: Install workspace dependencies (Bun) if: matrix.runtime == 'bun' run: bun install shell: bash - name: Build NAPI-RS Bindings (Bun) if: matrix.runtime == 'bun' working-directory: crates/html-to-markdown-node run: bun run build shell: bash - name: Build TypeScript package (Node.js) if: matrix.runtime == 'node' uses: ./.github/actions/build-typescript - name: Build TypeScript package (Bun) if: matrix.runtime == 'bun' working-directory: packages/typescript run: bun x tsc --project tsconfig.json shell: bash - name: Cleanup Rust cache if: always() && steps.checkout.outcome == 'success' uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1 build-ruby: needs: [rust-tests, changes] name: "Build: Ruby (${{ matrix.os }}, ruby-${{ matrix.ruby }})" if: | github.event_name == 'workflow_dispatch' || needs.changes.outputs.core == 'true' || needs.changes.outputs.ruby == 'true' runs-on: ${{ matrix.os }} timeout-minutes: 60 strategy: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] ruby: ["3.2", "3.3"] steps: - name: Checkout uses: actions/checkout@v6 # v6 id: checkout - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: cache-key-prefix: ruby-${{ matrix.os }}-${{ matrix.ruby }} - name: Setup Ruby (Unix) if: runner.os != 'Windows' uses: ruby/setup-ruby@v1 # v1 with: ruby-version: ${{ matrix.ruby }} bundler: "4.0.3" bundler-cache: false working-directory: packages/ruby - name: Setup Ruby (Windows) if: runner.os == 'Windows' uses: ruby/setup-ruby@v1 # v1 with: ruby-version: ${{ matrix.ruby }} bundler: "4.0.3" bundler-cache: false working-directory: packages/ruby windows-toolchain: UCRT64 - name: Set up Python uses: actions/setup-python@v6 # v6 with: python-version: "3.12" - name: Vendor core crate run: python3 scripts/ci/ruby/vendor-core-crate.py shell: bash - name: Build CLI binary uses: kreuzberg-dev/actions/build-rust-cli@v1 # v1 with: package-name: html-to-markdown-cli binary-name: html-to-markdown - name: Build Ruby extension uses: kreuzberg-dev/actions/build-ruby-gem@v1 # v1 - name: Cleanup Rust cache if: always() && steps.checkout.outcome == 'success' uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1 build-php: needs: [rust-tests, changes] name: "Build: PHP" if: | github.event_name == 'workflow_dispatch' || needs.changes.outputs.core == 'true' || needs.changes.outputs.php == 'true' runs-on: ubuntu-latest timeout-minutes: 60 steps: - name: Checkout uses: actions/checkout@v6 # v6 id: checkout - name: Setup PHP uses: shivammathur/setup-php@2.37.0 # 2 with: php-version: "8.4" tools: composer:2.9.1 coverage: none - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: cache-key-prefix: php - name: Capture php-config path run: scripts/ci/php/set-php-config.sh shell: bash - name: Install root Composer dependencies uses: ramsey/composer-install@4.0.0 # 3 with: dependency-versions: locked env: COMPOSER_AUTH: '{"github-oauth":{"github.com":"${{ secrets.GITHUB_TOKEN }}"}}' - name: Install PHP package Composer dependencies uses: ramsey/composer-install@4.0.0 # 3 with: dependency-versions: locked working-directory: packages/php env: COMPOSER_AUTH: '{"github-oauth":{"github.com":"${{ secrets.GITHUB_TOKEN }}"}}' - name: Build PHP extension id: build-php-extension uses: kreuzberg-dev/actions/build-php-extension@v1 # v1 with: crate-name: html-to-markdown-php lib-name: html_to_markdown_php - name: Upload PHP extension artifact uses: actions/upload-artifact@v7 # v7 with: name: php-extension-ubuntu path: ${{ steps.build-php-extension.outputs.extension-path }} retention-days: 7 - name: Cleanup Rust cache if: always() && steps.checkout.outcome == 'success' uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1 build-java: needs: [rust-tests, changes] name: "Build: Java (${{ matrix.os }})" if: | github.event_name == 'workflow_dispatch' || needs.changes.outputs.core == 'true' || needs.changes.outputs.java == 'true' runs-on: ${{ matrix.os }} timeout-minutes: 60 strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] java: ["25"] steps: - name: Checkout uses: actions/checkout@v6 # v6 id: checkout - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: cache-key-prefix: java-${{ matrix.os }} - name: Test Java Panama FFI bindings uses: kreuzberg-dev/actions/test-java-ffi@v1 # v1 with: ffi-crate-name: html-to-markdown-ffi ffi-lib-name: html_to_markdown_ffi java-version: ${{ matrix.java }} - name: Cleanup Rust cache if: always() && steps.checkout.outcome == 'success' uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1 build-wasm: needs: [rust-tests, changes] name: "Build: WASM" if: | github.event_name == 'workflow_dispatch' || needs.changes.outputs.core == 'true' || needs.changes.outputs.wasm == 'true' runs-on: ubuntu-latest timeout-minutes: 120 steps: - name: Checkout uses: actions/checkout@v6 # v6 id: checkout - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: target: wasm32-unknown-unknown use-sccache: false cache-key-prefix: wasm - name: Ensure wasm target installed run: scripts/common/ensure-wasm-target.sh shell: bash - name: Install wasm-pack run: scripts/common/install-wasm-pack.sh shell: bash - name: Setup Node workspace uses: kreuzberg-dev/actions/setup-node-workspace@v1 # v1 - name: Build WASM (all targets) uses: kreuzberg-dev/actions/build-wasm-package@v1 # v1 with: crate-dir: crates/html-to-markdown-wasm - name: Cleanup Rust cache if: always() && steps.checkout.outcome == 'success' uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1 # --- Stage 4: Language Tests --- test-python: needs: [build-python] name: "Test: Python (${{ matrix.os }}, py-${{ matrix.python }})" if: always() && !cancelled() && needs.build-python.result != 'skipped' runs-on: ${{ matrix.os }} timeout-minutes: 45 strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] python: ["3.10", "3.12", "3.14"] steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Install Task uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Install uv uses: astral-sh/setup-uv@v7 # v7 with: enable-cache: true - name: Set up Python uses: actions/setup-python@v6 # v6 with: python-version: ${{ matrix.python }} - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: components: rustfmt, clippy, llvm-tools-preview - name: Install Python Dependencies uses: nick-fields/retry@v4 # v4 with: timeout_minutes: 5 max_attempts: 3 retry_wait_seconds: 30 command: | if [[ "${{ runner.os }}" == "Windows" ]] && [[ -d ".venv" ]]; then echo "Removing existing .venv directory on Windows" rm -rf .venv fi uv sync --all-extras --no-install-workspace shell: bash - name: Build Python Bindings run: | uv pip install maturin cd packages/python && uv run maturin develop --release shell: bash - name: Build CLI binary run: cargo build --release -p html-to-markdown-cli shell: bash - name: Run E2E tests run: alef test --e2e --lang python shell: bash test-node: needs: [build-node] name: "Test: Node (${{ matrix.os }}, ${{ matrix.runtime }})" if: always() && !cancelled() && needs.build-node.result != 'skipped' runs-on: ${{ matrix.os }} timeout-minutes: 60 strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] runtime: [node, bun] exclude: - os: windows-latest runtime: bun steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Install Task uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Setup Node.js workspace if: matrix.runtime == 'node' uses: kreuzberg-dev/actions/setup-node-workspace@v1 # v1 - name: Setup Bun if: matrix.runtime == 'bun' uses: oven-sh/setup-bun@v2 # v2 with: bun-version: latest - name: Build NAPI-RS Bindings (Node.js) if: matrix.runtime == 'node' uses: kreuzberg-dev/actions/build-node-napi@v1 # v1 with: crate-dir: crates/html-to-markdown-node - name: Install workspace dependencies (Bun) if: matrix.runtime == 'bun' run: bun install shell: bash - name: Build NAPI-RS Bindings (Bun) if: matrix.runtime == 'bun' working-directory: crates/html-to-markdown-node run: bun run build shell: bash - name: Run Rust Tests (Node.js only) if: matrix.runtime == 'node' run: task rust:test shell: bash - name: Build TypeScript package (Node.js) if: matrix.runtime == 'node' uses: ./.github/actions/build-typescript - name: Build TypeScript package (Bun) if: matrix.runtime == 'bun' working-directory: packages/typescript run: bun x tsc --project tsconfig.json shell: bash - name: Run E2E tests (Node.js only) if: matrix.runtime == 'node' run: alef test --e2e --lang node shell: bash test-ruby: needs: [build-ruby] name: "Test: Ruby (${{ matrix.os }}, ruby-${{ matrix.ruby }})" if: always() && !cancelled() && needs.build-ruby.result != 'skipped' runs-on: ${{ matrix.os }} timeout-minutes: 60 strategy: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] ruby: ["3.2", "3.3"] steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Setup Ruby (Unix) if: runner.os != 'Windows' uses: ruby/setup-ruby@v1 # v1 with: ruby-version: ${{ matrix.ruby }} bundler: "4.0.3" bundler-cache: false working-directory: packages/ruby - name: Setup Ruby (Windows) if: runner.os == 'Windows' uses: ruby/setup-ruby@v1 # v1 with: ruby-version: ${{ matrix.ruby }} bundler: "4.0.3" bundler-cache: false working-directory: packages/ruby windows-toolchain: UCRT64 - name: Set up Python uses: actions/setup-python@v6 # v6 with: python-version: "3.12" - name: Vendor core crate run: python3 scripts/ci/ruby/vendor-core-crate.py shell: bash - name: Build CLI binary uses: kreuzberg-dev/actions/build-rust-cli@v1 # v1 with: package-name: html-to-markdown-cli binary-name: html-to-markdown - name: Build Ruby extension uses: kreuzberg-dev/actions/build-ruby-gem@v1 # v1 - name: Run Rubocop (Ubuntu/ruby-3.3 only) if: runner.os != 'Windows' && matrix.os == 'ubuntu-latest' && matrix.ruby == '3.3' run: ./scripts/ci/ruby/run-rubocop.sh shell: bash - name: Validate RBS signatures (Ubuntu/ruby-3.3 only) if: runner.os != 'Windows' && matrix.os == 'ubuntu-latest' && matrix.ruby == '3.3' run: ./scripts/ci/ruby/run-rbs-validate.sh shell: bash - name: Run Steep type checking (Ubuntu/ruby-3.3 only) if: runner.os != 'Windows' && matrix.os == 'ubuntu-latest' && matrix.ruby == '3.3' working-directory: packages/ruby run: ../../scripts/ci/ruby/run-steep.sh shell: bash - name: Run Ruby specs (Unix) if: runner.os != 'Windows' working-directory: packages/ruby run: ../../scripts/ci/ruby/run-rspec-unix.sh shell: bash - name: Run Ruby specs (Windows) if: runner.os == 'Windows' working-directory: packages/ruby shell: pwsh run: ../../scripts/ci/ruby/run-rspec-windows.ps1 - name: Install Task if: runner.os != 'Windows' uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Run E2E tests (Unix only) if: runner.os != 'Windows' run: alef test --e2e --lang ruby shell: bash test-php: needs: [build-php] name: "Test: PHP" if: always() && !cancelled() && needs.build-php.result != 'skipped' runs-on: ubuntu-latest timeout-minutes: 60 steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Setup PHP uses: shivammathur/setup-php@2.37.0 # 2 with: php-version: "8.4" tools: composer:2.9.1 coverage: none - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Capture php-config path run: scripts/ci/php/set-php-config.sh shell: bash - name: Install root Composer dependencies uses: ramsey/composer-install@4.0.0 # 3 with: dependency-versions: locked env: COMPOSER_AUTH: '{"github-oauth":{"github.com":"${{ secrets.GITHUB_TOKEN }}"}}' - name: Install PHP package Composer dependencies uses: ramsey/composer-install@4.0.0 # 3 with: dependency-versions: locked working-directory: packages/php env: COMPOSER_AUTH: '{"github-oauth":{"github.com":"${{ secrets.GITHUB_TOKEN }}"}}' - name: Build PHP extension id: build-php-extension uses: kreuzberg-dev/actions/build-php-extension@v1 # v1 with: crate-name: html-to-markdown-php lib-name: html_to_markdown_php - name: Run PHP static analysis run: scripts/ci/php/run-phpstan.sh shell: bash - name: Run PHP tests run: scripts/ci/php/run-php-tests.sh shell: bash env: EXTENSION_PATH: ${{ steps.build-php-extension.outputs.extension-path }} - name: Install Task uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Run E2E tests run: alef test --e2e --lang php shell: bash env: EXTENSION_PATH: ${{ steps.build-php-extension.outputs.extension-path }} test-go: needs: [build-ffi, changes] name: "Test: Go" if: | always() && !cancelled() && needs.build-ffi.result != 'skipped' && (github.event_name == 'workflow_dispatch' || needs.changes.outputs.go == 'true' || needs.changes.outputs.ffi == 'true' || needs.changes.outputs.core == 'true') runs-on: ubuntu-latest timeout-minutes: 60 steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Setup Go uses: actions/setup-go@v6 # v6 with: go-version: ${{ env.GO_VERSION }} check-latest: true - name: Build FFI library run: cargo build --release -p html-to-markdown-ffi shell: bash - name: Detect Go modules id: set-modules shell: bash run: scripts/ci/go/detect-go-modules.sh - name: Install golangci-lint if: steps.set-modules.outputs.modules != '[]' env: GOTOOLCHAIN: ${{ env.GO_TOOLCHAIN }} run: scripts/ci/go/install-golangci-lint.sh shell: bash - name: Run golangci-lint (all modules) if: steps.set-modules.outputs.modules != '[]' shell: bash run: | for module in $(echo '${{ steps.set-modules.outputs.modules }}' | jq -r '.[]'); do echo "=== Linting $module ===" (cd "$module" && "${{ github.workspace }}/scripts/ci/go/run-golangci-lint.sh") done - name: Install Task uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Install alef uses: kreuzberg-dev/actions/install-alef@v1 # v1 - name: Run E2E tests run: alef test --e2e --lang go shell: bash test-java: needs: [build-java] name: "Test: Java (${{ matrix.os }})" if: always() && !cancelled() && needs.build-java.result != 'skipped' runs-on: ${{ matrix.os }} timeout-minutes: 60 strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] java: ["25"] steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Test Java Panama FFI bindings uses: kreuzberg-dev/actions/test-java-ffi@v1 # v1 with: ffi-crate-name: html-to-markdown-ffi ffi-lib-name: html_to_markdown_ffi java-version: ${{ matrix.java }} - name: Install Task uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Run E2E tests (Ubuntu only) if: matrix.os == 'ubuntu-latest' run: alef test --e2e --lang java shell: bash test-elixir: needs: [build-ffi, changes] name: "Test: Elixir" if: | always() && !cancelled() && needs.build-ffi.result != 'skipped' && (github.event_name == 'workflow_dispatch' || needs.changes.outputs.elixir == 'true' || needs.changes.outputs.ffi == 'true' || needs.changes.outputs.core == 'true') runs-on: ubuntu-latest timeout-minutes: 60 steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Setup Elixir uses: erlef/setup-beam@v1 # v1 with: elixir-version: "1.19" otp-version: "28.1" - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Install Hex/Rebar run: scripts/ci/elixir/install-hex-rebar.sh shell: bash - name: Install dependencies working-directory: packages/elixir run: ../../scripts/ci/elixir/install-deps.sh shell: bash - name: Run tests working-directory: packages/elixir run: ../../scripts/ci/elixir/run-tests.sh shell: bash - name: Credo lint working-directory: packages/elixir run: ../../scripts/ci/elixir/run-credo.sh shell: bash - name: Install Task uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Install alef uses: kreuzberg-dev/actions/install-alef@v1 # v1 - name: Run E2E tests run: alef test --e2e --lang elixir shell: bash test-r: needs: [rust-tests, changes] name: "Test: R" if: | always() && !cancelled() && needs.rust-tests.result != 'skipped' && (github.event_name == 'workflow_dispatch' || needs.changes.outputs.r == 'true' || needs.changes.outputs.core == 'true') runs-on: ubuntu-latest timeout-minutes: 60 steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Setup R uses: kreuzberg-dev/actions/setup-r@v1 # v1 with: install-deps-script: scripts/ci/r/install-deps.sh - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Run tests working-directory: packages/r run: ../../scripts/ci/r/run-tests.sh shell: bash - name: Run lintr working-directory: packages/r run: ../../scripts/ci/r/run-lintr.sh shell: bash - name: Install Task uses: kreuzberg-dev/actions/install-task@v1 # v1 - name: Run E2E tests run: alef test --e2e --lang r shell: bash test-c-ffi: needs: [build-ffi, changes] name: "Test: C FFI (${{ matrix.runner }})" if: | always() && !cancelled() && needs.build-ffi.result != 'skipped' && (github.event_name == 'workflow_dispatch' || needs.changes.outputs.ffi == 'true' || needs.changes.outputs.core == 'true') runs-on: ${{ matrix.runner }} timeout-minutes: 60 strategy: fail-fast: false matrix: runner: - ubuntu-latest - ubuntu-24.04-arm - macos-latest steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: cache-key-prefix: c-ffi-${{ matrix.runner }} use-sccache: false - name: Build html-to-markdown-ffi shell: bash run: cargo build --release -p html-to-markdown-ffi - name: Run C e2e tests shell: bash env: LD_LIBRARY_PATH: ${{ github.workspace }}/target/release DYLD_LIBRARY_PATH: ${{ github.workspace }}/target/release run: cd e2e/c && make test - name: Verify header exists shell: bash run: | HEADER="crates/html-to-markdown-ffi/include/html_to_markdown.h" test -f "$HEADER" echo "Header verified: $HEADER" - name: Verify pkg-config output shell: bash run: | PC_DIR="$(pwd)/target/release/build" PC_FILE=$(find "$PC_DIR" -name 'html-to-markdown.pc' -path '*/html-to-markdown-ffi-*/out/*' 2>/dev/null | head -1) if [ -z "$PC_FILE" ]; then echo "Warning: html-to-markdown.pc not found in build output" find "$PC_DIR" -name '*.pc' 2>/dev/null || echo "No .pc files found" else echo "Found pkg-config file: $PC_FILE" cat "$PC_FILE" fi test-c-ffi-windows: needs: [build-ffi, changes] name: "Test: C FFI (windows-latest)" if: | always() && !cancelled() && needs.build-ffi.result != 'skipped' && (github.event_name == 'workflow_dispatch' || needs.changes.outputs.ffi == 'true' || needs.changes.outputs.core == 'true') runs-on: windows-latest timeout-minutes: 60 steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: cache-key-prefix: c-ffi-windows use-sccache: false - name: Build html-to-markdown-ffi shell: bash run: cargo build -p html-to-markdown-ffi - name: Verify header generated shell: bash run: | test -f crates/html-to-markdown-ffi/include/html_to_markdown.h echo "Header verified on Windows." test-wasm: needs: [build-wasm] name: "Test: WASM" if: always() && !cancelled() && needs.build-wasm.result != 'skipped' runs-on: ubuntu-latest timeout-minutes: 120 steps: - name: Checkout uses: actions/checkout@v6 # v6 - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: target: wasm32-unknown-unknown use-sccache: false - name: Ensure wasm target installed run: scripts/common/ensure-wasm-target.sh shell: bash - name: Install wasm-pack run: scripts/common/install-wasm-pack.sh shell: bash - name: Setup Node workspace uses: kreuzberg-dev/actions/setup-node-workspace@v1 # v1 - name: Build WASM (all targets) uses: kreuzberg-dev/actions/build-wasm-package@v1 # v1 with: crate-dir: crates/html-to-markdown-wasm - name: Test WASM bundle working-directory: crates/html-to-markdown-wasm run: ../../scripts/ci/wasm/test-wasm-bundle.sh shell: bash - name: Run Rust WASM tests working-directory: crates/html-to-markdown-wasm run: ../../scripts/ci/wasm/test-wasm-rust.sh shell: bash ================================================ FILE: .github/workflows/deploy-docs.yaml ================================================ name: Deploy Documentation on: push: branches: [main] paths: - 'docs/**' - 'zensical.toml' - 'pyproject.toml' - '.github/workflows/deploy-docs.yaml' workflow_dispatch: permissions: contents: read pages: write id-token: write concurrency: group: "pages" cancel-in-progress: false jobs: build: runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v6 # v6 with: fetch-depth: 0 - name: Setup Python uses: actions/setup-python@v6 # v6 with: python-version: '3.13' - name: Install uv uses: astral-sh/setup-uv@v7 # v7 with: enable-cache: true - name: Install dependencies and build docs run: | uv sync --group doc --no-editable --no-install-workspace --no-install-project uv run --no-sync zensical build --clean - name: Upload Pages artifact uses: actions/upload-pages-artifact@v5 # v4 with: path: site deploy: needs: build permissions: pages: write id-token: write environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} runs-on: ubuntu-latest steps: - name: Deploy to GitHub Pages id: deployment uses: actions/deploy-pages@v5 # v4 ================================================ FILE: .github/workflows/publish.yaml ================================================ name: Publish Release on: workflow_dispatch: inputs: tag: description: "Release tag to build (e.g., v2.6.0)" required: true type: string dry_run: description: "Prepare artifacts without publishing" required: false type: boolean default: false ref: description: "Git ref (branch, tag, or commit) to build; defaults to the tag" required: false type: string force_republish_java: description: "Force republish Java artifacts even if the version exists" required: false type: boolean default: false force_republish_wasm: description: "Force republish WASM package even if the version exists" required: false type: boolean default: false republish: description: "Delete and re-create the tag on current HEAD before publishing (retag + full republish)" required: false type: boolean default: false release: types: [published] repository_dispatch: types: [publish-release] permissions: contents: write concurrency: group: ${{ github.workflow }}-${{ (github.event_name == 'workflow_dispatch' && (github.event.inputs.ref || github.event.inputs.tag)) || github.ref || github.run_id }} cancel-in-progress: false jobs: prepare: name: Prepare metadata runs-on: ubuntu-latest outputs: tag: ${{ steps.meta.outputs.tag }} version: ${{ steps.meta.outputs.version }} ref: ${{ steps.meta.outputs.ref }} dry_run: ${{ steps.meta.outputs.dry_run }} checkout_ref: ${{ steps.meta.outputs.checkout_ref }} target_sha: ${{ steps.meta.outputs.target_sha }} matrix_ref: ${{ steps.meta.outputs.matrix_ref }} is_tag: ${{ steps.meta.outputs.is_tag }} force_republish_java: ${{ steps.republish.outputs.force_republish_java }} force_republish_wasm: ${{ steps.republish.outputs.force_republish_wasm }} steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ (inputs.republish == true && (inputs.ref || github.event.repository.default_branch)) || inputs.ref || inputs.tag || github.ref }} fetch-depth: 0 - name: Retag for republish if: ${{ inputs.republish == true || github.event.client_payload.republish == true }} env: TAG: ${{ inputs.tag || github.event.client_payload.tag }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | if [[ -z "${TAG}" ]]; then echo "::error::republish requires a tag input" exit 1 fi sha="$(git rev-parse HEAD)" echo "::notice::Republish requested — deleting and re-creating tag ${TAG} on ${sha:0:8}" # Delete via API (avoids workflows permission issue with git push) gh api "repos/${GITHUB_REPOSITORY}/git/refs/tags/${TAG}" -X DELETE 2>/dev/null || true # Create via API gh api "repos/${GITHUB_REPOSITORY}/git/refs" \ -f "ref=refs/tags/${TAG}" \ -f "sha=${sha}" --silent # Update local state git tag -d "${TAG}" 2>/dev/null || true git tag "${TAG}" "${sha}" - name: Validate tag and compute version id: meta env: GITHUB_EVENT_NAME: ${{ github.event_name }} GITHUB_REF_NAME: ${{ github.ref_name }} INPUT_TAG: ${{ inputs.tag }} INPUT_DRY_RUN: ${{ inputs.dry_run }} INPUT_REF: ${{ inputs.republish == true && format('refs/tags/{0}', inputs.tag) || inputs.ref }} EVENT_RELEASE_TAG: ${{ github.event.release.tag_name }} EVENT_DISPATCH_TAG: ${{ github.event.client_payload.tag }} EVENT_DISPATCH_DRY_RUN: ${{ github.event.client_payload.dry_run }} EVENT_DISPATCH_REF: ${{ github.event.client_payload.ref }} run: scripts/publish/validate-and-compute-metadata.sh - name: Resolve republish flags id: republish env: INPUT_FORCE_REPUBLISH_JAVA: ${{ inputs.force_republish_java }} INPUT_FORCE_REPUBLISH_WASM: ${{ inputs.force_republish_wasm }} EVENT_DISPATCH_FORCE_REPUBLISH_JAVA: ${{ github.event.client_payload.force_republish_java }} EVENT_DISPATCH_FORCE_REPUBLISH_WASM: ${{ github.event.client_payload.force_republish_wasm }} run: | force_java="${INPUT_FORCE_REPUBLISH_JAVA:-${EVENT_DISPATCH_FORCE_REPUBLISH_JAVA:-false}}" force_wasm="${INPUT_FORCE_REPUBLISH_WASM:-${EVENT_DISPATCH_FORCE_REPUBLISH_WASM:-false}}" echo "force_republish_java=${force_java}" >>"$GITHUB_OUTPUT" echo "force_republish_wasm=${force_wasm}" >>"$GITHUB_OUTPUT" - name: Install Task uses: go-task/setup-task@v2 # v2 with: version: 3.46.4 - name: Upload release metadata uses: actions/upload-artifact@v7 # v7 with: name: release-metadata path: release-metadata.json retention-days: 14 check-pypi: name: Check PyPI for existing version needs: prepare if: ${{ needs.prepare.outputs.is_tag == 'true' }} runs-on: ubuntu-latest outputs: exists: ${{ steps.check.outputs.exists }} steps: - name: Check PyPI version id: check uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: pypi package: html-to-markdown version: ${{ needs.prepare.outputs.version }} check-npm: name: Check npm for existing versions needs: prepare if: ${{ needs.prepare.outputs.is_tag == 'true' }} runs-on: ubuntu-latest outputs: node_exists: ${{ steps.check.outputs.exists }} wasm_exists: ${{ steps.check.outputs.wasm_exists }} ts_exists: ${{ steps.check.outputs.ts_exists }} steps: - name: Check npm packages id: check uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: npm package: "@kreuzberg/html-to-markdown" version: ${{ needs.prepare.outputs.version }} extra-packages: | wasm_exists=@kreuzberg/html-to-markdown-wasm ts_exists=@kreuzberg/html-to-markdown check-rubygems: name: Check RubyGems for existing version needs: prepare if: ${{ needs.prepare.outputs.is_tag == 'true' }} runs-on: ubuntu-latest outputs: exists: ${{ steps.check.outputs.exists }} steps: - name: Check RubyGems version id: check uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: rubygems package: html-to-markdown version: ${{ needs.prepare.outputs.version }} check-hex: name: Check Hex.pm for existing version needs: prepare if: ${{ needs.prepare.outputs.is_tag == 'true' }} runs-on: ubuntu-latest outputs: exists: ${{ steps.check.outputs.exists }} steps: - name: Check Hex version id: check uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: hex package: html_to_markdown version: ${{ needs.prepare.outputs.version }} check-maven: name: Check Maven Central for existing version needs: prepare if: ${{ needs.prepare.outputs.is_tag == 'true' }} runs-on: ubuntu-latest outputs: exists: ${{ steps.check.outputs.exists }} steps: - name: Check Maven version id: check uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: maven package: "dev.kreuzberg:html-to-markdown" version: ${{ needs.prepare.outputs.version }} check-nuget: name: Check NuGet for existing version needs: prepare if: ${{ needs.prepare.outputs.is_tag == 'true' }} runs-on: ubuntu-latest outputs: exists: ${{ steps.check.outputs.exists }} steps: - name: Check NuGet package id: check uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: nuget package: KreuzbergDev.HtmlToMarkdown version: ${{ needs.prepare.outputs.version }} check-packagist: name: Check Packagist for existing version needs: prepare if: ${{ needs.prepare.outputs.is_tag == 'true' }} runs-on: ubuntu-latest outputs: exists: ${{ steps.check.outputs.exists }} steps: - name: Check Packagist version id: check uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: packagist package: kreuzberg-dev/html-to-markdown version: ${{ needs.prepare.outputs.version }} check-cratesio: name: Check crates.io for existing versions needs: prepare if: ${{ needs.prepare.outputs.is_tag == 'true' }} runs-on: ubuntu-latest outputs: rs_exists: ${{ steps.check.outputs.exists }} cli_exists: ${{ steps.check.outputs.cli_exists }} all_exist: ${{ steps.derive.outputs.all_exist }} steps: - name: Query crates.io id: check uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: cratesio package: html-to-markdown-rs version: ${{ needs.prepare.outputs.version }} extra-packages: | cli_exists=html-to-markdown-cli - name: Derive all_exist id: derive run: | if [[ "${{ steps.check.outputs.exists }}" == "true" && "${{ steps.check.outputs.cli_exists }}" == "true" ]]; then echo "all_exist=true" >> "$GITHUB_OUTPUT" else echo "all_exist=false" >> "$GITHUB_OUTPUT" fi check-homebrew: name: Check if Homebrew formula already published needs: prepare if: ${{ needs.prepare.outputs.is_tag == 'true' }} runs-on: ubuntu-latest timeout-minutes: 5 outputs: exists: ${{ steps.check.outputs.exists }} steps: - name: Check Homebrew tap for formula id: check uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: homebrew package: html-to-markdown version: ${{ needs.prepare.outputs.version }} tap-repo: kreuzberg-dev/homebrew-tap python-wheels: name: Build Python wheels (${{ matrix.os }}) needs: prepare runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest, macos-latest] steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Build wheels uses: kreuzberg-dev/actions/build-python-wheels@v1 # v1 with: python-version: "3.13" package-dir: packages/python cibw-before-build-linux: > yum install -y openssl-devel && (test -x /usr/bin/aarch64-linux-gnu-gcc || ln -sf "$(command -v gcc)" /usr/local/bin/aarch64-linux-gnu-gcc 2>/dev/null || true) && pip install maturin uv && source ~/.cargo/env && python scripts/prepare_wheel.py cibw-before-build-macos: > pip install maturin uv && source ~/.cargo/env && python scripts/prepare_wheel.py cibw-before-build-windows: > pip install maturin uv && set PATH=%USERPROFILE%\.cargo\bin;%PATH% && python scripts\prepare_wheel.py upload-artifact: "false" - name: Upload wheels uses: actions/upload-artifact@v7 # v7 with: name: python-wheels-${{ matrix.os }} path: wheelhouse/*.whl retention-days: 14 python-sdist: name: Build Python sdist needs: prepare runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Set up Python uses: actions/setup-python@v6 # v6 with: python-version: "3.13" - name: Install build dependencies run: scripts/publish/python/install-build-deps.sh shell: bash - name: Build CLI binary for sdist run: scripts/publish/python/build-cli-for-sdist.sh shell: bash - name: Prepare sdist with CLI run: scripts/publish/python/prepare-sdist-with-cli.sh shell: bash - name: Build sdist run: scripts/publish/python/build-sdist.sh shell: bash - name: Upload sdist uses: actions/upload-artifact@v7 # v7 with: name: python-sdist path: packages/python/dist/*.tar.gz retention-days: 14 php-package: name: Build PHP PIE binary (php${{ matrix.php }} ${{ matrix.platform.label }}) needs: prepare if: ${{ needs.prepare.outputs.is_tag == 'true' }} runs-on: ${{ matrix.platform.os }} timeout-minutes: 60 permissions: contents: read strategy: fail-fast: false matrix: php: ["8.2", "8.3", "8.4", "8.5"] platform: - os: ubuntu-latest label: linux-x86_64 target: x86_64-unknown-linux-gnu - os: ubuntu-24.04-arm label: linux-arm64 target: aarch64-unknown-linux-gnu - os: macos-latest label: macos-arm64 target: aarch64-apple-darwin - os: windows-latest label: windows-x86_64 target: x86_64-pc-windows-msvc steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Setup PHP uses: kreuzberg-dev/actions/setup-php@v1 # v1 with: php-version: ${{ matrix.php }} - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 with: cache-key-prefix: publish-php-${{ matrix.platform.label }}-php${{ matrix.php }} toolchain: stable - name: Install alef uses: kreuzberg-dev/actions/install-alef@v1 # v1 - name: Build PHP extension uses: kreuzberg-dev/actions/build-php-extension@v1 # v1 with: crate-name: html-to-markdown-php lib-name: html_to_markdown_php php-version: ${{ matrix.php }} php-ts: nts - name: Determine Windows compiler if: runner.os == 'Windows' id: wincompiler shell: pwsh run: | $compiler = switch ('${{ matrix.php }}') { '8.2' { 'vs16' } '8.3' { 'vs16' } '8.4' { 'vs17' } '8.5' { 'vs17' } default { 'vs17' } } "compiler=$compiler" | Out-File -FilePath $env:GITHUB_OUTPUT -Encoding utf8 -Append - name: Package PIE archive uses: kreuzberg-dev/actions/package-php-pie@v1 # v1 with: php-version: ${{ matrix.php }} php-ts: nts target: ${{ matrix.platform.target }} windows-compiler: ${{ steps.wincompiler.outputs.compiler }} version: ${{ needs.prepare.outputs.version }} output-dir: dist/php-package - name: Upload PHP PIE package artifact uses: actions/upload-artifact@v7 # v7 with: name: php-package-${{ matrix.platform.label }}-php${{ matrix.php }} path: | dist/php-package/php_*.tgz dist/php-package/php_*.tgz.sha256 dist/php-package/php_*.zip dist/php-package/php_*.zip.sha256 retention-days: 14 node-typescript-defs: name: Generate Node TypeScript definitions needs: prepare runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Setup Node uses: actions/setup-node@v6 # v6 with: node-version: 24 check-latest: true - name: Enable corepack run: scripts/common/enable-corepack.sh shell: bash - name: Install Node dependencies run: scripts/publish/node/install-node-deps.sh shell: bash - name: Generate TypeScript definitions run: scripts/publish/node/generate-typescript-defs.sh shell: bash - name: Upload TypeScript definitions uses: actions/upload-artifact@v7 # v7 with: name: node-typescript-defs path: typescript-defs/ retention-days: 14 node-bindings: name: Build Node bindings (${{ matrix.target }}) needs: prepare runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: include: - os: macos-latest target: aarch64-apple-darwin rust_target: "" use_cross: false use_napi_cross: false - os: ubuntu-latest target: x86_64-unknown-linux-gnu rust_target: "" use_cross: false use_napi_cross: false - os: ubuntu-latest target: x86_64-unknown-linux-musl rust_target: x86_64-unknown-linux-musl use_cross: true use_napi_cross: false - os: ubuntu-latest target: aarch64-unknown-linux-gnu rust_target: aarch64-unknown-linux-gnu use_cross: false use_napi_cross: true - os: ubuntu-latest target: aarch64-unknown-linux-musl rust_target: aarch64-unknown-linux-musl use_cross: true use_napi_cross: false - os: ubuntu-latest target: armv7-unknown-linux-gnueabihf rust_target: armv7-unknown-linux-gnueabihf use_cross: false use_napi_cross: true - os: windows-latest target: x86_64-pc-windows-msvc rust_target: "" use_cross: false use_napi_cross: false - os: windows-latest target: aarch64-pc-windows-msvc rust_target: aarch64-pc-windows-msvc use_cross: false use_napi_cross: false steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Setup Rust uses: dtolnay/rust-toolchain@stable - name: Add Rust target if: ${{ matrix.rust_target != '' }} env: RUST_TARGET: ${{ matrix.rust_target }} run: scripts/publish/common/add-rust-target.sh shell: bash - name: Install cross if: ${{ matrix.use_cross }} run: scripts/publish/cli/install-cross.sh shell: bash - name: Setup Node uses: actions/setup-node@v6 # v6 with: node-version: 24 check-latest: true - name: Enable corepack run: scripts/common/enable-corepack.sh shell: bash - name: Install Node dependencies run: scripts/publish/node/install-node-deps.sh shell: bash - name: Clean npm directory if: runner.os != 'Windows' run: scripts/publish/node/clean-npm-dir.sh shell: bash - name: Clean npm directory (Windows) if: runner.os == 'Windows' shell: pwsh run: scripts/publish/node/clean-npm-dir.ps1 - name: Create npm package structure run: scripts/publish/node/create-npm-package-structure.sh shell: bash - name: Build native module if: runner.os != 'Windows' env: TARGET: ${{ matrix.target }} USE_CROSS: ${{ matrix.use_cross }} USE_NAPI_CROSS: ${{ matrix.use_napi_cross }} shell: bash run: scripts/publish/node/build-native-module.sh - name: Build native module (Windows) if: runner.os == 'Windows' shell: pwsh env: TARGET: ${{ matrix.target }} USE_CROSS: ${{ matrix.use_cross }} USE_NAPI_CROSS: ${{ matrix.use_napi_cross }} run: scripts/publish/node/build-native-module.ps1 - name: Package artifacts if: runner.os != 'Windows' env: TARGET: ${{ matrix.target }} run: scripts/publish/node/package-artifacts.sh shell: bash - name: Package artifacts (Windows) if: runner.os == 'Windows' shell: pwsh env: TARGET: ${{ matrix.target }} run: scripts/publish/node/package-artifacts.ps1 - name: Upload Node artifact uses: actions/upload-artifact@v7 # v7 with: name: node-bindings-${{ matrix.target }} path: node-bindings-${{ matrix.target }}.tar.gz retention-days: 14 wasm-bindings: name: Build WASM bindings needs: prepare runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Add wasm32 target run: scripts/common/ensure-wasm-target.sh shell: bash - name: Install wasm-pack run: scripts/common/install-wasm-pack.sh shell: bash - name: Setup Node uses: actions/setup-node@v6 # v6 with: node-version: 24 check-latest: true - name: Enable corepack run: scripts/common/enable-corepack.sh shell: bash - name: Install dependencies run: scripts/publish/wasm/install-deps.sh shell: bash - name: Build WASM bundles run: scripts/publish/wasm/build-bundles.sh shell: bash - name: Package WASM artifacts run: scripts/publish/wasm/package-artifacts.sh shell: bash - name: Upload WASM artifacts uses: actions/upload-artifact@v7 # v7 with: name: wasm-bundles path: wasm-artifacts/* retention-days: 14 cli-binaries: name: Build CLI binaries (${{ matrix.target }}) needs: prepare runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: include: - os: ubuntu-latest target: x86_64-unknown-linux-gnu use_cross: false - os: ubuntu-latest target: x86_64-unknown-linux-musl use_cross: false - os: ubuntu-latest target: aarch64-unknown-linux-gnu use_cross: false - os: macos-latest target: aarch64-apple-darwin use_cross: false - os: windows-latest target: x86_64-pc-windows-msvc use_cross: false steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Add compilation target env: RUST_TARGET: ${{ matrix.target }} run: scripts/publish/common/add-rust-target.sh shell: bash - name: Install build dependencies if: runner.os == 'Linux' env: TARGET: ${{ matrix.target }} run: scripts/publish/cli/install-build-deps-linux.sh shell: bash - name: Configure cross linker if: ${{ matrix.target == 'aarch64-unknown-linux-gnu' }} env: TARGET: ${{ matrix.target }} run: scripts/publish/cli/configure-cross-linker.sh shell: bash - name: Install cross if: ${{ matrix.use_cross }} run: scripts/publish/cli/install-cross.sh shell: bash - name: Build CLI shell: bash env: TARGET: ${{ matrix.target }} USE_CROSS: ${{ matrix.use_cross }} run: scripts/publish/cli/build-cli.sh - name: Package CLI artifact if: runner.os != 'Windows' shell: bash env: TARGET: ${{ matrix.target }} run: scripts/publish/cli/package-cli-artifact.sh - name: Package CLI artifact (Windows) if: runner.os == 'Windows' shell: pwsh env: TARGET: ${{ matrix.target }} run: scripts/publish/cli/package-cli-artifact.ps1 - name: Upload CLI artifact if: runner.os != 'Windows' uses: actions/upload-artifact@v7 # v7 with: name: cli-${{ matrix.target }} path: cli-${{ matrix.target }}.tar.gz retention-days: 14 - name: Upload CLI artifact (Windows) if: runner.os == 'Windows' uses: actions/upload-artifact@v7 # v7 with: name: cli-${{ matrix.target }} path: cli-${{ matrix.target }}.zip retention-days: 14 ruby-gem: name: Build Ruby gem (${{ matrix.label }}) needs: prepare strategy: fail-fast: false matrix: include: - os: ubuntu-latest label: linux - os: ubuntu-24.04-arm label: linux-aarch64 - os: macos-latest label: macos-arm64 - os: windows-latest label: windows-x64 runs-on: ${{ matrix.os }} env: RB_SYS_CARGO_PROFILE: release steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Remove cached CLI binaries shell: bash run: scripts/publish/ruby/remove-cached-cli.sh - name: Install MSYS2 toolchain if: runner.os == 'Windows' shell: pwsh run: scripts/publish/ruby/install-msys2-toolchain.ps1 - name: Install Rust (GNU on Windows) if: runner.os == 'Windows' shell: pwsh run: scripts/publish/ruby/install-rust-gnu.ps1 - name: Configure bindgen sysroot (Windows) if: runner.os == 'Windows' shell: bash run: scripts/publish/ruby/configure-bindgen-windows.sh - name: Set up Ruby uses: ruby/setup-ruby@v1 # v1 with: ruby-version: "3.3" bundler: "4.0.3" bundler-cache: false - name: Install Ruby dependencies (Unix) if: runner.os != 'Windows' run: scripts/publish/ruby/install-deps-unix.sh shell: bash - name: Install Ruby dependencies (Windows) if: runner.os == 'Windows' shell: pwsh run: scripts/publish/ruby/install-deps-windows.ps1 - name: Build gem artifacts (Unix) if: runner.os != 'Windows' shell: bash run: scripts/publish/ruby/build-gem-unix.sh - name: Build gem artifacts (Windows) if: runner.os == 'Windows' shell: pwsh run: scripts/publish/ruby/build-gem-windows.ps1 # Only the canonical `linux` builder ships the source gem; other matrix # entries would emit byte-different source gems (line endings, ext-rb # contents, vendor layout) that overwrite each other under merge-multiple # and produce an invalid .gem at publish time. Their native platform # gems (.gem with platform suffix) are still uploaded. - name: Drop source gem on non-canonical builders if: ${{ matrix.label != 'linux' }} shell: bash run: | shopt -s nullglob for f in packages/ruby/pkg/*.gem; do base="$(basename "$f")" case "$base" in *-x86_64-linux.gem|*-aarch64-linux.gem|*-arm64-darwin.gem|*-x86_64-darwin.gem|*-x64-mingw32.gem|*-x64-mingw-ucrt.gem) ;; *) echo "Removing non-canonical source gem $base"; rm -f "$f" ;; esac done - name: Upload gem artifacts uses: actions/upload-artifact@v7 # v7 with: name: rubygems-${{ matrix.label }} path: packages/ruby/pkg/*.gem retention-days: 14 elixir-natives: name: Build Elixir native libs (${{ matrix.settings.label }}) needs: [prepare] if: ${{ needs.prepare.outputs.is_tag == 'true' }} runs-on: ${{ matrix.settings.os }} timeout-minutes: 180 strategy: fail-fast: false matrix: settings: - os: ubuntu-24.04-arm label: linux-aarch64 target: aarch64-unknown-linux-gnu - os: ubuntu-latest label: linux-x86_64 target: x86_64-unknown-linux-gnu - os: macos-latest label: macos-arm64 target: aarch64-apple-darwin steps: - name: Checkout uses: actions/checkout@v6 with: ref: ${{ needs.prepare.outputs.ref }} - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 with: target: ${{ matrix.settings.target }} - name: Build Elixir NIF env: CARGO_BUILD_TARGET: ${{ matrix.settings.target }} run: cargo build --release --target ${{ matrix.settings.target }} --manifest-path packages/elixir/native/html_to_markdown_nif/Cargo.toml - name: Package NIF (NIF 2.16) shell: bash run: | VERSION="${{ needs.prepare.outputs.version }}" TARGET="${{ matrix.settings.target }}" NIF_VERSION="2.16" NIF_DIR="packages/elixir/native/html_to_markdown_nif" if [[ "${{ runner.os }}" == "macOS" ]]; then LIB_NAME="libhtml_to_markdown_nif.dylib"; EXT="so" else LIB_NAME="libhtml_to_markdown_nif.so"; EXT="so" fi mkdir -p dist/elixir ARTIFACT="libhtml_to_markdown_nif-v${VERSION}-nif-${NIF_VERSION}-${TARGET}.${EXT}" LIB_PATH="${NIF_DIR}/target/${TARGET}/release/${LIB_NAME}" [[ ! -f "$LIB_PATH" ]] && LIB_PATH="${NIF_DIR}/target/release/${LIB_NAME}" cp "$LIB_PATH" "${ARTIFACT}" tar -czf "dist/elixir/${ARTIFACT}.tar.gz" "${ARTIFACT}" - name: Package NIF (NIF 2.17) shell: bash run: | VERSION="${{ needs.prepare.outputs.version }}" TARGET="${{ matrix.settings.target }}" NIF_VERSION="2.17" NIF_DIR="packages/elixir/native/html_to_markdown_nif" if [[ "${{ runner.os }}" == "macOS" ]]; then LIB_NAME="libhtml_to_markdown_nif.dylib"; EXT="so" else LIB_NAME="libhtml_to_markdown_nif.so"; EXT="so" fi ARTIFACT="libhtml_to_markdown_nif-v${VERSION}-nif-${NIF_VERSION}-${TARGET}.${EXT}" LIB_PATH="${NIF_DIR}/target/${TARGET}/release/${LIB_NAME}" [[ ! -f "$LIB_PATH" ]] && LIB_PATH="${NIF_DIR}/target/release/${LIB_NAME}" cp "$LIB_PATH" "${ARTIFACT}" tar -czf "dist/elixir/${ARTIFACT}.tar.gz" "${ARTIFACT}" - name: Upload artifacts uses: actions/upload-artifact@v7 with: name: elixir-${{ matrix.settings.label }} path: dist/elixir/*.tar.gz if-no-files-found: error retention-days: 1 upload-elixir-release: name: Upload Elixir NIF binaries to GitHub Release needs: [prepare, elixir-natives] if: ${{ always() && needs.prepare.outputs.is_tag == 'true' && needs.elixir-natives.result == 'success' }} runs-on: ubuntu-latest permissions: contents: write steps: - name: Checkout uses: actions/checkout@v6 with: ref: ${{ needs.prepare.outputs.ref }} - name: Download Elixir NIF artifacts uses: actions/download-artifact@v8 with: pattern: elixir-* path: dist/elixir merge-multiple: true - name: Upload to GitHub Release env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | TAG="${{ needs.prepare.outputs.tag }}" for file in dist/elixir/*.tar.gz; do echo "Uploading $(basename "$file")..." gh release upload "$TAG" "$file" --clobber done elixir-package: name: Build Elixir Hex package (${{ matrix.label }}) needs: prepare strategy: fail-fast: false matrix: include: - os: ubuntu-latest label: linux build_hex: true - os: macos-latest label: macos build_hex: false runs-on: ${{ matrix.os }} env: MIX_ENV: dev steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Setup Elixir uses: erlef/setup-beam@v1 # v1 with: elixir-version: "1.19" otp-version: "28.1" - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Install Hex and Rebar run: scripts/publish/elixir/install-hex-rebar.sh shell: bash - name: Install dependencies run: scripts/publish/elixir/install-deps.sh shell: bash - name: Run Elixir tests run: scripts/publish/elixir/run-tests.sh shell: bash - name: Build Hex package if: ${{ matrix.build_hex }} run: scripts/publish/elixir/build-hex-package.sh shell: bash - name: Upload Hex artifact if: ${{ matrix.build_hex }} uses: actions/upload-artifact@v7 # v7 with: name: elixir-hex-package path: packages/elixir/html_to_markdown-*.tar retention-days: 14 csharp-package: name: Build C# NuGet package needs: [prepare, check-nuget, csharp-ffi] if: ${{ needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-nuget.outputs.exists != 'true' }} runs-on: ubuntu-latest timeout-minutes: 30 permissions: contents: read steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Setup .NET uses: actions/setup-dotnet@v5 # v5 with: dotnet-version: "8.0.x" - name: Download C# native FFI libraries uses: actions/download-artifact@v8 # v8 with: pattern: csharp-ffi-* path: dist/csharp-ffi merge-multiple: true - name: Install dependencies run: scripts/publish/csharp/restore.sh packages/csharp/HtmlToMarkdown.csproj shell: bash - name: Pack NuGet package run: scripts/publish/csharp/pack.sh shell: bash - name: Upload NuGet artifact uses: actions/upload-artifact@v7 # v7 with: name: csharp-nuget path: artifacts/csharp/*.nupkg retention-days: 14 csharp-ffi: name: Build C# native FFI libraries needs: [prepare, check-nuget] if: ${{ needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-nuget.outputs.exists != 'true' }} strategy: fail-fast: false matrix: include: - os: ubuntu-latest rid: linux-x64 target: x86_64-unknown-linux-gnu - os: ubuntu-24.04-arm rid: linux-arm64 target: aarch64-unknown-linux-gnu - os: windows-latest rid: win-x64 target: x86_64-pc-windows-msvc - os: macos-latest rid: osx-arm64 target: aarch64-apple-darwin runs-on: ${{ matrix.os }} timeout-minutes: 60 permissions: contents: read steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Install alef uses: kreuzberg-dev/actions/install-alef@v1 - name: Build and stage FFI library shell: bash run: | alef publish build --lang ffi --target ${{ matrix.target }} mkdir -p dist/csharp-ffi/${{ matrix.rid }}/native find target/release -maxdepth 1 -type f \( -name '*.so' -o -name '*.dylib' -o -name '*.dll' \) -name '*html_to_markdown_ffi*' -exec cp {} dist/csharp-ffi/${{ matrix.rid }}/native/ \; - name: Upload FFI artifact uses: actions/upload-artifact@v7 # v7 with: name: csharp-ffi-${{ matrix.rid }} path: dist/csharp-ffi retention-days: 14 go-ffi: name: Build Go native FFI libraries (${{ matrix.platform }}) needs: prepare strategy: fail-fast: false matrix: include: - os: ubuntu-latest platform: linux-x64 target: x86_64-unknown-linux-gnu - os: ubuntu-24.04-arm platform: linux-arm64 target: aarch64-unknown-linux-gnu - os: windows-latest platform: windows-x64 target: x86_64-pc-windows-msvc - os: macos-latest platform: darwin-arm64 target: aarch64-apple-darwin runs-on: ${{ matrix.os }} steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Install alef uses: kreuzberg-dev/actions/install-alef@v1 - name: Build and package Go FFI library shell: bash run: | alef publish build --lang ffi --target ${{ matrix.target }} alef publish package --lang go --target ${{ matrix.target }} -o dist/go-ffi - name: Upload Go FFI artifact uses: actions/upload-artifact@v7 # v7 with: name: go-ffi-${{ matrix.platform }} path: dist/go-ffi retention-days: 14 c-ffi-libraries: name: Build C FFI distribution packages (${{ matrix.platform }}) needs: prepare strategy: fail-fast: false matrix: include: - os: ubuntu-latest platform: linux-x64 target: x86_64-unknown-linux-gnu - os: ubuntu-24.04-arm platform: linux-arm64 target: aarch64-unknown-linux-gnu - os: windows-latest platform: windows-x64 target: x86_64-pc-windows-msvc - os: macos-latest platform: darwin-arm64 target: aarch64-apple-darwin runs-on: ${{ matrix.os }} steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Install alef uses: kreuzberg-dev/actions/install-alef@v1 - name: Build and package C FFI distribution shell: bash run: | alef publish build --lang ffi --target ${{ matrix.target }} alef publish package --lang ffi --target ${{ matrix.target }} -o dist/c-ffi - name: Upload C FFI artifact uses: actions/upload-artifact@v7 # v7 with: name: c-ffi-${{ matrix.platform }} path: dist/c-ffi retention-days: 14 cargo-packages: name: Package Rust crates needs: prepare runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Add Windows Rust target if: runner.os == 'Windows' run: rustup target add x86_64-pc-windows-msvc shell: bash - name: Package crates env: RELEASE_VERSION: ${{ needs.prepare.outputs.version }} run: scripts/publish/crates/package-crates.sh shell: bash - name: Upload crate packages uses: actions/upload-artifact@v7 # v7 with: name: cargo-crates path: crate-artifacts/*.crate retention-days: 14 upload-release-artifacts: name: Upload Release Artifacts needs: [ prepare, python-wheels, python-sdist, php-package, node-typescript-defs, node-bindings, wasm-bindings, cli-binaries, ruby-gem, go-ffi, c-ffi-libraries, cargo-packages, ] if: ${{ always() && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' }} runs-on: ubuntu-latest permissions: contents: write steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.tag }} fetch-depth: 0 - name: Download PHP package artifacts uses: actions/download-artifact@v8 # v8 with: pattern: php-package-* path: dist/php-package merge-multiple: true - name: Download CLI artifacts uses: actions/download-artifact@v8 # v8 with: pattern: cli-* path: dist/cli merge-multiple: false - name: Download Go FFI artifacts uses: actions/download-artifact@v8 # v8 with: pattern: go-ffi-* path: dist/go-ffi merge-multiple: false - name: Download C FFI artifacts uses: actions/download-artifact@v8 # v8 with: pattern: c-ffi-* path: dist/c-ffi merge-multiple: false - name: Upload PHP PIE packages env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} TAG: ${{ needs.prepare.outputs.tag }} run: scripts/publish/upload-php-pie.sh shell: bash - name: Upload CLI binaries env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} TAG: ${{ needs.prepare.outputs.tag }} run: scripts/publish/upload-cli-artifacts.sh shell: bash - name: Upload Go FFI artifacts env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} TAG: ${{ needs.prepare.outputs.tag }} run: scripts/publish/upload-go-ffi-artifacts.sh shell: bash - name: Upload C FFI artifacts env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} TAG: ${{ needs.prepare.outputs.tag }} run: scripts/publish/upload-c-ffi-artifacts.sh shell: bash - name: Create Go module tag env: VERSION: ${{ needs.prepare.outputs.version }} run: scripts/publish/go/create-module-tag.sh "v${VERSION}" shell: bash publish-crates: name: Publish crates.io packages needs: [prepare, cargo-packages, check-cratesio] if: ${{ always() && needs.cargo-packages.result == 'success' && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-cratesio.outputs.all_exist != 'true' }} runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.ref }} - name: Setup Rust uses: dtolnay/rust-toolchain@stable - name: Verify Cargo.toml version matches tag env: TAG_VERSION: ${{ needs.prepare.outputs.version }} run: scripts/publish/crates/verify-cargo-version.sh shell: bash - name: Re-check crates.io before publish id: recheck uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: cratesio package: html-to-markdown-rs version: ${{ needs.prepare.outputs.version }} extra-packages: | cli_exists=html-to-markdown-cli - name: Publish html-to-markdown-rs if: ${{ steps.recheck.outputs.exists != 'true' }} env: CARGO_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} run: scripts/publish/crates/publish-rs.sh shell: bash - name: Wait for indexing if: ${{ steps.recheck.outputs.exists != 'true' }} run: scripts/publish/crates/wait-for-indexing.sh shell: bash - name: Publish html-to-markdown-cli if: ${{ steps.recheck.outputs.cli_exists != 'true' }} env: CARGO_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} run: scripts/publish/crates/publish-cli.sh shell: bash publish-pypi: name: Publish Python packages to PyPI needs: [prepare, python-wheels, python-sdist, check-pypi] if: ${{ always() && needs.python-wheels.result == 'success' && needs.python-sdist.result == 'success' && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-pypi.outputs.exists != 'true' }} runs-on: ubuntu-latest environment: pypi permissions: contents: read id-token: write steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.ref }} - name: Download wheel artifacts uses: actions/download-artifact@v8 # v8 with: pattern: python-wheels-* path: dist merge-multiple: true - name: Download sdist artifact uses: actions/download-artifact@v8 # v8 with: name: python-sdist path: dist - name: List packages to publish run: | echo "Packages in dist:" ls -lh dist/ 2>/dev/null || echo "No packages found" shell: bash - name: Re-check PyPI before publish id: recheck uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: pypi package: html-to-markdown version: ${{ needs.prepare.outputs.version }} - name: Publish to PyPI if: ${{ steps.recheck.outputs.exists != 'true' }} uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: dist skip-existing: true publish-rubygems: name: Publish Ruby gems needs: [prepare, ruby-gem, check-rubygems] if: ${{ always() && needs.ruby-gem.result == 'success' && needs.prepare.outputs.is_tag == 'true' && (needs.prepare.outputs.dry_run == 'true' || needs.check-rubygems.outputs.exists != 'true') }} runs-on: ubuntu-latest permissions: contents: read id-token: write steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.ref }} - name: Download Ruby gem artifacts uses: actions/download-artifact@v8 # v8 with: pattern: rubygems-* path: dist merge-multiple: true - name: Re-check RubyGems before publish id: recheck uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: rubygems package: html-to-markdown version: ${{ needs.prepare.outputs.version }} - name: Setup Ruby if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }} uses: ruby/setup-ruby@v1 # v1 with: ruby-version: "3.3" bundler-cache: false - name: Update RubyGems if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }} run: gem update --system shell: bash - name: Configure trusted publishing credentials if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }} uses: rubygems/configure-rubygems-credentials@v2.0.0 # v1.0.0 - name: Publish gems if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }} uses: kreuzberg-dev/actions/publish-rubygems@v1 # v1 with: gems-dir: dist dry-run: ${{ needs.prepare.outputs.dry_run }} - name: RubyGems already published summary if: ${{ steps.recheck.outputs.exists == 'true' }} run: echo "Gem html-to-markdown@${{ needs.prepare.outputs.version }} already published on RubyGems — skipped." >> "$GITHUB_STEP_SUMMARY" shell: bash publish-hex: name: Publish Hex package needs: [prepare, elixir-package, check-hex, upload-elixir-release] if: ${{ always() && needs.elixir-package.result == 'success' && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-hex.outputs.exists != 'true' }} runs-on: ubuntu-latest permissions: contents: write steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.ref }} - name: Download Hex artifact uses: actions/download-artifact@v8 # v8 with: name: elixir-hex-package path: dist/elixir - name: Upload Elixir package to GitHub Release env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} TAG: ${{ needs.prepare.outputs.tag }} run: scripts/publish/upload-elixir-package.sh shell: bash - name: Setup Elixir uses: erlef/setup-beam@v1 # v1 with: elixir-version: "1.19" otp-version: "28.1" - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Install Hex/Rebar run: scripts/publish/elixir/install-hex-rebar.sh shell: bash - name: Generate NIF checksums from GitHub release run: scripts/publish/generate_elixir_checksums.sh "${{ needs.prepare.outputs.version }}" - name: Install dependencies run: scripts/publish/elixir/install-deps.sh shell: bash - name: Stage Rust core and generate lockfile shell: bash run: | scripts/publish/elixir/stage-rust-core.sh pushd packages/elixir/native/html_to_markdown_elixir >/dev/null cargo generate-lockfile popd >/dev/null - name: Re-check Hex.pm before publish id: recheck uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: hex package: html_to_markdown version: ${{ needs.prepare.outputs.version }} - name: Publish to Hex.pm if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }} uses: kreuzberg-dev/actions/publish-hex@v1 # v1 with: package-dir: packages/elixir dry-run: ${{ needs.prepare.outputs.dry_run }} env: HEX_API_KEY: ${{ secrets.HEX_API_KEY }} - name: Hex.pm already published summary if: ${{ steps.recheck.outputs.exists == 'true' }} run: echo "Package html_to_markdown@${{ needs.prepare.outputs.version }} already published on Hex.pm — skipped." >> "$GITHUB_STEP_SUMMARY" shell: bash publish-nuget: name: Publish NuGet package needs: [prepare, csharp-package, check-nuget] if: ${{ always() && needs.csharp-package.result == 'success' && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-nuget.outputs.exists != 'true' }} runs-on: ubuntu-latest permissions: contents: read id-token: write steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.ref }} - name: Download NuGet artifact uses: actions/download-artifact@v8 # v8 with: name: csharp-nuget path: dist - name: Re-check NuGet before publish id: recheck uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: nuget package: KreuzbergDev.HtmlToMarkdown version: ${{ needs.prepare.outputs.version }} - name: Publish to NuGet if: ${{ steps.recheck.outputs.exists != 'true' }} uses: kreuzberg-dev/actions/publish-nuget@v1 # v1 with: packages-dir: dist dry-run: ${{ needs.prepare.outputs.dry_run }} env: NUGET_API_KEY: ${{ secrets.NUGET_API_KEY }} publish-packagist: name: Publish to Packagist runs-on: ubuntu-latest needs: [prepare, check-packagist] if: | always() && needs.prepare.result == 'success' && needs.prepare.outputs.is_tag == 'true' && needs.prepare.outputs.dry_run != 'true' && needs.check-packagist.outputs.exists != 'true' steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.ref }} - name: Re-check Packagist before publish id: recheck uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: packagist package: kreuzberg-dev/html-to-markdown version: ${{ needs.prepare.outputs.version }} - name: Trigger Packagist Update if: ${{ steps.recheck.outputs.exists != 'true' }} uses: kreuzberg-dev/actions/publish-packagist@v1 # v1 with: packagist-username: kreuzberg-dev package-name: kreuzberg/html-to-markdown version: ${{ needs.prepare.outputs.version }} repository-url: https://github.com/kreuzberg-dev/html-to-markdown dry-run: ${{ needs.prepare.outputs.dry_run }} env: PACKAGIST_API_TOKEN: ${{ secrets.PACKAGIST_API_TOKEN }} java-ffi: name: Build Java native FFI libraries needs: [prepare, check-maven] if: ${{ needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && (needs.check-maven.outputs.exists != 'true' || needs.prepare.outputs.force_republish_java == 'true') }} strategy: fail-fast: false matrix: include: - os: ubuntu-latest platform: linux-x86_64 target: x86_64-unknown-linux-gnu - os: ubuntu-24.04-arm platform: linux-aarch64 target: aarch64-unknown-linux-gnu - os: windows-latest platform: windows-x86_64 target: x86_64-pc-windows-msvc - os: macos-latest platform: osx-aarch64 target: aarch64-apple-darwin runs-on: ${{ matrix.os }} timeout-minutes: 60 permissions: contents: read steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} env: TARGET_SHA: ${{ needs.prepare.outputs.target_sha }} run: scripts/publish/common/ensure-target-commit.sh shell: bash - name: Setup Rust uses: kreuzberg-dev/actions/setup-rust@v1 # v1 - name: Install alef uses: kreuzberg-dev/actions/install-alef@v1 - name: Build and stage FFI library shell: bash run: | alef publish build --lang ffi --target ${{ matrix.target }} mkdir -p dist/java-ffi/${{ matrix.platform }}/native find target/release -maxdepth 1 -type f \( -name '*.so' -o -name '*.dylib' -o -name '*.dll' \) -name '*html_to_markdown_ffi*' -exec cp {} dist/java-ffi/${{ matrix.platform }}/native/ \; - name: Upload FFI artifact uses: actions/upload-artifact@v7 # v7 with: name: java-ffi-${{ matrix.platform }} path: dist/java-ffi retention-days: 14 publish-maven: name: Publish Maven package needs: [prepare, check-maven, java-ffi] if: | always() && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && (needs.check-maven.outputs.exists != 'true' || needs.prepare.outputs.force_republish_java == 'true') && needs.java-ffi.result == 'success' runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 with: ref: ${{ needs.prepare.outputs.ref }} - name: Check Maven Central for existing release id: maven_check uses: kreuzberg-dev/actions/check-registry@v1 with: registry: maven package: "dev.kreuzberg:html-to-markdown" version: ${{ needs.prepare.outputs.version }} - name: Download Java FFI artifacts if: ${{ steps.maven_check.outputs.exists != 'true' }} uses: actions/download-artifact@v8 with: pattern: java-ffi-* path: java-ffi-artifacts merge-multiple: true - name: Setup Rust if: ${{ steps.maven_check.outputs.exists != 'true' }} uses: kreuzberg-dev/actions/setup-rust@v1 - name: Setup Java if: ${{ steps.maven_check.outputs.exists != 'true' }} env: MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }} MAVEN_PASSWORD: ${{ secrets.CENTRAL_PASSWORD }} MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} uses: actions/setup-java@v5 with: distribution: temurin java-version: '25' cache: maven server-id: ossrh server-username: MAVEN_USERNAME server-password: MAVEN_PASSWORD gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} gpg-passphrase: MAVEN_GPG_PASSPHRASE - name: Setup Maven if: ${{ steps.maven_check.outputs.exists != 'true' }} uses: kreuzberg-dev/actions/setup-maven@v1 - name: Prefer gpg2 binary if: ${{ steps.maven_check.outputs.exists != 'true' }} run: scripts/publish/maven/prefer-gpg2.sh shell: bash - name: Copy native libraries into resources if: ${{ steps.maven_check.outputs.exists != 'true' }} shell: bash run: scripts/publish/java/copy-native-libs.sh java-ffi-artifacts - name: Release Maven package if: ${{ steps.maven_check.outputs.exists != 'true' }} uses: kreuzberg-dev/actions/publish-maven@v1 with: pom-file: packages/java/pom.xml maven-profile: publish extra-args: -DskipTests dry-run: ${{ needs.prepare.outputs.dry_run }} env: MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }} MAVEN_PASSWORD: ${{ secrets.CENTRAL_PASSWORD }} MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} - name: Maven already published summary if: ${{ steps.maven_check.outputs.exists == 'true' }} run: echo "Maven package version ${{ needs.prepare.outputs.version }} already published; skipping." >> "$GITHUB_STEP_SUMMARY" publish-node: name: Publish Node packages needs: [prepare, node-bindings, node-typescript-defs, check-npm] if: ${{ always() && needs.node-bindings.result == 'success' && needs.node-typescript-defs.result == 'success' && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-npm.outputs.node_exists != 'true' }} runs-on: ubuntu-latest permissions: id-token: write contents: read steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.ref }} - name: Download Node artifacts uses: actions/download-artifact@v8 # v8 with: pattern: node-bindings-* path: node-artifacts merge-multiple: true - name: Download TypeScript definitions uses: actions/download-artifact@v8 # v8 with: name: node-typescript-defs path: typescript-defs - name: Setup Node uses: actions/setup-node@v6 # v6 with: node-version: 24 registry-url: https://registry.npmjs.org/ - name: Update NPM run: npm install -g npm@latest shell: bash - name: Enable corepack run: scripts/common/enable-corepack.sh shell: bash - name: Prepare artifact directory run: scripts/publish/node/prepare-artifact-directory.sh shell: bash - name: Install workspace dependencies if: ${{ needs.prepare.outputs.dry_run != 'true' }} run: scripts/publish/node/install-node-deps.sh shell: bash - name: Pack platform packages run: scripts/publish/node/pack-platform-packages.sh shell: bash - name: Re-check npm before publish id: recheck uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: npm package: "@kreuzberg/html-to-markdown" version: ${{ needs.prepare.outputs.version }} extra-packages: | ts_exists=@kreuzberg/html-to-markdown - name: Publish native binary packages if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }} uses: kreuzberg-dev/actions/publish-npm@v1 # v1 with: packages-dir: crates/html-to-markdown-node/npm dry-run: ${{ needs.prepare.outputs.dry_run }} env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - name: Wait for npm indexing (x64) if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }} uses: kreuzberg-dev/actions/wait-for-package@v1 # v1 with: registry: npm package: "@kreuzberg/html-to-markdown-linux-x64-gnu" version: ${{ needs.prepare.outputs.version }} max-attempts: "25" - name: Wait for npm indexing (arm64) if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }} uses: kreuzberg-dev/actions/wait-for-package@v1 # v1 with: registry: npm package: "@kreuzberg/html-to-markdown-linux-arm64-gnu" version: ${{ needs.prepare.outputs.version }} max-attempts: "25" - name: Prepare main Node package metadata if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }} run: scripts/publish/node/prepublish-main-package.sh crates/html-to-markdown-node shell: bash - name: Publish main Node package if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }} uses: kreuzberg-dev/actions/publish-npm@v1 # v1 with: package-dir: crates/html-to-markdown-node dry-run: ${{ needs.prepare.outputs.dry_run }} env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - name: Wait for main Node package indexing if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' && steps.recheck.outputs.ts_exists != 'true' }} uses: kreuzberg-dev/actions/wait-for-package@v1 # v1 with: registry: npm package: "@kreuzberg/html-to-markdown" version: ${{ needs.prepare.outputs.version }} max-attempts: "25" - name: Install TypeScript wrapper dependencies from npm if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.ts_exists != 'true' }} working-directory: packages/typescript run: pnpm install --no-frozen-lockfile shell: bash - name: Build TypeScript wrapper package if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.ts_exists != 'true' }} run: scripts/publish/typescript/build-package.sh shell: bash - name: Publish TypeScript wrapper package if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.ts_exists != 'true' }} uses: kreuzberg-dev/actions/publish-npm@v1 # v1 with: package-dir: packages/typescript dry-run: ${{ needs.prepare.outputs.dry_run }} env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} publish-wasm: name: Publish WASM package needs: [prepare, wasm-bindings, check-npm] if: ${{ always() && needs.wasm-bindings.result == 'success' && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && (needs.check-npm.outputs.wasm_exists != 'true' || needs.prepare.outputs.force_republish_wasm == 'true') }} runs-on: ubuntu-latest permissions: id-token: write contents: read steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.ref }} - name: Download WASM artifacts uses: actions/download-artifact@v8 # v8 with: name: wasm-bundles path: wasm-artifacts - name: Extract WASM artifacts run: scripts/publish/wasm/extract-artifacts.sh shell: bash - name: Remove .gitignore files from dist directories run: | rm -f crates/html-to-markdown-wasm/dist/.gitignore rm -f crates/html-to-markdown-wasm/dist-node/.gitignore rm -f crates/html-to-markdown-wasm/dist-web/.gitignore shell: bash - name: Setup Node uses: actions/setup-node@v6 # v6 with: node-version: 24 registry-url: https://registry.npmjs.org/ - name: Update NPM run: npm install -g npm@latest shell: bash - name: Re-check npm before publish id: recheck uses: kreuzberg-dev/actions/check-registry@v1 # v1 with: registry: npm package: "@kreuzberg/html-to-markdown-wasm" version: ${{ needs.prepare.outputs.version }} - name: Publish WASM package if: ${{ steps.recheck.outputs.exists != 'true' }} uses: kreuzberg-dev/actions/publish-npm@v1 # v1 with: package-dir: crates/html-to-markdown-wasm dry-run: ${{ needs.prepare.outputs.dry_run }} env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} homebrew-bottles: name: Build Homebrew bottles (${{ matrix.bottle_tag }}) needs: [prepare, check-homebrew] if: | needs.prepare.outputs.is_tag == 'true' && (needs.check-homebrew.outputs.exists != 'true') runs-on: ${{ matrix.runner }} timeout-minutes: 180 permissions: contents: write strategy: fail-fast: false matrix: include: - runner: macos-latest bottle_tag: arm64_sequoia - runner: macos-15-intel bottle_tag: sequoia - runner: ubuntu-latest bottle_tag: x86_64_linux - runner: ubuntu-24.04-arm bottle_tag: arm64_linux steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} fetch-depth: 0 - name: Ensure target commit if: ${{ needs.prepare.outputs.target_sha != '' }} run: git checkout --progress --force ${{ needs.prepare.outputs.target_sha }} - name: Setup Rust toolchain uses: dtolnay/rust-toolchain@stable - name: Setup Homebrew run: | brew tap homebrew/core brew update - name: Extract version id: version env: TAG: ${{ needs.prepare.outputs.tag }} run: | VERSION="${TAG#v}" echo "version=${VERSION}" >> "$GITHUB_OUTPUT" - name: Build CLI for bottle run: | cargo build --release \ -p html-to-markdown-cli - name: Create Homebrew bottle id: bottle env: VERSION: ${{ steps.version.outputs.version }} TAG: ${{ needs.prepare.outputs.tag }} run: | # Homebrew bottles require {formula}/{version}/ prefix in the tarball bottle_root="/tmp/html-to-markdown-bottle" bottle_dir="${bottle_root}/html-to-markdown/${VERSION}" mkdir -p "${bottle_dir}/bin" # Copy the built binary cp target/release/html-to-markdown "${bottle_dir}/bin/" # Create bottle tarball with correct prefix cd "${bottle_root}" bottle_filename="html-to-markdown-${VERSION}.${{ matrix.bottle_tag }}.bottle.tar.gz" tar -czf "${bottle_filename}" html-to-markdown/ # Calculate SHA256 sha256=$(shasum -a 256 "${bottle_filename}" | cut -d' ' -f1) echo "sha256=${sha256}" >> "$GITHUB_OUTPUT" echo "filename=${bottle_filename}" >> "$GITHUB_OUTPUT" # Copy to workspace for artifact upload cp "${bottle_filename}" "${{ github.workspace }}/" echo "Bottle created: ${bottle_filename}" echo "SHA256: ${sha256}" - name: Verify bottle file in workspace run: | cd "${{ github.workspace }}" ls -lh html-to-markdown-*.bottle.tar.gz echo "Files in workspace:" ls -la shell: bash - name: Upload bottle artifact uses: actions/upload-artifact@v7 # v7 with: name: homebrew-bottle-${{ matrix.bottle_tag }} path: html-to-markdown-${{ steps.version.outputs.version }}.${{ matrix.bottle_tag }}.bottle.tar.gz retention-days: 14 if-no-files-found: error upload-homebrew-bottles: name: Upload Homebrew bottles to GitHub Release needs: [prepare, check-homebrew, homebrew-bottles] if: | always() && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && (needs.check-homebrew.outputs.exists != 'true') && needs.homebrew-bottles.result == 'success' runs-on: ubuntu-latest permissions: contents: write steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.ref }} - name: Ensure GitHub release exists env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: scripts/publish/ensure-github-release-exists.sh "${{ needs.prepare.outputs.tag }}" - name: Download bottle artifacts uses: actions/download-artifact@v8 # v8 with: pattern: homebrew-bottle-* path: dist/homebrew merge-multiple: true - name: Verify downloaded artifacts run: | echo "Contents of dist/homebrew:" ls -laR dist/homebrew || echo "dist/homebrew not found" echo "All dist contents:" ls -laR dist || echo "dist not found" shell: bash - name: Upload bottles (idempotent) env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: scripts/publish/upload-homebrew-bottles.sh "${{ needs.prepare.outputs.tag }}" dist/homebrew publish-homebrew: name: Update Homebrew formula needs: [prepare, check-homebrew, upload-homebrew-bottles] if: | always() && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && (needs.check-homebrew.outputs.exists != 'true') && needs.upload-homebrew-bottles.result == 'success' runs-on: ubuntu-latest permissions: contents: write steps: - name: Checkout uses: actions/checkout@v6 # v6 with: ref: ${{ needs.prepare.outputs.checkout_ref }} - name: Download bottle artifacts uses: actions/download-artifact@v8 # v8 with: pattern: homebrew-bottle-* path: dist/homebrew merge-multiple: true - name: Setup Git credentials env: GH_TOKEN: ${{ secrets.HOMEBREW_TOKEN }} run: | git config --global credential.helper store echo "https://x-access-token:${GH_TOKEN}@github.com" > ~/.git-credentials git config --global user.name "html-to-markdown-bot" git config --global user.email "bot@kreuzberg.dev" - name: Update Homebrew formula with bottles uses: kreuzberg-dev/actions/publish-homebrew@v1 # v1 with: bottles-dir: dist/homebrew formula-name: html-to-markdown tap-repo: kreuzberg-dev/homebrew-tap tag: ${{ needs.prepare.outputs.tag }} version: ${{ needs.prepare.outputs.version }} github-repo: kreuzberg-dev/html-to-markdown dry-run: ${{ needs.prepare.outputs.dry_run }} ================================================ FILE: .github/workflows/validate-issues.yml ================================================ name: Validate Issues on: issues: types: [opened, edited] jobs: validate: uses: kreuzberg-dev/actions/.github/workflows/reusable-validate-issues.yml@v1 secrets: inherit ================================================ FILE: .github/workflows/validate-pr.yml ================================================ name: Validate PR on: pull_request: types: [opened, edited, synchronize] jobs: validate: uses: kreuzberg-dev/actions/.github/workflows/reusable-validate-pr.yml@v1 secrets: inherit ================================================ FILE: .gitignore ================================================ # Python __pycache__/ *.py[cod] *$py.class *.so .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg .pytest_cache/ .mypy_cache/ .ruff_cache/ htmlcov/ .coverage .coverage.* coverage.lcov # Rust target/ Cargo.lock rust-coverage.lcov *.node examples/**/.cargo/config.toml # Node.js / TypeScript node_modules/ **/node_modules/ dist/ dist-node/ dist-web/ *.tsbuildinfo .pnpm-debug.log packages/html-to-markdown-ts/bin/ # Ruby gem build outputs !packages/ruby/lib/ !packages/ruby/lib/**/*.rb packages/ruby/lib/bin/ packages/ruby/lib/*.bundle packages/ruby/tmp/ packages/ruby/vendor/html-to-markdown-rs/ packages/ruby/vendor/Cargo.toml packages/php-ext/workspace/ !packages/elixir/lib/ !packages/elixir/lib/**/*.ex erl_crash.dump # R package build artifacts !packages/r/R/ !packages/r/R/**/*.R packages/r/src/*.o packages/r/src/*.so packages/r/src/*.dll packages/r/src/*.dylib packages/r/*.tar.gz # Elixir test application dependencies and builds tests/test_apps/elixir/deps/ tests/test_apps/elixir/_build/ # Example dependency directories vendor/ **/vendor/ **/vendor/bundle/ .wrangler/ # IDEs & AI tool configs .vscode/ .idea/ *.swp *.swo *~ .cursorrules .windsurfrules # MkDocs site/ # OS .DS_Store Thumbs.db # Benchmarks .benchmarks/ benchmark-harness-results-*/ tools/runtime-bench/results/ tools/benchmark-harness/results/ tools/benchmark-harness/artifacts/ tools/benchmark-harness/artifacts-*/ tools/benchmark-harness/results-consolidated/ tools/benchmark-harness/results-local-*/ artifacts/ # Cache files *.cache .cache/ packages/php/.php-cs-fixer.cache # Temporary files .tmp/ [Tt][Oo][Dd][Oo]* # Environment & virtualenvs .env .env.local .venv/ **/.venv/ # C# / .NET bin/ obj/ *.dll *.exe *.pdb # Allow benchmark harness entrypoint under packages/go/v2/bin !packages/go/v2/bin/ !packages/go/v2/bin/benchmark.go # C FFI test binaries and build artifacts crates/html-to-markdown-ffi/tests/c/test_* !crates/html-to-markdown-ffi/tests/c/test_*.c crates/html-to-markdown-ffi/tests/c/*.o crates/html-to-markdown-ffi/tests/c/*.dSYM/ # Additional generated artifacts .remote-cache/ .alef/ .gemini/ GEMINI.md *.pyd vendor/bundle/ *.h.bak *.test *.class *.nupkg pkg/ .gems/ # BEGIN ai-rulez (DO NOT EDIT - managed by ai-rulez) .agents/ .claude/ .codex/ .cursor/ .github/agents/ .github/commands/ .github/copilot-instructions.md .github/skills/ .mcp.json AGENTS.md CLAUDE.md # END ai-rulez ================================================ FILE: .gitmodules ================================================ [submodule "homebrew-tap"] path = homebrew-tap url = https://github.com/Goldziher/homebrew-tap.git ================================================ FILE: .golangci.yml ================================================ version: "2" run: timeout: 5m issues-exit-code: 1 tests: true concurrency: 4 modules-download-mode: readonly allow-serial-runners: false allow-parallel-runners: true linters: default: none enable: - errcheck - govet - ineffassign - staticcheck - unused - revive - gocyclo - goconst - gocritic - gosec - misspell - nakedret settings: errcheck: check-type-assertions: true check-blank: true exclude-functions: - (net/http.ResponseWriter).Write - (io.Closer).Close - fmt.Fprintf - fmt.Printf - fmt.Println - os.Setenv - os.Unsetenv goconst: min-len: 3 min-occurrences: 3 gocyclo: min-complexity: 25 gosec: excludes: - G101 # ~keep hardcoded credentials check (too many false positives) govet: enable-all: true disable: - shadow misspell: locale: US nakedret: max-func-lines: 30 revive: confidence: 0.8 severity: warning enable-all-rules: false rules: - name: blank-imports - name: context-keys-type - name: time-naming - name: var-declaration - name: unexported-return - name: errorf - name: context-as-argument - name: dot-imports - name: error-return - name: error-strings - name: error-naming - name: if-return - name: increment-decrement - name: var-naming - name: range - name: receiver-naming - name: indent-error-flow - name: exported disabled: true - name: package-comments disabled: true exclusions: generated: lax rules: - linters: - goconst path: _test\.go - linters: - gocyclo path: _test\.go - linters: - gosec path: _test\.go - linters: - revive path: _test\.go text: "context-as-argument" - linters: - goconst - revive - errcheck - govet path: _test\.go text: "unusedwrite:" - linters: - govet text: "fieldalignment:" - linters: - errcheck path: _test\.go paths: - vendor - build - deployments - third_party$ - builtin$ - examples$ issues: max-issues-per-linter: 0 max-same-issues: 0 uniq-by-line: true new: false exclude: - "Error return value of `\\(\\*github\\.com/goccy/go-json\\.Encoder\\)\\.Encode` is not checked" - "Error return value of `w\\.Write` is not checked" - "Error return value of `resp\\.Body\\.Close` is not checked" - "Error return value of `res\\.Body\\.Close` is not checked" - "Error return value of `r\\.Body\\.Read` is not checked" - "Error return value of `os\\.Setenv` is not checked" - "Error return value of `os\\.Unsetenv` is not checked" - 'shadow: declaration of "err" shadows declaration' - "unusedwrite: unused write to field" - "Error return value of `c\\.provider\\.Delete` is not checked" - "Error return value of `provider\\.Close` is not checked" - "Error return value of `natsClient\\.Close` is not checked" - "Error return value of `cacheProvider\\.Close` is not checked" - "Error return value of `processor\\.Close` is not checked" - "Error return value of `sub\\.Unsubscribe` is not checked" - "Error return value of `json\\.Marshal` is not checked" - "Error return value of `strconv\\." - "Error return value of `fmt\\.Sscanf` is not checked" - "Error return value is not checked" formatters: exclusions: generated: lax paths: - third_party$ - builtin$ - examples$ ================================================ FILE: .mailmap ================================================ Na'aman Hirschfeld Na'aman Hischfeld Na'aman Hirschfeld Test User ================================================ FILE: .markdownlint.yaml ================================================ default: true MD007: indent: 4 MD033: false MD041: false MD013: false MD014: false MD024: siblings_only: true MD046: false ================================================ FILE: .mvn/wrapper/MavenWrapperDownloader.java ================================================ /* * Copyright 2007-present the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.net.*; import java.io.*; import java.nio.channels.*; import java.util.Properties; public class MavenWrapperDownloader { private static final String WRAPPER_VERSION = "3.3.4"; /** * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided. */ private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/" + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar"; /** * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to * use instead of the default one. */ private static final String MAVEN_WRAPPER_PROPERTIES_PATH = ".mvn/wrapper/maven-wrapper.properties"; /** * Path where the maven-wrapper.jar will be saved to. */ private static final String MAVEN_WRAPPER_JAR_PATH = ".mvn/wrapper/maven-wrapper.jar"; /** * Name of the property which should be used to override the default download url for the wrapper. */ private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl"; public static void main(String args[]) { System.out.println("- Downloader started"); File baseDirectory = new File(args[0]); System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath()); File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH); String url = DEFAULT_DOWNLOAD_URL; if(mavenWrapperPropertyFile.exists()) { FileInputStream mavenWrapperPropertyFileInputStream = null; try { mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile); Properties mavenWrapperProperties = new Properties(); mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream); url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url); } catch (IOException e) { System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'"); } finally { try { if(mavenWrapperPropertyFileInputStream != null) { mavenWrapperPropertyFileInputStream.close(); } } catch (IOException e) { } } } System.out.println("- Downloading from: " + url); File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH); if(!outputFile.getParentFile().exists()) { if(!outputFile.getParentFile().mkdirs()) { System.out.println( "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'"); } } System.out.println("- Downloading to: " + outputFile.getAbsolutePath()); try { downloadFileFromURL(url, outputFile); System.out.println("Done"); System.exit(0); } catch (Throwable e) { System.out.println("- Error downloading"); e.printStackTrace(); System.exit(1); } } private static void downloadFileFromURL(String urlString, File destination) throws Exception { if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) { String username = System.getenv("MVNW_USERNAME"); char[] password = System.getenv("MVNW_PASSWORD").toCharArray(); Authenticator.setDefault(new Authenticator() { @Override protected PasswordAuthentication getPasswordAuthentication() { return new PasswordAuthentication(username, password); } }); } URL website = new URL(urlString); ReadableByteChannel rbc; rbc = Channels.newChannel(website.openStream()); FileOutputStream fos = new FileOutputStream(destination); fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); fos.close(); rbc.close(); } } ================================================ FILE: .mvn/wrapper/maven-wrapper.properties ================================================ distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.3.4/maven-wrapper-3.3.4.jar maven.mainClass=org.apache.maven.cli.MavenCli ================================================ FILE: .php-cs-fixer.dist.php ================================================ setRiskyAllowed(false) ->setRules([ '@auto' => true ]) // 💡 by default, Fixer looks for `*.php` files excluding `./vendor/` - here, you can groom this config ->setFinder( (new Finder()) // 💡 root folder to check ->in(__DIR__) // 💡 additional files, eg bin entry file // ->append([__DIR__.'/bin-entry-file']) // 💡 folders to exclude, if any // ->exclude([/* ... */]) // 💡 path patterns to exclude, if any // ->notPath([/* ... */]) // 💡 extra configs // ->ignoreDotFiles(false) // true by default in v3, false in v4 or future mode // ->ignoreVCS(true) // true by default ) ; ================================================ FILE: .pre-commit-config.yaml ================================================ default_install_hook_types: - pre-commit - commit-msg exclude: ^docs/snippets/|vendor/|node_modules/|target/|dist/|artifacts/|scripts/ci/|\.cache/|rust-vendor/|\.venv/ repos: # AI-Rulez: auto-generate AI assistant configuration files - repo: https://github.com/Goldziher/ai-rulez rev: v4.1.5 hooks: - id: ai-rulez-generate # Commit message linting - repo: https://github.com/Goldziher/gitfluff rev: v0.8.0 hooks: - id: gitfluff-lint args: ["--write"] stages: [commit-msg] # General file checks - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: - id: trailing-whitespace exclude: \.github/copilot-instructions\.md - id: end-of-file-fixer exclude: \.github/copilot-instructions\.md - id: check-merge-conflict - id: check-added-large-files exclude: uv.lock - id: detect-private-key - id: check-json exclude: tsconfig\.base\.json - id: check-yaml args: ["--allow-multiple-documents", "--unsafe"] - id: check-toml - id: check-case-conflict # TOML formatting - repo: https://github.com/tox-dev/pyproject-fmt rev: "v2.21.1" hooks: - id: pyproject-fmt - repo: https://github.com/DevinR528/cargo-sort rev: "v2.1.4" hooks: - id: cargo-sort args: [-w] # Python: ruff (linting + formatting) + mypy (type checking) - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.15.12 hooks: - id: ruff args: [--fix] - id: ruff-format # Rust: formatting and linting (core workspace only — bindings handled by alef) - repo: https://github.com/AndrejOrsula/pre-commit-cargo rev: 0.5.0 hooks: - id: cargo-fmt args: ["--all"] - id: cargo-clippy args: [ "--fix", "--allow-dirty", "--allow-staged", "--workspace", "--exclude=html-to-markdown-php", "--exclude=html-to-markdown-py", "--exclude=html-to-markdown-node", "--exclude=html-to-markdown-e2e-rust", "--all-features", "--all-targets", "--", "-D", "warnings", ] - repo: https://github.com/bnjbvr/cargo-machete rev: v0.9.2 hooks: - id: cargo-machete args: [crates/] exclude: ^(e2e/|test_apps/) - repo: https://github.com/EmbarkStudios/cargo-deny rev: 0.19.4 hooks: - id: cargo-deny args: ["check"] # Node/TS/WASM: oxlint - repo: https://github.com/oxc-project/mirrors-oxlint rev: v1.62.0 hooks: - id: oxlint args: ["--fix"] exclude: ^(docs/demo/|e2e/) # cppcheck — kept upstream until shared repo ships it - repo: https://github.com/pocc/pre-commit-hooks rev: v1.3.5 hooks: - id: cppcheck args: [ "--std=c11", "--enable=warning,style,performance", "--suppress=missingIncludeSystem", "--suppress=unusedStructMember", "--suppress=normalCheckLevelMaxBranches", ] files: ^crates/html-to-markdown-ffi/tests/c/ # Markdown linting - repo: https://github.com/rvben/rumdl-pre-commit rev: "v0.1.86" hooks: - id: rumdl-fmt exclude: 'test_documents/|\.ai-rulez/|\.remote-cache/|e2e/|fixtures/|test_apps/|\.github/copilot-instructions\.md|CLAUDE\.md|\.claude/|\.agents/|\.codex/' # Shared kreuzberg-dev polyglot hooks (shell, C/C++, Java checkstyle, Go, Python, Ruby, C#, PHP, Elixir) - repo: https://github.com/kreuzberg-dev/pre-commit-hooks rev: v0.1.0 hooks: - id: shfmt args: ["-w", "-i", "2"] - id: shellcheck - id: clang-format args: ["--style=file"] files: ^crates/html-to-markdown-ffi/tests/c/ - id: clang-tidy files: ^crates/html-to-markdown-ffi/tests/c/ - id: checkstyle args: ["-c", "packages/java/checkstyle.xml", "-p", "packages/java/checkstyle.properties"] exclude: ^(\.mvn/wrapper/|e2e/|test_apps/|packages/java/src/) - id: mypy exclude: "e2e/|tests/|scripts/" - id: go-fmt exclude: ^(e2e/|test_apps/) - id: golangci-lint exclude: ^(e2e/|test_apps/) env: KREUZBERG_GO_MOD_DIRS: "packages/go" - id: govulncheck exclude: ^(e2e/|test_apps/) - id: rubocop files: ^packages/ruby/.*\.rb$ exclude: ^packages/ruby/ext/ - id: rubocop-lint files: ^packages/ruby/.*\.(rb|rbs)$ exclude: ^packages/ruby/ext/ - id: steep files: ^packages/ruby/.*\.(rb|rbs)$ exclude: ^packages/ruby/ext/ - id: dotnet-format files: ^packages/csharp/.*\.cs$ - id: dotnet-format-check files: ^packages/csharp/.*\.cs$ - id: php-cs-fixer files: ^packages/php/.*\.php$ - id: phpstan files: ^packages/php/(src|tests|stubs)/.*\.php$ args: ["analyse", "--no-progress", "--configuration", "packages/php/phpstan.neon"] - id: mix-format files: ^packages/elixir/ - id: mix-credo files: ^packages/elixir/ - id: java-verify files: ^packages/java/ # Alef: verify bindings and sync versions - repo: https://github.com/kreuzberg-dev/alef rev: v0.13.6 hooks: - id: alef-verify - id: alef-sync-versions # GitHub Actions: linting - repo: https://github.com/rhysd/actionlint rev: v1.7.12 hooks: - id: actionlint # Java cpd — kept upstream (not yet in shared repo) - repo: https://github.com/gherynos/pre-commit-java rev: v0.6.37 hooks: - id: cpd exclude: ^(\.mvn/wrapper/|e2e/|test_apps/|packages/java/src/) # Spelling (last — runs after all formatters and generators) - repo: https://github.com/crate-ci/typos rev: v1.46.0 hooks: - id: typos args: [--force-exclude] ================================================ FILE: .ruby-version ================================================ 3.4.8 ================================================ FILE: .rumdl.toml ================================================ # rumdl — Rust-based markdown linter # https://github.com/rvben/rumdl respect-gitignore = true exclude = ["node_modules", "target", "dist", "vendor"] # MD013: Disable line-length enforcement (tables and code blocks can be long) # MD041: Don't require first line to be an H1 # MD046: Disable code block style — MkDocs tabs/admonitions indent fenced # blocks, which rumdl misidentifies as indented code blocks # MD051: Disable cross-file link fragment checking (incompatible with MkDocs # HTML processing — MkDocs strips tags from heading IDs) # MD013: Line length (tables/code can be long) # MD033: Inline HTML (MkDocs uses HTML extensively) # MD036: Emphasis as heading (intentional style in docs/READMEs) # MD041: First line H1 not required # MD046: Code block style (MkDocs tabs indent fenced blocks) # MD051: Link fragment checking (incompatible with MkDocs anchor generation) # MD076: Blank lines between list items (intentional formatting in READMEs) disable = [ "MD012", "MD013", "MD024", "MD033", "MD036", "MD041", "MD046", "MD051", "MD076", ] # MD024: Allow duplicate heading names if they are not siblings [MD024] siblings_only = true ================================================ FILE: .sdkmanrc ================================================ java=25.0.2-tem maven=3.9.9 ================================================ FILE: .task/README.md ================================================ # .task/ Directory - Modular Task Organization This directory contains the modular Task configuration for the html-to-markdown project, following the **Kreuzberg pattern** for maintainable, scalable build automation. ## Purpose The `.task/` directory structure reduces the root `Taskfile.yml` from 838 lines to ~250 lines (66% reduction) by organizing tasks into logical modules. This approach: - **Improves Maintainability**: Each language/workflow lives in its own file - **Enables Reusability**: Common patterns defined once, reused everywhere - **Simplifies Testing**: Test individual modules independently - **Supports Cross-Platform**: Platform-specific logic isolated in config/ - **Scales Gracefully**: Adding new languages doesn't bloat the root Taskfile ## Directory Structure ```text .task/ ├── config/ │ ├── vars.yml # Global variables, version detection, paths │ └── platforms.yml # OS/arch detection, library extensions, target triples │ ├── languages/ # Language-specific task modules (11 total) │ ├── rust.yml # Rust core library tasks │ ├── python.yml # PyO3 Python bindings │ ├── node.yml # NAPI-RS Node.js bindings │ ├── typescript.yml # TypeScript wrapper package │ ├── wasm.yml # WebAssembly bindings │ ├── ruby.yml # Magnus Ruby bindings │ ├── php.yml # ext-php-rs PHP extension │ ├── go.yml # Go FFI wrapper │ ├── java.yml # Java JNI bindings │ ├── csharp.yml # C# P/Invoke wrapper │ └── elixir.yml # Elixir NIF bindings │ ├── workflows/ # Aggregated workflow tasks (internal) │ ├── build.yml # Build all languages with profile support │ ├── test.yml # Test all languages (parallel/sequential) │ └── lint.yml # Lint all languages with auto-fix │ └── tools/ # Utility and automation tasks ├── version-sync.yml # Version synchronization across manifests ├── general.yml # TOML formatting, shell linting └── pre-commit.yml # Prek pre-commit hook management (future) ``` ## Configuration Files ### `config/vars.yml` **Purpose**: Global variables shared across all task modules. **Key Variables**: ```yaml VERSION: # Extracted from Cargo.toml BUILD_PROFILE: # dev/release/ci (default: release) OS: # darwin/linux/windows ARCH: # x86_64/arm64/armv7 NUM_CPUS: # Detected CPU count for parallel builds ROOT: # Project root directory CRATES_DIR: # crates/ directory PACKAGES_DIR: # packages/ directory TARGET_DIR: # target/ directory (Rust build outputs) ``` **Example Usage**: ```yaml # In any language module: dir: "{{.PACKAGES_DIR}}/python" cmds: - cargo build --profile {{.BUILD_PROFILE}} ``` ### `config/platforms.yml` **Purpose**: Platform-specific detection and configuration. **Key Variables**: ```yaml EXE_EXT: # .exe on Windows, empty on Unix LIB_EXT: # dylib/so/dll based on OS LIB_PREFIX: # lib on Unix, empty on Windows RUST_TARGET: # Target triple (x86_64-apple-darwin, etc.) RUBY_FULL_PATH: # Full path to Ruby binary (handles Homebrew ARM64) IS_WINDOWS: # Boolean: true on Windows IS_MACOS: # Boolean: true on macOS IS_LINUX: # Boolean: true on Linux ``` **Example Usage**: ```yaml # Cross-platform library path configuration: env: LD_LIBRARY_PATH: '{{if ne .OS "windows"}}{{.TARGET_DIR}}/release{{end}}' PATH: '{{if eq .OS "windows"}}{{.TARGET_DIR}}/release;{{end}}{{.PATH}}' ``` ## Language Modules Each language module follows a **consistent pattern**: ### Standard Tasks (All Languages) ```yaml install: # Install dependencies/toolchain build: # Build with profile support (uses BUILD_PROFILE) build:dev: # Debug build (fast, unoptimized) build:release: # Release build (optimized) build:ci: # CI build (release + debug symbols) test: # Run tests test:ci: # Run tests with coverage (CI mode) coverage: # Generate coverage reports (lcov format) lint: # Lint + auto-fix (format + linters) lint:check: # Check-only (no modifications, for CI) format: # Format code format:check: # Check formatting without changes update: # Update dependencies clean: # Remove build artifacts ``` ### Example: `languages/python.yml` ```yaml version: "3" internal: true includes: platforms: ../config/platforms.yml vars: BUILD_PROFILE: "{{.BUILD_PROFILE | default \"release\"}}" PYTHON_WORK_DIR: "{{.PACKAGES_DIR}}/python" tasks: install: desc: "Install Python dependencies with uv" dir: "{{.PYTHON_WORK_DIR}}" cmds: - uv sync - uv pip install -e . build: desc: "Build Python bindings with maturin ({{.BUILD_PROFILE}} profile)" dir: "{{.PYTHON_WORK_DIR}}" cmds: - maturin develop{{if eq .BUILD_PROFILE "release"}} --release{{end}} test: desc: "Run Python tests with pytest" dir: "{{.PYTHON_WORK_DIR}}" cmds: - pytest -v tests/ # ... additional tasks ``` ### Cross-Platform Patterns **DO Use** (Cross-Platform Compatible): ```yaml # Python for file operations: - cmd: | python -c " import shutil, glob for d in ['build', 'dist']: shutil.rmtree(d, ignore_errors=True) " # Conditional environment variables: env: LD_LIBRARY_PATH: '{{if ne .OS "windows"}}{{.TARGET_DIR}}/release{{end}}' PATH: '{{if eq .OS "windows"}}{{.TARGET_DIR}}/release;{{end}}{{.PATH}}' # Task's built-in ignore_error: - cmd: some-command-that-might-fail ignore_error: true ``` **DON'T Use** (Platform-Specific): ```yaml # ❌ Unix-only commands: - rm -rf build/ dist/ - find . -name "*.pyc" -delete # ❌ Hardcoded paths: - /opt/homebrew/bin/ruby - C:\Program Files\Tool\bin # ❌ Bash-specific syntax: - cmd: test -d .venv && source .venv/bin/activate ``` ## Workflow Modules Workflow modules aggregate language tasks into unified operations. These are **internal** (not exposed to users directly). ### `workflows/build.yml` ```yaml version: "3" internal: true tasks: all: desc: "Build all language bindings" cmds: - task: rust:build - task: python:build - task: node:build # ... (11 languages) all:dev: desc: "Build all in debug mode" cmds: - task: rust:build:dev - task: python:build:dev # ... core: desc: "Build Rust core only" cmds: - task: rust:build bindings: desc: "Build all bindings (skip core)" cmds: - task: python:build - task: node:build # ... (exclude rust) ``` ### `workflows/test.yml` ```yaml version: "3" internal: true tasks: all: desc: "Run all tests" cmds: - task: rust:test - task: python:test # ... (sequential) all:parallel: desc: "Run tests in parallel" deps: - rust:test - python:test # ... (parallel execution) all:ci: desc: "Run CI tests with coverage" cmds: - task: rust:test:ci - task: python:test:ci # ... ``` ## Tools Modules ### `tools/version-sync.yml` **Purpose**: Synchronize version across all package manifests. ```yaml version: "3" tasks: sync: desc: "Sync version from Cargo.toml to all manifests" cmds: - python {{.ROOT}}/scripts/sync_versions.py ``` **Updates**: - Cargo workspace members (crates/*/Cargo.toml) - Python (packages/python/pyproject.toml) - Node.js (packages/typescript/package.json) - Ruby (packages/ruby/lib/html_to_markdown/version.rb) - PHP (packages/php/composer.json) - Go (packages/go/v3/version.go) - Java (packages/java/pom.xml) - C# (packages/csharp/HtmlToMarkdown.csproj) - Elixir (packages/elixir/mix.exs) - **test_apps manifests** (tests/test_apps/*/pyproject.toml, package.json, etc.) ### `tools/general.yml` **Purpose**: General-purpose linting and validation tasks. ```yaml version: "3" tasks: toml:format: desc: "Format TOML files" cmds: - taplo format **/*.toml toml:format:check: desc: "Check TOML formatting" cmds: - taplo format --check **/*.toml ``` ## How to Add a New Language Let's add **Swift** as an example: ### Step 1: Create Language Module **File**: `.task/languages/swift.yml` ```yaml version: "3" internal: true includes: platforms: ../config/platforms.yml vars: BUILD_PROFILE: "{{.BUILD_PROFILE | default \"release\"}}" SWIFT_WORK_DIR: "{{.PACKAGES_DIR}}/swift" tasks: install: desc: "Install Swift dependencies" dir: "{{.SWIFT_WORK_DIR}}" cmds: - swift package resolve build: desc: "Build Swift package ({{.BUILD_PROFILE}} profile)" dir: "{{.SWIFT_WORK_DIR}}" cmds: - cmd: swift build{{if eq .BUILD_PROFILE "release"}} -c release{{else}} -c debug{{end}} ignore_error: false build:dev: desc: "Build Swift package in debug mode" dir: "{{.SWIFT_WORK_DIR}}" cmds: - swift build -c debug build:release: desc: "Build Swift package in release mode" dir: "{{.SWIFT_WORK_DIR}}" cmds: - swift build -c release test: desc: "Run Swift tests" dir: "{{.SWIFT_WORK_DIR}}" cmds: - swift test test:ci: desc: "Run Swift tests with coverage (CI mode)" dir: "{{.SWIFT_WORK_DIR}}" cmds: - swift test --enable-code-coverage lint: desc: "Lint Swift code with auto-fix" dir: "{{.SWIFT_WORK_DIR}}" cmds: - swiftlint --fix - swiftformat . lint:check: desc: "Lint Swift code without auto-fix" dir: "{{.SWIFT_WORK_DIR}}" cmds: - swiftlint - swiftformat --lint . format: desc: "Format Swift code" dir: "{{.SWIFT_WORK_DIR}}" cmds: - swiftformat . format:check: desc: "Check Swift formatting" dir: "{{.SWIFT_WORK_DIR}}" cmds: - swiftformat --lint . update: desc: "Update Swift dependencies" dir: "{{.SWIFT_WORK_DIR}}" cmds: - swift package update clean: desc: "Clean Swift build artifacts" dir: "{{.SWIFT_WORK_DIR}}" cmds: - swift package clean ``` ### Step 2: Include in Root Taskfile **File**: `Taskfile.yml` ```yaml includes: # ... existing includes swift: taskfile: .task/languages/swift.yml ``` ### Step 3: Add to Workflow Aggregators **File**: `.task/workflows/build.yml` ```yaml tasks: all: cmds: - task: rust:build # ... existing languages - task: swift:build # ADD THIS ``` **File**: `.task/workflows/test.yml` ```yaml tasks: all: cmds: - task: rust:test # ... existing languages - task: swift:test # ADD THIS ``` **File**: `.task/workflows/lint.yml` ```yaml tasks: all: cmds: - task: rust:lint # ... existing languages - task: swift:lint # ADD THIS ``` ### Step 4: Update Root Taskfile Aggregates **File**: `Taskfile.yml` ```yaml tasks: setup: cmds: - task: rust:install # ... existing installs - task: swift:install # ADD THIS ``` Now users can run: ```bash task swift:build task swift:test task swift:lint ``` ## Internal vs Public Tasks ### Internal Tasks Defined with `internal: true` at the file level: ```yaml version: "3" internal: true # This file's tasks are not listed in `task --list` ``` **Characteristics**: - Not visible in `task --list` - Only callable from other tasks - Used for: config files, workflow aggregators **Examples**: - `.task/config/vars.yml` (internal) - `.task/workflows/build.yml` (internal) - `.task/workflows/test.yml` (internal) ### Public Tasks Included without `internal: true` or via root Taskfile: ```yaml includes: rust: taskfile: .task/languages/rust.yml # No internal flag = public ``` **Characteristics**: - Visible in `task --list` - Directly callable by users - Used for: language modules, tool modules **Examples**: - `rust:build` (public) - `python:test` (public) - `version:sync` (public) ## Best Practices ### 1. Always Use Template Variables ```yaml # ✅ Good: dir: "{{.PACKAGES_DIR}}/python" cmds: - cargo build --profile {{.BUILD_PROFILE}} # ❌ Bad: dir: "packages/python" cmds: - cargo build --release ``` ### 2. Support All Build Profiles ```yaml # ✅ Good: Profile-aware command - cmd: maturin develop{{if eq .BUILD_PROFILE "release"}} --release{{end}} # ❌ Bad: Hardcoded profile - cmd: maturin develop --release ``` ### 3. Use Cross-Platform Commands ```yaml # ✅ Good: Python for file operations - cmd: | python -c " import shutil shutil.rmtree('build', ignore_errors=True) " # ❌ Bad: Unix-only command - cmd: rm -rf build/ ``` ### 4. Include Platform Config ```yaml # ✅ Good: Include platforms for cross-platform logic includes: platforms: ../config/platforms.yml env: LD_LIBRARY_PATH: '{{if ne .OS "windows"}}{{.TARGET_DIR}}/release{{end}}' # ❌ Bad: Hardcoded Unix assumption env: LD_LIBRARY_PATH: "{{.TARGET_DIR}}/release" ``` ### 5. Consistent Task Naming ```yaml # ✅ Good: Consistent naming with colons install: build: build:dev: build:release: build:ci: test: test:ci: lint: lint:check: format: format:check: # ❌ Bad: Inconsistent naming install_deps: make_build: run_tests: check-format: ``` ### 6. Document Descriptions ```yaml # ✅ Good: Clear, actionable description install: desc: "Install Python dependencies with uv" # ❌ Bad: Vague or missing description install: desc: "Install stuff" ``` ### 7. Error Handling ```yaml # ✅ Good: Explicit error handling - cmd: pytest -v tests/ ignore_error: false # Fail on errors - cmd: rm -rf .cache/ ignore_error: true # OK to fail (file may not exist) # ❌ Bad: Implicit behavior - cmd: pytest -v tests/ ``` ## Troubleshooting ### Task Not Found **Error**: `Task "foo:bar" not found` **Solution**: Ensure the include is in root `Taskfile.yml`: ```yaml includes: foo: taskfile: .task/languages/foo.yml ``` ### Variable Not Defined **Error**: `template: :1:2: executing "" at <.SOME_VAR>: map has no entry for key "SOME_VAR"` **Solution**: Define variable in `.task/config/vars.yml` or include platforms: ```yaml includes: platforms: ../config/platforms.yml ``` ### Cross-Platform Failures **Error**: Task works on macOS but fails on Windows **Solution**: Use conditional environment variables and cross-platform commands: ```yaml env: PATH: '{{if eq .OS "windows"}}{{.TARGET_DIR}}/release;{{end}}{{.PATH}}' cmds: - cmd: | python -c "import shutil; shutil.rmtree('build', ignore_errors=True)" ``` ### Circular Dependencies **Error**: `task: import cycle not allowed` **Solution**: Avoid including files that include each other. Use internal workflow aggregators instead. ## References - **Task Documentation**: - **Kreuzberg Pattern**: ../kreuzberg/ (sibling project) - **Root Taskfile**: ../Taskfile.yml - **Platform Config**: config/platforms.yml - **Global Variables**: config/vars.yml --- **Last Updated**: 2025-12-28 **Maintainers**: html-to-markdown contributors ================================================ FILE: .task/checksum/_lint-typescript-lint ================================================ 5185d264d62b8f691570c5e0c226b22 ================================================ FILE: .task/checksum/_test-typescript-test ================================================ b93fe0d03a54250e90b23f1a50fb35ec ================================================ FILE: .task/checksum/typescript-typecheck ================================================ 99aa06d3014798d86001c324468d497f ================================================ FILE: .task/config/platforms.yml ================================================ version: "3" internal: true includes: vars: ./vars.yml vars: # Executable extension - empty for Unix, .exe for Windows EXE_EXT: sh: | if [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then echo ".exe" else echo "" fi # Library extension - platform specific shared library suffix LIB_EXT: sh: | if [[ "$OSTYPE" == "darwin"* ]]; then echo "dylib" elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then echo "dll" else echo "so" fi # Library prefix - lib for Unix-like systems, empty for Windows LIB_PREFIX: sh: | if [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then echo "" else echo "lib" fi # Platform string for Rust targets RUST_TARGET: sh: | ARCH=$(uname -m) OS_TYPE="$OSTYPE" case "$ARCH" in x86_64|x64) ARCH_STR="x86_64" ;; aarch64|arm64) ARCH_STR="aarch64" ;; armv7l|armv7) ARCH_STR="armv7" ;; *) ARCH_STR="$ARCH" ;; esac if [[ "$OS_TYPE" == "darwin"* ]]; then echo "${ARCH_STR}-apple-darwin" elif [[ "$OS_TYPE" == "linux-gnu"* ]] || [[ "$OS_TYPE" == "linux"* ]]; then echo "${ARCH_STR}-unknown-linux-gnu" elif [[ "$OS_TYPE" == "msys" ]] || [[ "$OS_TYPE" == "cygwin" ]] || [[ "$OS_TYPE" == "win32" ]]; then echo "${ARCH_STR}-pc-windows-msvc" else echo "${ARCH_STR}-unknown-unknown" fi # Boolean platform checks (imported from vars.yml) IS_WINDOWS: "{{.IS_WINDOWS}}" IS_MACOS: "{{.IS_MACOS}}" IS_LINUX: "{{.IS_LINUX}}" # Ruby path detection - handles Homebrew ARM64 and standard installations RUBY_FULL_PATH: sh: | if command -v ruby >/dev/null 2>&1; then command -v ruby elif [[ "$OSTYPE" == "darwin"* ]] && [[ -f "/opt/homebrew/opt/ruby/bin/ruby" ]]; then echo "/opt/homebrew/opt/ruby/bin/ruby" else echo "ruby" fi # Convenient binary paths for platform-specific tools CARGO_BIN: sh: command -v cargo 2>/dev/null || echo "cargo" RUSTC_BIN: sh: command -v rustc 2>/dev/null || echo "rustc" # Shell script extension for platform-specific scripts SHELL_EXT: sh: | if [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then echo ".ps1" else echo ".sh" fi ================================================ FILE: .task/config/vars.yml ================================================ version: "3" internal: true vars: # Version extraction from Cargo.toml (workspace.package.version) VERSION: sh: grep -m 1 'version = ' Cargo.toml | sed 's/version = "\(.*\)"/\1/' # Build profile (dev/release/ci) - default to release BUILD_PROFILE: "{{.BUILD_PROFILE | default \"release\"}}" # Toolchain versions GOLANGCI_LINT_VERSION: "latest" GO_TOOLCHAIN: "go1.26.0" BUNDLER_VERSION: "4.0.0" RUBY_BIN: sh: | if command -v ruby >/dev/null 2>&1; then dirname "$(command -v ruby)" elif [[ "$OSTYPE" == "darwin"* ]] && [[ -d "/opt/homebrew/opt/ruby/bin" ]]; then echo "/opt/homebrew/opt/ruby/bin" else echo "ruby" fi # Logging RUST_LOG: "info" # Root project directories (absolute paths) ROOT: "{{.ROOT_DIR}}" CRATES_DIR: "{{.ROOT_DIR}}/crates" PACKAGES_DIR: "{{.ROOT_DIR}}/packages" SCRIPTS_DIR: "{{.ROOT_DIR}}/scripts" TOOLS_DIR: "{{.ROOT_DIR}}/tools" TARGET_DIR: "{{.ROOT_DIR}}/target" EXAMPLES_DIR: "{{.ROOT_DIR}}/examples" # OS Detection - determine operating system OS: sh: | case "$(uname -s 2>/dev/null || echo 'unknown')" in Darwin*) echo "darwin" ;; Linux*) echo "linux" ;; MINGW*|MSYS*|CYGWIN*) echo "windows" ;; *) if [[ "$OSTYPE" == "darwin"* ]]; then echo "darwin" elif [[ "$OSTYPE" == "linux-gnu"* ]] || [[ "$OSTYPE" == "linux"* ]]; then echo "linux" elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then echo "windows" else echo "unknown" fi ;; esac # OS Boolean helpers IS_WINDOWS: sh: | if [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then echo "true" else echo "false" fi IS_MACOS: sh: | if [[ "$OSTYPE" == "darwin"* ]]; then echo "true" else echo "false" fi IS_LINUX: sh: | if [[ "$OSTYPE" == "linux-gnu"* ]] || [[ "$OSTYPE" == "linux"* ]]; then echo "true" else echo "false" fi # Architecture detection - determine CPU architecture ARCH: sh: | ARCH=$(uname -m) case "$ARCH" in x86_64|x64) echo "x86_64" ;; aarch64|arm64) echo "arm64" ;; armv7l|armv7) echo "armv7" ;; armv6l|armv6) echo "armv6" ;; i686|i386) echo "i386" ;; *) echo "$ARCH" ;; esac # Number of CPUs available NUM_CPUS: sh: | if command -v nproc >/dev/null 2>&1; then nproc elif [[ "$OSTYPE" == "darwin"* ]]; then sysctl -n hw.ncpu elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then echo "${NUMBER_OF_PROCESSORS:-4}" else echo "4" fi # GNU Make parallel flag for optimal builds MAKE_JOBS: "{{.NUM_CPUS}}" ================================================ FILE: .task/languages/python.yml ================================================ version: "3" internal: true vars: BUILD_PROFILE: "{{.BUILD_PROFILE | default \"release\"}}" PYTHON_PKG: "packages/python" tasks: install: desc: "Install Python dependencies with uv" cmds: - cd {{.PYTHON_PKG}} && uv sync - cd {{.PYTHON_PKG}} && uv pip install -e . build: desc: "Build Python bindings with maturin ({{.BUILD_PROFILE}} profile)" cmds: - cd {{.PYTHON_PKG}} && maturin develop{{if eq .BUILD_PROFILE "release"}} --release{{end}} build:dev: desc: "Build Python bindings in debug mode" cmds: - cd {{.PYTHON_PKG}} && maturin develop build:release: desc: "Build Python bindings in release mode" cmds: - cd {{.PYTHON_PKG}} && maturin develop --release build:ci: desc: "Build Python bindings for CI (release with debug info)" cmds: - cd {{.PYTHON_PKG}} && maturin develop --release wheel: desc: "Build Python wheel distribution" cmds: - cd {{.PYTHON_PKG}} && maturin build --release coverage: desc: "Generate Python code coverage report (lcov format)" cmds: - cd {{.PYTHON_PKG}} && uv run pytest -v --cov=. --cov-report=lcov:coverage.lcov tests/ clean: desc: "Clean Python build artifacts" cmds: - cmd: | cd {{.PYTHON_PKG}} && python -c " import shutil, glob dirs = ['__pycache__', '.pytest_cache', '.mypy_cache', '.ruff_cache', 'dist', 'build', '.maturin'] for d in dirs: shutil.rmtree(d, ignore_errors=True) for f in glob.glob('*.egg-info'): shutil.rmtree(f, ignore_errors=True) " ignore_error: true ================================================ FILE: .task/languages/rust.yml ================================================ version: "3" internal: true includes: platforms: ../config/platforms.yml vars: RUST_LOG: "{{.RUST_LOG | default \"info\"}}" BUILD_PROFILE: "{{.BUILD_PROFILE | default \"release\"}}" RUST_BACKTRACE: "{{.RUST_BACKTRACE | default \"1\"}}" CARGO_TERM_COLOR: "always" tasks: install: desc: "Install Rust toolchain and components (rustup, cargo)" silent: false cmds: - rustup update stable - rustup component add rustfmt clippy - rustup component add llvm-tools-preview - cargo install cargo-llvm-cov --locked - cargo install cargo-upgrades --locked - cargo --version - rustc --version build: desc: "Build all Rust crates with {{.BUILD_PROFILE}} profile" silent: false cmds: - cmd: | cargo build --workspace --profile {{.BUILD_PROFILE}} -j {{.NUM_CPUS}} ignore_error: false build:dev: desc: "Build all Rust crates in debug mode" silent: false cmds: - cmd: | cargo build --workspace -j {{.NUM_CPUS}} ignore_error: false build:release: desc: "Build all Rust crates in release mode" silent: false cmds: - cmd: | cargo build --workspace --release -j {{.NUM_CPUS}} ignore_error: false build:ci: desc: "Build for CI with debug info enabled (no strip)" silent: false cmds: - cmd: | CARGO_PROFILE_RELEASE_DEBUG=2 CARGO_PROFILE_RELEASE_STRIP=none cargo build --workspace --exclude html-to-markdown-php --release -j {{.NUM_CPUS}} ignore_error: false test: desc: "Run Rust test suite" silent: false cmds: - cmd: | RUST_LOG={{.RUST_LOG}} RUST_BACKTRACE={{.RUST_BACKTRACE}} cargo test --release --no-default-features --workspace --exclude html-to-markdown-py --exclude html-to-markdown-php -j {{.NUM_CPUS}} ignore_error: false test:ci: desc: "Run tests with coverage for CI (generates lcov)" silent: false cmds: - cmd: | {{if eq OS "windows"}} RUST_LOG={{.RUST_LOG}} RUST_BACKTRACE={{.RUST_BACKTRACE}} cargo llvm-cov --features metadata,visitor,inline-images --workspace --exclude html-to-markdown-py --exclude html-to-markdown-php --exclude benchmark-harness --lcov --output-path rust-coverage.lcov -j {{.NUM_CPUS}} {{else}} RUST_LOG={{.RUST_LOG}} RUST_BACKTRACE={{.RUST_BACKTRACE}} cargo llvm-cov --all-features --workspace --exclude html-to-markdown-py --exclude html-to-markdown-php --exclude benchmark-harness --lcov --output-path rust-coverage.lcov -j {{.NUM_CPUS}} {{end}} ignore_error: false - cmd: | {{if eq OS "windows"}} cargo llvm-cov --features metadata,visitor,inline-images --workspace --exclude html-to-markdown-py --exclude html-to-markdown-php --exclude benchmark-harness --summary-only {{else}} cargo llvm-cov --all-features --workspace --exclude html-to-markdown-py --exclude html-to-markdown-php --exclude benchmark-harness --summary-only {{end}} ignore_error: false coverage: desc: "Generate code coverage report (lcov format)" silent: false cmds: - cmd: | RUST_LOG={{.RUST_LOG}} cargo llvm-cov --all-features --workspace --exclude html-to-markdown-py --exclude html-to-markdown-php --exclude benchmark-harness --exclude html-to-markdown-wasm-wasmtime-tests --lcov --output-path rust-coverage.lcov -j {{.NUM_CPUS}} ignore_error: false lint: desc: "Lint Rust code WITH auto-fix (cargo fmt + cargo clippy --fix)" silent: false cmds: - cmd: cargo fmt --all ignore_error: false - cmd: | cargo clippy --workspace --fix --allow-dirty --allow-staged -j {{.NUM_CPUS}} ignore_error: false lint:check: desc: "Lint Rust code WITHOUT auto-fix (check-only)" silent: false cmds: - cmd: cargo fmt --all --check ignore_error: false - cmd: | cargo clippy -j {{.NUM_CPUS}} --workspace -- -D warnings ignore_error: false format: desc: "Format Rust code (with modifications)" silent: false cmds: - cmd: cargo fmt --all ignore_error: false format:check: desc: "Check Rust formatting without modifications" silent: false cmds: - cmd: cargo fmt --all --check ignore_error: false update: desc: "Update Rust dependencies within major versions (cargo update)" silent: false cmds: - cmd: cargo update ignore_error: false - cmd: cargo update --manifest-path packages/ruby/ext/html_to_markdown_rb/Cargo.toml ignore_error: false - cmd: cargo update --manifest-path packages/elixir/native/html_to_markdown_nif/Cargo.toml ignore_error: false - cmd: cargo update --manifest-path packages/r/src/rust/Cargo.toml ignore_error: false upgrade: desc: "Upgrade Rust dependencies to latest including breaking changes (cargo upgrade --incompatible + cargo update)" silent: false cmds: - cmd: cargo upgrade --incompatible ignore_error: false - cmd: cargo update ignore_error: false - cmd: cargo upgrade --incompatible --manifest-path packages/ruby/ext/html_to_markdown_rb/Cargo.toml ignore_error: false - cmd: cargo update --manifest-path packages/ruby/ext/html_to_markdown_rb/Cargo.toml ignore_error: false - cmd: cargo upgrade --incompatible --manifest-path packages/elixir/native/html_to_markdown_nif/Cargo.toml ignore_error: false - cmd: cargo update --manifest-path packages/elixir/native/html_to_markdown_nif/Cargo.toml ignore_error: false - cmd: cargo upgrade --incompatible --manifest-path packages/r/src/rust/Cargo.toml ignore_error: false - cmd: cargo update --manifest-path packages/r/src/rust/Cargo.toml ignore_error: false clean: desc: "Clean Rust build artifacts" silent: false cmds: - cmd: cargo clean ignore_error: false doc: desc: "Generate and open Rust documentation" silent: false cmds: - cmd: | cargo doc --workspace --all-features --no-deps --open ignore_error: false e2e:generate: desc: "Generate E2E tests from fixtures using alef" silent: false cmds: - cmd: alef e2e generate --lang rust ignore_error: false e2e:test: desc: "Run Rust E2E tests in e2e/rust directory" silent: false cmds: - cmd: cargo test --manifest-path e2e/rust/Cargo.toml ignore_error: false ================================================ FILE: .task/tools/docs.yml ================================================ version: '3' tasks: generate-readme: desc: Generate package READMEs using alef cmds: - alef readme generate-readme:check: desc: Validate READMEs match generated output (CI mode) cmds: - alef readme - git diff --exit-code -- packages/*/README.md crates/*/README.md generate-docs: desc: Generate API reference documentation using alef cmds: - alef docs generate-docs:check: desc: Validate API docs match generated output (CI mode) cmds: - alef docs - git diff --exit-code -- docs/reference/ ================================================ FILE: .task/tools/general.yml ================================================ version: "3" internal: true includes: platforms: ../config/platforms.yml vars: SCRIPTS_DIR: "{{.TASKFILE_DIR}}/../../scripts" tasks: pre-commit:install: desc: "Install prek pre-commit hooks for commit and commit-msg" silent: false cmds: - cmd: prek install ignore_error: false - cmd: prek install --hook-type commit-msg ignore_error: false pre-commit:run: desc: "Run prek pre-commit hooks on all files" silent: false cmds: - cmd: prek run --all-files ignore_error: false pre-commit:uninstall: desc: "Uninstall prek hooks" silent: false cmds: - cmd: prek uninstall ignore_error: true - cmd: prek uninstall --hook-type commit-msg ignore_error: true validate:config: desc: "Validate YAML task configuration files" silent: false cmds: - cmd: | for file in {{.TASKFILE_DIR}}/**/*.yml; do echo "Validating $file..." if ! command -v yamllint &> /dev/null; then echo "yamllint not found, skipping validation" break fi yamllint "$file" || exit 1 done ignore_error: false validate:all: desc: "Validate all project configurations" silent: false cmds: - task: validate:config ================================================ FILE: .task/tools/version-sync.yml ================================================ version: "3" internal: true includes: platforms: ../config/platforms.yml vars: # Use installed alef binary. Install via: cargo binstall alef-cli # For local dev with sibling repo: cargo run --manifest-path ../alef/Cargo.toml -- ALEF: "alef" tasks: sync: desc: "Synchronize version across all package manifests and regenerate everything" cmds: - "{{.ALEF}} sync-versions" - "{{.ALEF}} readme" - "{{.ALEF}} docs" - "{{.ALEF}} generate --clean" - "{{.ALEF}} stubs" - "{{.ALEF}} e2e generate" check: desc: "Check if versions are synchronized (dry-run)" cmds: - cmd: grep -m 1 'version = ' Cargo.toml | sed 's/version = "\(.*\)"/\1/' silent: false bump:major: desc: "Bump major version (X.0.0) and sync" cmds: - "{{.ALEF}} sync-versions --bump major" - task: sync bump:minor: desc: "Bump minor version (0.X.0) and sync" cmds: - "{{.ALEF}} sync-versions --bump minor" - task: sync bump:patch: desc: "Bump patch version (0.0.X) and sync" cmds: - "{{.ALEF}} sync-versions --bump patch" - task: sync show: desc: "Show current version from Cargo.toml" cmds: - cmd: grep -m 1 'version = ' Cargo.toml | sed 's/version = "\(.*\)"/\1/' silent: false ================================================ FILE: .task/workflows/e2e.yml ================================================ version: "3" tasks: generate:all: desc: Generate all E2E tests from fixtures across all supported languages cmds: - alef e2e generate test:all: desc: Run all E2E tests across all supported languages cmds: - alef test --e2e lint:all: desc: Lint all generated E2E test code cmds: - alef lint verify:all: desc: Full E2E pipeline - generate, lint, and test all suites cmds: - alef e2e generate - alef lint - alef test --e2e generate:rust: desc: Generate Rust E2E tests from fixtures cmds: - alef e2e generate --lang rust test:rust: desc: Run Rust E2E tests cmds: - task: rust:e2e:test quick: desc: Run quick E2E tests (Rust only) cmds: - task: rust:e2e:test ================================================ FILE: .typos.toml ================================================ [files] extend-exclude = ["target/", ".alef/", "*.lock", "*.min.js"] [default.extend-words] # Add project-specific words here # crate_name = "crate_name" ================================================ FILE: ATTRIBUTIONS.md ================================================ # Attributions This project includes vendored code from third-party libraries. This file provides the required attribution and license information. ## markup5ever_rcdom - **Version vendored**: 0.36.0+unofficial - **Original authors**: The html5ever Project Developers - **Repository**: - **Vendored into**: `crates/html-to-markdown/src/rcdom.rs` - **License**: MIT OR Apache-2.0 ### MIT License ```text Copyright (c) 2014 The html5ever Project Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ``` ### Apache License, Version 2.0 ```text Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS ``` ================================================ FILE: CHANGELOG.md ================================================ # Changelog All notable changes to html-to-markdown will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ### Fixed - **Visitor element start/end sequence for hyphenated tags (#331)** — the `repair_with_html5ever` fallback (triggered when input contains custom-element / hyphenated tag names) re-parsed the document under HTML5 semantics, which discard XML-style self-closing on unknown elements. As a result, `` was treated as an open tag and subsequent siblings nested inside it, breaking the visitor's pre-order/post-order start/end pairing. The repair path now pre-expands XML-style self-closing tags on non-void elements to explicit open+close pairs before the HTML5 parse, so visitor events remain correctly paired for hyphenated/namespaced custom tags. - **`default-features = false` build broken (#332)** — bare `#[serde(...)]` and `#[derive(Serialize, Deserialize)]` on core types in `src/types/{document,tables,result,warnings}.rs` and `src/options/conversion.rs` are now feature-gated behind `#[cfg_attr(feature = "serde", ...)]`. CI now runs `cargo check --no-default-features` matrix to prevent regressions. - **Ruby `TypeError` on `convert()` with options** (#334) — `HtmlToMarkdown.convert(html, options)` raised `TypeError` on every call that supplied options (including `3.4.0.pre.rc.15`). The Ruby wrapper was passing a `ConversionOptions` object to the FFI, but the generated Rust function expects `Option` (JSON). The wrapper now serialises the options hash to JSON before crossing the FFI boundary, matching what `serde_json::from_str` expects on the Rust side. ## [3.3.3] - 2026-04-23 ### Fixed - **Python enum KeyError** (#324) — `ConversionOptions()` with default enums no longer crashes; PyO3 enum fields are passed directly instead of broken `str()` + map lookup. - **Ruby Magnus binding** — fixed 65 compilation errors: `funcall` API, visitor bridge args, `Vec` conversion, optional flattening, sanitized field serde round-trip. - **Elixir `.formatter.exs`** — 120-char line length, generated code now passes `mix format --check-formatted`. - **Unused deps** — removed `serde_json` from Node and WASM binding crates. - **Checkstyle** — excluded `test_apps/` from pre-commit checkstyle hook. ## [3.3.2] - 2026-04-23 ### Fixed - **Elixir visitor bridge** — implemented async thread-based visitor protocol using `rustler::thread::spawn` + `OwnedEnv::send_and_clear` + `mpsc` channels, replacing the impossible synchronous `env.call()` approach. - **Elixir NIF rustler 0.37** — replaced removed `SavedTerm`, `is_nil()`, `Pid::spawn_monitor`, `.encode()` APIs with 0.37-compatible equivalents. - **Elixir type conversions** — fixed double-optional wrapping (`map(Some)`) and ambiguous `From` impl in generated `_from` methods. - **Java checkstyle** — added `maven-checkstyle-plugin` to pom.xml pointing to project `checkstyle.xml` (120-char limit), so `mvn checkstyle:check` uses our config instead of default Sun checks. - **Ruby Rakefile** — explicit `Bundler::GemHelper.install_tasks name:` for Bundler 4 compatibility. ## [3.3.1] - 2026-04-23 ### Fixed - **Java checkstyle** — switched to 120-char line limit, added Spotless auto-formatting with Eclipse JDT formatter, added `final` params and javadoc to all generated code. - **Elixir `list` type collision** — `NodeContent::List` variant no longer redefines Elixir's built-in `list/0` type (now emits `list_variant`). - **Elixir NIF missing `serde`** — added `serde` with derive feature as direct dependency to the NIF crate. - **C# `VisitResult.Continue`** — default visitor methods now use `new VisitResult.Continue()` instead of non-invocable `VisitResult.Continue()`. - **Node `convert` export** — restored the missing `#[napi] pub fn convert` function dropped during binding regeneration. - **Ruby CI** — updated Bundler from 2.7.2 to 4.0.3 to match `Gemfile.lock`. ## [3.3.0] - 2026-04-23 ### Added - **`exclude_selectors` option** — CSS selector-based element exclusion. Unlike `strip_tags` (which removes the wrapper but keeps children), excluded elements and all descendants are dropped entirely. Supports any CSS selector: `.class`, `#id`, `[attribute]`, compound selectors. Works in both markdown and plain text output modes. - **CLI flags** — `--preserve-tags`, `--skip-images`, `--max-depth` for full ConversionOptions parity. - **Visitor pattern for all bindings** (#314, #313) — restored visitor support across Python, TypeScript, Ruby, PHP, Go, Java, C#, Elixir, R, WASM, and C FFI. - **R visitor support** — added visitor callbacks for the R binding. - **E2E test fixtures** — 78 new fixtures for 100% ConversionOptions field coverage (35/35 fields). Added fixtures for `exclude_selectors`, `ConversionResult.tables`, and `ConversionResult.warnings`. - **Ruby RBS type stubs** — auto-generated via alef from the Rust IR, including `VERSION` constant. Gemspec now includes `sig/**/*`. - **Alef pre-commit hook** — `alef-verify` hook added to `.pre-commit-config.yaml` to check generated code freshness. CI installs alef v0.5.3 binary. ### Fixed - **`

` inside `
` not exported** (#321) — top-level `
` elements were unconditionally dropped during preprocessing; now only `
` with navigation hints (e.g. `class="site-header"`, `role="navigation"`) is removed. - **`PreprocessingPreset` not wired into preprocessing logic** — the `preset` field on `PreprocessingOptions` was defined but never checked. Now Minimal/Standard/Aggressive presets have distinct behavior. - **`remove_forms` flag was dead code** — `
` elements are now dropped when `remove_forms: true` and preset is Standard or Aggressive. - **Aggressive preset** — now drops navigation-hinted elements of any tag type, `

'); echo $result['content']; // # Hello ``` Requires PHP 8.2+. The package ships precompiled extensions for common PHP versions. If no prebuilt extension matches, Composer will compile from source via `cargo`. === "Java" **Maven** — add to `pom.xml`: ```xml dev.kreuzberg html-to-markdown 3.1.0 ``` **Gradle** — add to `build.gradle`: ```groovy implementation 'dev.kreuzberg:html-to-markdown:3.1.0' ``` Or Kotlin DSL (`build.gradle.kts`): ```kotlin implementation("dev.kreuzberg:html-to-markdown:3.1.0") ``` **Verify:** ```java import dev.kreuzberg.htmltomarkdown.HtmlToMarkdown; public class Main { public static void main(String[] args) { var result = HtmlToMarkdown.convert("

Hello

"); System.out.println(result.content()); // # Hello } } ``` Requires Java 22+. The JAR extracts the native `libhtml_to_markdown` shared library at startup. No separate install step is needed — the library is bundled for Linux x86_64, macOS arm64/x86_64, and Windows x64. === "C#" ```bash dotnet add package KreuzbergDev.HtmlToMarkdown ``` Or via the NuGet Package Manager in Visual Studio — search for `KreuzbergDev.HtmlToMarkdown`. **Verify:** ```csharp using HtmlToMarkdown; var result = HtmlToMarkdownConverter.Convert("

Hello

"); Console.WriteLine(result.Content); // # Hello ``` Targets .NET Standard 2.0 and above (.NET 6+, .NET Framework 4.6.1+). The package bundles native binaries for Linux, macOS, and Windows via `NativeLibrary` P/Invoke. === "Elixir" Add to `mix.exs`: ```elixir def deps do [ {:html_to_markdown, "~> 3.1"} ] end ``` Then fetch: ```bash mix deps.get ``` **Verify:** ```elixir {:ok, result} = HtmlToMarkdown.convert("

Hello

") IO.puts(result.content) # # Hello ``` Requires Elixir 1.19+. Uses Rustler NIFs — precompiled NIF binaries are fetched automatically via `RustlerPrecompiled`. Set `RUSTLER_PRECOMPILED_GLOBAL_CACHE_PATH` to control cache location. === "R" ```r install.packages("htmltomarkdown") ``` **Verify:** ```r library(htmltomarkdown) result <- htmltomarkdown::convert("

Hello

") cat(result$content) # # Hello ``` Requires R 4.0+. Available on CRAN for Linux (x86_64), macOS (arm64, x86_64), and Windows. === "C" Download a prebuilt release archive for your platform from the [GitHub Releases page](https://github.com/kreuzberg-dev/html-to-markdown/releases). Each archive contains: - `libhtml_to_markdown.so` / `.dylib` / `.dll` — shared library - `libhtml_to_markdown.a` — static library - `html_to_markdown.h` — C header **Build from source** (requires Rust toolchain): ```bash git clone https://github.com/kreuzberg-dev/html-to-markdown.git cd html-to-markdown cargo build --release -p html-to-markdown-ffi # output: target/release/libhtml_to_markdown.{so,dylib,dll} ``` **Link and verify:** ```c #include "html_to_markdown.h" #include int main(void) { HtmlToMarkdownResult r = html_to_markdown_convert("

Hello

", NULL); if (r.error == NULL) { printf("%s\n", r.content); // # Hello } html_to_markdown_free_result(r); return 0; } ``` Compile with: ```bash gcc main.c -lhtml_to_markdown -L./target/release -o main ``` Always call `html_to_markdown_free_result` after every call. Memory returned across the FFI boundary is owned by the Rust allocator and must not be freed with `free()`. === "WASM" ```bash npm install @kreuzberg/html-to-markdown-wasm ``` **Verify:** ```javascript import init, { convert } from '@kreuzberg/html-to-markdown-wasm'; await init(); // load and instantiate the .wasm file — call once const result = convert('

Hello

'); console.log(result.content); // # Hello ``` `init()` must complete before any `convert()` call. After that, `convert` is synchronous. The WASM build omits the `inline-images` feature (no filesystem access in the browser sandbox). Works in browsers, Cloudflare Workers, Deno, and Bun. --- ## CLI Install via Cargo: ```bash cargo install html-to-markdown-cli ``` Or via Homebrew: ```bash brew install kreuzberg-dev/tap/html-to-markdown ``` **Verify:** ```bash echo "

Hello

" | html-to-markdown # # Hello ``` See [CLI reference](cli.md) for all flags and options. --8<-- "snippets/feedback.md" ================================================ FILE: docs/language-guides.md ================================================ # Language Binding Guides Every binding wraps the same Rust core. The option names and return shapes are identical across languages; only the syntax differs. This page covers per-language install notes and naming conventions. ## Rust **Package:** `html-to-markdown-rs` on [crates.io](https://crates.io/crates/html-to-markdown-rs) ```toml html-to-markdown-rs = "3.1" ``` Option structs follow Rust naming conventions (`snake_case`). Use the builder API via `ConversionOptions::builder()` or construct `ConversionOptions` directly. See [Installation: Feature Flags](installation.md#feature-flags) for the six Cargo features. ## Python **Package:** `html-to-markdown` on PyPI **Requires:** Python ≥ 3.10 ```bash pip install html-to-markdown ``` ```python from html_to_markdown import convert, ConversionOptions result = convert("

Title

", ConversionOptions(heading_style="atx")) print(result.content) ``` Option keys match the Rust field names (`snake_case`). `ConversionOptions` accepts keyword arguments. `ConversionResult` is a class with attributes — access fields as `result.content`, `result.metadata`, `result.tables`, `result.images`, `result.document`, `result.warnings`. ## TypeScript **Package:** `@kreuzberg/html-to-markdown` on npm **Requires:** Node.js ≥ 18 ```bash npm install @kreuzberg/html-to-markdown ``` ```typescript import { convert } from '@kreuzberg/html-to-markdown'; const result = convert('

Title

', { headingStyle: 'atx' }); console.log(result.content); ``` Option keys are `camelCase` (`headingStyle`, `linkStyle`, `outputFormat`). The package ships both ESM and CJS outputs. TypeScript types are bundled. ## Go **Module:** `github.com/kreuzberg-dev/html-to-markdown/packages/go/v3` **Requires:** Go ≥ 1.26 ```bash go get github.com/kreuzberg-dev/html-to-markdown/packages/go/v3 ``` ```go import htmltomarkdown "github.com/kreuzberg-dev/html-to-markdown/packages/go/v3" result, err := htmltomarkdown.Convert("

Title

", nil) if err != nil { log.Fatal(err) } fmt.Println(result.Content) ``` Options use Go struct field names (`PascalCase`). `Convert` returns `(*ConversionResult, error)`. Errors are standard Go errors. Use `errors.Is`/`errors.As` to inspect them. ## Ruby **Gem:** `html-to-markdown` on RubyGems **Requires:** Ruby ≥ 3.2 ```bash gem install html-to-markdown ``` ```ruby require 'html_to_markdown' result = HtmlToMarkdown.convert('

Title

', heading_style: :atx) puts result[:content] ``` Options are keyword arguments with `snake_case` symbols. `result` is a hash. Errors raise `HtmlToMarkdown::ConversionError`. ## PHP **Package:** `kreuzberg/html-to-markdown` on Packagist ```bash composer require kreuzberg/html-to-markdown ``` ```php $converter = new \Kreuzberg\HtmlToMarkdown\Converter(); $result = $converter->convert('

Title

', ['headingStyle' => 'atx']); echo $result->content; ``` Options are a plain associative array with `camelCase` keys. `$result` is a value object. Errors throw `\Kreuzberg\HtmlToMarkdown\ConversionException`. ## Java **Maven:** `dev.kreuzberg:html-to-markdown` ```xml dev.kreuzberg html-to-markdown 3.1.0 ``` ```java import dev.kreuzberg.HtmlToMarkdown; import dev.kreuzberg.ConversionOptions; ConversionOptions options = ConversionOptions.builder() .headingStyle("atx") .build(); ConversionResult result = HtmlToMarkdown.convert("

Title

", options); System.out.println(result.getContent()); ``` Uses a builder for options. Errors throw `dev.kreuzberg.ConversionException` (checked). The library ships with native binaries for Linux x86_64, macOS arm64/x86_64, and Windows x86_64. ## C **NuGet:** `KreuzbergDev.HtmlToMarkdown` ```bash dotnet add package KreuzbergDev.HtmlToMarkdown ``` ```csharp using KreuzbergDev.HtmlToMarkdown; var options = new ConversionOptions { HeadingStyle = "atx" }; var result = HtmlToMarkdownConverter.Convert("

Title

", options); Console.WriteLine(result.Content); ``` Option properties are `PascalCase`. Errors throw `ConversionException`. The package targets `netstandard2.0` and above. ## Elixir **Hex:** `html_to_markdown` **Requires:** Elixir ~> 1.19 ```elixir {:html_to_markdown, "~> 3.1"} ``` ```elixir case HtmlToMarkdown.convert("

Title

", heading_style: :atx) do {:ok, result} -> IO.puts(result.content) {:error, reason} -> IO.warn("failed: #{reason}") end ``` `convert/2` returns `{:ok, result}` or `{:error, reason}`. Options are a keyword list. The struct fields match the Rust names (`snake_case`). ## R **CRAN:** `htmltomarkdown` ```r install.packages("htmltomarkdown") ``` ```r library(htmltomarkdown) result <- htmltomarkdown::convert("

Title

", heading_style = "atx") cat(result$content) ``` Options are named function arguments. The returned list matches the `ConversionResult` shape with `snake_case` names. Errors stop execution with a message; wrap in `tryCatch` if you need to handle them. ## C **Link against:** `libhtml_to_markdown` **Header:** `html_to_markdown.h` Download a pre-built release archive for your platform from the [GitHub releases page](https://github.com/kreuzberg-dev/html-to-markdown/releases), or build from source with `cargo build --release -p html-to-markdown-ffi`. ```c #include "html_to_markdown.h" HtmlToMarkdownResult result = html_to_markdown_convert("

Title

", NULL); if (result.error == NULL) { printf("%s\n", result.content); } html_to_markdown_free_result(result); ``` Always call `html_to_markdown_free_result` to release memory owned by the Rust allocator. The C API is a thin synchronous FFI layer. No async mode, no thread-local state. ## WASM **npm:** `@kreuzberg/html-to-markdown-wasm` ```bash npm install @kreuzberg/html-to-markdown-wasm ``` ```javascript import init, { convert } from '@kreuzberg/html-to-markdown-wasm'; await init(); const result = convert('

Title

', { headingStyle: 'atx' }); console.log(result.content); ``` `init()` loads and instantiates the `.wasm` file. Call it once before any conversion. After that, `convert` is synchronous. Options use `camelCase` and have the same shape as the TypeScript binding. The WASM build omits the `inline-images` feature (no file-system access in the browser sandbox). --8<-- "snippets/feedback.md" ================================================ FILE: docs/llms.txt ================================================ # html-to-markdown > High-performance HTML to Markdown conversion library with 12 native language bindings. ## Links - GitHub: https://github.com/kreuzberg-dev/html-to-markdown - Organization: https://github.com/kreuzberg-dev - Kreuzberg (document extraction): https://github.com/kreuzberg-dev/kreuzberg - Kreuzberg Docs: https://docs.kreuzberg.dev - PyPI: https://pypi.org/project/html-to-markdown/ - npm: https://www.npmjs.com/package/@kreuzberg/html-to-markdown - crates.io: https://crates.io/crates/html-to-markdown-rs - Docs: https://docs.html-to-markdown.kreuzberg.dev ## Capabilities - Convert HTML to Markdown, Djot, or plain text - Extract structured document tree (DocumentStructure with headings, paragraphs, lists, tables, etc.) - Extract HTML metadata (title, description, Open Graph, Twitter Card, JSON-LD, links, images) - Extract inline images from data URIs and SVGs - Table extraction with cell-level data (colspan, rowspan, headers) - Custom visitor pattern for content filtering and transformation - 12 native language bindings: Rust, Python, TypeScript, Go, Ruby, PHP, Java, C#, Elixir, R, C (FFI), WebAssembly - CLI tool for command-line conversion - Processing speed: 150-280 MB/s ## API (v3) Single entry point: `convert(html, options?) -> ConversionResult` ConversionResult contains: - content: Optional — the converted text (markdown/djot/plain); null/None when output_format is "none" - document: Optional — structured document tree; only populated when include_document_structure is true - metadata: HtmlMetadata — extracted HTML metadata (title, description, Open Graph, Twitter Card, JSON-LD, links, images) - tables: Vec — extracted tables with grid data (headers, rows, colspan/rowspan) - images: Vec — extracted inline images (data URIs, embedded SVGs); only populated when extract_images is true - warnings: Vec — non-fatal processing warnings ## Usage Examples ### Rust ```rust use html_to_markdown_rs::convert; let result = convert("

Hello

World

", None)?; let markdown = result.content.unwrap_or_default(); // "# Hello\n\nWorld" ``` ### Python ```python from html_to_markdown import convert result = convert("

Hello

World

") markdown = result.content # "# Hello\n\nWorld" ``` ### TypeScript ```typescript import { convert } from '@kreuzberg/html-to-markdown'; const result = convert('

Hello

World

'); const markdown = result.content; // "# Hello\n\nWorld" ``` ### Go ```go import "github.com/kreuzberg-dev/html-to-markdown/packages/go/v3/htmltomarkdown" result, err := htmltomarkdown.Convert("

Hello

World

") if result.Content != nil { fmt.Println(*result.Content) } ``` ### Ruby ```ruby require 'html_to_markdown' result = HtmlToMarkdown.convert("

Hello

World

") markdown = result[:content] ``` ### PHP ```php use function HtmlToMarkdown\convert; $result = convert('

Hello

World

'); $markdown = $result['content']; ``` ### Java ```java import dev.kreuzberg.htmltomarkdown.HtmlToMarkdown; ConversionResult result = HtmlToMarkdown.convert("

Hello

World

"); System.out.println(result.content()); ``` ### C# ```csharp using HtmlToMarkdown; var result = HtmlToMarkdownConverter.Convert("

Hello

World

"); Console.WriteLine(result.Content); ``` ### Elixir ```elixir {:ok, result} = HtmlToMarkdown.convert("

Hello

World

") IO.puts(result.content) ``` ### R ```r library(htmltomarkdown) result <- convert("

Hello

World

") cat(result$content) ``` ### C (FFI) ```c #include "html_to_markdown.h" char *json = html_to_markdown_convert("

Hello

", NULL); // json: {"content":"# Hello","metadata":null,"tables":null} html_to_markdown_free_string(json); ``` ### WASM ```javascript import init, { convert } from '@kreuzberg/html-to-markdown-wasm'; await init(); const result = convert('

Hello

World

'); console.log(result.content); ``` ## Configuration Options All options are passed to `convert()` as the second argument via a `ConversionOptions` struct/object/dict. - output_format: "markdown" | "djot" | "plain" | "none" (default: "markdown") - heading_style: "atx" | "underlined" | "atx_closed" (default: "atx") - list_indent_type: "spaces" | "tab" (default: "spaces") - list_indent_width: int 1-8 (default: 2) - bullets: string cycling through list marker chars (default: "-") - strong_em_symbol: "*" | "_" (default: "*") - newline_style: "backslash" | "spaces" (default: "backslash") - code_block_style: "indented" | "backticks" | "tildes" (default: "indented") - code_language: string default language for fenced code blocks (default: "") - autolinks: bool — use when link text equals href (default: false) - default_title: bool — use href as title when none present (default: false) - keep_inline_images_in: list of element names to keep images as markdown (default: []) - br_in_tables: bool — preserve
in table cells (default: false) - highlight_style: "double-equal" | "html" | "bold" | "none" (default: "double-equal") - escape_asterisks: bool (default: false) - escape_underscores: bool (default: false) - escape_misc: bool (default: false) - escape_ascii: bool (default: false) - sub_symbol: string wrapper for text (default: "") - sup_symbol: string wrapper for text (default: "") - whitespace_mode: "normalized" | "strict" (default: "normalized") - strip_newlines: bool (default: false) - wrap: bool — enable line wrapping (default: false) - wrap_width: int 20-500 (default: 80) - convert_as_inline: bool (default: false) - strip_tags: list of tag names to strip (default: []) - preserve_tags: list of tag names to emit verbatim as HTML (default: []) - link_style: "inline" | "reference" — inline emits [text](url); reference emits [text][1] with definitions at end (default: "inline") - skip_images: bool — drop images entirely (default: false) - max_image_size: int bytes — skip inline images larger than this (default: 5242880) - capture_svg: bool — include inline SVGs in result.images when extract_images is true (default: false) - infer_dimensions: bool — decode image bytes to infer width/height (default: true) - encoding: string — CLI only; input file character encoding (default: "utf-8"); ignored by the core library - debug: bool — CLI only; prints diagnostic lines to stderr after each conversion (default: false) - extract_metadata: bool — populate result.metadata and result.tables (default: true) - extract_images: bool — extract img elements and populate result.images (default: false) - include_document_structure: bool — populate result.document (default: false) - preprocess: bool — clean HTML before conversion (default: false) - preset: "minimal" | "standard" | "aggressive" preprocessing aggressiveness (default: "standard") - keep_navigation: bool — preserve