Copy disabled (too large)
Download .txt
Showing preview only (10,945K chars total). Download the full file to get everything.
Repository: Goldziher/html-to-markdown
Branch: main
Commit: 64ef6808077f
Files: 1171
Total size: 10.2 MB
Directory structure:
gitextract_5vwofxnz/
├── .ai-rulez/
│ ├── config.toml
│ ├── context/
│ │ └── crate-structure.md
│ ├── domains/
│ │ ├── conversion-algorithms/
│ │ │ └── DOMAIN.md
│ │ ├── html-parsing/
│ │ │ └── DOMAIN.md
│ │ └── safety-sanitization/
│ │ └── DOMAIN.md
│ └── rules/
│ └── alef-generated-bindings.md
├── .cargo/
│ └── config.toml
├── .clang-format
├── .editorconfig
├── .github/
│ ├── CODEOWNERS
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ ├── documentation.yml
│ │ └── feature_request.yml
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── actions/
│ │ ├── build-typescript/
│ │ │ └── action.yml
│ │ └── smoke-pie/
│ │ └── action.yml
│ ├── dependabot.yaml
│ └── workflows/
│ ├── ci.yaml
│ ├── deploy-docs.yaml
│ ├── publish.yaml
│ ├── validate-issues.yml
│ └── validate-pr.yml
├── .gitignore
├── .gitmodules
├── .golangci.yml
├── .mailmap
├── .markdownlint.yaml
├── .mvn/
│ └── wrapper/
│ ├── MavenWrapperDownloader.java
│ ├── maven-wrapper.jar
│ └── maven-wrapper.properties
├── .php-cs-fixer.dist.php
├── .pre-commit-config.yaml
├── .ruby-version
├── .rumdl.toml
├── .sdkmanrc
├── .task/
│ ├── README.md
│ ├── checksum/
│ │ ├── _lint-typescript-lint
│ │ ├── _test-typescript-test
│ │ └── typescript-typecheck
│ ├── config/
│ │ ├── platforms.yml
│ │ └── vars.yml
│ ├── languages/
│ │ ├── python.yml
│ │ └── rust.yml
│ ├── tools/
│ │ ├── docs.yml
│ │ ├── general.yml
│ │ └── version-sync.yml
│ └── workflows/
│ └── e2e.yml
├── .typos.toml
├── ATTRIBUTIONS.md
├── CHANGELOG.md
├── CONTRIBUTING.md
├── Cargo.toml
├── LICENSE
├── README.md
├── Taskfile.yaml
├── _typos.toml
├── alef.toml
├── composer.json
├── crates/
│ ├── html-to-markdown/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── examples/
│ │ │ ├── basic.rs
│ │ │ ├── table.rs
│ │ │ ├── test_deser.rs
│ │ │ ├── test_escape.rs
│ │ │ ├── test_inline_formatting.rs
│ │ │ ├── test_lists.rs
│ │ │ ├── test_semantic_tags.rs
│ │ │ ├── test_tables.rs
│ │ │ ├── test_task_lists.rs
│ │ │ └── test_whitespace.rs
│ │ ├── src/
│ │ │ ├── convert_api.rs
│ │ │ ├── converter/
│ │ │ │ ├── block/
│ │ │ │ │ ├── blockquote.rs
│ │ │ │ │ ├── container.rs
│ │ │ │ │ ├── div.rs
│ │ │ │ │ ├── heading.rs
│ │ │ │ │ ├── horizontal_rule.rs
│ │ │ │ │ ├── line_break.rs
│ │ │ │ │ ├── mod.rs
│ │ │ │ │ ├── paragraph.rs
│ │ │ │ │ ├── preformatted.rs
│ │ │ │ │ ├── table/
│ │ │ │ │ │ ├── builder.rs
│ │ │ │ │ │ ├── caption.rs
│ │ │ │ │ │ ├── cell.rs
│ │ │ │ │ │ ├── cells.rs
│ │ │ │ │ │ ├── layout.rs
│ │ │ │ │ │ ├── mod.rs
│ │ │ │ │ │ ├── scanner.rs
│ │ │ │ │ │ └── utils.rs
│ │ │ │ │ └── unknown.rs
│ │ │ │ ├── context.rs
│ │ │ │ ├── dom_context.rs
│ │ │ │ ├── form/
│ │ │ │ │ ├── elements.rs
│ │ │ │ │ └── mod.rs
│ │ │ │ ├── format/
│ │ │ │ │ ├── djot.rs
│ │ │ │ │ ├── markdown.rs
│ │ │ │ │ └── mod.rs
│ │ │ │ ├── handlers/
│ │ │ │ │ ├── blockquote.rs
│ │ │ │ │ ├── code_block.rs
│ │ │ │ │ ├── graphic.rs
│ │ │ │ │ ├── image.rs
│ │ │ │ │ ├── link.rs
│ │ │ │ │ └── mod.rs
│ │ │ │ ├── inline/
│ │ │ │ │ ├── code.rs
│ │ │ │ │ ├── emphasis.rs
│ │ │ │ │ ├── link.rs
│ │ │ │ │ ├── mod.rs
│ │ │ │ │ ├── ruby.rs
│ │ │ │ │ └── semantic/
│ │ │ │ │ ├── marks.rs
│ │ │ │ │ ├── mod.rs
│ │ │ │ │ └── typography.rs
│ │ │ │ ├── list/
│ │ │ │ │ ├── definition.rs
│ │ │ │ │ ├── item.rs
│ │ │ │ │ ├── mod.rs
│ │ │ │ │ ├── ordered.rs
│ │ │ │ │ ├── unordered.rs
│ │ │ │ │ └── utils.rs
│ │ │ │ ├── main.rs
│ │ │ │ ├── main_helpers.rs
│ │ │ │ ├── media/
│ │ │ │ │ ├── embedded.rs
│ │ │ │ │ ├── graphic.rs
│ │ │ │ │ ├── image.rs
│ │ │ │ │ ├── mod.rs
│ │ │ │ │ └── svg.rs
│ │ │ │ ├── metadata.rs
│ │ │ │ ├── mod.rs
│ │ │ │ ├── plain_text.rs
│ │ │ │ ├── preprocessing_helpers.rs
│ │ │ │ ├── reference_collector.rs
│ │ │ │ ├── semantic/
│ │ │ │ │ ├── attributes.rs
│ │ │ │ │ ├── definition_list.rs
│ │ │ │ │ ├── figure.rs
│ │ │ │ │ ├── mod.rs
│ │ │ │ │ ├── sectioning.rs
│ │ │ │ │ └── summary.rs
│ │ │ │ ├── text/
│ │ │ │ │ ├── mod.rs
│ │ │ │ │ └── processing.rs
│ │ │ │ ├── text_node.rs
│ │ │ │ ├── utility/
│ │ │ │ │ ├── attributes.rs
│ │ │ │ │ ├── caching.rs
│ │ │ │ │ ├── content.rs
│ │ │ │ │ ├── mod.rs
│ │ │ │ │ ├── preprocessing.rs
│ │ │ │ │ ├── serialization.rs
│ │ │ │ │ └── siblings.rs
│ │ │ │ └── visitor_hooks.rs
│ │ │ ├── error.rs
│ │ │ ├── exports.rs
│ │ │ ├── inline_images.rs
│ │ │ ├── lib.rs
│ │ │ ├── metadata/
│ │ │ │ ├── collector.rs
│ │ │ │ ├── config.rs
│ │ │ │ ├── extraction.rs
│ │ │ │ ├── mod.rs
│ │ │ │ └── types.rs
│ │ │ ├── options/
│ │ │ │ ├── conversion.rs
│ │ │ │ ├── inline_image.rs
│ │ │ │ ├── mod.rs
│ │ │ │ ├── preprocessing.rs
│ │ │ │ └── validation.rs
│ │ │ ├── prelude.rs
│ │ │ ├── rcdom.rs
│ │ │ ├── text.rs
│ │ │ ├── types/
│ │ │ │ ├── document.rs
│ │ │ │ ├── mod.rs
│ │ │ │ ├── result.rs
│ │ │ │ ├── structure_builder.rs
│ │ │ │ ├── structure_collector.rs
│ │ │ │ ├── tables.rs
│ │ │ │ └── warnings.rs
│ │ │ ├── validation.rs
│ │ │ ├── visitor/
│ │ │ │ ├── default_impl.rs
│ │ │ │ ├── mod.rs
│ │ │ │ ├── traits.rs
│ │ │ │ └── types.rs
│ │ │ ├── visitor_helpers/
│ │ │ │ └── helpers/
│ │ │ │ ├── callbacks/
│ │ │ │ │ └── mod.rs
│ │ │ │ ├── content.rs
│ │ │ │ ├── mod.rs
│ │ │ │ ├── state.rs
│ │ │ │ └── traversal.rs
│ │ │ ├── visitor_helpers.rs
│ │ │ ├── wrapper/
│ │ │ │ ├── sync.rs
│ │ │ │ └── utils.rs
│ │ │ └── wrapper.rs
│ │ └── tests/
│ │ ├── br_in_inline_test.rs
│ │ ├── commonmark_compliance_test.rs
│ │ ├── djot_output_test.rs
│ │ ├── exclude_selectors_test.rs
│ │ ├── integration_test.rs
│ │ ├── issue_121_regressions.rs
│ │ ├── issue_127_regressions.rs
│ │ ├── issue_128_regressions.rs
│ │ ├── issue_131_regressions.rs
│ │ ├── issue_134_regressions.rs
│ │ ├── issue_139_regressions.rs
│ │ ├── issue_140_regressions.rs
│ │ ├── issue_143_regressions.rs
│ │ ├── issue_145_regressions.rs
│ │ ├── issue_146_regressions.rs
│ │ ├── issue_176_regressions.rs
│ │ ├── issue_190_regressions.rs
│ │ ├── issue_199_regressions.rs
│ │ ├── issue_200_regressions.rs
│ │ ├── issue_212_regressions.rs
│ │ ├── issue_216_217_regressions.rs
│ │ ├── json_ld_script_extraction.rs
│ │ ├── lists_test.rs
│ │ ├── plain_output_test.rs
│ │ ├── preprocessing_tests.rs
│ │ ├── reference_links_test.rs
│ │ ├── sectioning_elements_test.rs
│ │ ├── skip_images_test.rs
│ │ ├── tables_test.rs
│ │ ├── test_custom_elements.rs
│ │ ├── test_issue_187.rs
│ │ ├── test_issue_218.rs
│ │ ├── test_issue_277.rs
│ │ ├── test_max_depth.rs
│ │ ├── test_nested_simple.rs
│ │ ├── test_script_style_stripping.rs
│ │ ├── test_spa_bisect.rs
│ │ ├── visitor_code_integration_test.rs
│ │ ├── visitor_integration_test.rs
│ │ └── xml_tables_test.rs
│ ├── html-to-markdown-cli/
│ │ ├── Cargo.toml
│ │ ├── src/
│ │ │ ├── args.rs
│ │ │ ├── convert.rs
│ │ │ ├── main.rs
│ │ │ ├── output.rs
│ │ │ ├── utils.rs
│ │ │ └── validators.rs
│ │ └── tests/
│ │ └── cli_test.rs
│ ├── html-to-markdown-ffi/
│ │ ├── Cargo.toml
│ │ ├── build.rs
│ │ ├── cbindgen.toml
│ │ ├── cmake/
│ │ │ └── html-to-markdown-ffi-config.cmake
│ │ ├── include/
│ │ │ └── html_to_markdown.h
│ │ └── src/
│ │ └── lib.rs
│ ├── html-to-markdown-node/
│ │ ├── Cargo.toml
│ │ ├── index.d.ts
│ │ ├── index.js
│ │ ├── npm/
│ │ │ ├── darwin-arm64/
│ │ │ │ ├── README.md
│ │ │ │ └── package.json
│ │ │ ├── darwin-x64/
│ │ │ │ ├── README.md
│ │ │ │ └── package.json
│ │ │ ├── linux-arm-gnueabihf/
│ │ │ │ ├── README.md
│ │ │ │ └── package.json
│ │ │ ├── linux-arm64-gnu/
│ │ │ │ ├── README.md
│ │ │ │ └── package.json
│ │ │ ├── linux-arm64-musl/
│ │ │ │ ├── README.md
│ │ │ │ └── package.json
│ │ │ ├── linux-x64-gnu/
│ │ │ │ ├── README.md
│ │ │ │ └── package.json
│ │ │ ├── linux-x64-musl/
│ │ │ │ ├── README.md
│ │ │ │ └── package.json
│ │ │ ├── win32-arm64-msvc/
│ │ │ │ ├── README.md
│ │ │ │ └── package.json
│ │ │ └── win32-x64-msvc/
│ │ │ ├── README.md
│ │ │ └── package.json
│ │ ├── package.json
│ │ └── src/
│ │ └── lib.rs
│ ├── html-to-markdown-php/
│ │ ├── Cargo.toml
│ │ └── src/
│ │ └── lib.rs
│ ├── html-to-markdown-py/
│ │ ├── Cargo.toml
│ │ └── src/
│ │ └── lib.rs
│ ├── html-to-markdown-rs-ffi/
│ │ └── README.md
│ ├── html-to-markdown-rs-wasm/
│ │ └── README.md
│ └── html-to-markdown-wasm/
│ ├── Cargo.toml
│ ├── package.json
│ ├── scripts/
│ │ ├── cleanup-gitignore.js
│ │ └── patch-bundler-entry.js
│ └── src/
│ └── lib.rs
├── deny.toml
├── docs/
│ ├── CNAME
│ ├── api-reference.md
│ ├── architecture.md
│ ├── cli.md
│ ├── configuration.md
│ ├── contributing.md
│ ├── css/
│ │ └── extra.css
│ ├── demo/
│ │ ├── html_to_markdown_wasm.js
│ │ ├── html_to_markdown_wasm_bg.wasm
│ │ ├── index.html
│ │ ├── script.js
│ │ └── style.css
│ ├── errors.md
│ ├── index.md
│ ├── installation.md
│ ├── language-guides.md
│ ├── llms.txt
│ ├── migration.md
│ ├── overrides/
│ │ └── main.html
│ ├── reference/
│ │ ├── api-c.md
│ │ ├── api-csharp.md
│ │ ├── api-elixir.md
│ │ ├── api-go.md
│ │ ├── api-java.md
│ │ ├── api-php.md
│ │ ├── api-python.md
│ │ ├── api-r.md
│ │ ├── api-ruby.md
│ │ ├── api-rust.md
│ │ ├── api-typescript.md
│ │ ├── api-wasm.md
│ │ ├── configuration.md
│ │ ├── errors.md
│ │ └── types.md
│ ├── snippets/
│ │ ├── c/
│ │ │ ├── getting-started/
│ │ │ │ ├── basic_usage.md
│ │ │ │ └── with_options.md
│ │ │ ├── metadata/
│ │ │ │ └── basic_extraction.md
│ │ │ ├── table-extraction/
│ │ │ │ └── basic_extraction.md
│ │ │ └── visitor/
│ │ │ └── basic_visitor.md
│ │ ├── csharp/
│ │ │ ├── getting-started/
│ │ │ │ ├── basic_usage.md
│ │ │ │ └── with_options.md
│ │ │ ├── metadata/
│ │ │ │ └── basic_extraction.md
│ │ │ ├── table-extraction/
│ │ │ │ └── basic_extraction.md
│ │ │ └── visitor/
│ │ │ └── basic_visitor.md
│ │ ├── elixir/
│ │ │ ├── getting-started/
│ │ │ │ ├── basic_usage.md
│ │ │ │ └── with_options.md
│ │ │ ├── metadata/
│ │ │ │ └── basic_extraction.md
│ │ │ ├── table-extraction/
│ │ │ │ └── basic_extraction.md
│ │ │ └── visitor/
│ │ │ └── basic_visitor.md
│ │ ├── feedback.md
│ │ ├── go/
│ │ │ ├── getting-started/
│ │ │ │ ├── basic_usage.md
│ │ │ │ └── with_options.md
│ │ │ ├── metadata/
│ │ │ │ └── basic_extraction.md
│ │ │ ├── table-extraction/
│ │ │ │ └── basic_extraction.md
│ │ │ └── visitor/
│ │ │ └── basic_visitor.md
│ │ ├── java/
│ │ │ ├── getting-started/
│ │ │ │ ├── basic_usage.md
│ │ │ │ └── with_options.md
│ │ │ ├── metadata/
│ │ │ │ └── basic_extraction.md
│ │ │ ├── table-extraction/
│ │ │ │ └── basic_extraction.md
│ │ │ └── visitor/
│ │ │ └── basic_visitor.md
│ │ ├── php/
│ │ │ ├── getting-started/
│ │ │ │ ├── basic_usage.md
│ │ │ │ └── with_options.md
│ │ │ ├── metadata/
│ │ │ │ └── basic_extraction.md
│ │ │ ├── table-extraction/
│ │ │ │ └── basic_extraction.md
│ │ │ └── visitor/
│ │ │ └── basic_visitor.md
│ │ ├── python/
│ │ │ ├── getting-started/
│ │ │ │ ├── basic_usage.md
│ │ │ │ └── with_options.md
│ │ │ ├── metadata/
│ │ │ │ └── basic_extraction.md
│ │ │ ├── table-extraction/
│ │ │ │ └── basic_extraction.md
│ │ │ └── visitor/
│ │ │ └── basic_visitor.md
│ │ ├── r/
│ │ │ ├── getting-started/
│ │ │ │ ├── basic_usage.md
│ │ │ │ └── with_options.md
│ │ │ ├── metadata/
│ │ │ │ └── basic_extraction.md
│ │ │ ├── table-extraction/
│ │ │ │ └── basic_extraction.md
│ │ │ └── visitor/
│ │ │ └── basic_visitor.md
│ │ ├── ruby/
│ │ │ ├── getting-started/
│ │ │ │ ├── basic_usage.md
│ │ │ │ └── with_options.md
│ │ │ ├── metadata/
│ │ │ │ └── basic_extraction.md
│ │ │ ├── table-extraction/
│ │ │ │ └── basic_extraction.md
│ │ │ └── visitor/
│ │ │ └── basic_visitor.md
│ │ ├── rust/
│ │ │ ├── getting-started/
│ │ │ │ ├── basic_usage.md
│ │ │ │ └── with_options.md
│ │ │ ├── metadata/
│ │ │ │ └── basic_extraction.md
│ │ │ ├── table-extraction/
│ │ │ │ └── basic_extraction.md
│ │ │ └── visitor/
│ │ │ └── basic_visitor.md
│ │ ├── typescript/
│ │ │ ├── getting-started/
│ │ │ │ ├── basic_usage.md
│ │ │ │ └── with_options.md
│ │ │ ├── metadata/
│ │ │ │ └── basic_extraction.md
│ │ │ ├── table-extraction/
│ │ │ │ └── basic_extraction.md
│ │ │ └── visitor/
│ │ │ └── basic_visitor.md
│ │ └── wasm/
│ │ ├── getting-started/
│ │ │ ├── basic_usage.md
│ │ │ └── with_options.md
│ │ ├── metadata/
│ │ │ └── basic_extraction.md
│ │ ├── table-extraction/
│ │ │ └── basic_extraction.md
│ │ └── visitor/
│ │ └── basic_visitor.md
│ ├── tables.md
│ ├── usage.md
│ └── visitor.md
├── e2e/
│ ├── c/
│ │ ├── Makefile
│ │ ├── download_ffi.sh
│ │ ├── main.c
│ │ ├── test_conversion.c
│ │ ├── test_edge_cases.c
│ │ ├── test_metadata.c
│ │ ├── test_options.c
│ │ ├── test_real_world.c
│ │ ├── test_result.c
│ │ ├── test_runner.h
│ │ ├── test_smoke.c
│ │ └── test_structure.c
│ ├── csharp/
│ │ ├── HtmlToMarkdown.E2eTests.csproj
│ │ └── tests/
│ │ ├── ConversionTests.cs
│ │ ├── EdgeCasesTests.cs
│ │ ├── MetadataTests.cs
│ │ ├── OptionsTests.cs
│ │ ├── RealWorldTests.cs
│ │ ├── ResultTests.cs
│ │ ├── SmokeTests.cs
│ │ ├── StructureTests.cs
│ │ └── VisitorTests.cs
│ ├── dart/
│ │ └── pubspec.yaml
│ ├── elixir/
│ │ ├── mix.exs
│ │ └── test/
│ │ ├── conversion_test.exs
│ │ ├── edge_cases_test.exs
│ │ ├── metadata_test.exs
│ │ ├── options_test.exs
│ │ ├── real_world_test.exs
│ │ ├── result_test.exs
│ │ ├── smoke_test.exs
│ │ ├── structure_test.exs
│ │ ├── test_helper.exs
│ │ └── visitor_test.exs
│ ├── gleam/
│ │ └── gleam.toml
│ ├── go/
│ │ ├── conversion_test.go
│ │ ├── edge_cases_test.go
│ │ ├── go.mod
│ │ ├── go.sum
│ │ ├── metadata_test.go
│ │ ├── options_test.go
│ │ ├── real_world_test.go
│ │ ├── result_test.go
│ │ ├── smoke_test.go
│ │ ├── structure_test.go
│ │ └── visitor_test.go
│ ├── java/
│ │ ├── pom.xml
│ │ └── src/
│ │ └── test/
│ │ └── java/
│ │ └── dev/
│ │ └── kreuzberg/
│ │ └── htmltomarkdown/
│ │ └── e2e/
│ │ ├── ConversionTest.java
│ │ ├── EdgeCasesTest.java
│ │ ├── MetadataTest.java
│ │ ├── OptionsTest.java
│ │ ├── RealWorldTest.java
│ │ ├── ResultTest.java
│ │ ├── SmokeTest.java
│ │ ├── StructureTest.java
│ │ └── VisitorTest.java
│ ├── kotlin/
│ │ └── build.gradle.kts
│ ├── node/
│ │ ├── package.json
│ │ ├── tests/
│ │ │ ├── conversion.test.ts
│ │ │ ├── edge_cases.test.ts
│ │ │ ├── metadata.test.ts
│ │ │ ├── options.test.ts
│ │ │ ├── real_world.test.ts
│ │ │ ├── result.test.ts
│ │ │ ├── smoke.test.ts
│ │ │ ├── structure.test.ts
│ │ │ └── visitor.test.ts
│ │ ├── tsconfig.json
│ │ └── vitest.config.ts
│ ├── php/
│ │ ├── bootstrap.php
│ │ ├── composer.json
│ │ ├── phpunit.xml
│ │ └── tests/
│ │ ├── ConversionTest.php
│ │ ├── EdgeCasesTest.php
│ │ ├── MetadataTest.php
│ │ ├── OptionsTest.php
│ │ ├── RealWorldTest.php
│ │ ├── ResultTest.php
│ │ ├── SmokeTest.php
│ │ ├── StructureTest.php
│ │ └── VisitorTest.php
│ ├── python/
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── pyproject.toml
│ │ └── tests/
│ │ ├── __init__.py
│ │ ├── test_conversion.py
│ │ ├── test_edge_cases.py
│ │ ├── test_metadata.py
│ │ ├── test_options.py
│ │ ├── test_real_world.py
│ │ ├── test_result.py
│ │ ├── test_smoke.py
│ │ ├── test_structure.py
│ │ └── test_visitor.py
│ ├── r/
│ │ ├── DESCRIPTION
│ │ ├── run_tests.R
│ │ └── tests/
│ │ ├── test_conversion.R
│ │ ├── test_edge_cases.R
│ │ ├── test_metadata.R
│ │ ├── test_options.R
│ │ ├── test_real_world.R
│ │ ├── test_result.R
│ │ ├── test_smoke.R
│ │ ├── test_structure.R
│ │ └── test_visitor.R
│ ├── ruby/
│ │ ├── .rubocop.yaml
│ │ ├── Gemfile
│ │ └── spec/
│ │ ├── conversion_spec.rb
│ │ ├── edge_cases_spec.rb
│ │ ├── metadata_spec.rb
│ │ ├── options_spec.rb
│ │ ├── real_world_spec.rb
│ │ ├── result_spec.rb
│ │ ├── smoke_spec.rb
│ │ ├── structure_spec.rb
│ │ └── visitor_spec.rb
│ ├── rust/
│ │ ├── Cargo.toml
│ │ └── tests/
│ │ ├── conversion_test.rs
│ │ ├── edge_cases_test.rs
│ │ ├── metadata_test.rs
│ │ ├── options_test.rs
│ │ ├── real_world_test.rs
│ │ ├── result_test.rs
│ │ ├── smoke_test.rs
│ │ ├── structure_test.rs
│ │ └── visitor_test.rs
│ ├── swift/
│ │ └── Package.swift
│ ├── wasm/
│ │ ├── package.json
│ │ ├── tests/
│ │ │ ├── conversion.test.ts
│ │ │ ├── edge_cases.test.ts
│ │ │ ├── metadata.test.ts
│ │ │ ├── options.test.ts
│ │ │ ├── real_world.test.ts
│ │ │ ├── result.test.ts
│ │ │ ├── smoke.test.ts
│ │ │ ├── structure.test.ts
│ │ │ └── visitor.test.ts
│ │ ├── tsconfig.json
│ │ └── vitest.config.ts
│ └── zig/
│ ├── build.zig
│ └── build.zig.zon
├── fixtures/
│ ├── conversion/
│ │ ├── blockquotes.json
│ │ ├── code.json
│ │ ├── emphasis.json
│ │ ├── forms.json
│ │ ├── headings.json
│ │ ├── images.json
│ │ ├── line_breaks.json
│ │ ├── links.json
│ │ ├── lists.json
│ │ ├── paragraphs.json
│ │ ├── semantic.json
│ │ └── tables.json
│ ├── edge-cases/
│ │ ├── empty.json
│ │ ├── encoding.json
│ │ ├── malformed.json
│ │ ├── visitor_errors.json
│ │ └── xss.json
│ ├── metadata/
│ │ ├── basic.json
│ │ ├── document_properties.json
│ │ ├── links_and_images.json
│ │ ├── open_graph.json
│ │ └── structured_data.json
│ ├── options/
│ │ ├── br_in_tables.json
│ │ ├── code_block_style.json
│ │ ├── code_options.json
│ │ ├── escape_ascii.json
│ │ ├── escaping.json
│ │ ├── exclude_selectors.json
│ │ ├── heading_style.json
│ │ ├── highlight_style.json
│ │ ├── inline_and_newlines.json
│ │ ├── list_options.json
│ │ ├── max_depth.json
│ │ ├── newline_style.json
│ │ ├── output_format.json
│ │ ├── preprocessing.json
│ │ ├── remaining_options.json
│ │ ├── strong_em_symbol.json
│ │ ├── sub_sup_symbols.json
│ │ ├── tag_control.json
│ │ ├── whitespace_mode.json
│ │ └── wrapping.json
│ ├── real-world/
│ │ └── articles.json
│ ├── result/
│ │ ├── tables.json
│ │ └── warnings.json
│ ├── smoke/
│ │ └── basic.json
│ ├── structure/
│ │ ├── basic.json
│ │ └── nesting.json
│ └── visitor/
│ ├── advanced_elements.json
│ ├── basic.json
│ ├── elements.json
│ ├── formatting.json
│ ├── forms_and_semantics.json
│ ├── headings.json
│ ├── images.json
│ ├── links.json
│ └── media.json
├── just
├── package.json
├── packages/
│ ├── csharp/
│ │ ├── .editorconfig
│ │ ├── Directory.Build.props
│ │ ├── HtmlToMarkdown/
│ │ │ ├── AnnotationKind.cs
│ │ │ ├── CodeBlockStyle.cs
│ │ │ ├── ConfigErrorException.cs
│ │ │ ├── ConversionErrorException.cs
│ │ │ ├── ConversionOptions.cs
│ │ │ ├── ConversionOptionsBuilder.cs
│ │ │ ├── ConversionOptionsUpdate.cs
│ │ │ ├── ConversionResult.cs
│ │ │ ├── DocumentMetadata.cs
│ │ │ ├── DocumentNode.cs
│ │ │ ├── DocumentStructure.cs
│ │ │ ├── GridCell.cs
│ │ │ ├── HeaderMetadata.cs
│ │ │ ├── HeadingStyle.cs
│ │ │ ├── HighlightStyle.cs
│ │ │ ├── HtmlMetadata.cs
│ │ │ ├── HtmlToMarkdown.csproj
│ │ │ ├── HtmlToMarkdownRs.cs
│ │ │ ├── HtmlToMarkdownRsException.cs
│ │ │ ├── IVisitor.cs
│ │ │ ├── ImageMetadata.cs
│ │ │ ├── ImageType.cs
│ │ │ ├── InvalidInputException.cs
│ │ │ ├── IoErrorException.cs
│ │ │ ├── LinkMetadata.cs
│ │ │ ├── LinkStyle.cs
│ │ │ ├── LinkType.cs
│ │ │ ├── ListIndentType.cs
│ │ │ ├── NativeMethods.cs
│ │ │ ├── NewlineStyle.cs
│ │ │ ├── NodeContent.cs
│ │ │ ├── NodeContext.cs
│ │ │ ├── NodeType.cs
│ │ │ ├── OtherException.cs
│ │ │ ├── OutputFormat.cs
│ │ │ ├── PanicException.cs
│ │ │ ├── ParseErrorException.cs
│ │ │ ├── PreprocessingOptions.cs
│ │ │ ├── PreprocessingOptionsUpdate.cs
│ │ │ ├── PreprocessingPreset.cs
│ │ │ ├── ProcessingWarning.cs
│ │ │ ├── SanitizationErrorException.cs
│ │ │ ├── StructuredData.cs
│ │ │ ├── StructuredDataType.cs
│ │ │ ├── TableData.cs
│ │ │ ├── TableGrid.cs
│ │ │ ├── TextAnnotation.cs
│ │ │ ├── TextDirection.cs
│ │ │ ├── TraitBridges.cs
│ │ │ ├── VisitResult.cs
│ │ │ ├── VisitorCallbacks.cs
│ │ │ ├── VisitorHandle.cs
│ │ │ ├── WarningKind.cs
│ │ │ └── WhitespaceMode.cs
│ │ ├── HtmlToMarkdown.Tests/
│ │ │ └── HtmlToMarkdown.Tests.csproj
│ │ ├── HtmlToMarkdown.csproj
│ │ └── README.md
│ ├── elixir/
│ │ ├── .credo.exs
│ │ ├── .formatter.exs
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── checksum-Elixir.HtmlToMarkdown.Native.exs
│ │ ├── config/
│ │ │ └── config.exs
│ │ ├── lib/
│ │ │ ├── html_to_markdown/
│ │ │ │ ├── annotation_kind.ex
│ │ │ │ ├── code_block_style.ex
│ │ │ │ ├── conversion_options.ex
│ │ │ │ ├── conversion_options_update.ex
│ │ │ │ ├── conversion_result.ex
│ │ │ │ ├── document_metadata.ex
│ │ │ │ ├── document_node.ex
│ │ │ │ ├── document_structure.ex
│ │ │ │ ├── grid_cell.ex
│ │ │ │ ├── header_metadata.ex
│ │ │ │ ├── heading_style.ex
│ │ │ │ ├── highlight_style.ex
│ │ │ │ ├── html_metadata.ex
│ │ │ │ ├── html_visitor_bridge.ex
│ │ │ │ ├── image_metadata.ex
│ │ │ │ ├── image_type.ex
│ │ │ │ ├── link_metadata.ex
│ │ │ │ ├── link_style.ex
│ │ │ │ ├── link_type.ex
│ │ │ │ ├── list_indent_type.ex
│ │ │ │ ├── native.ex
│ │ │ │ ├── newline_style.ex
│ │ │ │ ├── node_content.ex
│ │ │ │ ├── node_context.ex
│ │ │ │ ├── node_type.ex
│ │ │ │ ├── output_format.ex
│ │ │ │ ├── preprocessing_options.ex
│ │ │ │ ├── preprocessing_options_update.ex
│ │ │ │ ├── preprocessing_preset.ex
│ │ │ │ ├── processing_warning.ex
│ │ │ │ ├── structured_data.ex
│ │ │ │ ├── structured_data_type.ex
│ │ │ │ ├── table_data.ex
│ │ │ │ ├── table_grid.ex
│ │ │ │ ├── text_annotation.ex
│ │ │ │ ├── text_direction.ex
│ │ │ │ ├── visit_result.ex
│ │ │ │ ├── warning_kind.ex
│ │ │ │ └── whitespace_mode.ex
│ │ │ └── html_to_markdown.ex
│ │ ├── mix.exs
│ │ ├── native/
│ │ │ └── html_to_markdown_nif/
│ │ │ ├── Cargo.toml
│ │ │ └── src/
│ │ │ └── lib.rs
│ │ └── test/
│ │ └── test_helper.exs
│ ├── go/
│ │ ├── .golangci.yml
│ │ ├── README.md
│ │ ├── binding.go
│ │ ├── go.mod
│ │ └── v3/
│ │ └── README.md
│ ├── java/
│ │ ├── README.md
│ │ ├── checkstyle-suppressions.xml
│ │ ├── checkstyle.properties
│ │ ├── checkstyle.xml
│ │ ├── eclipse-formatter.xml
│ │ ├── pmd-ruleset.xml
│ │ ├── pom.xml
│ │ ├── pom.xml.versionsBackup
│ │ ├── src/
│ │ │ └── main/
│ │ │ ├── java/
│ │ │ │ └── dev/
│ │ │ │ └── kreuzberg/
│ │ │ │ └── htmltomarkdown/
│ │ │ │ ├── AnnotationKind.java
│ │ │ │ ├── CodeBlockStyle.java
│ │ │ │ ├── ConfigErrorException.java
│ │ │ │ ├── ConversionErrorException.java
│ │ │ │ ├── ConversionOptions.java
│ │ │ │ ├── ConversionOptionsBuilder.java
│ │ │ │ ├── ConversionOptionsUpdate.java
│ │ │ │ ├── ConversionOptionsUpdateBuilder.java
│ │ │ │ ├── ConversionResult.java
│ │ │ │ ├── ConversionResultBuilder.java
│ │ │ │ ├── DocumentMetadata.java
│ │ │ │ ├── DocumentMetadataBuilder.java
│ │ │ │ ├── DocumentNode.java
│ │ │ │ ├── DocumentStructure.java
│ │ │ │ ├── GridCell.java
│ │ │ │ ├── HeaderMetadata.java
│ │ │ │ ├── HeadingStyle.java
│ │ │ │ ├── HighlightStyle.java
│ │ │ │ ├── HtmlMetadata.java
│ │ │ │ ├── HtmlMetadataBuilder.java
│ │ │ │ ├── HtmlToMarkdown.java
│ │ │ │ ├── HtmlToMarkdownRs.java
│ │ │ │ ├── HtmlToMarkdownRsException.java
│ │ │ │ ├── HtmlVisitorBridge.java
│ │ │ │ ├── IHtmlVisitor.java
│ │ │ │ ├── ImageMetadata.java
│ │ │ │ ├── ImageType.java
│ │ │ │ ├── InvalidInputException.java
│ │ │ │ ├── IoErrorException.java
│ │ │ │ ├── LinkMetadata.java
│ │ │ │ ├── LinkStyle.java
│ │ │ │ ├── LinkType.java
│ │ │ │ ├── ListIndentType.java
│ │ │ │ ├── NativeLib.java
│ │ │ │ ├── NewlineStyle.java
│ │ │ │ ├── NodeContent.java
│ │ │ │ ├── NodeContext.java
│ │ │ │ ├── NodeType.java
│ │ │ │ ├── OtherException.java
│ │ │ │ ├── OutputFormat.java
│ │ │ │ ├── PanicException.java
│ │ │ │ ├── ParseErrorException.java
│ │ │ │ ├── PreprocessingOptions.java
│ │ │ │ ├── PreprocessingOptionsBuilder.java
│ │ │ │ ├── PreprocessingOptionsUpdate.java
│ │ │ │ ├── PreprocessingOptionsUpdateBuilder.java
│ │ │ │ ├── PreprocessingPreset.java
│ │ │ │ ├── ProcessingWarning.java
│ │ │ │ ├── SanitizationErrorException.java
│ │ │ │ ├── StructuredData.java
│ │ │ │ ├── StructuredDataType.java
│ │ │ │ ├── TableData.java
│ │ │ │ ├── TableGrid.java
│ │ │ │ ├── TableGridBuilder.java
│ │ │ │ ├── TestVisitor.java
│ │ │ │ ├── TestVisitorAdapter.java
│ │ │ │ ├── TextAnnotation.java
│ │ │ │ ├── TextDirection.java
│ │ │ │ ├── VisitContext.java
│ │ │ │ ├── VisitResult.java
│ │ │ │ ├── Visitor.java
│ │ │ │ ├── VisitorBridge.java
│ │ │ │ ├── VisitorHandle.java
│ │ │ │ ├── WarningKind.java
│ │ │ │ ├── WhitespaceMode.java
│ │ │ │ └── package-info.java
│ │ │ └── resources/
│ │ │ └── .gitkeep
│ │ └── versions-rules.xml
│ ├── node/
│ │ ├── .oxfmtrc.json
│ │ ├── .oxlintrc.json
│ │ ├── biome.json
│ │ ├── index.d.ts
│ │ ├── package.json
│ │ ├── src/
│ │ │ └── index.d.ts
│ │ └── tsconfig.json
│ ├── php/
│ │ ├── .gitignore
│ │ ├── .php-cs-fixer.dist.php
│ │ ├── README.md
│ │ ├── composer.json
│ │ ├── php-cs-fixer.php
│ │ ├── phpstan-baseline.neon
│ │ ├── phpstan-test.neon
│ │ ├── phpstan.neon
│ │ ├── phpunit.xml
│ │ ├── src/
│ │ │ ├── HtmlToMarkdown.php
│ │ │ └── functions.php
│ │ ├── stubs/
│ │ │ └── html_to_markdown_extension.php
│ │ └── tests/
│ │ └── .gitkeep
│ ├── python/
│ │ ├── LICENSE
│ │ ├── README.md
│ │ ├── html_to_markdown/
│ │ │ ├── __init__.py
│ │ │ ├── _html_to_markdown.pyi
│ │ │ ├── api.py
│ │ │ ├── exceptions.py
│ │ │ ├── options.py
│ │ │ └── py.typed
│ │ ├── pyproject.toml
│ │ └── tests/
│ │ └── commonmark_spec.json
│ ├── r/
│ │ ├── .Rbuildignore
│ │ ├── .gitignore
│ │ ├── .lintr
│ │ ├── DESCRIPTION
│ │ ├── LICENSE
│ │ ├── NAMESPACE
│ │ ├── R/
│ │ │ ├── extendr-wrappers.R
│ │ │ ├── htmltomarkdown-package.R
│ │ │ ├── htmltomarkdown.R
│ │ │ ├── options.R
│ │ │ └── version.R
│ │ ├── README.md
│ │ ├── cleanup
│ │ ├── cleanup.win
│ │ ├── configure
│ │ ├── configure.win
│ │ ├── inst/
│ │ │ └── AUTHORS
│ │ ├── man/
│ │ │ ├── conversion_options.Rd
│ │ │ ├── convert.Rd
│ │ │ ├── htmltomarkdown-package.Rd
│ │ │ └── version.Rd
│ │ ├── src/
│ │ │ ├── Makevars.in
│ │ │ ├── Makevars.win.in
│ │ │ ├── entrypoint.c
│ │ │ └── rust/
│ │ │ ├── Cargo.toml
│ │ │ ├── src/
│ │ │ │ ├── lib.rs
│ │ │ │ ├── options.rs
│ │ │ │ └── types.rs
│ │ │ └── vendor-config.toml
│ │ ├── tests/
│ │ │ └── testthat.R
│ │ └── tools/
│ │ ├── config.R
│ │ └── msrv.R
│ ├── ruby/
│ │ ├── .gitignore
│ │ ├── .rubocop.yml
│ │ ├── Gemfile
│ │ ├── README.md
│ │ ├── Rakefile
│ │ ├── Steepfile
│ │ ├── exe/
│ │ │ └── html-to-markdown
│ │ ├── ext/
│ │ │ └── html_to_markdown_rb/
│ │ │ ├── Cargo.toml
│ │ │ ├── Makefile
│ │ │ ├── extconf.rb
│ │ │ ├── native/
│ │ │ │ └── Cargo.toml
│ │ │ └── src/
│ │ │ ├── html-to-markdown/
│ │ │ │ └── version.rb
│ │ │ ├── html-to-markdown.rb
│ │ │ └── lib.rs
│ │ ├── html_to_markdown.gemspec
│ │ ├── lib/
│ │ │ ├── html_to_markdown/
│ │ │ │ └── version.rb
│ │ │ └── html_to_markdown.rb
│ │ ├── sig/
│ │ │ ├── html_to_markdown/
│ │ │ │ ├── cli.rbs
│ │ │ │ └── cli_proxy.rbs
│ │ │ ├── open3.rbs
│ │ │ └── types.rbs
│ │ └── spec/
│ │ ├── html_to_markdown_spec.rb
│ │ └── spec_helper.rb
│ ├── typescript/
│ │ ├── .npmignore
│ │ ├── README.md
│ │ ├── index.d.ts
│ │ ├── package.json
│ │ ├── src/
│ │ │ ├── helpers.ts
│ │ │ └── index.ts
│ │ └── tsconfig.json
│ └── wasm/
│ └── src/
│ ├── helpers.ts
│ └── index.ts
├── pnpm-workspace.yaml
├── pyproject.toml
├── readme_templates/
│ ├── language_package.md
│ └── partials/
│ ├── _api_reference.md
│ ├── _badges.md
│ ├── _djot_output.md
│ ├── _footer.md
│ ├── _installation.md
│ ├── _metadata_extraction.md
│ ├── _plain_text_output.md
│ ├── _quick_start.md
│ └── _visitor_pattern.md
├── rust-toolchain.toml
├── rustfmt.toml
├── scripts/
│ ├── build-demo.sh
│ ├── ci/
│ │ ├── elixir/
│ │ │ ├── install-deps.sh
│ │ │ ├── install-hex-rebar.sh
│ │ │ ├── run-credo.sh
│ │ │ └── run-tests.sh
│ │ ├── go/
│ │ │ ├── detect-go-modules.sh
│ │ │ ├── install-golangci-lint.sh
│ │ │ └── run-golangci-lint.sh
│ │ ├── node/
│ │ │ ├── test-napi-cargo.sh
│ │ │ ├── test-napi.sh
│ │ │ └── test-typescript.sh
│ │ ├── php/
│ │ │ ├── run-php-tests.sh
│ │ │ ├── run-phpstan.sh
│ │ │ └── set-php-config.sh
│ │ ├── python/
│ │ │ ├── build-cli.sh
│ │ │ └── run-pytest.sh
│ │ ├── r/
│ │ │ ├── install-deps.sh
│ │ │ ├── run-lintr.sh
│ │ │ ├── run-tests.sh
│ │ │ └── vendor-core-crate.py
│ │ ├── ruby/
│ │ │ ├── run-rbs-validate.sh
│ │ │ ├── run-rspec-unix.sh
│ │ │ ├── run-rspec-windows.ps1
│ │ │ ├── run-rubocop.sh
│ │ │ ├── run-steep.sh
│ │ │ └── vendor-core-crate.py
│ │ ├── rust/
│ │ │ ├── check-fmt.sh
│ │ │ ├── install-cargo-llvm-cov.sh
│ │ │ ├── run-clippy.sh
│ │ │ ├── run-llvm-cov.sh
│ │ │ └── run-tests.sh
│ │ ├── smoke/
│ │ │ ├── capture-php-config.sh
│ │ │ └── install-pnpm-deps.sh
│ │ ├── validate/
│ │ │ ├── install-elixir-deps.sh
│ │ │ ├── install-ruby-deps.sh
│ │ │ ├── run-prek.sh
│ │ │ └── run-rust-checks.sh
│ │ └── wasm/
│ │ ├── run-wasmtime-tests.sh
│ │ ├── test-wasm-bundle.sh
│ │ └── test-wasm-rust.sh
│ ├── common/
│ │ ├── enable-corepack.sh
│ │ ├── ensure-wasm-target.sh
│ │ ├── install-maven-latest.sh
│ │ └── install-wasm-pack.sh
│ ├── generate_visitor_callbacks.py
│ ├── preferred-ruby.sh
│ ├── preferred-rustc.sh
│ ├── prepare_ruby_gem.rb
│ ├── prepare_wheel.py
│ ├── publish/
│ │ ├── cli/
│ │ │ ├── build-cli.sh
│ │ │ ├── configure-cross-linker.sh
│ │ │ ├── install-build-deps-linux.sh
│ │ │ ├── install-cross.sh
│ │ │ ├── package-cli-artifact.ps1
│ │ │ └── package-cli-artifact.sh
│ │ ├── common/
│ │ │ ├── add-rust-target.sh
│ │ │ └── ensure-target-commit.sh
│ │ ├── crates/
│ │ │ ├── package-crates.sh
│ │ │ ├── publish-cli.sh
│ │ │ ├── publish-rs.sh
│ │ │ ├── verify-cargo-version.sh
│ │ │ └── wait-for-indexing.sh
│ │ ├── csharp/
│ │ │ ├── pack.sh
│ │ │ ├── restore.sh
│ │ │ └── stage-ffi.sh
│ │ ├── elixir/
│ │ │ ├── build-hex-package.sh
│ │ │ ├── install-deps.sh
│ │ │ ├── install-hex-rebar.sh
│ │ │ ├── run-tests.sh
│ │ │ ├── stage-rust-core.sh
│ │ │ └── vendor-dependencies.sh
│ │ ├── ensure-github-release-exists.sh
│ │ ├── generate_elixir_checksums.sh
│ │ ├── go/
│ │ │ └── create-module-tag.sh
│ │ ├── java/
│ │ │ └── copy-native-libs.sh
│ │ ├── maven/
│ │ │ ├── patch-legacy-gpg-args.sh
│ │ │ └── prefer-gpg2.sh
│ │ ├── node/
│ │ │ ├── build-native-module.ps1
│ │ │ ├── build-native-module.sh
│ │ │ ├── clean-npm-dir.ps1
│ │ │ ├── clean-npm-dir.sh
│ │ │ ├── create-npm-package-structure.sh
│ │ │ ├── generate-typescript-defs.sh
│ │ │ ├── install-node-deps.sh
│ │ │ ├── pack-platform-packages.sh
│ │ │ ├── package-artifacts.ps1
│ │ │ ├── package-artifacts.sh
│ │ │ ├── prepare-artifact-directory.sh
│ │ │ └── prepublish-main-package.sh
│ │ ├── python/
│ │ │ ├── build-cli-for-sdist.sh
│ │ │ ├── build-sdist.sh
│ │ │ ├── install-build-deps.sh
│ │ │ └── prepare-sdist-with-cli.sh
│ │ ├── r/
│ │ │ ├── already-published-summary.sh
│ │ │ ├── build-cran-package.sh
│ │ │ ├── run-tests.sh
│ │ │ ├── stage-rust-core.sh
│ │ │ └── vendor-dependencies.sh
│ │ ├── ruby/
│ │ │ ├── already-published-summary.sh
│ │ │ ├── build-gem-unix.sh
│ │ │ ├── build-gem-windows.ps1
│ │ │ ├── build-native-gem.rb
│ │ │ ├── configure-bindgen-windows.sh
│ │ │ ├── install-deps-unix.sh
│ │ │ ├── install-deps-windows.ps1
│ │ │ ├── install-msys2-toolchain.ps1
│ │ │ ├── install-rust-gnu.ps1
│ │ │ └── remove-cached-cli.sh
│ │ ├── typescript/
│ │ │ └── build-package.sh
│ │ ├── upload-c-ffi-artifacts.sh
│ │ ├── upload-cli-artifacts.sh
│ │ ├── upload-elixir-package.sh
│ │ ├── upload-go-ffi-artifacts.sh
│ │ ├── upload-homebrew-bottles.sh
│ │ ├── upload-php-pie.sh
│ │ ├── validate-and-compute-metadata.sh
│ │ └── wasm/
│ │ ├── build-bundles.sh
│ │ ├── extract-artifacts.sh
│ │ ├── install-deps.sh
│ │ └── package-artifacts.sh
│ ├── readme_config.yaml
│ ├── readme_templates/
│ │ ├── language_package.md.jinja
│ │ └── partials/
│ │ ├── _api_reference.md.jinja
│ │ ├── _badges.md.jinja
│ │ ├── _djot_output.md.jinja
│ │ ├── _footer.md.jinja
│ │ ├── _installation.md.jinja
│ │ ├── _metadata_extraction.md.jinja
│ │ ├── _plain_text_output.md.jinja
│ │ ├── _quick_start.md.jinja
│ │ └── _visitor_pattern.md.jinja
│ └── update_dotnet_packages.py
├── skills/
│ └── html-to-markdown/
│ ├── SKILL.md
│ └── references/
│ ├── cli-reference.md
│ ├── configuration.md
│ ├── other-bindings.md
│ ├── python-api.md
│ ├── rust-api.md
│ └── typescript-api.md
├── test_apps/
│ ├── README.md
│ ├── bun/
│ │ ├── README.md
│ │ ├── package.json
│ │ └── smoke.test.ts
│ ├── c/
│ │ ├── .gitignore
│ │ ├── Makefile
│ │ ├── README.md
│ │ ├── download_ffi.sh
│ │ ├── htm_test
│ │ ├── main.c
│ │ ├── run_tests
│ │ ├── test_conversion.c
│ │ ├── test_runner.h
│ │ └── test_smoke.c
│ ├── csharp/
│ │ ├── E2eTests.csproj
│ │ ├── KreuzbergDev.HtmlToMarkdown.E2eTests.csproj
│ │ ├── README.md
│ │ └── tests/
│ │ ├── ConversionTests.cs
│ │ └── SmokeTests.cs
│ ├── elixir/
│ │ ├── README.md
│ │ ├── deps/
│ │ │ ├── html_to_markdown/
│ │ │ │ ├── .formatter.exs
│ │ │ │ ├── .hex
│ │ │ │ ├── README.md
│ │ │ │ ├── checksum-Elixir.HtmlToMarkdown.Native.exs
│ │ │ │ ├── hex_metadata.config
│ │ │ │ └── mix.exs
│ │ │ ├── jason/
│ │ │ │ ├── .hex
│ │ │ │ ├── CHANGELOG.md
│ │ │ │ ├── LICENSE
│ │ │ │ ├── README.md
│ │ │ │ ├── hex_metadata.config
│ │ │ │ └── mix.exs
│ │ │ ├── rustler/
│ │ │ │ ├── .hex
│ │ │ │ ├── README.md
│ │ │ │ ├── hex_metadata.config
│ │ │ │ ├── mix.exs
│ │ │ │ └── priv/
│ │ │ │ └── templates/
│ │ │ │ ├── basic/
│ │ │ │ │ ├── Cargo.toml.eex
│ │ │ │ │ ├── README.md
│ │ │ │ │ └── src/
│ │ │ │ │ └── lib.rs
│ │ │ │ └── root/
│ │ │ │ └── Cargo.toml.eex
│ │ │ ├── rustler_precompiled/
│ │ │ │ ├── .hex
│ │ │ │ ├── CHANGELOG.md
│ │ │ │ ├── PRECOMPILATION_GUIDE.md
│ │ │ │ ├── README.md
│ │ │ │ ├── TROUBLESHOOTING.md
│ │ │ │ ├── hex_metadata.config
│ │ │ │ └── mix.exs
│ │ │ └── toml/
│ │ │ ├── .hex
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── hex_metadata.config
│ │ │ └── mix.exs
│ │ ├── mix.exs
│ │ └── test/
│ │ ├── conversion_test.exs
│ │ ├── smoke_test.exs
│ │ └── test_helper.exs
│ ├── fixtures/
│ │ ├── README.md
│ │ ├── basic-html.json
│ │ ├── complex-html.json
│ │ ├── edge-cases.json
│ │ ├── metadata-extraction.json
│ │ └── real-world.json
│ ├── go/
│ │ ├── README.md
│ │ ├── conversion_test.go
│ │ ├── go.mod
│ │ ├── go.sum
│ │ ├── run_tests.sh
│ │ └── smoke_test.go
│ ├── java/
│ │ ├── .mvn/
│ │ │ └── wrapper/
│ │ │ └── maven-wrapper.properties
│ │ ├── README.md
│ │ ├── mvnw
│ │ ├── mvnw.cmd
│ │ ├── pom.xml
│ │ └── src/
│ │ └── test/
│ │ └── java/
│ │ └── dev/
│ │ └── kreuzberg/
│ │ ├── e2e/
│ │ │ ├── ConversionTest.java
│ │ │ └── SmokeTest.java
│ │ └── htmltomarkdown/
│ │ └── e2e/
│ │ ├── ConversionTest.java
│ │ └── SmokeTest.java
│ ├── node/
│ │ ├── .nvmrc
│ │ ├── README.md
│ │ ├── package.json
│ │ ├── tests/
│ │ │ ├── conversion.test.ts
│ │ │ └── smoke.test.ts
│ │ ├── tsconfig.json
│ │ └── vitest.config.ts
│ ├── php/
│ │ ├── README.md
│ │ ├── bootstrap.php
│ │ ├── composer.json
│ │ ├── phpstan.neon
│ │ ├── phpunit.xml
│ │ └── tests/
│ │ ├── ConversionTest.php
│ │ └── SmokeTest.php
│ ├── php-ext/
│ │ ├── README.md
│ │ ├── main.php
│ │ └── run_tests.sh
│ ├── python/
│ │ ├── .python-version
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── pyproject.toml
│ │ └── tests/
│ │ ├── __init__.py
│ │ ├── test_conversion.py
│ │ └── test_smoke.py
│ ├── r/
│ │ ├── DESCRIPTION
│ │ ├── run_tests.R
│ │ └── tests/
│ │ ├── test_conversion.R
│ │ └── test_smoke.R
│ ├── ruby/
│ │ ├── .bundle/
│ │ │ └── config
│ │ ├── .rubocop.yaml
│ │ ├── .ruby-version
│ │ ├── Gemfile
│ │ ├── README.md
│ │ └── spec/
│ │ ├── conversion_spec.rb
│ │ └── smoke_spec.rb
│ ├── rust/
│ │ ├── Cargo.toml
│ │ └── tests/
│ │ ├── conversion_test.rs
│ │ └── smoke_test.rs
│ └── wasm/
│ ├── .nvmrc
│ ├── README.md
│ ├── globalSetup.ts
│ ├── package.json
│ ├── tests/
│ │ ├── conversion.test.ts
│ │ └── smoke.test.ts
│ ├── tsconfig.json
│ └── vitest.config.ts
├── test_documents/
│ └── html/
│ ├── issues/
│ │ ├── gh-121-hacker-news.html
│ │ ├── gh-121-hacker-news.md
│ │ ├── gh-121-minimal-failing.html
│ │ ├── gh-121-spa-app.html
│ │ ├── gh-121-spa-app.md
│ │ ├── gh-127-issue.html
│ │ ├── gh-134-pre-code.html
│ │ ├── gh-134-pre-code.md
│ │ ├── gh-140-table-cell-pipe-with-escape-misc.md
│ │ ├── gh-140-table-cell-pipe.html
│ │ ├── gh-140-table-cell-pipe.md
│ │ ├── gh-143-links-wordwrap.html
│ │ ├── gh-143-links-wordwrap.md
│ │ ├── gh-190/
│ │ │ ├── firsteigen.html
│ │ │ ├── flex2021.html
│ │ │ ├── flex2025.html
│ │ │ ├── insight.html
│ │ │ ├── kimbrain.html
│ │ │ ├── maxkim.html
│ │ │ ├── mitrade.html
│ │ │ ├── ozonekorea.html
│ │ │ ├── plusblog.html
│ │ │ ├── rbloggers.html
│ │ │ ├── sjsu.html
│ │ │ └── vipaarontours.html
│ │ ├── test-nested-simple.html
│ │ ├── test-nested-simple.md
│ │ └── test-with-custom-elements.html
│ ├── visitor/
│ │ ├── baseline.html
│ │ ├── callbacks.html
│ │ ├── complex.html
│ │ └── custom.html
│ └── wikipedia/
│ ├── large_rust.html
│ ├── lists_timeline.html
│ ├── medium_python.html
│ ├── small_html.html
│ └── tables_countries.html
├── tsconfig.base.json
└── zensical.toml
================================================
FILE CONTENTS
================================================
================================================
FILE: .ai-rulez/config.toml
================================================
# AI-Rulez Configuration (migrated to V4 TOML format)
# Documentation: https://github.com/Goldziher/ai-rulez
version = '4.0'
name = 'html-to-markdown'
description = 'High-performance HTML to Markdown converter with Rust core and polyglot bindings (Python, TypeScript, Ruby, PHP, Go, Java, C#, Elixir, R, WebAssembly, C FFI).'
gitignore = true
presets = ['claude', 'copilot', 'cursor', 'antigravity', 'codex']
builtins = ['rust', 'python', 'typescript', 'go', 'java', 'ruby', 'php', 'csharp', 'elixir', 'r', 'default-commands']
[header]
style = 'minimal'
[[includes]]
name = 'kreuzberg-core'
source = 'https://github.com/kreuzberg-dev/ai-rulez.git'
path = 'modules/core'
merge_strategy = 'local-override'
[[includes]]
name = 'kreuzberg-languages'
source = 'https://github.com/kreuzberg-dev/ai-rulez.git'
path = 'modules/languages'
merge_strategy = 'local-override'
[[includes]]
name = 'kreuzberg-cicd'
source = 'https://github.com/kreuzberg-dev/ai-rulez.git'
path = 'modules/cicd'
merge_strategy = 'local-override'
[[includes]]
name = 'kreuzberg-infrastructure'
source = 'https://github.com/kreuzberg-dev/ai-rulez.git'
path = 'modules/infrastructure'
merge_strategy = 'local-override'
[[includes]]
name = 'kreuzberg-e2e-generator'
source = 'https://github.com/kreuzberg-dev/ai-rulez.git'
path = 'modules/e2e-generator'
merge_strategy = 'local-override'
[[installed_skills]]
name = 'alef'
source = 'https://github.com/kreuzberg-dev/alef.git'
[[mcp_servers]]
name = 'playwright'
description = 'Playwright browser automation for E2E testing and docs verification'
command = 'npx'
args = ['-y', '@playwright/mcp@latest']
[defaults]
effort = 'medium'
================================================
FILE: .ai-rulez/context/crate-structure.md
================================================
---
priority: high
---
# Crate & Package Structure
## Workspace crates (`crates/`)
- `html-to-markdown` — core library, primary Rust API, `unsafe_code = "forbid"` at workspace level
- `html-to-markdown-cli` — CLI binary (clap)
- `html-to-markdown-ffi` — C FFI bridge, cbindgen headers, **only crate that overrides unsafe_code lint**
- `html-to-markdown-py` — PyO3 Python binding
- `html-to-markdown-node` — NAPI-RS Node/TypeScript binding
- `html-to-markdown-php` — ext-php-rs PHP binding
- `html-to-markdown-wasm` — wasm-bindgen WebAssembly binding
## Out-of-workspace packages (`packages/`)
- `csharp/`, `elixir/`, `go/`, `java/`, `r/`, `ruby/` — language-native packages wrapping the FFI crate
- `php/`, `python/`, `typescript/`, `wasm/` — distribution packages
## Primary API
- `convert(&str, Option<ConversionOptions>) -> Result<ConversionResult, ConversionError>`
- `ConversionResult`: `content`, `warnings`, optionally `metadata` and `inline_images` (feature-gated)
- Feature flags: `inline-images`, `metadata`, `visitor` (custom traversal), `serde`
- Dual parser: html5ever (spec-compliance) and astral-tl (performance), selectable via `ConversionOptions`
================================================
FILE: .ai-rulez/domains/conversion-algorithms/DOMAIN.md
================================================
# Conversion Algorithms Domain
## Purpose
Core HTML-to-Markdown transformation logic. Converts parsed DOM trees into well-formatted Markdown output for 60+ HTML element types.
## Key Areas
- **Block elements**: headings, paragraphs, blockquotes, lists, tables, code blocks, horizontal rules, semantic HTML5 elements
- **Inline elements**: bold, italic, strikethrough, inline code, links, images, abbreviations
- **Tables**: GFM pipe tables with alignment, colspan/rowspan handling, complex table fallbacks
- **Lists**: ordered, unordered, nested, task lists, definition lists, tight vs loose detection
- **Forms & media**: input fields, textareas, selects, audio, video, iframes, embeds
- **Special elements**: line breaks, comments, SVG text extraction, ruby annotations
## Architecture
Visitor pattern in `visitor.rs` dispatches to per-element converter functions. Conversion behavior is controlled by `ConversionOptions` (heading style, list indent, code block style, newline style, table format).
## Dependencies
- Upstream: HTML Parsing domain (DOM tree), Safety-Sanitization domain (attribute validation)
- Downstream: Output formatting, metadata extraction
================================================
FILE: .ai-rulez/domains/html-parsing/DOMAIN.md
================================================
# HTML Parsing Domain
## Purpose
Foundation of the conversion pipeline: HTML parser selection, DOM tree construction, and tree traversal infrastructure.
## Key Areas
- **Parser backends**: html5ever (HTML5 spec compliance, malformed HTML recovery) and tl/astral-tl (lightweight, fast)
- **DOM traversal**: depth-first tree walking via visitor pattern, parent/child/sibling navigation
- **Node types**: element nodes (60+ tags), text nodes, comment nodes, document/fragment nodes
- **Text extraction**: text content from subtrees, configurable whitespace handling (preserve, minimal, collapse)
- **Attribute access**: by name, iteration, class checking, case-insensitive per HTML spec
- **Safety constraints**: depth limits, size limits, binary data rejection, encoding detection
## Architecture
Parser infrastructure in `converter.rs` and `wrapper.rs`. DOM traversal via `DomWalker` trait in `visitor.rs`. Element classification into Block, Inline, Void, FormControl, Semantic categories. Configuration through `ConversionOptions` (parser type, encoding, whitespace mode, max depth, max size).
## Dependencies
- Upstream: html5ever, astral-tl, encoding_rs
- Downstream: Conversion Algorithms domain, Safety-Sanitization domain
================================================
FILE: .ai-rulez/domains/safety-sanitization/DOMAIN.md
================================================
# Safety & Sanitization Domain
## Purpose
Protects the conversion pipeline from malicious or malformed input. Ensures converted Markdown output cannot be exploited for XSS, code injection, or data exfiltration.
## Key Areas
- **Input validation**: binary data detection (magic numbers, null byte ratios, control char ratios), encoding detection, size/depth limits
- **XSS prevention**: dangerous element removal (script, style, iframe, object, embed), event handler stripping, javascript:/data:/vbscript: URL blocking
- **URL sanitization**: scheme whitelist (http, https, mailto, ftp), protocol normalization, URL-encoded payload detection, case-insensitive scheme matching
- **Attribute filtering**: event handler removal, safe attribute whitelist (id, class, title, alt, href, src), style sanitization
- **SVG handling**: script/style removal within SVG, event handler stripping, xlink:href validation, text extraction fallback
- **Runtime safety**: stack overflow prevention (max nesting depth), memory bounds enforcement, ReDoS prevention
## Architecture
Multi-layer defense: validate_input() -> sanitize -> parse -> convert with URL/attribute sanitization at each element. Configuration via `SafetyConfig` (max document size, max nesting depth, allowed tags/attributes/schemes, strip options).
## Dependencies
- Upstream: url, encoding_rs
- Downstream: HTML Parsing domain (operates on validated input), Conversion Algorithms domain (safe elements only)
================================================
FILE: .ai-rulez/rules/alef-generated-bindings.md
================================================
---
priority: critical
---
- Files in `packages/*/` and binding crates are generated or managed by Alef — check `alef.toml` before editing
- `alef.toml` defines: output paths, module names, rename mappings, e2e call overrides, README templates
- Run `alef generate` after changing `alef.toml` — commit both source and generated files
- Never hand-edit generated files; modify `alef.toml` or the Rust source instead
- Fixtures under `fixtures/` feed `tools/e2e-generator/` — never add tests to `e2e/` directly
================================================
FILE: .cargo/config.toml
================================================
[build]
incremental = true
[target.wasm32-unknown-unknown]
rustflags = ["-C", "target-feature=+bulk-memory", "--cfg", "getrandom_backend=\"wasm_js\""]
[net]
git-fetch-with-cli = true
[registries.crates-io]
protocol = "sparse"
[target.'cfg(target_os = "macos")']
rustflags = ["-C", "link-arg=-Wl,-undefined,dynamic_lookup"]
[target.x86_64-pc-windows-msvc]
linker = "rust-lld"
[target.i686-pc-windows-msvc]
linker = "rust-lld"
[target.x86_64-unknown-linux-musl]
linker = "musl-gcc"
[target.aarch64-unknown-linux-gnu]
linker = "aarch64-linux-gnu-gcc"
[env]
RUBY = { value = "scripts/preferred-ruby.sh", relative = true }
================================================
FILE: .clang-format
================================================
---
BasedOnStyle: LLVM
IndentWidth: 4
ColumnLimit: 100
BreakBeforeBraces: Attach
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: false
SortIncludes: true
================================================
FILE: .editorconfig
================================================
# EditorConfig is awesome: https://EditorConfig.org
# top-most EditorConfig file
root = true
# All files
[*]
charset = utf-8
insert_final_newline = true
trim_trailing_whitespace = true
end_of_line = lf
# Code files
[*.{cs,go,rs,py,js,ts,tsx,jsx,php,rb}]
indent_style = space
# C# files
[*.cs]
indent_size = 4
# Organize usings
dotnet_sort_system_directives_first = true
dotnet_separate_import_directive_groups = false
# this. and Me. preferences
dotnet_style_qualification_for_field = false:warning
dotnet_style_qualification_for_property = false:warning
dotnet_style_qualification_for_method = false:warning
dotnet_style_qualification_for_event = false:warning
# Language keywords vs BCL types preferences
dotnet_style_predefined_type_for_locals_parameters_members = true:warning
dotnet_style_predefined_type_for_member_access = true:warning
# Parentheses preferences
dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity:suggestion
dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity:suggestion
dotnet_style_parentheses_in_other_binary_operators = always_for_clarity:suggestion
dotnet_style_parentheses_in_other_operators = never_if_unnecessary:suggestion
# Modifier preferences
dotnet_style_require_accessibility_modifiers = always:warning
dotnet_style_readonly_field = true:warning
csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:suggestion
# Expression-level preferences
dotnet_style_object_initializer = true:suggestion
dotnet_style_collection_initializer = true:suggestion
dotnet_style_explicit_tuple_names = true:warning
dotnet_style_prefer_inferred_tuple_names = true:suggestion
dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion
dotnet_style_prefer_auto_properties = true:suggestion
dotnet_style_prefer_conditional_expression_over_assignment = true:silent
dotnet_style_prefer_conditional_expression_over_return = true:silent
dotnet_style_prefer_compound_assignment = true:suggestion
dotnet_style_prefer_simplified_interpolation = true:suggestion
dotnet_style_prefer_simplified_boolean_expressions = true:suggestion
# Null-checking preferences
dotnet_style_coalesce_expression = true:warning
dotnet_style_null_propagation = true:warning
dotnet_style_prefer_is_null_check_over_reference_equality_method = true:warning
# C# Code Style Rules
# var preferences
csharp_style_var_for_built_in_types = true:suggestion
csharp_style_var_when_type_is_apparent = true:suggestion
csharp_style_var_elsewhere = true:suggestion
# Expression-bodied members
csharp_style_expression_bodied_methods = when_on_single_line:suggestion
csharp_style_expression_bodied_constructors = false:silent
csharp_style_expression_bodied_operators = when_on_single_line:suggestion
csharp_style_expression_bodied_properties = when_on_single_line:suggestion
csharp_style_expression_bodied_indexers = when_on_single_line:suggestion
csharp_style_expression_bodied_accessors = when_on_single_line:suggestion
csharp_style_expression_bodied_lambdas = when_on_single_line:suggestion
csharp_style_expression_bodied_local_functions = when_on_single_line:suggestion
# Pattern matching preferences
csharp_style_pattern_matching_over_is_with_cast_check = true:warning
csharp_style_pattern_matching_over_as_with_null_check = true:warning
csharp_style_prefer_switch_expression = true:suggestion
csharp_style_prefer_pattern_matching = true:suggestion
csharp_style_prefer_not_pattern = true:suggestion
# Null-checking preferences
csharp_style_throw_expression = true:suggestion
csharp_style_conditional_delegate_call = true:warning
# Code block preferences
csharp_prefer_braces = true:warning
csharp_prefer_simple_using_statement = true:suggestion
# Expression preferences
csharp_prefer_simple_default_expression = true:suggestion
csharp_style_pattern_local_over_anonymous_function = true:suggestion
csharp_style_inlined_variable_declaration = true:suggestion
csharp_style_deconstructed_variable_declaration = true:suggestion
csharp_style_prefer_index_operator = true:suggestion
csharp_style_prefer_range_operator = true:suggestion
csharp_style_implicit_object_creation_when_type_is_apparent = true:suggestion
# C# Formatting Rules
# New line preferences
csharp_new_line_before_open_brace = all
csharp_new_line_before_else = true
csharp_new_line_before_catch = true
csharp_new_line_before_finally = true
csharp_new_line_before_members_in_object_initializers = true
csharp_new_line_before_members_in_anonymous_types = true
csharp_new_line_between_query_expression_clauses = true
# Indentation preferences
csharp_indent_case_contents = true
csharp_indent_switch_labels = true
csharp_indent_labels = no_change
csharp_indent_block_contents = true
csharp_indent_braces = false
csharp_indent_case_contents_when_block = false
# Space preferences
csharp_space_after_cast = false
csharp_space_after_keywords_in_control_flow_statements = true
csharp_space_between_parentheses = false
csharp_space_before_colon_in_inheritance_clause = true
csharp_space_after_colon_in_inheritance_clause = true
csharp_space_around_binary_operators = before_and_after
csharp_space_between_method_declaration_parameter_list_parentheses = false
csharp_space_between_method_declaration_empty_parameter_list_parentheses = false
csharp_space_between_method_declaration_name_and_open_parenthesis = false
csharp_space_between_method_call_parameter_list_parentheses = false
csharp_space_between_method_call_empty_parameter_list_parentheses = false
csharp_space_between_method_call_name_and_opening_parenthesis = false
csharp_space_after_comma = true
csharp_space_after_dot = false
csharp_space_after_semicolon_in_for_statement = true
csharp_space_before_semicolon_in_for_statement = false
csharp_space_around_declaration_statements = false
csharp_space_before_open_square_brackets = false
csharp_space_between_empty_square_brackets = false
csharp_space_between_square_brackets = false
# Wrap preferences
csharp_preserve_single_line_statements = false
csharp_preserve_single_line_blocks = true
# Using directive preferences
csharp_using_directive_placement = outside_namespace:warning
# Go files
[*.go]
indent_style = tab
indent_size = 4
# Rust files
[*.rs]
indent_size = 4
# Python files
[*.py]
indent_size = 4
# JavaScript/TypeScript files
[*.{js,ts,tsx,jsx}]
indent_size = 2
# Ruby files
[*.rb]
indent_size = 2
# PHP files
[*.php]
indent_size = 4
# YAML files
[*.{yml,yaml}]
indent_size = 2
# Markdown files
[*.md]
trim_trailing_whitespace = false
================================================
FILE: .github/CODEOWNERS
================================================
# Default owner — everything
* @Goldziher
# Zensical config and documentation
/zensical.toml @Goldziher @pratik-mahalle @v-tan
/docs/ @Goldziher @pratik-mahalle @v-tan
*.md @Goldziher @pratik-mahalle @v-tan
# Rust crates
/crates/ @Goldziher @kh3rld
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: Bug Report
description: Report a bug or unexpected behavior
title: "bug: "
labels: ["bug"]
projects: ["kreuzberg-dev/1"]
body:
- type: textarea
id: description
attributes:
label: Description
description: What happened? What did you expect to happen?
validations:
required: true
- type: textarea
id: steps-to-reproduce
attributes:
label: Steps to reproduce
description: Minimal steps to reproduce the issue.
validations:
required: true
- type: textarea
id: reproduction-files
attributes:
label: Relevant files and configuration
description: >-
Any configuration files, input files, or code snippets needed to
reproduce the issue.
render: text
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: true
================================================
FILE: .github/ISSUE_TEMPLATE/documentation.yml
================================================
name: Documentation Issue
description: Report missing, unclear, or incorrect documentation
title: "docs: "
labels: ["documentation"]
projects: ["kreuzberg-dev/1"]
body:
- type: textarea
id: what
attributes:
label: What
description: What documentation is missing, unclear, or incorrect?
validations:
required: true
- type: textarea
id: why
attributes:
label: Why
description: Why does this need to change?
validations:
required: true
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yml
================================================
name: Feature Request
description: Suggest a new feature or improvement
title: "feat: "
labels: ["enhancement"]
projects: ["kreuzberg-dev/1"]
body:
- type: textarea
id: what
attributes:
label: What is the proposed feature?
validations:
required: true
- type: textarea
id: why
attributes:
label: Why would this be a good addition?
validations:
required: true
================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
## Related
<!-- Link issues or discussions if applicable -->
## Description
<!-- What does this PR do? -->
## Checklist
- [ ] CI passing
- [ ] Tests added where applicable
================================================
FILE: .github/actions/build-typescript/action.yml
================================================
name: Build TypeScript package
description: Builds TypeScript package (requires Node bindings to be built first)
runs:
using: composite
steps:
- name: Build TypeScript package
shell: bash
working-directory: packages/typescript
run: pnpm run build
================================================
FILE: .github/actions/smoke-pie/action.yml
================================================
name: Smoke test PIE install
description: Tests PHP extension installation via PIE
inputs:
pie-artifacts-dir:
description: Directory containing PIE source artifacts
required: true
runs:
using: composite
steps:
- name: Smoke PIE install
shell: bash
env:
COMPOSER_ALLOW_SUPERUSER: 1
run: |
set -euo pipefail
# Download PIE
curl -fsSL https://github.com/php/pie/releases/latest/download/pie.phar -o /tmp/pie.phar
# Find the PIE source archive
pie_archive=$(find "${{ inputs.pie-artifacts-dir }}" -name "php_html_to_markdown-*.tgz" | head -n 1)
if [ -z "$pie_archive" ]; then
echo "PIE source archive not found" >&2
exit 1
fi
# Extract to temp dir and install via PIE
tmp=$(mktemp -d)
tar -xzf "$pie_archive" -C "$tmp"
# Add as local repository and build
php /tmp/pie.phar repository:add path "$tmp"
CARGO_BIN=$(command -v cargo)
php /tmp/pie.phar build kreuzberg-dev/html-to-markdown:*@dev --working-dir "$tmp" --with-cargo-bin="$CARGO_BIN"
# Find the built extension
ext_so=$(find "$tmp" -name "*.so" -path "*/html_to_markdown.so" | head -n 1)
if [ -z "$ext_so" ]; then
echo "Extension .so file not found after PIE build" >&2
exit 1
fi
# Test the extension (placeholder for smoke test)
# Note: PHP smoke example directory was removed
# Consider implementing integration tests via packages/php/tests
echo "✓ PIE install smoke test passed"
================================================
FILE: .github/dependabot.yaml
================================================
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
ignore:
# Pin artifact actions to v4 until GitHub Actions runners support v6/v7
# v6 and v7 require Actions Runner 2.327.1+ (released Dec 12, 2025)
- dependency-name: "actions/upload-artifact"
update-types: ["version-update:semver-major"]
- dependency-name: "actions/download-artifact"
update-types: ["version-update:semver-major"]
- package-ecosystem: "cargo"
# Explicitly list root only — packages/ruby/ext has a standalone workspace
# with path deps to vendored crates that only exist at build time
directories:
- "/"
schedule:
interval: "weekly"
ignore:
- dependency-name: "html-to-markdown-rs"
- package-ecosystem: "pip"
directories:
- "/"
- "/packages/python"
schedule:
interval: "weekly"
- package-ecosystem: "npm"
directories:
- "/"
- "/crates/html-to-markdown-node"
- "/crates/html-to-markdown-wasm"
- "/packages/typescript"
schedule:
interval: "weekly"
- package-ecosystem: "bundler"
directory: "/packages/ruby"
schedule:
interval: "weekly"
- package-ecosystem: "composer"
directories:
- "/"
- "/packages/php"
schedule:
interval: "weekly"
- package-ecosystem: "gomod"
directory: "/packages/go/v3"
schedule:
interval: "weekly"
- package-ecosystem: "maven"
directory: "/packages/java"
schedule:
interval: "weekly"
- package-ecosystem: "nuget"
directory: "/packages/csharp"
schedule:
interval: "weekly"
- package-ecosystem: "mix"
directory: "/packages/elixir"
schedule:
interval: "weekly"
================================================
FILE: .github/workflows/ci.yaml
================================================
name: CI
on:
push:
branches: [main]
paths:
- "crates/**"
- "packages/**"
- "e2e/**"
- "tools/**"
- "scripts/**"
- "fixtures/**"
- ".github/**"
- ".cargo/config.toml"
- ".pre-commit-config.yaml"
- ".golangci.yml"
- "alef.toml"
- "pyproject.toml"
- "uv.lock"
- "uv.toml"
- "pnpm-lock.yaml"
- "pnpm-workspace.yaml"
- "package.json"
- "Cargo.toml"
- "Cargo.lock"
- "Taskfile.yaml"
- ".task/**"
- "rustfmt.toml"
- "rust-toolchain.toml"
- "Gemfile"
- "Gemfile.lock"
- "composer.json"
- "composer.lock"
- "go.mod"
- "go.sum"
pull_request:
branches: [main]
paths:
- "crates/**"
- "packages/**"
- "e2e/**"
- "tools/**"
- "scripts/**"
- "fixtures/**"
- ".github/**"
- ".cargo/config.toml"
- ".pre-commit-config.yaml"
- ".golangci.yml"
- "alef.toml"
- "pyproject.toml"
- "uv.lock"
- "uv.toml"
- "pnpm-lock.yaml"
- "pnpm-workspace.yaml"
- "package.json"
- "Cargo.toml"
- "Cargo.lock"
- "Taskfile.yaml"
- ".task/**"
- "rustfmt.toml"
- "rust-toolchain.toml"
- "Gemfile"
- "Gemfile.lock"
- "composer.json"
- "composer.lock"
- "go.mod"
- "go.sum"
workflow_dispatch: {}
concurrency:
group: ci-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
RUST_BACKTRACE: short
BUILD_PROFILE: "ci"
GO_VERSION: "1.26.0"
GO_TOOLCHAIN: "go1.26.0"
GOLANGCI_LINT_VERSION: "latest"
permissions:
contents: read
# --- Stage 1: Validate ---
jobs:
validate:
name: "Validate"
runs-on: ubuntu-24.04-arm
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
components: rustfmt, clippy, llvm-tools-preview
- name: Setup Python
uses: kreuzberg-dev/actions/setup-python-env@v1 # v1
with:
python-version: "3.13"
- name: Setup Node Workspace
uses: kreuzberg-dev/actions/setup-node-workspace@v1 # v1
- name: Setup Go
uses: actions/setup-go@v6 # v6
with:
go-version: ${{ env.GO_VERSION }}
cache-dependency-path: packages/go/go.sum
- name: Install golangci-lint
uses: golangci/golangci-lint-action@v9
with:
install-only: true
- name: Setup Java
uses: actions/setup-java@v5 # v5
with:
distribution: temurin
java-version: "25"
- name: Setup Elixir
uses: kreuzberg-dev/actions/setup-elixir@v1 # v1
- name: Setup Ruby
uses: ruby/setup-ruby@v1 # v1
with:
ruby-version: "3.4"
bundler-cache: false
- name: Setup PHP
uses: kreuzberg-dev/actions/setup-php@v1 # v1
- name: Setup R
uses: kreuzberg-dev/actions/setup-r@v1 # v1
with:
install-deps-script: scripts/ci/r/install-deps.sh
- name: Install C/C++ tools
run: |
sudo apt-get update -qq
sudo apt-get install -y --no-install-recommends cppcheck clang-format
- name: Install Alef
uses: kreuzberg-dev/actions/install-alef@v1
- name: Install All Binding Dependencies
run: alef setup
shell: bash
- name: Run Lint Checks
run: task lint:check
shell: bash
- name: Check Code Formatting
run: task format:check
shell: bash
- name: Install alef
uses: kreuzberg-dev/actions/install-alef@v1
- name: Run Pre-commit Hooks
uses: j178/prek-action@v2 # v2
with:
extra-args: --all-files
- name: Install Python README Dependencies
run: pip install pyyaml jinja2
shell: bash
- name: Validate READMEs
run: task docs:generate-readme:check
shell: bash
validate-rust:
name: "Validate: Rust"
runs-on: ubuntu-24.04-arm
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
components: rustfmt, clippy, llvm-tools-preview
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Check Rust Formatting
run: task rust:lint:check
shell: bash
- name: Run Clippy
run: task rust:lint:check
shell: bash
- name: Check feature flags (html-to-markdown)
shell: bash
env:
RUSTFLAGS: "-D warnings"
run: |
cargo check -p html-to-markdown-rs --no-default-features
cargo check -p html-to-markdown-rs --no-default-features --features visitor
cargo check -p html-to-markdown-rs --no-default-features --features metadata
cargo check -p html-to-markdown-rs --no-default-features --features inline-images
changes:
name: "Detect Changes"
runs-on: ubuntu-24.04-arm
outputs:
core: ${{ steps.filter.outputs.core }}
rust: ${{ steps.filter.outputs.rust }}
ffi: ${{ steps.filter.outputs.ffi }}
python: ${{ steps.filter.outputs.python }}
node: ${{ steps.filter.outputs.node }}
ruby: ${{ steps.filter.outputs.ruby }}
php: ${{ steps.filter.outputs.php }}
go: ${{ steps.filter.outputs.go }}
java: ${{ steps.filter.outputs.java }}
elixir: ${{ steps.filter.outputs.elixir }}
r: ${{ steps.filter.outputs.r }}
wasm: ${{ steps.filter.outputs.wasm }}
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Detect changes
uses: dorny/paths-filter@v4 # v3
id: filter
with:
filters: |
core:
- 'crates/html-to-markdown/**'
- 'Cargo.toml'
- 'Cargo.lock'
- 'rust-toolchain.toml'
- '.cargo/config.toml'
- 'fixtures/**'
- 'tools/e2e-generator/**'
rust:
- 'crates/html-to-markdown/**'
- 'crates/html-to-markdown-cli/**'
- 'e2e/rust/**'
- 'Cargo.toml'
- 'Cargo.lock'
- 'rustfmt.toml'
ffi:
- 'crates/html-to-markdown-ffi/**'
- 'crates/html-to-markdown/**'
- 'Cargo.toml'
- 'Cargo.lock'
python:
- 'crates/html-to-markdown-py/**'
- 'packages/python/**'
- 'e2e/python/**'
- 'pyproject.toml'
- 'uv.lock'
- 'uv.toml'
- 'fixtures/**'
node:
- 'crates/html-to-markdown-node/**'
- 'packages/typescript/**'
- 'e2e/node/**'
- 'package.json'
- 'pnpm-lock.yaml'
- 'pnpm-workspace.yaml'
- 'fixtures/**'
ruby:
- 'packages/ruby/**'
- 'e2e/ruby/**'
- 'Gemfile'
- 'Gemfile.lock'
- 'fixtures/**'
php:
- 'crates/html-to-markdown-php/**'
- 'packages/php/**'
- 'packages/php-ext/**'
- 'e2e/php/**'
- 'composer.json'
- 'composer.lock'
- 'fixtures/**'
go:
- 'packages/go/**'
- 'e2e/go/**'
- 'crates/html-to-markdown-ffi/**'
- 'go.mod'
- 'go.sum'
- 'fixtures/**'
java:
- 'packages/java/**'
- 'e2e/java/**'
- 'crates/html-to-markdown-ffi/**'
- 'fixtures/**'
elixir:
- 'packages/elixir/**'
- 'e2e/elixir/**'
- 'crates/html-to-markdown-ffi/**'
- 'fixtures/**'
r:
- 'packages/r/**'
- 'e2e/r/**'
- 'fixtures/**'
wasm:
- 'crates/html-to-markdown-wasm/**'
- 'crates/html-to-markdown-wasm-wasi/**'
- 'packages/wasm/**'
- 'e2e/wasm/**'
- 'fixtures/**'
# --- Stage 2: Core Builds ---
build-ffi:
needs: [validate, validate-rust, changes]
name: "Build: FFI (${{ matrix.runner }})"
if: |
github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.core == 'true' ||
needs.changes.outputs.ffi == 'true'
runs-on: ${{ matrix.runner }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
runner:
- ubuntu-latest
- ubuntu-24.04-arm
- macos-latest
- windows-latest
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
id: checkout
- name: Free disk space
if: startsWith(matrix.runner, 'ubuntu')
uses: kreuzberg-dev/actions/free-disk-space-linux@v1 # v1
with:
show-initial: "false"
show-final: "true"
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
cache-key-prefix: ffi-${{ matrix.runner }}
use-sccache: false
- name: Build html-to-markdown-ffi (release, Unix)
if: matrix.runner != 'windows-latest'
run: cargo build --release -p html-to-markdown-ffi
shell: bash
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: "0"
RUST_BACKTRACE: short
- name: Build html-to-markdown-ffi (debug, Windows)
if: matrix.runner == 'windows-latest'
run: cargo build -p html-to-markdown-ffi
shell: bash
env:
CARGO_TERM_COLOR: always
- name: Verify header exists
shell: bash
run: |
HEADER="crates/html-to-markdown-ffi/include/html_to_markdown.h"
test -f "$HEADER"
echo "Header verified: $HEADER"
- name: Upload FFI artifacts
if: matrix.runner != 'windows-latest'
uses: actions/upload-artifact@v7 # v7
with:
name: ffi-${{ matrix.runner }}
path: |
target/release/libhtml_to_markdown_ffi.*
target/release/html_to_markdown_ffi.*
crates/html-to-markdown-ffi/include/html_to_markdown.h
retention-days: 7
if-no-files-found: warn
- name: Cleanup Rust cache
if: always() && steps.checkout.outcome == 'success'
uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1
rust-tests:
needs: [validate, validate-rust, changes]
name: "Test: Rust (${{ matrix.os }})"
if: |
github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.core == 'true' ||
needs.changes.outputs.rust == 'true'
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
id: checkout
- name: Set up Python
uses: actions/setup-python@v6 # v6
with:
python-version: "3.13"
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
components: rustfmt, clippy, llvm-tools-preview
cache-key-prefix: rust-tests-${{ matrix.os }}
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Run Rust Tests
env:
RUST_BACKTRACE: full
run: task rust:test:ci
shell: bash
- name: Run E2E Tests
run: task rust:e2e:test
shell: bash
- name: Cleanup Rust cache
if: always() && steps.checkout.outcome == 'success'
uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1
rust-coverage:
needs: [validate, validate-rust, changes]
name: "Coverage: Rust"
if: |
github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.core == 'true' ||
needs.changes.outputs.rust == 'true'
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
components: rustfmt, clippy, llvm-tools-preview
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Generate Rust Coverage
run: task rust:coverage
shell: bash
- name: Upload Coverage Artifacts
if: always()
uses: actions/upload-artifact@v7 # v7
with:
name: coverage-report-${{ github.sha }}
path: rust-coverage.lcov
retention-days: 7
# --- Stage 3: Language Builds ---
build-python:
needs: [rust-tests, changes]
name: "Build: Python (${{ matrix.os }})"
if: |
github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.core == 'true' ||
needs.changes.outputs.python == 'true'
runs-on: ${{ matrix.os }}
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python: ["3.10", "3.12", "3.14"]
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
id: checkout
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Install uv
uses: astral-sh/setup-uv@v7 # v7
with:
enable-cache: true
- name: Set up Python
uses: actions/setup-python@v6 # v6
with:
python-version: ${{ matrix.python }}
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
components: rustfmt, clippy, llvm-tools-preview
cache-key-prefix: python-${{ matrix.os }}-${{ matrix.python }}
- name: Install Python Dependencies
uses: nick-fields/retry@v4 # v4
with:
timeout_minutes: 5
max_attempts: 3
retry_wait_seconds: 30
command: |
if [[ "${{ runner.os }}" == "Windows" ]] && [[ -d ".venv" ]]; then
echo "Removing existing .venv directory on Windows"
rm -rf .venv
fi
uv sync --all-extras --no-install-workspace
shell: bash
- name: Build Python Bindings
run: |
uv pip install maturin
cd packages/python && uv run maturin develop --release
shell: bash
- name: Build CLI binary
run: cargo build --release -p html-to-markdown-cli
shell: bash
- name: Cleanup Rust cache
if: always() && steps.checkout.outcome == 'success'
uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1
build-node:
needs: [rust-tests, changes]
name: "Build: Node (${{ matrix.os }}, ${{ matrix.runtime }})"
if: |
github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.core == 'true' ||
needs.changes.outputs.node == 'true'
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
runtime: [node, bun]
exclude:
- os: windows-latest
runtime: bun
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
id: checkout
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
cache-key-prefix: node-${{ matrix.os }}-${{ matrix.runtime }}
- name: Setup Node.js workspace
if: matrix.runtime == 'node'
uses: kreuzberg-dev/actions/setup-node-workspace@v1 # v1
- name: Setup Bun
if: matrix.runtime == 'bun'
uses: oven-sh/setup-bun@v2 # v2
with:
bun-version: latest
- name: Build NAPI-RS Bindings (Node.js)
if: matrix.runtime == 'node'
uses: kreuzberg-dev/actions/build-node-napi@v1 # v1
with:
crate-dir: crates/html-to-markdown-node
- name: Install workspace dependencies (Bun)
if: matrix.runtime == 'bun'
run: bun install
shell: bash
- name: Build NAPI-RS Bindings (Bun)
if: matrix.runtime == 'bun'
working-directory: crates/html-to-markdown-node
run: bun run build
shell: bash
- name: Build TypeScript package (Node.js)
if: matrix.runtime == 'node'
uses: ./.github/actions/build-typescript
- name: Build TypeScript package (Bun)
if: matrix.runtime == 'bun'
working-directory: packages/typescript
run: bun x tsc --project tsconfig.json
shell: bash
- name: Cleanup Rust cache
if: always() && steps.checkout.outcome == 'success'
uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1
build-ruby:
needs: [rust-tests, changes]
name: "Build: Ruby (${{ matrix.os }}, ruby-${{ matrix.ruby }})"
if: |
github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.core == 'true' ||
needs.changes.outputs.ruby == 'true'
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
ruby: ["3.2", "3.3"]
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
id: checkout
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
cache-key-prefix: ruby-${{ matrix.os }}-${{ matrix.ruby }}
- name: Setup Ruby (Unix)
if: runner.os != 'Windows'
uses: ruby/setup-ruby@v1 # v1
with:
ruby-version: ${{ matrix.ruby }}
bundler: "4.0.3"
bundler-cache: false
working-directory: packages/ruby
- name: Setup Ruby (Windows)
if: runner.os == 'Windows'
uses: ruby/setup-ruby@v1 # v1
with:
ruby-version: ${{ matrix.ruby }}
bundler: "4.0.3"
bundler-cache: false
working-directory: packages/ruby
windows-toolchain: UCRT64
- name: Set up Python
uses: actions/setup-python@v6 # v6
with:
python-version: "3.12"
- name: Vendor core crate
run: python3 scripts/ci/ruby/vendor-core-crate.py
shell: bash
- name: Build CLI binary
uses: kreuzberg-dev/actions/build-rust-cli@v1 # v1
with:
package-name: html-to-markdown-cli
binary-name: html-to-markdown
- name: Build Ruby extension
uses: kreuzberg-dev/actions/build-ruby-gem@v1 # v1
- name: Cleanup Rust cache
if: always() && steps.checkout.outcome == 'success'
uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1
build-php:
needs: [rust-tests, changes]
name: "Build: PHP"
if: |
github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.core == 'true' ||
needs.changes.outputs.php == 'true'
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
id: checkout
- name: Setup PHP
uses: shivammathur/setup-php@2.37.0 # 2
with:
php-version: "8.4"
tools: composer:2.9.1
coverage: none
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
cache-key-prefix: php
- name: Capture php-config path
run: scripts/ci/php/set-php-config.sh
shell: bash
- name: Install root Composer dependencies
uses: ramsey/composer-install@4.0.0 # 3
with:
dependency-versions: locked
env:
COMPOSER_AUTH: '{"github-oauth":{"github.com":"${{ secrets.GITHUB_TOKEN }}"}}'
- name: Install PHP package Composer dependencies
uses: ramsey/composer-install@4.0.0 # 3
with:
dependency-versions: locked
working-directory: packages/php
env:
COMPOSER_AUTH: '{"github-oauth":{"github.com":"${{ secrets.GITHUB_TOKEN }}"}}'
- name: Build PHP extension
id: build-php-extension
uses: kreuzberg-dev/actions/build-php-extension@v1 # v1
with:
crate-name: html-to-markdown-php
lib-name: html_to_markdown_php
- name: Upload PHP extension artifact
uses: actions/upload-artifact@v7 # v7
with:
name: php-extension-ubuntu
path: ${{ steps.build-php-extension.outputs.extension-path }}
retention-days: 7
- name: Cleanup Rust cache
if: always() && steps.checkout.outcome == 'success'
uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1
build-java:
needs: [rust-tests, changes]
name: "Build: Java (${{ matrix.os }})"
if: |
github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.core == 'true' ||
needs.changes.outputs.java == 'true'
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
java: ["25"]
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
id: checkout
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
cache-key-prefix: java-${{ matrix.os }}
- name: Test Java Panama FFI bindings
uses: kreuzberg-dev/actions/test-java-ffi@v1 # v1
with:
ffi-crate-name: html-to-markdown-ffi
ffi-lib-name: html_to_markdown_ffi
java-version: ${{ matrix.java }}
- name: Cleanup Rust cache
if: always() && steps.checkout.outcome == 'success'
uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1
build-wasm:
needs: [rust-tests, changes]
name: "Build: WASM"
if: |
github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.core == 'true' ||
needs.changes.outputs.wasm == 'true'
runs-on: ubuntu-latest
timeout-minutes: 120
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
id: checkout
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
target: wasm32-unknown-unknown
use-sccache: false
cache-key-prefix: wasm
- name: Ensure wasm target installed
run: scripts/common/ensure-wasm-target.sh
shell: bash
- name: Install wasm-pack
run: scripts/common/install-wasm-pack.sh
shell: bash
- name: Setup Node workspace
uses: kreuzberg-dev/actions/setup-node-workspace@v1 # v1
- name: Build WASM (all targets)
uses: kreuzberg-dev/actions/build-wasm-package@v1 # v1
with:
crate-dir: crates/html-to-markdown-wasm
- name: Cleanup Rust cache
if: always() && steps.checkout.outcome == 'success'
uses: kreuzberg-dev/actions/cleanup-rust-cache@v1 # v1
# --- Stage 4: Language Tests ---
test-python:
needs: [build-python]
name: "Test: Python (${{ matrix.os }}, py-${{ matrix.python }})"
if: always() && !cancelled() && needs.build-python.result != 'skipped'
runs-on: ${{ matrix.os }}
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python: ["3.10", "3.12", "3.14"]
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Install uv
uses: astral-sh/setup-uv@v7 # v7
with:
enable-cache: true
- name: Set up Python
uses: actions/setup-python@v6 # v6
with:
python-version: ${{ matrix.python }}
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
components: rustfmt, clippy, llvm-tools-preview
- name: Install Python Dependencies
uses: nick-fields/retry@v4 # v4
with:
timeout_minutes: 5
max_attempts: 3
retry_wait_seconds: 30
command: |
if [[ "${{ runner.os }}" == "Windows" ]] && [[ -d ".venv" ]]; then
echo "Removing existing .venv directory on Windows"
rm -rf .venv
fi
uv sync --all-extras --no-install-workspace
shell: bash
- name: Build Python Bindings
run: |
uv pip install maturin
cd packages/python && uv run maturin develop --release
shell: bash
- name: Build CLI binary
run: cargo build --release -p html-to-markdown-cli
shell: bash
- name: Run E2E tests
run: alef test --e2e --lang python
shell: bash
test-node:
needs: [build-node]
name: "Test: Node (${{ matrix.os }}, ${{ matrix.runtime }})"
if: always() && !cancelled() && needs.build-node.result != 'skipped'
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
runtime: [node, bun]
exclude:
- os: windows-latest
runtime: bun
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Setup Node.js workspace
if: matrix.runtime == 'node'
uses: kreuzberg-dev/actions/setup-node-workspace@v1 # v1
- name: Setup Bun
if: matrix.runtime == 'bun'
uses: oven-sh/setup-bun@v2 # v2
with:
bun-version: latest
- name: Build NAPI-RS Bindings (Node.js)
if: matrix.runtime == 'node'
uses: kreuzberg-dev/actions/build-node-napi@v1 # v1
with:
crate-dir: crates/html-to-markdown-node
- name: Install workspace dependencies (Bun)
if: matrix.runtime == 'bun'
run: bun install
shell: bash
- name: Build NAPI-RS Bindings (Bun)
if: matrix.runtime == 'bun'
working-directory: crates/html-to-markdown-node
run: bun run build
shell: bash
- name: Run Rust Tests (Node.js only)
if: matrix.runtime == 'node'
run: task rust:test
shell: bash
- name: Build TypeScript package (Node.js)
if: matrix.runtime == 'node'
uses: ./.github/actions/build-typescript
- name: Build TypeScript package (Bun)
if: matrix.runtime == 'bun'
working-directory: packages/typescript
run: bun x tsc --project tsconfig.json
shell: bash
- name: Run E2E tests (Node.js only)
if: matrix.runtime == 'node'
run: alef test --e2e --lang node
shell: bash
test-ruby:
needs: [build-ruby]
name: "Test: Ruby (${{ matrix.os }}, ruby-${{ matrix.ruby }})"
if: always() && !cancelled() && needs.build-ruby.result != 'skipped'
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
ruby: ["3.2", "3.3"]
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Setup Ruby (Unix)
if: runner.os != 'Windows'
uses: ruby/setup-ruby@v1 # v1
with:
ruby-version: ${{ matrix.ruby }}
bundler: "4.0.3"
bundler-cache: false
working-directory: packages/ruby
- name: Setup Ruby (Windows)
if: runner.os == 'Windows'
uses: ruby/setup-ruby@v1 # v1
with:
ruby-version: ${{ matrix.ruby }}
bundler: "4.0.3"
bundler-cache: false
working-directory: packages/ruby
windows-toolchain: UCRT64
- name: Set up Python
uses: actions/setup-python@v6 # v6
with:
python-version: "3.12"
- name: Vendor core crate
run: python3 scripts/ci/ruby/vendor-core-crate.py
shell: bash
- name: Build CLI binary
uses: kreuzberg-dev/actions/build-rust-cli@v1 # v1
with:
package-name: html-to-markdown-cli
binary-name: html-to-markdown
- name: Build Ruby extension
uses: kreuzberg-dev/actions/build-ruby-gem@v1 # v1
- name: Run Rubocop (Ubuntu/ruby-3.3 only)
if: runner.os != 'Windows' && matrix.os == 'ubuntu-latest' && matrix.ruby == '3.3'
run: ./scripts/ci/ruby/run-rubocop.sh
shell: bash
- name: Validate RBS signatures (Ubuntu/ruby-3.3 only)
if: runner.os != 'Windows' && matrix.os == 'ubuntu-latest' && matrix.ruby == '3.3'
run: ./scripts/ci/ruby/run-rbs-validate.sh
shell: bash
- name: Run Steep type checking (Ubuntu/ruby-3.3 only)
if: runner.os != 'Windows' && matrix.os == 'ubuntu-latest' && matrix.ruby == '3.3'
working-directory: packages/ruby
run: ../../scripts/ci/ruby/run-steep.sh
shell: bash
- name: Run Ruby specs (Unix)
if: runner.os != 'Windows'
working-directory: packages/ruby
run: ../../scripts/ci/ruby/run-rspec-unix.sh
shell: bash
- name: Run Ruby specs (Windows)
if: runner.os == 'Windows'
working-directory: packages/ruby
shell: pwsh
run: ../../scripts/ci/ruby/run-rspec-windows.ps1
- name: Install Task
if: runner.os != 'Windows'
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Run E2E tests (Unix only)
if: runner.os != 'Windows'
run: alef test --e2e --lang ruby
shell: bash
test-php:
needs: [build-php]
name: "Test: PHP"
if: always() && !cancelled() && needs.build-php.result != 'skipped'
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Setup PHP
uses: shivammathur/setup-php@2.37.0 # 2
with:
php-version: "8.4"
tools: composer:2.9.1
coverage: none
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Capture php-config path
run: scripts/ci/php/set-php-config.sh
shell: bash
- name: Install root Composer dependencies
uses: ramsey/composer-install@4.0.0 # 3
with:
dependency-versions: locked
env:
COMPOSER_AUTH: '{"github-oauth":{"github.com":"${{ secrets.GITHUB_TOKEN }}"}}'
- name: Install PHP package Composer dependencies
uses: ramsey/composer-install@4.0.0 # 3
with:
dependency-versions: locked
working-directory: packages/php
env:
COMPOSER_AUTH: '{"github-oauth":{"github.com":"${{ secrets.GITHUB_TOKEN }}"}}'
- name: Build PHP extension
id: build-php-extension
uses: kreuzberg-dev/actions/build-php-extension@v1 # v1
with:
crate-name: html-to-markdown-php
lib-name: html_to_markdown_php
- name: Run PHP static analysis
run: scripts/ci/php/run-phpstan.sh
shell: bash
- name: Run PHP tests
run: scripts/ci/php/run-php-tests.sh
shell: bash
env:
EXTENSION_PATH: ${{ steps.build-php-extension.outputs.extension-path }}
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Run E2E tests
run: alef test --e2e --lang php
shell: bash
env:
EXTENSION_PATH: ${{ steps.build-php-extension.outputs.extension-path }}
test-go:
needs: [build-ffi, changes]
name: "Test: Go"
if: |
always() && !cancelled() && needs.build-ffi.result != 'skipped' &&
(github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.go == 'true' ||
needs.changes.outputs.ffi == 'true' ||
needs.changes.outputs.core == 'true')
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Setup Go
uses: actions/setup-go@v6 # v6
with:
go-version: ${{ env.GO_VERSION }}
check-latest: true
- name: Build FFI library
run: cargo build --release -p html-to-markdown-ffi
shell: bash
- name: Detect Go modules
id: set-modules
shell: bash
run: scripts/ci/go/detect-go-modules.sh
- name: Install golangci-lint
if: steps.set-modules.outputs.modules != '[]'
env:
GOTOOLCHAIN: ${{ env.GO_TOOLCHAIN }}
run: scripts/ci/go/install-golangci-lint.sh
shell: bash
- name: Run golangci-lint (all modules)
if: steps.set-modules.outputs.modules != '[]'
shell: bash
run: |
for module in $(echo '${{ steps.set-modules.outputs.modules }}' | jq -r '.[]'); do
echo "=== Linting $module ==="
(cd "$module" && "${{ github.workspace }}/scripts/ci/go/run-golangci-lint.sh")
done
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Install alef
uses: kreuzberg-dev/actions/install-alef@v1 # v1
- name: Run E2E tests
run: alef test --e2e --lang go
shell: bash
test-java:
needs: [build-java]
name: "Test: Java (${{ matrix.os }})"
if: always() && !cancelled() && needs.build-java.result != 'skipped'
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
java: ["25"]
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Test Java Panama FFI bindings
uses: kreuzberg-dev/actions/test-java-ffi@v1 # v1
with:
ffi-crate-name: html-to-markdown-ffi
ffi-lib-name: html_to_markdown_ffi
java-version: ${{ matrix.java }}
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Run E2E tests (Ubuntu only)
if: matrix.os == 'ubuntu-latest'
run: alef test --e2e --lang java
shell: bash
test-elixir:
needs: [build-ffi, changes]
name: "Test: Elixir"
if: |
always() && !cancelled() && needs.build-ffi.result != 'skipped' &&
(github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.elixir == 'true' ||
needs.changes.outputs.ffi == 'true' ||
needs.changes.outputs.core == 'true')
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Setup Elixir
uses: erlef/setup-beam@v1 # v1
with:
elixir-version: "1.19"
otp-version: "28.1"
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Install Hex/Rebar
run: scripts/ci/elixir/install-hex-rebar.sh
shell: bash
- name: Install dependencies
working-directory: packages/elixir
run: ../../scripts/ci/elixir/install-deps.sh
shell: bash
- name: Run tests
working-directory: packages/elixir
run: ../../scripts/ci/elixir/run-tests.sh
shell: bash
- name: Credo lint
working-directory: packages/elixir
run: ../../scripts/ci/elixir/run-credo.sh
shell: bash
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Install alef
uses: kreuzberg-dev/actions/install-alef@v1 # v1
- name: Run E2E tests
run: alef test --e2e --lang elixir
shell: bash
test-r:
needs: [rust-tests, changes]
name: "Test: R"
if: |
always() && !cancelled() && needs.rust-tests.result != 'skipped' &&
(github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.r == 'true' ||
needs.changes.outputs.core == 'true')
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Setup R
uses: kreuzberg-dev/actions/setup-r@v1 # v1
with:
install-deps-script: scripts/ci/r/install-deps.sh
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Run tests
working-directory: packages/r
run: ../../scripts/ci/r/run-tests.sh
shell: bash
- name: Run lintr
working-directory: packages/r
run: ../../scripts/ci/r/run-lintr.sh
shell: bash
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1 # v1
- name: Run E2E tests
run: alef test --e2e --lang r
shell: bash
test-c-ffi:
needs: [build-ffi, changes]
name: "Test: C FFI (${{ matrix.runner }})"
if: |
always() && !cancelled() && needs.build-ffi.result != 'skipped' &&
(github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.ffi == 'true' ||
needs.changes.outputs.core == 'true')
runs-on: ${{ matrix.runner }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
runner:
- ubuntu-latest
- ubuntu-24.04-arm
- macos-latest
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
cache-key-prefix: c-ffi-${{ matrix.runner }}
use-sccache: false
- name: Build html-to-markdown-ffi
shell: bash
run: cargo build --release -p html-to-markdown-ffi
- name: Run C e2e tests
shell: bash
env:
LD_LIBRARY_PATH: ${{ github.workspace }}/target/release
DYLD_LIBRARY_PATH: ${{ github.workspace }}/target/release
run: cd e2e/c && make test
- name: Verify header exists
shell: bash
run: |
HEADER="crates/html-to-markdown-ffi/include/html_to_markdown.h"
test -f "$HEADER"
echo "Header verified: $HEADER"
- name: Verify pkg-config output
shell: bash
run: |
PC_DIR="$(pwd)/target/release/build"
PC_FILE=$(find "$PC_DIR" -name 'html-to-markdown.pc' -path '*/html-to-markdown-ffi-*/out/*' 2>/dev/null | head -1)
if [ -z "$PC_FILE" ]; then
echo "Warning: html-to-markdown.pc not found in build output"
find "$PC_DIR" -name '*.pc' 2>/dev/null || echo "No .pc files found"
else
echo "Found pkg-config file: $PC_FILE"
cat "$PC_FILE"
fi
test-c-ffi-windows:
needs: [build-ffi, changes]
name: "Test: C FFI (windows-latest)"
if: |
always() && !cancelled() && needs.build-ffi.result != 'skipped' &&
(github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.ffi == 'true' ||
needs.changes.outputs.core == 'true')
runs-on: windows-latest
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
cache-key-prefix: c-ffi-windows
use-sccache: false
- name: Build html-to-markdown-ffi
shell: bash
run: cargo build -p html-to-markdown-ffi
- name: Verify header generated
shell: bash
run: |
test -f crates/html-to-markdown-ffi/include/html_to_markdown.h
echo "Header verified on Windows."
test-wasm:
needs: [build-wasm]
name: "Test: WASM"
if: always() && !cancelled() && needs.build-wasm.result != 'skipped'
runs-on: ubuntu-latest
timeout-minutes: 120
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
target: wasm32-unknown-unknown
use-sccache: false
- name: Ensure wasm target installed
run: scripts/common/ensure-wasm-target.sh
shell: bash
- name: Install wasm-pack
run: scripts/common/install-wasm-pack.sh
shell: bash
- name: Setup Node workspace
uses: kreuzberg-dev/actions/setup-node-workspace@v1 # v1
- name: Build WASM (all targets)
uses: kreuzberg-dev/actions/build-wasm-package@v1 # v1
with:
crate-dir: crates/html-to-markdown-wasm
- name: Test WASM bundle
working-directory: crates/html-to-markdown-wasm
run: ../../scripts/ci/wasm/test-wasm-bundle.sh
shell: bash
- name: Run Rust WASM tests
working-directory: crates/html-to-markdown-wasm
run: ../../scripts/ci/wasm/test-wasm-rust.sh
shell: bash
================================================
FILE: .github/workflows/deploy-docs.yaml
================================================
name: Deploy Documentation
on:
push:
branches: [main]
paths:
- 'docs/**'
- 'zensical.toml'
- 'pyproject.toml'
- '.github/workflows/deploy-docs.yaml'
workflow_dispatch:
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v6 # v6
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v6 # v6
with:
python-version: '3.13'
- name: Install uv
uses: astral-sh/setup-uv@v7 # v7
with:
enable-cache: true
- name: Install dependencies and build docs
run: |
uv sync --group doc --no-editable --no-install-workspace --no-install-project
uv run --no-sync zensical build --clean
- name: Upload Pages artifact
uses: actions/upload-pages-artifact@v5 # v4
with:
path: site
deploy:
needs: build
permissions:
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v5 # v4
================================================
FILE: .github/workflows/publish.yaml
================================================
name: Publish Release
on:
workflow_dispatch:
inputs:
tag:
description: "Release tag to build (e.g., v2.6.0)"
required: true
type: string
dry_run:
description: "Prepare artifacts without publishing"
required: false
type: boolean
default: false
ref:
description: "Git ref (branch, tag, or commit) to build; defaults to the tag"
required: false
type: string
force_republish_java:
description: "Force republish Java artifacts even if the version exists"
required: false
type: boolean
default: false
force_republish_wasm:
description: "Force republish WASM package even if the version exists"
required: false
type: boolean
default: false
republish:
description: "Delete and re-create the tag on current HEAD before publishing (retag + full republish)"
required: false
type: boolean
default: false
release:
types: [published]
repository_dispatch:
types: [publish-release]
permissions:
contents: write
concurrency:
group: ${{ github.workflow }}-${{ (github.event_name == 'workflow_dispatch' && (github.event.inputs.ref || github.event.inputs.tag)) || github.ref || github.run_id }}
cancel-in-progress: false
jobs:
prepare:
name: Prepare metadata
runs-on: ubuntu-latest
outputs:
tag: ${{ steps.meta.outputs.tag }}
version: ${{ steps.meta.outputs.version }}
ref: ${{ steps.meta.outputs.ref }}
dry_run: ${{ steps.meta.outputs.dry_run }}
checkout_ref: ${{ steps.meta.outputs.checkout_ref }}
target_sha: ${{ steps.meta.outputs.target_sha }}
matrix_ref: ${{ steps.meta.outputs.matrix_ref }}
is_tag: ${{ steps.meta.outputs.is_tag }}
force_republish_java: ${{ steps.republish.outputs.force_republish_java }}
force_republish_wasm: ${{ steps.republish.outputs.force_republish_wasm }}
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ (inputs.republish == true && (inputs.ref || github.event.repository.default_branch)) || inputs.ref || inputs.tag || github.ref }}
fetch-depth: 0
- name: Retag for republish
if: ${{ inputs.republish == true || github.event.client_payload.republish == true }}
env:
TAG: ${{ inputs.tag || github.event.client_payload.tag }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
if [[ -z "${TAG}" ]]; then
echo "::error::republish requires a tag input"
exit 1
fi
sha="$(git rev-parse HEAD)"
echo "::notice::Republish requested — deleting and re-creating tag ${TAG} on ${sha:0:8}"
# Delete via API (avoids workflows permission issue with git push)
gh api "repos/${GITHUB_REPOSITORY}/git/refs/tags/${TAG}" -X DELETE 2>/dev/null || true
# Create via API
gh api "repos/${GITHUB_REPOSITORY}/git/refs" \
-f "ref=refs/tags/${TAG}" \
-f "sha=${sha}" --silent
# Update local state
git tag -d "${TAG}" 2>/dev/null || true
git tag "${TAG}" "${sha}"
- name: Validate tag and compute version
id: meta
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_REF_NAME: ${{ github.ref_name }}
INPUT_TAG: ${{ inputs.tag }}
INPUT_DRY_RUN: ${{ inputs.dry_run }}
INPUT_REF: ${{ inputs.republish == true && format('refs/tags/{0}', inputs.tag) || inputs.ref }}
EVENT_RELEASE_TAG: ${{ github.event.release.tag_name }}
EVENT_DISPATCH_TAG: ${{ github.event.client_payload.tag }}
EVENT_DISPATCH_DRY_RUN: ${{ github.event.client_payload.dry_run }}
EVENT_DISPATCH_REF: ${{ github.event.client_payload.ref }}
run: scripts/publish/validate-and-compute-metadata.sh
- name: Resolve republish flags
id: republish
env:
INPUT_FORCE_REPUBLISH_JAVA: ${{ inputs.force_republish_java }}
INPUT_FORCE_REPUBLISH_WASM: ${{ inputs.force_republish_wasm }}
EVENT_DISPATCH_FORCE_REPUBLISH_JAVA: ${{ github.event.client_payload.force_republish_java }}
EVENT_DISPATCH_FORCE_REPUBLISH_WASM: ${{ github.event.client_payload.force_republish_wasm }}
run: |
force_java="${INPUT_FORCE_REPUBLISH_JAVA:-${EVENT_DISPATCH_FORCE_REPUBLISH_JAVA:-false}}"
force_wasm="${INPUT_FORCE_REPUBLISH_WASM:-${EVENT_DISPATCH_FORCE_REPUBLISH_WASM:-false}}"
echo "force_republish_java=${force_java}" >>"$GITHUB_OUTPUT"
echo "force_republish_wasm=${force_wasm}" >>"$GITHUB_OUTPUT"
- name: Install Task
uses: go-task/setup-task@v2 # v2
with:
version: 3.46.4
- name: Upload release metadata
uses: actions/upload-artifact@v7 # v7
with:
name: release-metadata
path: release-metadata.json
retention-days: 14
check-pypi:
name: Check PyPI for existing version
needs: prepare
if: ${{ needs.prepare.outputs.is_tag == 'true' }}
runs-on: ubuntu-latest
outputs:
exists: ${{ steps.check.outputs.exists }}
steps:
- name: Check PyPI version
id: check
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: pypi
package: html-to-markdown
version: ${{ needs.prepare.outputs.version }}
check-npm:
name: Check npm for existing versions
needs: prepare
if: ${{ needs.prepare.outputs.is_tag == 'true' }}
runs-on: ubuntu-latest
outputs:
node_exists: ${{ steps.check.outputs.exists }}
wasm_exists: ${{ steps.check.outputs.wasm_exists }}
ts_exists: ${{ steps.check.outputs.ts_exists }}
steps:
- name: Check npm packages
id: check
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: npm
package: "@kreuzberg/html-to-markdown"
version: ${{ needs.prepare.outputs.version }}
extra-packages: |
wasm_exists=@kreuzberg/html-to-markdown-wasm
ts_exists=@kreuzberg/html-to-markdown
check-rubygems:
name: Check RubyGems for existing version
needs: prepare
if: ${{ needs.prepare.outputs.is_tag == 'true' }}
runs-on: ubuntu-latest
outputs:
exists: ${{ steps.check.outputs.exists }}
steps:
- name: Check RubyGems version
id: check
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: rubygems
package: html-to-markdown
version: ${{ needs.prepare.outputs.version }}
check-hex:
name: Check Hex.pm for existing version
needs: prepare
if: ${{ needs.prepare.outputs.is_tag == 'true' }}
runs-on: ubuntu-latest
outputs:
exists: ${{ steps.check.outputs.exists }}
steps:
- name: Check Hex version
id: check
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: hex
package: html_to_markdown
version: ${{ needs.prepare.outputs.version }}
check-maven:
name: Check Maven Central for existing version
needs: prepare
if: ${{ needs.prepare.outputs.is_tag == 'true' }}
runs-on: ubuntu-latest
outputs:
exists: ${{ steps.check.outputs.exists }}
steps:
- name: Check Maven version
id: check
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: maven
package: "dev.kreuzberg:html-to-markdown"
version: ${{ needs.prepare.outputs.version }}
check-nuget:
name: Check NuGet for existing version
needs: prepare
if: ${{ needs.prepare.outputs.is_tag == 'true' }}
runs-on: ubuntu-latest
outputs:
exists: ${{ steps.check.outputs.exists }}
steps:
- name: Check NuGet package
id: check
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: nuget
package: KreuzbergDev.HtmlToMarkdown
version: ${{ needs.prepare.outputs.version }}
check-packagist:
name: Check Packagist for existing version
needs: prepare
if: ${{ needs.prepare.outputs.is_tag == 'true' }}
runs-on: ubuntu-latest
outputs:
exists: ${{ steps.check.outputs.exists }}
steps:
- name: Check Packagist version
id: check
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: packagist
package: kreuzberg-dev/html-to-markdown
version: ${{ needs.prepare.outputs.version }}
check-cratesio:
name: Check crates.io for existing versions
needs: prepare
if: ${{ needs.prepare.outputs.is_tag == 'true' }}
runs-on: ubuntu-latest
outputs:
rs_exists: ${{ steps.check.outputs.exists }}
cli_exists: ${{ steps.check.outputs.cli_exists }}
all_exist: ${{ steps.derive.outputs.all_exist }}
steps:
- name: Query crates.io
id: check
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: cratesio
package: html-to-markdown-rs
version: ${{ needs.prepare.outputs.version }}
extra-packages: |
cli_exists=html-to-markdown-cli
- name: Derive all_exist
id: derive
run: |
if [[ "${{ steps.check.outputs.exists }}" == "true" && "${{ steps.check.outputs.cli_exists }}" == "true" ]]; then
echo "all_exist=true" >> "$GITHUB_OUTPUT"
else
echo "all_exist=false" >> "$GITHUB_OUTPUT"
fi
check-homebrew:
name: Check if Homebrew formula already published
needs: prepare
if: ${{ needs.prepare.outputs.is_tag == 'true' }}
runs-on: ubuntu-latest
timeout-minutes: 5
outputs:
exists: ${{ steps.check.outputs.exists }}
steps:
- name: Check Homebrew tap for formula
id: check
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: homebrew
package: html-to-markdown
version: ${{ needs.prepare.outputs.version }}
tap-repo: kreuzberg-dev/homebrew-tap
python-wheels:
name: Build Python wheels (${{ matrix.os }})
needs: prepare
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest, macos-latest]
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Build wheels
uses: kreuzberg-dev/actions/build-python-wheels@v1 # v1
with:
python-version: "3.13"
package-dir: packages/python
cibw-before-build-linux: >
yum install -y openssl-devel &&
(test -x /usr/bin/aarch64-linux-gnu-gcc ||
ln -sf "$(command -v gcc)" /usr/local/bin/aarch64-linux-gnu-gcc 2>/dev/null || true) &&
pip install maturin uv &&
source ~/.cargo/env &&
python scripts/prepare_wheel.py
cibw-before-build-macos: >
pip install maturin uv &&
source ~/.cargo/env &&
python scripts/prepare_wheel.py
cibw-before-build-windows: >
pip install maturin uv &&
set PATH=%USERPROFILE%\.cargo\bin;%PATH% &&
python scripts\prepare_wheel.py
upload-artifact: "false"
- name: Upload wheels
uses: actions/upload-artifact@v7 # v7
with:
name: python-wheels-${{ matrix.os }}
path: wheelhouse/*.whl
retention-days: 14
python-sdist:
name: Build Python sdist
needs: prepare
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Set up Python
uses: actions/setup-python@v6 # v6
with:
python-version: "3.13"
- name: Install build dependencies
run: scripts/publish/python/install-build-deps.sh
shell: bash
- name: Build CLI binary for sdist
run: scripts/publish/python/build-cli-for-sdist.sh
shell: bash
- name: Prepare sdist with CLI
run: scripts/publish/python/prepare-sdist-with-cli.sh
shell: bash
- name: Build sdist
run: scripts/publish/python/build-sdist.sh
shell: bash
- name: Upload sdist
uses: actions/upload-artifact@v7 # v7
with:
name: python-sdist
path: packages/python/dist/*.tar.gz
retention-days: 14
php-package:
name: Build PHP PIE binary (php${{ matrix.php }} ${{ matrix.platform.label }})
needs: prepare
if: ${{ needs.prepare.outputs.is_tag == 'true' }}
runs-on: ${{ matrix.platform.os }}
timeout-minutes: 60
permissions:
contents: read
strategy:
fail-fast: false
matrix:
php: ["8.2", "8.3", "8.4", "8.5"]
platform:
- os: ubuntu-latest
label: linux-x86_64
target: x86_64-unknown-linux-gnu
- os: ubuntu-24.04-arm
label: linux-arm64
target: aarch64-unknown-linux-gnu
- os: macos-latest
label: macos-arm64
target: aarch64-apple-darwin
- os: windows-latest
label: windows-x86_64
target: x86_64-pc-windows-msvc
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Setup PHP
uses: kreuzberg-dev/actions/setup-php@v1 # v1
with:
php-version: ${{ matrix.php }}
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
with:
cache-key-prefix: publish-php-${{ matrix.platform.label }}-php${{ matrix.php }}
toolchain: stable
- name: Install alef
uses: kreuzberg-dev/actions/install-alef@v1 # v1
- name: Build PHP extension
uses: kreuzberg-dev/actions/build-php-extension@v1 # v1
with:
crate-name: html-to-markdown-php
lib-name: html_to_markdown_php
php-version: ${{ matrix.php }}
php-ts: nts
- name: Determine Windows compiler
if: runner.os == 'Windows'
id: wincompiler
shell: pwsh
run: |
$compiler = switch ('${{ matrix.php }}') {
'8.2' { 'vs16' }
'8.3' { 'vs16' }
'8.4' { 'vs17' }
'8.5' { 'vs17' }
default { 'vs17' }
}
"compiler=$compiler" | Out-File -FilePath $env:GITHUB_OUTPUT -Encoding utf8 -Append
- name: Package PIE archive
uses: kreuzberg-dev/actions/package-php-pie@v1 # v1
with:
php-version: ${{ matrix.php }}
php-ts: nts
target: ${{ matrix.platform.target }}
windows-compiler: ${{ steps.wincompiler.outputs.compiler }}
version: ${{ needs.prepare.outputs.version }}
output-dir: dist/php-package
- name: Upload PHP PIE package artifact
uses: actions/upload-artifact@v7 # v7
with:
name: php-package-${{ matrix.platform.label }}-php${{ matrix.php }}
path: |
dist/php-package/php_*.tgz
dist/php-package/php_*.tgz.sha256
dist/php-package/php_*.zip
dist/php-package/php_*.zip.sha256
retention-days: 14
node-typescript-defs:
name: Generate Node TypeScript definitions
needs: prepare
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Setup Node
uses: actions/setup-node@v6 # v6
with:
node-version: 24
check-latest: true
- name: Enable corepack
run: scripts/common/enable-corepack.sh
shell: bash
- name: Install Node dependencies
run: scripts/publish/node/install-node-deps.sh
shell: bash
- name: Generate TypeScript definitions
run: scripts/publish/node/generate-typescript-defs.sh
shell: bash
- name: Upload TypeScript definitions
uses: actions/upload-artifact@v7 # v7
with:
name: node-typescript-defs
path: typescript-defs/
retention-days: 14
node-bindings:
name: Build Node bindings (${{ matrix.target }})
needs: prepare
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- os: macos-latest
target: aarch64-apple-darwin
rust_target: ""
use_cross: false
use_napi_cross: false
- os: ubuntu-latest
target: x86_64-unknown-linux-gnu
rust_target: ""
use_cross: false
use_napi_cross: false
- os: ubuntu-latest
target: x86_64-unknown-linux-musl
rust_target: x86_64-unknown-linux-musl
use_cross: true
use_napi_cross: false
- os: ubuntu-latest
target: aarch64-unknown-linux-gnu
rust_target: aarch64-unknown-linux-gnu
use_cross: false
use_napi_cross: true
- os: ubuntu-latest
target: aarch64-unknown-linux-musl
rust_target: aarch64-unknown-linux-musl
use_cross: true
use_napi_cross: false
- os: ubuntu-latest
target: armv7-unknown-linux-gnueabihf
rust_target: armv7-unknown-linux-gnueabihf
use_cross: false
use_napi_cross: true
- os: windows-latest
target: x86_64-pc-windows-msvc
rust_target: ""
use_cross: false
use_napi_cross: false
- os: windows-latest
target: aarch64-pc-windows-msvc
rust_target: aarch64-pc-windows-msvc
use_cross: false
use_napi_cross: false
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Setup Rust
uses: dtolnay/rust-toolchain@stable
- name: Add Rust target
if: ${{ matrix.rust_target != '' }}
env:
RUST_TARGET: ${{ matrix.rust_target }}
run: scripts/publish/common/add-rust-target.sh
shell: bash
- name: Install cross
if: ${{ matrix.use_cross }}
run: scripts/publish/cli/install-cross.sh
shell: bash
- name: Setup Node
uses: actions/setup-node@v6 # v6
with:
node-version: 24
check-latest: true
- name: Enable corepack
run: scripts/common/enable-corepack.sh
shell: bash
- name: Install Node dependencies
run: scripts/publish/node/install-node-deps.sh
shell: bash
- name: Clean npm directory
if: runner.os != 'Windows'
run: scripts/publish/node/clean-npm-dir.sh
shell: bash
- name: Clean npm directory (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: scripts/publish/node/clean-npm-dir.ps1
- name: Create npm package structure
run: scripts/publish/node/create-npm-package-structure.sh
shell: bash
- name: Build native module
if: runner.os != 'Windows'
env:
TARGET: ${{ matrix.target }}
USE_CROSS: ${{ matrix.use_cross }}
USE_NAPI_CROSS: ${{ matrix.use_napi_cross }}
shell: bash
run: scripts/publish/node/build-native-module.sh
- name: Build native module (Windows)
if: runner.os == 'Windows'
shell: pwsh
env:
TARGET: ${{ matrix.target }}
USE_CROSS: ${{ matrix.use_cross }}
USE_NAPI_CROSS: ${{ matrix.use_napi_cross }}
run: scripts/publish/node/build-native-module.ps1
- name: Package artifacts
if: runner.os != 'Windows'
env:
TARGET: ${{ matrix.target }}
run: scripts/publish/node/package-artifacts.sh
shell: bash
- name: Package artifacts (Windows)
if: runner.os == 'Windows'
shell: pwsh
env:
TARGET: ${{ matrix.target }}
run: scripts/publish/node/package-artifacts.ps1
- name: Upload Node artifact
uses: actions/upload-artifact@v7 # v7
with:
name: node-bindings-${{ matrix.target }}
path: node-bindings-${{ matrix.target }}.tar.gz
retention-days: 14
wasm-bindings:
name: Build WASM bindings
needs: prepare
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Add wasm32 target
run: scripts/common/ensure-wasm-target.sh
shell: bash
- name: Install wasm-pack
run: scripts/common/install-wasm-pack.sh
shell: bash
- name: Setup Node
uses: actions/setup-node@v6 # v6
with:
node-version: 24
check-latest: true
- name: Enable corepack
run: scripts/common/enable-corepack.sh
shell: bash
- name: Install dependencies
run: scripts/publish/wasm/install-deps.sh
shell: bash
- name: Build WASM bundles
run: scripts/publish/wasm/build-bundles.sh
shell: bash
- name: Package WASM artifacts
run: scripts/publish/wasm/package-artifacts.sh
shell: bash
- name: Upload WASM artifacts
uses: actions/upload-artifact@v7 # v7
with:
name: wasm-bundles
path: wasm-artifacts/*
retention-days: 14
cli-binaries:
name: Build CLI binaries (${{ matrix.target }})
needs: prepare
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
target: x86_64-unknown-linux-gnu
use_cross: false
- os: ubuntu-latest
target: x86_64-unknown-linux-musl
use_cross: false
- os: ubuntu-latest
target: aarch64-unknown-linux-gnu
use_cross: false
- os: macos-latest
target: aarch64-apple-darwin
use_cross: false
- os: windows-latest
target: x86_64-pc-windows-msvc
use_cross: false
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Add compilation target
env:
RUST_TARGET: ${{ matrix.target }}
run: scripts/publish/common/add-rust-target.sh
shell: bash
- name: Install build dependencies
if: runner.os == 'Linux'
env:
TARGET: ${{ matrix.target }}
run: scripts/publish/cli/install-build-deps-linux.sh
shell: bash
- name: Configure cross linker
if: ${{ matrix.target == 'aarch64-unknown-linux-gnu' }}
env:
TARGET: ${{ matrix.target }}
run: scripts/publish/cli/configure-cross-linker.sh
shell: bash
- name: Install cross
if: ${{ matrix.use_cross }}
run: scripts/publish/cli/install-cross.sh
shell: bash
- name: Build CLI
shell: bash
env:
TARGET: ${{ matrix.target }}
USE_CROSS: ${{ matrix.use_cross }}
run: scripts/publish/cli/build-cli.sh
- name: Package CLI artifact
if: runner.os != 'Windows'
shell: bash
env:
TARGET: ${{ matrix.target }}
run: scripts/publish/cli/package-cli-artifact.sh
- name: Package CLI artifact (Windows)
if: runner.os == 'Windows'
shell: pwsh
env:
TARGET: ${{ matrix.target }}
run: scripts/publish/cli/package-cli-artifact.ps1
- name: Upload CLI artifact
if: runner.os != 'Windows'
uses: actions/upload-artifact@v7 # v7
with:
name: cli-${{ matrix.target }}
path: cli-${{ matrix.target }}.tar.gz
retention-days: 14
- name: Upload CLI artifact (Windows)
if: runner.os == 'Windows'
uses: actions/upload-artifact@v7 # v7
with:
name: cli-${{ matrix.target }}
path: cli-${{ matrix.target }}.zip
retention-days: 14
ruby-gem:
name: Build Ruby gem (${{ matrix.label }})
needs: prepare
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
label: linux
- os: ubuntu-24.04-arm
label: linux-aarch64
- os: macos-latest
label: macos-arm64
- os: windows-latest
label: windows-x64
runs-on: ${{ matrix.os }}
env:
RB_SYS_CARGO_PROFILE: release
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Remove cached CLI binaries
shell: bash
run: scripts/publish/ruby/remove-cached-cli.sh
- name: Install MSYS2 toolchain
if: runner.os == 'Windows'
shell: pwsh
run: scripts/publish/ruby/install-msys2-toolchain.ps1
- name: Install Rust (GNU on Windows)
if: runner.os == 'Windows'
shell: pwsh
run: scripts/publish/ruby/install-rust-gnu.ps1
- name: Configure bindgen sysroot (Windows)
if: runner.os == 'Windows'
shell: bash
run: scripts/publish/ruby/configure-bindgen-windows.sh
- name: Set up Ruby
uses: ruby/setup-ruby@v1 # v1
with:
ruby-version: "3.3"
bundler: "4.0.3"
bundler-cache: false
- name: Install Ruby dependencies (Unix)
if: runner.os != 'Windows'
run: scripts/publish/ruby/install-deps-unix.sh
shell: bash
- name: Install Ruby dependencies (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: scripts/publish/ruby/install-deps-windows.ps1
- name: Build gem artifacts (Unix)
if: runner.os != 'Windows'
shell: bash
run: scripts/publish/ruby/build-gem-unix.sh
- name: Build gem artifacts (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: scripts/publish/ruby/build-gem-windows.ps1
# Only the canonical `linux` builder ships the source gem; other matrix
# entries would emit byte-different source gems (line endings, ext-rb
# contents, vendor layout) that overwrite each other under merge-multiple
# and produce an invalid .gem at publish time. Their native platform
# gems (.gem with platform suffix) are still uploaded.
- name: Drop source gem on non-canonical builders
if: ${{ matrix.label != 'linux' }}
shell: bash
run: |
shopt -s nullglob
for f in packages/ruby/pkg/*.gem; do
base="$(basename "$f")"
case "$base" in
*-x86_64-linux.gem|*-aarch64-linux.gem|*-arm64-darwin.gem|*-x86_64-darwin.gem|*-x64-mingw32.gem|*-x64-mingw-ucrt.gem) ;;
*) echo "Removing non-canonical source gem $base"; rm -f "$f" ;;
esac
done
- name: Upload gem artifacts
uses: actions/upload-artifact@v7 # v7
with:
name: rubygems-${{ matrix.label }}
path: packages/ruby/pkg/*.gem
retention-days: 14
elixir-natives:
name: Build Elixir native libs (${{ matrix.settings.label }})
needs: [prepare]
if: ${{ needs.prepare.outputs.is_tag == 'true' }}
runs-on: ${{ matrix.settings.os }}
timeout-minutes: 180
strategy:
fail-fast: false
matrix:
settings:
- os: ubuntu-24.04-arm
label: linux-aarch64
target: aarch64-unknown-linux-gnu
- os: ubuntu-latest
label: linux-x86_64
target: x86_64-unknown-linux-gnu
- os: macos-latest
label: macos-arm64
target: aarch64-apple-darwin
steps:
- name: Checkout
uses: actions/checkout@v6
with:
ref: ${{ needs.prepare.outputs.ref }}
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1
with:
target: ${{ matrix.settings.target }}
- name: Build Elixir NIF
env:
CARGO_BUILD_TARGET: ${{ matrix.settings.target }}
run: cargo build --release --target ${{ matrix.settings.target }} --manifest-path packages/elixir/native/html_to_markdown_nif/Cargo.toml
- name: Package NIF (NIF 2.16)
shell: bash
run: |
VERSION="${{ needs.prepare.outputs.version }}"
TARGET="${{ matrix.settings.target }}"
NIF_VERSION="2.16"
NIF_DIR="packages/elixir/native/html_to_markdown_nif"
if [[ "${{ runner.os }}" == "macOS" ]]; then
LIB_NAME="libhtml_to_markdown_nif.dylib"; EXT="so"
else
LIB_NAME="libhtml_to_markdown_nif.so"; EXT="so"
fi
mkdir -p dist/elixir
ARTIFACT="libhtml_to_markdown_nif-v${VERSION}-nif-${NIF_VERSION}-${TARGET}.${EXT}"
LIB_PATH="${NIF_DIR}/target/${TARGET}/release/${LIB_NAME}"
[[ ! -f "$LIB_PATH" ]] && LIB_PATH="${NIF_DIR}/target/release/${LIB_NAME}"
cp "$LIB_PATH" "${ARTIFACT}"
tar -czf "dist/elixir/${ARTIFACT}.tar.gz" "${ARTIFACT}"
- name: Package NIF (NIF 2.17)
shell: bash
run: |
VERSION="${{ needs.prepare.outputs.version }}"
TARGET="${{ matrix.settings.target }}"
NIF_VERSION="2.17"
NIF_DIR="packages/elixir/native/html_to_markdown_nif"
if [[ "${{ runner.os }}" == "macOS" ]]; then
LIB_NAME="libhtml_to_markdown_nif.dylib"; EXT="so"
else
LIB_NAME="libhtml_to_markdown_nif.so"; EXT="so"
fi
ARTIFACT="libhtml_to_markdown_nif-v${VERSION}-nif-${NIF_VERSION}-${TARGET}.${EXT}"
LIB_PATH="${NIF_DIR}/target/${TARGET}/release/${LIB_NAME}"
[[ ! -f "$LIB_PATH" ]] && LIB_PATH="${NIF_DIR}/target/release/${LIB_NAME}"
cp "$LIB_PATH" "${ARTIFACT}"
tar -czf "dist/elixir/${ARTIFACT}.tar.gz" "${ARTIFACT}"
- name: Upload artifacts
uses: actions/upload-artifact@v7
with:
name: elixir-${{ matrix.settings.label }}
path: dist/elixir/*.tar.gz
if-no-files-found: error
retention-days: 1
upload-elixir-release:
name: Upload Elixir NIF binaries to GitHub Release
needs: [prepare, elixir-natives]
if: ${{ always() && needs.prepare.outputs.is_tag == 'true' && needs.elixir-natives.result == 'success' }}
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout
uses: actions/checkout@v6
with:
ref: ${{ needs.prepare.outputs.ref }}
- name: Download Elixir NIF artifacts
uses: actions/download-artifact@v8
with:
pattern: elixir-*
path: dist/elixir
merge-multiple: true
- name: Upload to GitHub Release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
TAG="${{ needs.prepare.outputs.tag }}"
for file in dist/elixir/*.tar.gz; do
echo "Uploading $(basename "$file")..."
gh release upload "$TAG" "$file" --clobber
done
elixir-package:
name: Build Elixir Hex package (${{ matrix.label }})
needs: prepare
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
label: linux
build_hex: true
- os: macos-latest
label: macos
build_hex: false
runs-on: ${{ matrix.os }}
env:
MIX_ENV: dev
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Setup Elixir
uses: erlef/setup-beam@v1 # v1
with:
elixir-version: "1.19"
otp-version: "28.1"
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Install Hex and Rebar
run: scripts/publish/elixir/install-hex-rebar.sh
shell: bash
- name: Install dependencies
run: scripts/publish/elixir/install-deps.sh
shell: bash
- name: Run Elixir tests
run: scripts/publish/elixir/run-tests.sh
shell: bash
- name: Build Hex package
if: ${{ matrix.build_hex }}
run: scripts/publish/elixir/build-hex-package.sh
shell: bash
- name: Upload Hex artifact
if: ${{ matrix.build_hex }}
uses: actions/upload-artifact@v7 # v7
with:
name: elixir-hex-package
path: packages/elixir/html_to_markdown-*.tar
retention-days: 14
csharp-package:
name: Build C# NuGet package
needs: [prepare, check-nuget, csharp-ffi]
if: ${{ needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-nuget.outputs.exists != 'true' }}
runs-on: ubuntu-latest
timeout-minutes: 30
permissions:
contents: read
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Setup .NET
uses: actions/setup-dotnet@v5 # v5
with:
dotnet-version: "8.0.x"
- name: Download C# native FFI libraries
uses: actions/download-artifact@v8 # v8
with:
pattern: csharp-ffi-*
path: dist/csharp-ffi
merge-multiple: true
- name: Install dependencies
run: scripts/publish/csharp/restore.sh packages/csharp/HtmlToMarkdown.csproj
shell: bash
- name: Pack NuGet package
run: scripts/publish/csharp/pack.sh
shell: bash
- name: Upload NuGet artifact
uses: actions/upload-artifact@v7 # v7
with:
name: csharp-nuget
path: artifacts/csharp/*.nupkg
retention-days: 14
csharp-ffi:
name: Build C# native FFI libraries
needs: [prepare, check-nuget]
if: ${{ needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-nuget.outputs.exists != 'true' }}
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
rid: linux-x64
target: x86_64-unknown-linux-gnu
- os: ubuntu-24.04-arm
rid: linux-arm64
target: aarch64-unknown-linux-gnu
- os: windows-latest
rid: win-x64
target: x86_64-pc-windows-msvc
- os: macos-latest
rid: osx-arm64
target: aarch64-apple-darwin
runs-on: ${{ matrix.os }}
timeout-minutes: 60
permissions:
contents: read
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Install alef
uses: kreuzberg-dev/actions/install-alef@v1
- name: Build and stage FFI library
shell: bash
run: |
alef publish build --lang ffi --target ${{ matrix.target }}
mkdir -p dist/csharp-ffi/${{ matrix.rid }}/native
find target/release -maxdepth 1 -type f \( -name '*.so' -o -name '*.dylib' -o -name '*.dll' \) -name '*html_to_markdown_ffi*' -exec cp {} dist/csharp-ffi/${{ matrix.rid }}/native/ \;
- name: Upload FFI artifact
uses: actions/upload-artifact@v7 # v7
with:
name: csharp-ffi-${{ matrix.rid }}
path: dist/csharp-ffi
retention-days: 14
go-ffi:
name: Build Go native FFI libraries (${{ matrix.platform }})
needs: prepare
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
platform: linux-x64
target: x86_64-unknown-linux-gnu
- os: ubuntu-24.04-arm
platform: linux-arm64
target: aarch64-unknown-linux-gnu
- os: windows-latest
platform: windows-x64
target: x86_64-pc-windows-msvc
- os: macos-latest
platform: darwin-arm64
target: aarch64-apple-darwin
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Install alef
uses: kreuzberg-dev/actions/install-alef@v1
- name: Build and package Go FFI library
shell: bash
run: |
alef publish build --lang ffi --target ${{ matrix.target }}
alef publish package --lang go --target ${{ matrix.target }} -o dist/go-ffi
- name: Upload Go FFI artifact
uses: actions/upload-artifact@v7 # v7
with:
name: go-ffi-${{ matrix.platform }}
path: dist/go-ffi
retention-days: 14
c-ffi-libraries:
name: Build C FFI distribution packages (${{ matrix.platform }})
needs: prepare
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
platform: linux-x64
target: x86_64-unknown-linux-gnu
- os: ubuntu-24.04-arm
platform: linux-arm64
target: aarch64-unknown-linux-gnu
- os: windows-latest
platform: windows-x64
target: x86_64-pc-windows-msvc
- os: macos-latest
platform: darwin-arm64
target: aarch64-apple-darwin
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Install alef
uses: kreuzberg-dev/actions/install-alef@v1
- name: Build and package C FFI distribution
shell: bash
run: |
alef publish build --lang ffi --target ${{ matrix.target }}
alef publish package --lang ffi --target ${{ matrix.target }} -o dist/c-ffi
- name: Upload C FFI artifact
uses: actions/upload-artifact@v7 # v7
with:
name: c-ffi-${{ matrix.platform }}
path: dist/c-ffi
retention-days: 14
cargo-packages:
name: Package Rust crates
needs: prepare
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Add Windows Rust target
if: runner.os == 'Windows'
run: rustup target add x86_64-pc-windows-msvc
shell: bash
- name: Package crates
env:
RELEASE_VERSION: ${{ needs.prepare.outputs.version }}
run: scripts/publish/crates/package-crates.sh
shell: bash
- name: Upload crate packages
uses: actions/upload-artifact@v7 # v7
with:
name: cargo-crates
path: crate-artifacts/*.crate
retention-days: 14
upload-release-artifacts:
name: Upload Release Artifacts
needs:
[
prepare,
python-wheels,
python-sdist,
php-package,
node-typescript-defs,
node-bindings,
wasm-bindings,
cli-binaries,
ruby-gem,
go-ffi,
c-ffi-libraries,
cargo-packages,
]
if: ${{ always() && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' }}
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.tag }}
fetch-depth: 0
- name: Download PHP package artifacts
uses: actions/download-artifact@v8 # v8
with:
pattern: php-package-*
path: dist/php-package
merge-multiple: true
- name: Download CLI artifacts
uses: actions/download-artifact@v8 # v8
with:
pattern: cli-*
path: dist/cli
merge-multiple: false
- name: Download Go FFI artifacts
uses: actions/download-artifact@v8 # v8
with:
pattern: go-ffi-*
path: dist/go-ffi
merge-multiple: false
- name: Download C FFI artifacts
uses: actions/download-artifact@v8 # v8
with:
pattern: c-ffi-*
path: dist/c-ffi
merge-multiple: false
- name: Upload PHP PIE packages
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
TAG: ${{ needs.prepare.outputs.tag }}
run: scripts/publish/upload-php-pie.sh
shell: bash
- name: Upload CLI binaries
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
TAG: ${{ needs.prepare.outputs.tag }}
run: scripts/publish/upload-cli-artifacts.sh
shell: bash
- name: Upload Go FFI artifacts
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
TAG: ${{ needs.prepare.outputs.tag }}
run: scripts/publish/upload-go-ffi-artifacts.sh
shell: bash
- name: Upload C FFI artifacts
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
TAG: ${{ needs.prepare.outputs.tag }}
run: scripts/publish/upload-c-ffi-artifacts.sh
shell: bash
- name: Create Go module tag
env:
VERSION: ${{ needs.prepare.outputs.version }}
run: scripts/publish/go/create-module-tag.sh "v${VERSION}"
shell: bash
publish-crates:
name: Publish crates.io packages
needs: [prepare, cargo-packages, check-cratesio]
if: ${{ always() && needs.cargo-packages.result == 'success' && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-cratesio.outputs.all_exist != 'true' }}
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.ref }}
- name: Setup Rust
uses: dtolnay/rust-toolchain@stable
- name: Verify Cargo.toml version matches tag
env:
TAG_VERSION: ${{ needs.prepare.outputs.version }}
run: scripts/publish/crates/verify-cargo-version.sh
shell: bash
- name: Re-check crates.io before publish
id: recheck
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: cratesio
package: html-to-markdown-rs
version: ${{ needs.prepare.outputs.version }}
extra-packages: |
cli_exists=html-to-markdown-cli
- name: Publish html-to-markdown-rs
if: ${{ steps.recheck.outputs.exists != 'true' }}
env:
CARGO_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
run: scripts/publish/crates/publish-rs.sh
shell: bash
- name: Wait for indexing
if: ${{ steps.recheck.outputs.exists != 'true' }}
run: scripts/publish/crates/wait-for-indexing.sh
shell: bash
- name: Publish html-to-markdown-cli
if: ${{ steps.recheck.outputs.cli_exists != 'true' }}
env:
CARGO_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
run: scripts/publish/crates/publish-cli.sh
shell: bash
publish-pypi:
name: Publish Python packages to PyPI
needs: [prepare, python-wheels, python-sdist, check-pypi]
if: ${{ always() && needs.python-wheels.result == 'success' && needs.python-sdist.result == 'success' && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-pypi.outputs.exists != 'true' }}
runs-on: ubuntu-latest
environment: pypi
permissions:
contents: read
id-token: write
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.ref }}
- name: Download wheel artifacts
uses: actions/download-artifact@v8 # v8
with:
pattern: python-wheels-*
path: dist
merge-multiple: true
- name: Download sdist artifact
uses: actions/download-artifact@v8 # v8
with:
name: python-sdist
path: dist
- name: List packages to publish
run: |
echo "Packages in dist:"
ls -lh dist/ 2>/dev/null || echo "No packages found"
shell: bash
- name: Re-check PyPI before publish
id: recheck
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: pypi
package: html-to-markdown
version: ${{ needs.prepare.outputs.version }}
- name: Publish to PyPI
if: ${{ steps.recheck.outputs.exists != 'true' }}
uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: dist
skip-existing: true
publish-rubygems:
name: Publish Ruby gems
needs: [prepare, ruby-gem, check-rubygems]
if: ${{ always() && needs.ruby-gem.result == 'success' && needs.prepare.outputs.is_tag == 'true' && (needs.prepare.outputs.dry_run == 'true' || needs.check-rubygems.outputs.exists != 'true') }}
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.ref }}
- name: Download Ruby gem artifacts
uses: actions/download-artifact@v8 # v8
with:
pattern: rubygems-*
path: dist
merge-multiple: true
- name: Re-check RubyGems before publish
id: recheck
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: rubygems
package: html-to-markdown
version: ${{ needs.prepare.outputs.version }}
- name: Setup Ruby
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }}
uses: ruby/setup-ruby@v1 # v1
with:
ruby-version: "3.3"
bundler-cache: false
- name: Update RubyGems
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }}
run: gem update --system
shell: bash
- name: Configure trusted publishing credentials
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }}
uses: rubygems/configure-rubygems-credentials@v2.0.0 # v1.0.0
- name: Publish gems
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }}
uses: kreuzberg-dev/actions/publish-rubygems@v1 # v1
with:
gems-dir: dist
dry-run: ${{ needs.prepare.outputs.dry_run }}
- name: RubyGems already published summary
if: ${{ steps.recheck.outputs.exists == 'true' }}
run: echo "Gem html-to-markdown@${{ needs.prepare.outputs.version }} already published on RubyGems — skipped." >> "$GITHUB_STEP_SUMMARY"
shell: bash
publish-hex:
name: Publish Hex package
needs: [prepare, elixir-package, check-hex, upload-elixir-release]
if: ${{ always() && needs.elixir-package.result == 'success' && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-hex.outputs.exists != 'true' }}
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.ref }}
- name: Download Hex artifact
uses: actions/download-artifact@v8 # v8
with:
name: elixir-hex-package
path: dist/elixir
- name: Upload Elixir package to GitHub Release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
TAG: ${{ needs.prepare.outputs.tag }}
run: scripts/publish/upload-elixir-package.sh
shell: bash
- name: Setup Elixir
uses: erlef/setup-beam@v1 # v1
with:
elixir-version: "1.19"
otp-version: "28.1"
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Install Hex/Rebar
run: scripts/publish/elixir/install-hex-rebar.sh
shell: bash
- name: Generate NIF checksums from GitHub release
run: scripts/publish/generate_elixir_checksums.sh "${{ needs.prepare.outputs.version }}"
- name: Install dependencies
run: scripts/publish/elixir/install-deps.sh
shell: bash
- name: Stage Rust core and generate lockfile
shell: bash
run: |
scripts/publish/elixir/stage-rust-core.sh
pushd packages/elixir/native/html_to_markdown_elixir >/dev/null
cargo generate-lockfile
popd >/dev/null
- name: Re-check Hex.pm before publish
id: recheck
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: hex
package: html_to_markdown
version: ${{ needs.prepare.outputs.version }}
- name: Publish to Hex.pm
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }}
uses: kreuzberg-dev/actions/publish-hex@v1 # v1
with:
package-dir: packages/elixir
dry-run: ${{ needs.prepare.outputs.dry_run }}
env:
HEX_API_KEY: ${{ secrets.HEX_API_KEY }}
- name: Hex.pm already published summary
if: ${{ steps.recheck.outputs.exists == 'true' }}
run: echo "Package html_to_markdown@${{ needs.prepare.outputs.version }} already published on Hex.pm — skipped." >> "$GITHUB_STEP_SUMMARY"
shell: bash
publish-nuget:
name: Publish NuGet package
needs: [prepare, csharp-package, check-nuget]
if: ${{ always() && needs.csharp-package.result == 'success' && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-nuget.outputs.exists != 'true' }}
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.ref }}
- name: Download NuGet artifact
uses: actions/download-artifact@v8 # v8
with:
name: csharp-nuget
path: dist
- name: Re-check NuGet before publish
id: recheck
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: nuget
package: KreuzbergDev.HtmlToMarkdown
version: ${{ needs.prepare.outputs.version }}
- name: Publish to NuGet
if: ${{ steps.recheck.outputs.exists != 'true' }}
uses: kreuzberg-dev/actions/publish-nuget@v1 # v1
with:
packages-dir: dist
dry-run: ${{ needs.prepare.outputs.dry_run }}
env:
NUGET_API_KEY: ${{ secrets.NUGET_API_KEY }}
publish-packagist:
name: Publish to Packagist
runs-on: ubuntu-latest
needs: [prepare, check-packagist]
if: |
always() &&
needs.prepare.result == 'success' &&
needs.prepare.outputs.is_tag == 'true' &&
needs.prepare.outputs.dry_run != 'true' &&
needs.check-packagist.outputs.exists != 'true'
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.ref }}
- name: Re-check Packagist before publish
id: recheck
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: packagist
package: kreuzberg-dev/html-to-markdown
version: ${{ needs.prepare.outputs.version }}
- name: Trigger Packagist Update
if: ${{ steps.recheck.outputs.exists != 'true' }}
uses: kreuzberg-dev/actions/publish-packagist@v1 # v1
with:
packagist-username: kreuzberg-dev
package-name: kreuzberg/html-to-markdown
version: ${{ needs.prepare.outputs.version }}
repository-url: https://github.com/kreuzberg-dev/html-to-markdown
dry-run: ${{ needs.prepare.outputs.dry_run }}
env:
PACKAGIST_API_TOKEN: ${{ secrets.PACKAGIST_API_TOKEN }}
java-ffi:
name: Build Java native FFI libraries
needs: [prepare, check-maven]
if: ${{ needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && (needs.check-maven.outputs.exists != 'true' || needs.prepare.outputs.force_republish_java == 'true') }}
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
platform: linux-x86_64
target: x86_64-unknown-linux-gnu
- os: ubuntu-24.04-arm
platform: linux-aarch64
target: aarch64-unknown-linux-gnu
- os: windows-latest
platform: windows-x86_64
target: x86_64-pc-windows-msvc
- os: macos-latest
platform: osx-aarch64
target: aarch64-apple-darwin
runs-on: ${{ matrix.os }}
timeout-minutes: 60
permissions:
contents: read
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
env:
TARGET_SHA: ${{ needs.prepare.outputs.target_sha }}
run: scripts/publish/common/ensure-target-commit.sh
shell: bash
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1 # v1
- name: Install alef
uses: kreuzberg-dev/actions/install-alef@v1
- name: Build and stage FFI library
shell: bash
run: |
alef publish build --lang ffi --target ${{ matrix.target }}
mkdir -p dist/java-ffi/${{ matrix.platform }}/native
find target/release -maxdepth 1 -type f \( -name '*.so' -o -name '*.dylib' -o -name '*.dll' \) -name '*html_to_markdown_ffi*' -exec cp {} dist/java-ffi/${{ matrix.platform }}/native/ \;
- name: Upload FFI artifact
uses: actions/upload-artifact@v7 # v7
with:
name: java-ffi-${{ matrix.platform }}
path: dist/java-ffi
retention-days: 14
publish-maven:
name: Publish Maven package
needs: [prepare, check-maven, java-ffi]
if: |
always() &&
needs.prepare.outputs.dry_run != 'true' &&
needs.prepare.outputs.is_tag == 'true' &&
(needs.check-maven.outputs.exists != 'true' || needs.prepare.outputs.force_republish_java == 'true') &&
needs.java-ffi.result == 'success'
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
with:
ref: ${{ needs.prepare.outputs.ref }}
- name: Check Maven Central for existing release
id: maven_check
uses: kreuzberg-dev/actions/check-registry@v1
with:
registry: maven
package: "dev.kreuzberg:html-to-markdown"
version: ${{ needs.prepare.outputs.version }}
- name: Download Java FFI artifacts
if: ${{ steps.maven_check.outputs.exists != 'true' }}
uses: actions/download-artifact@v8
with:
pattern: java-ffi-*
path: java-ffi-artifacts
merge-multiple: true
- name: Setup Rust
if: ${{ steps.maven_check.outputs.exists != 'true' }}
uses: kreuzberg-dev/actions/setup-rust@v1
- name: Setup Java
if: ${{ steps.maven_check.outputs.exists != 'true' }}
env:
MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }}
MAVEN_PASSWORD: ${{ secrets.CENTRAL_PASSWORD }}
MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
uses: actions/setup-java@v5
with:
distribution: temurin
java-version: '25'
cache: maven
server-id: ossrh
server-username: MAVEN_USERNAME
server-password: MAVEN_PASSWORD
gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
gpg-passphrase: MAVEN_GPG_PASSPHRASE
- name: Setup Maven
if: ${{ steps.maven_check.outputs.exists != 'true' }}
uses: kreuzberg-dev/actions/setup-maven@v1
- name: Prefer gpg2 binary
if: ${{ steps.maven_check.outputs.exists != 'true' }}
run: scripts/publish/maven/prefer-gpg2.sh
shell: bash
- name: Copy native libraries into resources
if: ${{ steps.maven_check.outputs.exists != 'true' }}
shell: bash
run: scripts/publish/java/copy-native-libs.sh java-ffi-artifacts
- name: Release Maven package
if: ${{ steps.maven_check.outputs.exists != 'true' }}
uses: kreuzberg-dev/actions/publish-maven@v1
with:
pom-file: packages/java/pom.xml
maven-profile: publish
extra-args: -DskipTests
dry-run: ${{ needs.prepare.outputs.dry_run }}
env:
MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }}
MAVEN_PASSWORD: ${{ secrets.CENTRAL_PASSWORD }}
MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
- name: Maven already published summary
if: ${{ steps.maven_check.outputs.exists == 'true' }}
run: echo "Maven package version ${{ needs.prepare.outputs.version }} already published; skipping." >> "$GITHUB_STEP_SUMMARY"
publish-node:
name: Publish Node packages
needs: [prepare, node-bindings, node-typescript-defs, check-npm]
if: ${{ always() && needs.node-bindings.result == 'success' && needs.node-typescript-defs.result == 'success' && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && needs.check-npm.outputs.node_exists != 'true' }}
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.ref }}
- name: Download Node artifacts
uses: actions/download-artifact@v8 # v8
with:
pattern: node-bindings-*
path: node-artifacts
merge-multiple: true
- name: Download TypeScript definitions
uses: actions/download-artifact@v8 # v8
with:
name: node-typescript-defs
path: typescript-defs
- name: Setup Node
uses: actions/setup-node@v6 # v6
with:
node-version: 24
registry-url: https://registry.npmjs.org/
- name: Update NPM
run: npm install -g npm@latest
shell: bash
- name: Enable corepack
run: scripts/common/enable-corepack.sh
shell: bash
- name: Prepare artifact directory
run: scripts/publish/node/prepare-artifact-directory.sh
shell: bash
- name: Install workspace dependencies
if: ${{ needs.prepare.outputs.dry_run != 'true' }}
run: scripts/publish/node/install-node-deps.sh
shell: bash
- name: Pack platform packages
run: scripts/publish/node/pack-platform-packages.sh
shell: bash
- name: Re-check npm before publish
id: recheck
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: npm
package: "@kreuzberg/html-to-markdown"
version: ${{ needs.prepare.outputs.version }}
extra-packages: |
ts_exists=@kreuzberg/html-to-markdown
- name: Publish native binary packages
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }}
uses: kreuzberg-dev/actions/publish-npm@v1 # v1
with:
packages-dir: crates/html-to-markdown-node/npm
dry-run: ${{ needs.prepare.outputs.dry_run }}
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
- name: Wait for npm indexing (x64)
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }}
uses: kreuzberg-dev/actions/wait-for-package@v1 # v1
with:
registry: npm
package: "@kreuzberg/html-to-markdown-linux-x64-gnu"
version: ${{ needs.prepare.outputs.version }}
max-attempts: "25"
- name: Wait for npm indexing (arm64)
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }}
uses: kreuzberg-dev/actions/wait-for-package@v1 # v1
with:
registry: npm
package: "@kreuzberg/html-to-markdown-linux-arm64-gnu"
version: ${{ needs.prepare.outputs.version }}
max-attempts: "25"
- name: Prepare main Node package metadata
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }}
run: scripts/publish/node/prepublish-main-package.sh crates/html-to-markdown-node
shell: bash
- name: Publish main Node package
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' }}
uses: kreuzberg-dev/actions/publish-npm@v1 # v1
with:
package-dir: crates/html-to-markdown-node
dry-run: ${{ needs.prepare.outputs.dry_run }}
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
- name: Wait for main Node package indexing
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.exists != 'true' && steps.recheck.outputs.ts_exists != 'true' }}
uses: kreuzberg-dev/actions/wait-for-package@v1 # v1
with:
registry: npm
package: "@kreuzberg/html-to-markdown"
version: ${{ needs.prepare.outputs.version }}
max-attempts: "25"
- name: Install TypeScript wrapper dependencies from npm
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.ts_exists != 'true' }}
working-directory: packages/typescript
run: pnpm install --no-frozen-lockfile
shell: bash
- name: Build TypeScript wrapper package
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.ts_exists != 'true' }}
run: scripts/publish/typescript/build-package.sh
shell: bash
- name: Publish TypeScript wrapper package
if: ${{ needs.prepare.outputs.dry_run != 'true' && steps.recheck.outputs.ts_exists != 'true' }}
uses: kreuzberg-dev/actions/publish-npm@v1 # v1
with:
package-dir: packages/typescript
dry-run: ${{ needs.prepare.outputs.dry_run }}
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
publish-wasm:
name: Publish WASM package
needs: [prepare, wasm-bindings, check-npm]
if: ${{ always() && needs.wasm-bindings.result == 'success' && needs.prepare.outputs.dry_run != 'true' && needs.prepare.outputs.is_tag == 'true' && (needs.check-npm.outputs.wasm_exists != 'true' || needs.prepare.outputs.force_republish_wasm == 'true') }}
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.ref }}
- name: Download WASM artifacts
uses: actions/download-artifact@v8 # v8
with:
name: wasm-bundles
path: wasm-artifacts
- name: Extract WASM artifacts
run: scripts/publish/wasm/extract-artifacts.sh
shell: bash
- name: Remove .gitignore files from dist directories
run: |
rm -f crates/html-to-markdown-wasm/dist/.gitignore
rm -f crates/html-to-markdown-wasm/dist-node/.gitignore
rm -f crates/html-to-markdown-wasm/dist-web/.gitignore
shell: bash
- name: Setup Node
uses: actions/setup-node@v6 # v6
with:
node-version: 24
registry-url: https://registry.npmjs.org/
- name: Update NPM
run: npm install -g npm@latest
shell: bash
- name: Re-check npm before publish
id: recheck
uses: kreuzberg-dev/actions/check-registry@v1 # v1
with:
registry: npm
package: "@kreuzberg/html-to-markdown-wasm"
version: ${{ needs.prepare.outputs.version }}
- name: Publish WASM package
if: ${{ steps.recheck.outputs.exists != 'true' }}
uses: kreuzberg-dev/actions/publish-npm@v1 # v1
with:
package-dir: crates/html-to-markdown-wasm
dry-run: ${{ needs.prepare.outputs.dry_run }}
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
homebrew-bottles:
name: Build Homebrew bottles (${{ matrix.bottle_tag }})
needs: [prepare, check-homebrew]
if: |
needs.prepare.outputs.is_tag == 'true' &&
(needs.check-homebrew.outputs.exists != 'true')
runs-on: ${{ matrix.runner }}
timeout-minutes: 180
permissions:
contents: write
strategy:
fail-fast: false
matrix:
include:
- runner: macos-latest
bottle_tag: arm64_sequoia
- runner: macos-15-intel
bottle_tag: sequoia
- runner: ubuntu-latest
bottle_tag: x86_64_linux
- runner: ubuntu-24.04-arm
bottle_tag: arm64_linux
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
run: git checkout --progress --force ${{ needs.prepare.outputs.target_sha }}
- name: Setup Rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: Setup Homebrew
run: |
brew tap homebrew/core
brew update
- name: Extract version
id: version
env:
TAG: ${{ needs.prepare.outputs.tag }}
run: |
VERSION="${TAG#v}"
echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
- name: Build CLI for bottle
run: |
cargo build --release \
-p html-to-markdown-cli
- name: Create Homebrew bottle
id: bottle
env:
VERSION: ${{ steps.version.outputs.version }}
TAG: ${{ needs.prepare.outputs.tag }}
run: |
# Homebrew bottles require {formula}/{version}/ prefix in the tarball
bottle_root="/tmp/html-to-markdown-bottle"
bottle_dir="${bottle_root}/html-to-markdown/${VERSION}"
mkdir -p "${bottle_dir}/bin"
# Copy the built binary
cp target/release/html-to-markdown "${bottle_dir}/bin/"
# Create bottle tarball with correct prefix
cd "${bottle_root}"
bottle_filename="html-to-markdown-${VERSION}.${{ matrix.bottle_tag }}.bottle.tar.gz"
tar -czf "${bottle_filename}" html-to-markdown/
# Calculate SHA256
sha256=$(shasum -a 256 "${bottle_filename}" | cut -d' ' -f1)
echo "sha256=${sha256}" >> "$GITHUB_OUTPUT"
echo "filename=${bottle_filename}" >> "$GITHUB_OUTPUT"
# Copy to workspace for artifact upload
cp "${bottle_filename}" "${{ github.workspace }}/"
echo "Bottle created: ${bottle_filename}"
echo "SHA256: ${sha256}"
- name: Verify bottle file in workspace
run: |
cd "${{ github.workspace }}"
ls -lh html-to-markdown-*.bottle.tar.gz
echo "Files in workspace:"
ls -la
shell: bash
- name: Upload bottle artifact
uses: actions/upload-artifact@v7 # v7
with:
name: homebrew-bottle-${{ matrix.bottle_tag }}
path: html-to-markdown-${{ steps.version.outputs.version }}.${{ matrix.bottle_tag }}.bottle.tar.gz
retention-days: 14
if-no-files-found: error
upload-homebrew-bottles:
name: Upload Homebrew bottles to GitHub Release
needs: [prepare, check-homebrew, homebrew-bottles]
if: |
always() &&
needs.prepare.outputs.dry_run != 'true' &&
needs.prepare.outputs.is_tag == 'true' &&
(needs.check-homebrew.outputs.exists != 'true') &&
needs.homebrew-bottles.result == 'success'
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.ref }}
- name: Ensure GitHub release exists
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: scripts/publish/ensure-github-release-exists.sh "${{ needs.prepare.outputs.tag }}"
- name: Download bottle artifacts
uses: actions/download-artifact@v8 # v8
with:
pattern: homebrew-bottle-*
path: dist/homebrew
merge-multiple: true
- name: Verify downloaded artifacts
run: |
echo "Contents of dist/homebrew:"
ls -laR dist/homebrew || echo "dist/homebrew not found"
echo "All dist contents:"
ls -laR dist || echo "dist not found"
shell: bash
- name: Upload bottles (idempotent)
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: scripts/publish/upload-homebrew-bottles.sh "${{ needs.prepare.outputs.tag }}" dist/homebrew
publish-homebrew:
name: Update Homebrew formula
needs: [prepare, check-homebrew, upload-homebrew-bottles]
if: |
always() &&
needs.prepare.outputs.dry_run != 'true' &&
needs.prepare.outputs.is_tag == 'true' &&
(needs.check-homebrew.outputs.exists != 'true') &&
needs.upload-homebrew-bottles.result == 'success'
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout
uses: actions/checkout@v6 # v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
- name: Download bottle artifacts
uses: actions/download-artifact@v8 # v8
with:
pattern: homebrew-bottle-*
path: dist/homebrew
merge-multiple: true
- name: Setup Git credentials
env:
GH_TOKEN: ${{ secrets.HOMEBREW_TOKEN }}
run: |
git config --global credential.helper store
echo "https://x-access-token:${GH_TOKEN}@github.com" > ~/.git-credentials
git config --global user.name "html-to-markdown-bot"
git config --global user.email "bot@kreuzberg.dev"
- name: Update Homebrew formula with bottles
uses: kreuzberg-dev/actions/publish-homebrew@v1 # v1
with:
bottles-dir: dist/homebrew
formula-name: html-to-markdown
tap-repo: kreuzberg-dev/homebrew-tap
tag: ${{ needs.prepare.outputs.tag }}
version: ${{ needs.prepare.outputs.version }}
github-repo: kreuzberg-dev/html-to-markdown
dry-run: ${{ needs.prepare.outputs.dry_run }}
================================================
FILE: .github/workflows/validate-issues.yml
================================================
name: Validate Issues
on:
issues:
types: [opened, edited]
jobs:
validate:
uses: kreuzberg-dev/actions/.github/workflows/reusable-validate-issues.yml@v1
secrets: inherit
================================================
FILE: .github/workflows/validate-pr.yml
================================================
name: Validate PR
on:
pull_request:
types: [opened, edited, synchronize]
jobs:
validate:
uses: kreuzberg-dev/actions/.github/workflows/reusable-validate-pr.yml@v1
secrets: inherit
================================================
FILE: .gitignore
================================================
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
.pytest_cache/
.mypy_cache/
.ruff_cache/
htmlcov/
.coverage
.coverage.*
coverage.lcov
# Rust
target/
Cargo.lock
rust-coverage.lcov
*.node
examples/**/.cargo/config.toml
# Node.js / TypeScript
node_modules/
**/node_modules/
dist/
dist-node/
dist-web/
*.tsbuildinfo
.pnpm-debug.log
packages/html-to-markdown-ts/bin/
# Ruby gem build outputs
!packages/ruby/lib/
!packages/ruby/lib/**/*.rb
packages/ruby/lib/bin/
packages/ruby/lib/*.bundle
packages/ruby/tmp/
packages/ruby/vendor/html-to-markdown-rs/
packages/ruby/vendor/Cargo.toml
packages/php-ext/workspace/
!packages/elixir/lib/
!packages/elixir/lib/**/*.ex
erl_crash.dump
# R package build artifacts
!packages/r/R/
!packages/r/R/**/*.R
packages/r/src/*.o
packages/r/src/*.so
packages/r/src/*.dll
packages/r/src/*.dylib
packages/r/*.tar.gz
# Elixir test application dependencies and builds
tests/test_apps/elixir/deps/
tests/test_apps/elixir/_build/
# Example dependency directories
vendor/
**/vendor/
**/vendor/bundle/
.wrangler/
# IDEs & AI tool configs
.vscode/
.idea/
*.swp
*.swo
*~
.cursorrules
.windsurfrules
# MkDocs
site/
# OS
.DS_Store
Thumbs.db
# Benchmarks
.benchmarks/
benchmark-harness-results-*/
tools/runtime-bench/results/
tools/benchmark-harness/results/
tools/benchmark-harness/artifacts/
tools/benchmark-harness/artifacts-*/
tools/benchmark-harness/results-consolidated/
tools/benchmark-harness/results-local-*/
artifacts/
# Cache files
*.cache
.cache/
packages/php/.php-cs-fixer.cache
# Temporary files
.tmp/
[Tt][Oo][Dd][Oo]*
# Environment & virtualenvs
.env
.env.local
.venv/
**/.venv/
# C# / .NET
bin/
obj/
*.dll
*.exe
*.pdb
# Allow benchmark harness entrypoint under packages/go/v2/bin
!packages/go/v2/bin/
!packages/go/v2/bin/benchmark.go
# C FFI test binaries and build artifacts
crates/html-to-markdown-ffi/tests/c/test_*
!crates/html-to-markdown-ffi/tests/c/test_*.c
crates/html-to-markdown-ffi/tests/c/*.o
crates/html-to-markdown-ffi/tests/c/*.dSYM/
# Additional generated artifacts
.remote-cache/
.alef/
.gemini/
GEMINI.md
*.pyd
vendor/bundle/
*.h.bak
*.test
*.class
*.nupkg
pkg/
.gems/
# BEGIN ai-rulez (DO NOT EDIT - managed by ai-rulez)
.agents/
.claude/
.codex/
.cursor/
.github/agents/
.github/commands/
.github/copilot-instructions.md
.github/skills/
.mcp.json
AGENTS.md
CLAUDE.md
# END ai-rulez
================================================
FILE: .gitmodules
================================================
[submodule "homebrew-tap"]
path = homebrew-tap
url = https://github.com/Goldziher/homebrew-tap.git
================================================
FILE: .golangci.yml
================================================
version: "2"
run:
timeout: 5m
issues-exit-code: 1
tests: true
concurrency: 4
modules-download-mode: readonly
allow-serial-runners: false
allow-parallel-runners: true
linters:
default: none
enable:
- errcheck
- govet
- ineffassign
- staticcheck
- unused
- revive
- gocyclo
- goconst
- gocritic
- gosec
- misspell
- nakedret
settings:
errcheck:
check-type-assertions: true
check-blank: true
exclude-functions:
- (net/http.ResponseWriter).Write
- (io.Closer).Close
- fmt.Fprintf
- fmt.Printf
- fmt.Println
- os.Setenv
- os.Unsetenv
goconst:
min-len: 3
min-occurrences: 3
gocyclo:
min-complexity: 25
gosec:
excludes:
- G101 # ~keep hardcoded credentials check (too many false positives)
govet:
enable-all: true
disable:
- shadow
misspell:
locale: US
nakedret:
max-func-lines: 30
revive:
confidence: 0.8
severity: warning
enable-all-rules: false
rules:
- name: blank-imports
- name: context-keys-type
- name: time-naming
- name: var-declaration
- name: unexported-return
- name: errorf
- name: context-as-argument
- name: dot-imports
- name: error-return
- name: error-strings
- name: error-naming
- name: if-return
- name: increment-decrement
- name: var-naming
- name: range
- name: receiver-naming
- name: indent-error-flow
- name: exported
disabled: true
- name: package-comments
disabled: true
exclusions:
generated: lax
rules:
- linters:
- goconst
path: _test\.go
- linters:
- gocyclo
path: _test\.go
- linters:
- gosec
path: _test\.go
- linters:
- revive
path: _test\.go
text: "context-as-argument"
- linters:
- goconst
- revive
- errcheck
- govet
path: _test\.go
text: "unusedwrite:"
- linters:
- govet
text: "fieldalignment:"
- linters:
- errcheck
path: _test\.go
paths:
- vendor
- build
- deployments
- third_party$
- builtin$
- examples$
issues:
max-issues-per-linter: 0
max-same-issues: 0
uniq-by-line: true
new: false
exclude:
- "Error return value of `\\(\\*github\\.com/goccy/go-json\\.Encoder\\)\\.Encode` is not checked"
- "Error return value of `w\\.Write` is not checked"
- "Error return value of `resp\\.Body\\.Close` is not checked"
- "Error return value of `res\\.Body\\.Close` is not checked"
- "Error return value of `r\\.Body\\.Read` is not checked"
- "Error return value of `os\\.Setenv` is not checked"
- "Error return value of `os\\.Unsetenv` is not checked"
- 'shadow: declaration of "err" shadows declaration'
- "unusedwrite: unused write to field"
- "Error return value of `c\\.provider\\.Delete` is not checked"
- "Error return value of `provider\\.Close` is not checked"
- "Error return value of `natsClient\\.Close` is not checked"
- "Error return value of `cacheProvider\\.Close` is not checked"
- "Error return value of `processor\\.Close` is not checked"
- "Error return value of `sub\\.Unsubscribe` is not checked"
- "Error return value of `json\\.Marshal` is not checked"
- "Error return value of `strconv\\."
- "Error return value of `fmt\\.Sscanf` is not checked"
- "Error return value is not checked"
formatters:
exclusions:
generated: lax
paths:
- third_party$
- builtin$
- examples$
================================================
FILE: .mailmap
================================================
Na'aman Hirschfeld <nhirschfeld@gmail.com> Na'aman Hischfeld <nhirschfeld@gmail.com>
Na'aman Hirschfeld <nhirschfeld@gmail.com> Test User <nhirschfeld@gmail.com>
================================================
FILE: .markdownlint.yaml
================================================
default: true
MD007:
indent: 4
MD033: false
MD041: false
MD013: false
MD014: false
MD024:
siblings_only: true
MD046: false
================================================
FILE: .mvn/wrapper/MavenWrapperDownloader.java
================================================
/*
* Copyright 2007-present the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.net.*;
import java.io.*;
import java.nio.channels.*;
import java.util.Properties;
public class MavenWrapperDownloader {
private static final String WRAPPER_VERSION = "3.3.4";
/**
* Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided.
*/
private static final String DEFAULT_DOWNLOAD_URL =
"https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/"
+ WRAPPER_VERSION
+ "/maven-wrapper-"
+ WRAPPER_VERSION
+ ".jar";
/**
* Path to the maven-wrapper.properties file, which might contain a downloadUrl property to
* use instead of the default one.
*/
private static final String MAVEN_WRAPPER_PROPERTIES_PATH =
".mvn/wrapper/maven-wrapper.properties";
/**
* Path where the maven-wrapper.jar will be saved to.
*/
private static final String MAVEN_WRAPPER_JAR_PATH =
".mvn/wrapper/maven-wrapper.jar";
/**
* Name of the property which should be used to override the default download url for the wrapper.
*/
private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl";
public static void main(String args[]) {
System.out.println("- Downloader started");
File baseDirectory = new File(args[0]);
System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath());
File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH);
String url = DEFAULT_DOWNLOAD_URL;
if(mavenWrapperPropertyFile.exists()) {
FileInputStream mavenWrapperPropertyFileInputStream = null;
try {
mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile);
Properties mavenWrapperProperties = new Properties();
mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream);
url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url);
} catch (IOException e) {
System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'");
} finally {
try {
if(mavenWrapperPropertyFileInputStream != null) {
mavenWrapperPropertyFileInputStream.close();
}
} catch (IOException e) {
}
}
}
System.out.println("- Downloading from: " + url);
File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH);
if(!outputFile.getParentFile().exists()) {
if(!outputFile.getParentFile().mkdirs()) {
System.out.println(
"- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'");
}
}
System.out.println("- Downloading to: " + outputFile.getAbsolutePath());
try {
downloadFileFromURL(url, outputFile);
System.out.println("Done");
System.exit(0);
} catch (Throwable e) {
System.out.println("- Error downloading");
e.printStackTrace();
System.exit(1);
}
}
private static void downloadFileFromURL(String urlString, File destination) throws Exception {
if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) {
String username = System.getenv("MVNW_USERNAME");
char[] password = System.getenv("MVNW_PASSWORD").toCharArray();
Authenticator.setDefault(new Authenticator() {
@Override
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(username, password);
}
});
}
URL website = new URL(urlString);
ReadableByteChannel rbc;
rbc = Channels.newChannel(website.openStream());
FileOutputStream fos = new FileOutputStream(destination);
fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
fos.close();
rbc.close();
}
}
================================================
FILE: .mvn/wrapper/maven-wrapper.properties
================================================
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.3.4/maven-wrapper-3.3.4.jar
maven.mainClass=org.apache.maven.cli.MavenCli
================================================
FILE: .php-cs-fixer.dist.php
================================================
<?php
declare(strict_types=1);
use PhpCsFixer\Config;
use PhpCsFixer\Finder;
return (new Config())
->setRiskyAllowed(false)
->setRules([
'@auto' => true
])
// 💡 by default, Fixer looks for `*.php` files excluding `./vendor/` - here, you can groom this config
->setFinder(
(new Finder())
// 💡 root folder to check
->in(__DIR__)
// 💡 additional files, eg bin entry file
// ->append([__DIR__.'/bin-entry-file'])
// 💡 folders to exclude, if any
// ->exclude([/* ... */])
// 💡 path patterns to exclude, if any
// ->notPath([/* ... */])
// 💡 extra configs
// ->ignoreDotFiles(false) // true by default in v3, false in v4 or future mode
// ->ignoreVCS(true) // true by default
)
;
================================================
FILE: .pre-commit-config.yaml
================================================
default_install_hook_types:
- pre-commit
- commit-msg
exclude: ^docs/snippets/|vendor/|node_modules/|target/|dist/|artifacts/|scripts/ci/|\.cache/|rust-vendor/|\.venv/
repos:
# AI-Rulez: auto-generate AI assistant configuration files
- repo: https://github.com/Goldziher/ai-rulez
rev: v4.1.5
hooks:
- id: ai-rulez-generate
# Commit message linting
- repo: https://github.com/Goldziher/gitfluff
rev: v0.8.0
hooks:
- id: gitfluff-lint
args: ["--write"]
stages: [commit-msg]
# General file checks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: trailing-whitespace
exclude: \.github/copilot-instructions\.md
- id: end-of-file-fixer
exclude: \.github/copilot-instructions\.md
- id: check-merge-conflict
- id: check-added-large-files
exclude: uv.lock
- id: detect-private-key
- id: check-json
exclude: tsconfig\.base\.json
- id: check-yaml
args: ["--allow-multiple-documents", "--unsafe"]
- id: check-toml
- id: check-case-conflict
# TOML formatting
- repo: https://github.com/tox-dev/pyproject-fmt
rev: "v2.21.1"
hooks:
- id: pyproject-fmt
- repo: https://github.com/DevinR528/cargo-sort
rev: "v2.1.4"
hooks:
- id: cargo-sort
args: [-w]
# Python: ruff (linting + formatting) + mypy (type checking)
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.12
hooks:
- id: ruff
args: [--fix]
- id: ruff-format
# Rust: formatting and linting (core workspace only — bindings handled by alef)
- repo: https://github.com/AndrejOrsula/pre-commit-cargo
rev: 0.5.0
hooks:
- id: cargo-fmt
args: ["--all"]
- id: cargo-clippy
args:
[
"--fix",
"--allow-dirty",
"--allow-staged",
"--workspace",
"--exclude=html-to-markdown-php",
"--exclude=html-to-markdown-py",
"--exclude=html-to-markdown-node",
"--exclude=html-to-markdown-e2e-rust",
"--all-features",
"--all-targets",
"--",
"-D",
"warnings",
]
- repo: https://github.com/bnjbvr/cargo-machete
rev: v0.9.2
hooks:
- id: cargo-machete
args: [crates/]
exclude: ^(e2e/|test_apps/)
- repo: https://github.com/EmbarkStudios/cargo-deny
rev: 0.19.4
hooks:
- id: cargo-deny
args: ["check"]
# Node/TS/WASM: oxlint
- repo: https://github.com/oxc-project/mirrors-oxlint
rev: v1.62.0
hooks:
- id: oxlint
args: ["--fix"]
exclude: ^(docs/demo/|e2e/)
# cppcheck — kept upstream until shared repo ships it
- repo: https://github.com/pocc/pre-commit-hooks
rev: v1.3.5
hooks:
- id: cppcheck
args:
[
"--std=c11",
"--enable=warning,style,performance",
"--suppress=missingIncludeSystem",
"--suppress=unusedStructMember",
"--suppress=normalCheckLevelMaxBranches",
]
files: ^crates/html-to-markdown-ffi/tests/c/
# Markdown linting
- repo: https://github.com/rvben/rumdl-pre-commit
rev: "v0.1.86"
hooks:
- id: rumdl-fmt
exclude: 'test_documents/|\.ai-rulez/|\.remote-cache/|e2e/|fixtures/|test_apps/|\.github/copilot-instructions\.md|CLAUDE\.md|\.claude/|\.agents/|\.codex/'
# Shared kreuzberg-dev polyglot hooks (shell, C/C++, Java checkstyle, Go, Python, Ruby, C#, PHP, Elixir)
- repo: https://github.com/kreuzberg-dev/pre-commit-hooks
rev: v0.1.0
hooks:
- id: shfmt
args: ["-w", "-i", "2"]
- id: shellcheck
- id: clang-format
args: ["--style=file"]
files: ^crates/html-to-markdown-ffi/tests/c/
- id: clang-tidy
files: ^crates/html-to-markdown-ffi/tests/c/
- id: checkstyle
args: ["-c", "packages/java/checkstyle.xml", "-p", "packages/java/checkstyle.properties"]
exclude: ^(\.mvn/wrapper/|e2e/|test_apps/|packages/java/src/)
- id: mypy
exclude: "e2e/|tests/|scripts/"
- id: go-fmt
exclude: ^(e2e/|test_apps/)
- id: golangci-lint
exclude: ^(e2e/|test_apps/)
env:
KREUZBERG_GO_MOD_DIRS: "packages/go"
- id: govulncheck
exclude: ^(e2e/|test_apps/)
- id: rubocop
files: ^packages/ruby/.*\.rb$
exclude: ^packages/ruby/ext/
- id: rubocop-lint
files: ^packages/ruby/.*\.(rb|rbs)$
exclude: ^packages/ruby/ext/
- id: steep
files: ^packages/ruby/.*\.(rb|rbs)$
exclude: ^packages/ruby/ext/
- id: dotnet-format
files: ^packages/csharp/.*\.cs$
- id: dotnet-format-check
files: ^packages/csharp/.*\.cs$
- id: php-cs-fixer
files: ^packages/php/.*\.php$
- id: phpstan
files: ^packages/php/(src|tests|stubs)/.*\.php$
args: ["analyse", "--no-progress", "--configuration", "packages/php/phpstan.neon"]
- id: mix-format
files: ^packages/elixir/
- id: mix-credo
files: ^packages/elixir/
- id: java-verify
files: ^packages/java/
# Alef: verify bindings and sync versions
- repo: https://github.com/kreuzberg-dev/alef
rev: v0.13.6
hooks:
- id: alef-verify
- id: alef-sync-versions
# GitHub Actions: linting
- repo: https://github.com/rhysd/actionlint
rev: v1.7.12
hooks:
- id: actionlint
# Java cpd — kept upstream (not yet in shared repo)
- repo: https://github.com/gherynos/pre-commit-java
rev: v0.6.37
hooks:
- id: cpd
exclude: ^(\.mvn/wrapper/|e2e/|test_apps/|packages/java/src/)
# Spelling (last — runs after all formatters and generators)
- repo: https://github.com/crate-ci/typos
rev: v1.46.0
hooks:
- id: typos
args: [--force-exclude]
================================================
FILE: .ruby-version
================================================
3.4.8
================================================
FILE: .rumdl.toml
================================================
# rumdl — Rust-based markdown linter
# https://github.com/rvben/rumdl
respect-gitignore = true
exclude = ["node_modules", "target", "dist", "vendor"]
# MD013: Disable line-length enforcement (tables and code blocks can be long)
# MD041: Don't require first line to be an H1
# MD046: Disable code block style — MkDocs tabs/admonitions indent fenced
# blocks, which rumdl misidentifies as indented code blocks
# MD051: Disable cross-file link fragment checking (incompatible with MkDocs
# HTML processing — MkDocs strips <span> tags from heading IDs)
# MD013: Line length (tables/code can be long)
# MD033: Inline HTML (MkDocs uses HTML extensively)
# MD036: Emphasis as heading (intentional style in docs/READMEs)
# MD041: First line H1 not required
# MD046: Code block style (MkDocs tabs indent fenced blocks)
# MD051: Link fragment checking (incompatible with MkDocs anchor generation)
# MD076: Blank lines between list items (intentional formatting in READMEs)
disable = [
"MD012",
"MD013",
"MD024",
"MD033",
"MD036",
"MD041",
"MD046",
"MD051",
"MD076",
]
# MD024: Allow duplicate heading names if they are not siblings
[MD024]
siblings_only = true
================================================
FILE: .sdkmanrc
================================================
java=25.0.2-tem
maven=3.9.9
================================================
FILE: .task/README.md
================================================
# .task/ Directory - Modular Task Organization
This directory contains the modular Task configuration for the html-to-markdown project, following the **Kreuzberg pattern** for maintainable, scalable build automation.
## Purpose
The `.task/` directory structure reduces the root `Taskfile.yml` from 838 lines to ~250 lines (66% reduction) by organizing tasks into logical modules. This approach:
- **Improves Maintainability**: Each language/workflow lives in its own file
- **Enables Reusability**: Common patterns defined once, reused everywhere
- **Simplifies Testing**: Test individual modules independently
- **Supports Cross-Platform**: Platform-specific logic isolated in config/
- **Scales Gracefully**: Adding new languages doesn't bloat the root Taskfile
## Directory Structure
```text
.task/
├── config/
│ ├── vars.yml # Global variables, version detection, paths
│ └── platforms.yml # OS/arch detection, library extensions, target triples
│
├── languages/ # Language-specific task modules (11 total)
│ ├── rust.yml # Rust core library tasks
│ ├── python.yml # PyO3 Python bindings
│ ├── node.yml # NAPI-RS Node.js bindings
│ ├── typescript.yml # TypeScript wrapper package
│ ├── wasm.yml # WebAssembly bindings
│ ├── ruby.yml # Magnus Ruby bindings
│ ├── php.yml # ext-php-rs PHP extension
│ ├── go.yml # Go FFI wrapper
│ ├── java.yml # Java JNI bindings
│ ├── csharp.yml # C# P/Invoke wrapper
│ └── elixir.yml # Elixir NIF bindings
│
├── workflows/ # Aggregated workflow tasks (internal)
│ ├── build.yml # Build all languages with profile support
│ ├── test.yml # Test all languages (parallel/sequential)
│ └── lint.yml # Lint all languages with auto-fix
│
└── tools/ # Utility and automation tasks
├── version-sync.yml # Version synchronization across manifests
├── general.yml # TOML formatting, shell linting
└── pre-commit.yml # Prek pre-commit hook management (future)
```
## Configuration Files
### `config/vars.yml`
**Purpose**: Global variables shared across all task modules.
**Key Variables**:
```yaml
VERSION: # Extracted from Cargo.toml
BUILD_PROFILE: # dev/release/ci (default: release)
OS: # darwin/linux/windows
ARCH: # x86_64/arm64/armv7
NUM_CPUS: # Detected CPU count for parallel builds
ROOT: # Project root directory
CRATES_DIR: # crates/ directory
PACKAGES_DIR: # packages/ directory
TARGET_DIR: # target/ directory (Rust build outputs)
```
**Example Usage**:
```yaml
# In any language module:
dir: "{{.PACKAGES_DIR}}/python"
cmds:
- cargo build --profile {{.BUILD_PROFILE}}
```
### `config/platforms.yml`
**Purpose**: Platform-specific detection and configuration.
**Key Variables**:
```yaml
EXE_EXT: # .exe on Windows, empty on Unix
LIB_EXT: # dylib/so/dll based on OS
LIB_PREFIX: # lib on Unix, empty on Windows
RUST_TARGET: # Target triple (x86_64-apple-darwin, etc.)
RUBY_FULL_PATH: # Full path to Ruby binary (handles Homebrew ARM64)
IS_WINDOWS: # Boolean: true on Windows
IS_MACOS: # Boolean: true on macOS
IS_LINUX: # Boolean: true on Linux
```
**Example Usage**:
```yaml
# Cross-platform library path configuration:
env:
LD_LIBRARY_PATH: '{{if ne .OS "windows"}}{{.TARGET_DIR}}/release{{end}}'
PATH: '{{if eq .OS "windows"}}{{.TARGET_DIR}}/release;{{end}}{{.PATH}}'
```
## Language Modules
Each language module follows a **consistent pattern**:
### Standard Tasks (All Languages)
```yaml
install: # Install dependencies/toolchain
build: # Build with profile support (uses BUILD_PROFILE)
build:dev: # Debug build (fast, unoptimized)
build:release: # Release build (optimized)
build:ci: # CI build (release + debug symbols)
test: # Run tests
test:ci: # Run tests with coverage (CI mode)
coverage: # Generate coverage reports (lcov format)
lint: # Lint + auto-fix (format + linters)
lint:check: # Check-only (no modifications, for CI)
format: # Format code
format:check: # Check formatting without changes
update: # Update dependencies
clean: # Remove build artifacts
```
### Example: `languages/python.yml`
```yaml
version: "3"
internal: true
includes:
platforms: ../config/platforms.yml
vars:
BUILD_PROFILE: "{{.BUILD_PROFILE | default \"release\"}}"
PYTHON_WORK_DIR: "{{.PACKAGES_DIR}}/python"
tasks:
install:
desc: "Install Python dependencies with uv"
dir: "{{.PYTHON_WORK_DIR}}"
cmds:
- uv sync
- uv pip install -e .
build:
desc: "Build Python bindings with maturin ({{.BUILD_PROFILE}} profile)"
dir: "{{.PYTHON_WORK_DIR}}"
cmds:
- maturin develop{{if eq .BUILD_PROFILE "release"}} --release{{end}}
test:
desc: "Run Python tests with pytest"
dir: "{{.PYTHON_WORK_DIR}}"
cmds:
- pytest -v tests/
# ... additional tasks
```
### Cross-Platform Patterns
**DO Use** (Cross-Platform Compatible):
```yaml
# Python for file operations:
- cmd: |
python -c "
import shutil, glob
for d in ['build', 'dist']:
shutil.rmtree(d, ignore_errors=True)
"
# Conditional environment variables:
env:
LD_LIBRARY_PATH: '{{if ne .OS "windows"}}{{.TARGET_DIR}}/release{{end}}'
PATH: '{{if eq .OS "windows"}}{{.TARGET_DIR}}/release;{{end}}{{.PATH}}'
# Task's built-in ignore_error:
- cmd: some-command-that-might-fail
ignore_error: true
```
**DON'T Use** (Platform-Specific):
```yaml
# ❌ Unix-only commands:
- rm -rf build/ dist/
- find . -name "*.pyc" -delete
# ❌ Hardcoded paths:
- /opt/homebrew/bin/ruby
- C:\Program Files\Tool\bin
# ❌ Bash-specific syntax:
- cmd: test -d .venv && source .venv/bin/activate
```
## Workflow Modules
Workflow modules aggregate language tasks into unified operations. These are **internal** (not exposed to users directly).
### `workflows/build.yml`
```yaml
version: "3"
internal: true
tasks:
all:
desc: "Build all language bindings"
cmds:
- task: rust:build
- task: python:build
- task: node:build
# ... (11 languages)
all:dev:
desc: "Build all in debug mode"
cmds:
- task: rust:build:dev
- task: python:build:dev
# ...
core:
desc: "Build Rust core only"
cmds:
- task: rust:build
bindings:
desc: "Build all bindings (skip core)"
cmds:
- task: python:build
- task: node:build
# ... (exclude rust)
```
### `workflows/test.yml`
```yaml
version: "3"
internal: true
tasks:
all:
desc: "Run all tests"
cmds:
- task: rust:test
- task: python:test
# ... (sequential)
all:parallel:
desc: "Run tests in parallel"
deps:
- rust:test
- python:test
# ... (parallel execution)
all:ci:
desc: "Run CI tests with coverage"
cmds:
- task: rust:test:ci
- task: python:test:ci
# ...
```
## Tools Modules
### `tools/version-sync.yml`
**Purpose**: Synchronize version across all package manifests.
```yaml
version: "3"
tasks:
sync:
desc: "Sync version from Cargo.toml to all manifests"
cmds:
- python {{.ROOT}}/scripts/sync_versions.py
```
**Updates**:
- Cargo workspace members (crates/*/Cargo.toml)
- Python (packages/python/pyproject.toml)
- Node.js (packages/typescript/package.json)
- Ruby (packages/ruby/lib/html_to_markdown/version.rb)
- PHP (packages/php/composer.json)
- Go (packages/go/v3/version.go)
- Java (packages/java/pom.xml)
- C# (packages/csharp/HtmlToMarkdown.csproj)
- Elixir (packages/elixir/mix.exs)
- **test_apps manifests** (tests/test_apps/*/pyproject.toml, package.json, etc.)
### `tools/general.yml`
**Purpose**: General-purpose linting and validation tasks.
```yaml
version: "3"
tasks:
toml:format:
desc: "Format TOML files"
cmds:
- taplo format **/*.toml
toml:format:check:
desc: "Check TOML formatting"
cmds:
- taplo format --check **/*.toml
```
## How to Add a New Language
Let's add **Swift** as an example:
### Step 1: Create Language Module
**File**: `.task/languages/swift.yml`
```yaml
version: "3"
internal: true
includes:
platforms: ../config/platforms.yml
vars:
BUILD_PROFILE: "{{.BUILD_PROFILE | default \"release\"}}"
SWIFT_WORK_DIR: "{{.PACKAGES_DIR}}/swift"
tasks:
install:
desc: "Install Swift dependencies"
dir: "{{.SWIFT_WORK_DIR}}"
cmds:
- swift package resolve
build:
desc: "Build Swift package ({{.BUILD_PROFILE}} profile)"
dir: "{{.SWIFT_WORK_DIR}}"
cmds:
- cmd: swift build{{if eq .BUILD_PROFILE "release"}} -c release{{else}} -c debug{{end}}
ignore_error: false
build:dev:
desc: "Build Swift package in debug mode"
dir: "{{.SWIFT_WORK_DIR}}"
cmds:
- swift build -c debug
build:release:
desc: "Build Swift package in release mode"
dir: "{{.SWIFT_WORK_DIR}}"
cmds:
- swift build -c release
test:
desc: "Run Swift tests"
dir: "{{.SWIFT_WORK_DIR}}"
cmds:
- swift test
test:ci:
desc: "Run Swift tests with coverage (CI mode)"
dir: "{{.SWIFT_WORK_DIR}}"
cmds:
- swift test --enable-code-coverage
lint:
desc: "Lint Swift code with auto-fix"
dir: "{{.SWIFT_WORK_DIR}}"
cmds:
- swiftlint --fix
- swiftformat .
lint:check:
desc: "Lint Swift code without auto-fix"
dir: "{{.SWIFT_WORK_DIR}}"
cmds:
- swiftlint
- swiftformat --lint .
format:
desc: "Format Swift code"
dir: "{{.SWIFT_WORK_DIR}}"
cmds:
- swiftformat .
format:check:
desc: "Check Swift formatting"
dir: "{{.SWIFT_WORK_DIR}}"
cmds:
- swiftformat --lint .
update:
desc: "Update Swift dependencies"
dir: "{{.SWIFT_WORK_DIR}}"
cmds:
- swift package update
clean:
desc: "Clean Swift build artifacts"
dir: "{{.SWIFT_WORK_DIR}}"
cmds:
- swift package clean
```
### Step 2: Include in Root Taskfile
**File**: `Taskfile.yml`
```yaml
includes:
# ... existing includes
swift:
taskfile: .task/languages/swift.yml
```
### Step 3: Add to Workflow Aggregators
**File**: `.task/workflows/build.yml`
```yaml
tasks:
all:
cmds:
- task: rust:build
# ... existing languages
- task: swift:build # ADD THIS
```
**File**: `.task/workflows/test.yml`
```yaml
tasks:
all:
cmds:
- task: rust:test
# ... existing languages
- task: swift:test # ADD THIS
```
**File**: `.task/workflows/lint.yml`
```yaml
tasks:
all:
cmds:
- task: rust:lint
# ... existing languages
- task: swift:lint # ADD THIS
```
### Step 4: Update Root Taskfile Aggregates
**File**: `Taskfile.yml`
```yaml
tasks:
setup:
cmds:
- task: rust:install
# ... existing installs
- task: swift:install # ADD THIS
```
Now users can run:
```bash
task swift:build
task swift:test
task swift:lint
```
## Internal vs Public Tasks
### Internal Tasks
Defined with `internal: true` at the file level:
```yaml
version: "3"
internal: true # This file's tasks are not listed in `task --list`
```
**Characteristics**:
- Not visible in `task --list`
- Only callable from other tasks
- Used for: config files, workflow aggregators
**Examples**:
- `.task/config/vars.yml` (internal)
- `.task/workflows/build.yml` (internal)
- `.task/workflows/test.yml` (internal)
### Public Tasks
Included without `internal: true` or via root Taskfile:
```yaml
includes:
rust:
taskfile: .task/languages/rust.yml
# No internal flag = public
```
**Characteristics**:
- Visible in `task --list`
- Directly callable by users
- Used for: language modules, tool modules
**Examples**:
- `rust:build` (public)
- `python:test` (public)
- `version:sync` (public)
## Best Practices
### 1. Always Use Template Variables
```yaml
# ✅ Good:
dir: "{{.PACKAGES_DIR}}/python"
cmds:
- cargo build --profile {{.BUILD_PROFILE}}
# ❌ Bad:
dir: "packages/python"
cmds:
- cargo build --release
```
### 2. Support All Build Profiles
```yaml
# ✅ Good: Profile-aware command
- cmd: maturin develop{{if eq .BUILD_PROFILE "release"}} --release{{end}}
# ❌ Bad: Hardcoded profile
- cmd: maturin develop --release
```
### 3. Use Cross-Platform Commands
```yaml
# ✅ Good: Python for file operations
- cmd: |
python -c "
import shutil
shutil.rmtree('build', ignore_errors=True)
"
# ❌ Bad: Unix-only command
- cmd: rm -rf build/
```
### 4. Include Platform Config
```yaml
# ✅ Good: Include platforms for cross-platform logic
includes:
platforms: ../config/platforms.yml
env:
LD_LIBRARY_PATH: '{{if ne .OS "windows"}}{{.TARGET_DIR}}/release{{end}}'
# ❌ Bad: Hardcoded Unix assumption
env:
LD_LIBRARY_PATH: "{{.TARGET_DIR}}/release"
```
### 5. Consistent Task Naming
```yaml
# ✅ Good: Consistent naming with colons
install:
build:
build:dev:
build:release:
build:ci:
test:
test:ci:
lint:
lint:check:
format:
format:check:
# ❌ Bad: Inconsistent naming
install_deps:
make_build:
run_tests:
check-format:
```
### 6. Document Descriptions
```yaml
# ✅ Good: Clear, actionable description
install:
desc: "Install Python dependencies with uv"
# ❌ Bad: Vague or missing description
install:
desc: "Install stuff"
```
### 7. Error Handling
```yaml
# ✅ Good: Explicit error handling
- cmd: pytest -v tests/
ignore_error: false # Fail on errors
- cmd: rm -rf .cache/
ignore_error: true # OK to fail (file may not exist)
# ❌ Bad: Implicit behavior
- cmd: pytest -v tests/
```
## Troubleshooting
### Task Not Found
**Error**: `Task "foo:bar" not found`
**Solution**: Ensure the include is in root `Taskfile.yml`:
```yaml
includes:
foo:
taskfile: .task/languages/foo.yml
```
### Variable Not Defined
**Error**: `template: :1:2: executing "" at <.SOME_VAR>: map has no entry for key "SOME_VAR"`
**Solution**: Define variable in `.task/config/vars.yml` or include platforms:
```yaml
includes:
platforms: ../config/platforms.yml
```
### Cross-Platform Failures
**Error**: Task works on macOS but fails on Windows
**Solution**: Use conditional environment variables and cross-platform commands:
```yaml
env:
PATH: '{{if eq .OS "windows"}}{{.TARGET_DIR}}/release;{{end}}{{.PATH}}'
cmds:
- cmd: |
python -c "import shutil; shutil.rmtree('build', ignore_errors=True)"
```
### Circular Dependencies
**Error**: `task: import cycle not allowed`
**Solution**: Avoid including files that include each other. Use internal workflow aggregators instead.
## References
- **Task Documentation**: <https://taskfile.dev>
- **Kreuzberg Pattern**: ../kreuzberg/ (sibling project)
- **Root Taskfile**: ../Taskfile.yml
- **Platform Config**: config/platforms.yml
- **Global Variables**: config/vars.yml
---
**Last Updated**: 2025-12-28
**Maintainers**: html-to-markdown contributors
================================================
FILE: .task/checksum/_lint-typescript-lint
================================================
5185d264d62b8f691570c5e0c226b22
================================================
FILE: .task/checksum/_test-typescript-test
================================================
b93fe0d03a54250e90b23f1a50fb35ec
================================================
FILE: .task/checksum/typescript-typecheck
================================================
99aa06d3014798d86001c324468d497f
================================================
FILE: .task/config/platforms.yml
================================================
version: "3"
internal: true
includes:
vars: ./vars.yml
vars:
# Executable extension - empty for Unix, .exe for Windows
EXE_EXT:
sh: |
if [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then
echo ".exe"
else
echo ""
fi
# Library extension - platform specific shared library suffix
LIB_EXT:
sh: |
if [[ "$OSTYPE" == "darwin"* ]]; then
echo "dylib"
elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then
echo "dll"
else
echo "so"
fi
# Library prefix - lib for Unix-like systems, empty for Windows
LIB_PREFIX:
sh: |
if [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then
echo ""
else
echo "lib"
fi
# Platform string for Rust targets
RUST_TARGET:
sh: |
ARCH=$(uname -m)
OS_TYPE="$OSTYPE"
case "$ARCH" in
x86_64|x64)
ARCH_STR="x86_64"
;;
aarch64|arm64)
ARCH_STR="aarch64"
;;
armv7l|armv7)
ARCH_STR="armv7"
;;
*)
ARCH_STR="$ARCH"
;;
esac
if [[ "$OS_TYPE" == "darwin"* ]]; then
echo "${ARCH_STR}-apple-darwin"
elif [[ "$OS_TYPE" == "linux-gnu"* ]] || [[ "$OS_TYPE" == "linux"* ]]; then
echo "${ARCH_STR}-unknown-linux-gnu"
elif [[ "$OS_TYPE" == "msys" ]] || [[ "$OS_TYPE" == "cygwin" ]] || [[ "$OS_TYPE" == "win32" ]]; then
echo "${ARCH_STR}-pc-windows-msvc"
else
echo "${ARCH_STR}-unknown-unknown"
fi
# Boolean platform checks (imported from vars.yml)
IS_WINDOWS: "{{.IS_WINDOWS}}"
IS_MACOS: "{{.IS_MACOS}}"
IS_LINUX: "{{.IS_LINUX}}"
# Ruby path detection - handles Homebrew ARM64 and standard installations
RUBY_FULL_PATH:
sh: |
if command -v ruby >/dev/null 2>&1; then
command -v ruby
elif [[ "$OSTYPE" == "darwin"* ]] && [[ -f "/opt/homebrew/opt/ruby/bin/ruby" ]]; then
echo "/opt/homebrew/opt/ruby/bin/ruby"
else
echo "ruby"
fi
# Convenient binary paths for platform-specific tools
CARGO_BIN:
sh: command -v cargo 2>/dev/null || echo "cargo"
RUSTC_BIN:
sh: command -v rustc 2>/dev/null || echo "rustc"
# Shell script extension for platform-specific scripts
SHELL_EXT:
sh: |
if [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then
echo ".ps1"
else
echo ".sh"
fi
================================================
FILE: .task/config/vars.yml
================================================
version: "3"
internal: true
vars:
# Version extraction from Cargo.toml (workspace.package.version)
VERSION:
sh: grep -m 1 'version = ' Cargo.toml | sed 's/version = "\(.*\)"/\1/'
# Build profile (dev/release/ci) - default to release
BUILD_PROFILE: "{{.BUILD_PROFILE | default \"release\"}}"
# Toolchain versions
GOLANGCI_LINT_VERSION: "latest"
GO_TOOLCHAIN: "go1.26.0"
BUNDLER_VERSION: "4.0.0"
RUBY_BIN:
sh: |
if command -v ruby >/dev/null 2>&1; then
dirname "$(command -v ruby)"
elif [[ "$OSTYPE" == "darwin"* ]] && [[ -d "/opt/homebrew/opt/ruby/bin" ]]; then
echo "/opt/homebrew/opt/ruby/bin"
else
echo "ruby"
fi
# Logging
RUST_LOG: "info"
# Root project directories (absolute paths)
ROOT: "{{.ROOT_DIR}}"
CRATES_DIR: "{{.ROOT_DIR}}/crates"
PACKAGES_DIR: "{{.ROOT_DIR}}/packages"
SCRIPTS_DIR: "{{.ROOT_DIR}}/scripts"
TOOLS_DIR: "{{.ROOT_DIR}}/tools"
TARGET_DIR: "{{.ROOT_DIR}}/target"
EXAMPLES_DIR: "{{.ROOT_DIR}}/examples"
# OS Detection - determine operating system
OS:
sh: |
case "$(uname -s 2>/dev/null || echo 'unknown')" in
Darwin*)
echo "darwin"
;;
Linux*)
echo "linux"
;;
MINGW*|MSYS*|CYGWIN*)
echo "windows"
;;
*)
if [[ "$OSTYPE" == "darwin"* ]]; then
echo "darwin"
elif [[ "$OSTYPE" == "linux-gnu"* ]] || [[ "$OSTYPE" == "linux"* ]]; then
echo "linux"
elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then
echo "windows"
else
echo "unknown"
fi
;;
esac
# OS Boolean helpers
IS_WINDOWS:
sh: |
if [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then
echo "true"
else
echo "false"
fi
IS_MACOS:
sh: |
if [[ "$OSTYPE" == "darwin"* ]]; then
echo "true"
else
echo "false"
fi
IS_LINUX:
sh: |
if [[ "$OSTYPE" == "linux-gnu"* ]] || [[ "$OSTYPE" == "linux"* ]]; then
echo "true"
else
echo "false"
fi
# Architecture detection - determine CPU architecture
ARCH:
sh: |
ARCH=$(uname -m)
case "$ARCH" in
x86_64|x64)
echo "x86_64"
;;
aarch64|arm64)
echo "arm64"
;;
armv7l|armv7)
echo "armv7"
;;
armv6l|armv6)
echo "armv6"
;;
i686|i386)
echo "i386"
;;
*)
echo "$ARCH"
;;
esac
# Number of CPUs available
NUM_CPUS:
sh: |
if command -v nproc >/dev/null 2>&1; then
nproc
elif [[ "$OSTYPE" == "darwin"* ]]; then
sysctl -n hw.ncpu
elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "win32" ]]; then
echo "${NUMBER_OF_PROCESSORS:-4}"
else
echo "4"
fi
# GNU Make parallel flag for optimal builds
MAKE_JOBS: "{{.NUM_CPUS}}"
================================================
FILE: .task/languages/python.yml
================================================
version: "3"
internal: true
vars:
BUILD_PROFILE: "{{.BUILD_PROFILE | default \"release\"}}"
PYTHON_PKG: "packages/python"
tasks:
install:
desc: "Install Python dependencies with uv"
cmds:
- cd {{.PYTHON_PKG}} && uv sync
- cd {{.PYTHON_PKG}} && uv pip install -e .
build:
desc: "Build Python bindings with maturin ({{.BUILD_PROFILE}} profile)"
cmds:
- cd {{.PYTHON_PKG}} && maturin develop{{if eq .BUILD_PROFILE "release"}} --release{{end}}
build:dev:
desc: "Build Python bindings in debug mode"
cmds:
- cd {{.PYTHON_PKG}} && maturin develop
build:release:
desc: "Build Python bindings in release mode"
cmds:
- cd {{.PYTHON_PKG}} && maturin develop --release
build:ci:
desc: "Build Python bindings for CI (release with debug info)"
cmds:
- cd {{.PYTHON_PKG}} && maturin develop --release
wheel:
desc: "Build Python wheel distribution"
cmds:
- cd {{.PYTHON_PKG}} && maturin build --release
coverage:
desc: "Generate Python code coverage report (lcov format)"
cmds:
- cd {{.PYTHON_PKG}} && uv run pytest -v --cov=. --cov-report=lcov:coverage.lcov tests/
clean:
desc: "Clean Python build artifacts"
cmds:
- cmd: |
cd {{.PYTHON_PKG}} && python -c "
import shutil, glob
dirs = ['__pycache__', '.pytest_cache', '.mypy_cache', '.ruff_cache', 'dist', 'build', '.maturin']
for d in dirs:
shutil.rmtree(d, ignore_errors=True)
for f in glob.glob('*.egg-info'):
shutil.rmtree(f, ignore_errors=True)
"
ignore_error: true
================================================
FILE: .task/languages/rust.yml
================================================
version: "3"
internal: true
includes:
platforms: ../config/platforms.yml
vars:
RUST_LOG: "{{.RUST_LOG | default \"info\"}}"
BUILD_PROFILE: "{{.BUILD_PROFILE | default \"release\"}}"
RUST_BACKTRACE: "{{.RUST_BACKTRACE | default \"1\"}}"
CARGO_TERM_COLOR: "always"
tasks:
install:
desc: "Install Rust toolchain and components (rustup, cargo)"
silent: false
cmds:
- rustup update stable
- rustup component add rustfmt clippy
- rustup component add llvm-tools-preview
- cargo install cargo-llvm-cov --locked
- cargo install cargo-upgrades --locked
- cargo --version
- rustc --version
build:
desc: "Build all Rust crates with {{.BUILD_PROFILE}} profile"
silent: false
cmds:
- cmd: |
cargo build --workspace --profile {{.BUILD_PROFILE}} -j {{.NUM_CPUS}}
ignore_error: false
build:dev:
desc: "Build all Rust crates in debug mode"
silent: false
cmds:
- cmd: |
cargo build --workspace -j {{.NUM_CPUS}}
ignore_error: false
build:release:
desc: "Build all Rust crates in release mode"
silent: false
cmds:
- cmd: |
cargo build --workspace --release -j {{.NUM_CPUS}}
ignore_error: false
build:ci:
desc: "Build for CI with debug info enabled (no strip)"
silent: false
cmds:
- cmd: |
CARGO_PROFILE_RELEASE_DEBUG=2 CARGO_PROFILE_RELEASE_STRIP=none cargo build --workspace --exclude html-to-markdown-php --release -j {{.NUM_CPUS}}
ignore_error: false
test:
desc: "Run Rust test suite"
silent: false
cmds:
- cmd: |
RUST_LOG={{.RUST_LOG}} RUST_BACKTRACE={{.RUST_BACKTRACE}} cargo test --release --no-default-features --workspace --exclude html-to-markdown-py --exclude html-to-markdown-php -j {{.NUM_CPUS}}
ignore_error: false
test:ci:
desc: "Run tests with coverage for CI (generates lcov)"
silent: false
cmds:
- cmd: |
{{if eq OS "windows"}}
RUST_LOG={{.RUST_LOG}} RUST_BACKTRACE={{.RUST_BACKTRACE}} cargo llvm-cov --features metadata,visitor,inline-images --workspace --exclude html-to-markdown-py --exclude html-to-markdown-php --exclude benchmark-harness --lcov --output-path rust-coverage.lcov -j {{.NUM_CPUS}}
{{else}}
RUST_LOG={{.RUST_LOG}} RUST_BACKTRACE={{.RUST_BACKTRACE}} cargo llvm-cov --all-features --workspace --exclude html-to-markdown-py --exclude html-to-markdown-php --exclude benchmark-harness --lcov --output-path rust-coverage.lcov -j {{.NUM_CPUS}}
{{end}}
ignore_error: false
- cmd: |
{{if eq OS "windows"}}
cargo llvm-cov --features metadata,visitor,inline-images --workspace --exclude html-to-markdown-py --exclude html-to-markdown-php --exclude benchmark-harness --summary-only
{{else}}
cargo llvm-cov --all-features --workspace --exclude html-to-markdown-py --exclude html-to-markdown-php --exclude benchmark-harness --summary-only
{{end}}
ignore_error: false
coverage:
desc: "Generate code coverage report (lcov format)"
silent: false
cmds:
- cmd: |
RUST_LOG={{.RUST_LOG}} cargo llvm-cov --all-features --workspace --exclude html-to-markdown-py --exclude html-to-markdown-php --exclude benchmark-harness --exclude html-to-markdown-wasm-wasmtime-tests --lcov --output-path rust-coverage.lcov -j {{.NUM_CPUS}}
ignore_error: false
lint:
desc: "Lint Rust code WITH auto-fix (cargo fmt + cargo clippy --fix)"
silent: false
cmds:
- cmd: cargo fmt --all
ignore_error: false
- cmd: |
cargo clippy --workspace --fix --allow-dirty --allow-staged -j {{.NUM_CPUS}}
ignore_error: false
lint:check:
desc: "Lint Rust code WITHOUT auto-fix (check-only)"
silent: false
cmds:
- cmd: cargo fmt --all --check
ignore_error: false
- cmd: |
cargo clippy -j {{.NUM_CPUS}} --workspace -- -D warnings
ignore_error: false
format:
desc: "Format Rust code (with modifications)"
silent: false
cmds:
- cmd: cargo fmt --all
ignore_error: false
format:check:
desc: "Check Rust formatting without modifications"
silent: false
cmds:
- cmd: cargo fmt --all --check
ignore_error: false
update:
desc: "Update Rust dependencies within major versions (cargo update)"
silent: false
cmds:
- cmd: cargo update
ignore_error: false
- cmd: cargo update --manifest-path packages/ruby/ext/html_to_markdown_rb/Cargo.toml
ignore_error: false
- cmd: cargo update --manifest-path packages/elixir/native/html_to_markdown_nif/Cargo.toml
ignore_error: false
- cmd: cargo update --manifest-path packages/r/src/rust/Cargo.toml
ignore_error: false
upgrade:
desc: "Upgrade Rust dependencies to latest including breaking changes (cargo upgrade --incompatible + cargo update)"
silent: false
cmds:
- cmd: cargo upgrade --incompatible
ignore_error: false
- cmd: cargo update
ignore_error: false
- cmd: cargo upgrade --incompatible --manifest-path packages/ruby/ext/html_to_markdown_rb/Cargo.toml
ignore_error: false
- cmd: cargo update --manifest-path packages/ruby/ext/html_to_markdown_rb/Cargo.toml
ignore_error: false
- cmd: cargo upgrade --incompatible --manifest-path packages/elixir/native/html_to_markdown_nif/Cargo.toml
ignore_error: false
- cmd: cargo update --manifest-path packages/elixir/native/html_to_markdown_nif/Cargo.toml
ignore_error: false
- cmd: cargo upgrade --incompatible --manifest-path packages/r/src/rust/Cargo.toml
ignore_error: false
- cmd: cargo update --manifest-path packages/r/src/rust/Cargo.toml
ignore_error: false
clean:
desc: "Clean Rust build artifacts"
silent: false
cmds:
- cmd: cargo clean
ignore_error: false
doc:
desc: "Generate and open Rust documentation"
silent: false
cmds:
- cmd: |
cargo doc --workspace --all-features --no-deps --open
ignore_error: false
e2e:generate:
desc: "Generate E2E tests from fixtures using alef"
silent: false
cmds:
- cmd: alef e2e generate --lang rust
ignore_error: false
e2e:test:
desc: "Run Rust E2E tests in e2e/rust directory"
silent: false
cmds:
- cmd: cargo test --manifest-path e2e/rust/Cargo.toml
ignore_error: false
================================================
FILE: .task/tools/docs.yml
================================================
version: '3'
tasks:
generate-readme:
desc: Generate package READMEs using alef
cmds:
- alef readme
generate-readme:check:
desc: Validate READMEs match generated output (CI mode)
cmds:
- alef readme
- git diff --exit-code -- packages/*/README.md crates/*/README.md
generate-docs:
desc: Generate API reference documentation using alef
cmds:
- alef docs
generate-docs:check:
desc: Validate API docs match generated output (CI mode)
cmds:
- alef docs
- git diff --exit-code -- docs/reference/
================================================
FILE: .task/tools/general.yml
================================================
version: "3"
internal: true
includes:
platforms: ../config/platforms.yml
vars:
SCRIPTS_DIR: "{{.TASKFILE_DIR}}/../../scripts"
tasks:
pre-commit:install:
desc: "Install prek pre-commit hooks for commit and commit-msg"
silent: false
cmds:
- cmd: prek install
ignore_error: false
- cmd: prek install --hook-type commit-msg
ignore_error: false
pre-commit:run:
desc: "Run prek pre-commit hooks on all files"
silent: false
cmds:
- cmd: prek run --all-files
ignore_error: false
pre-commit:uninstall:
desc: "Uninstall prek hooks"
silent: false
cmds:
- cmd: prek uninstall
ignore_error: true
- cmd: prek uninstall --hook-type commit-msg
ignore_error: true
validate:config:
desc: "Validate YAML task configuration files"
silent: false
cmds:
- cmd: |
for file in {{.TASKFILE_DIR}}/**/*.yml; do
echo "Validating $file..."
if ! command -v yamllint &> /dev/null; then
echo "yamllint not found, skipping validation"
break
fi
yamllint "$file" || exit 1
done
ignore_error: false
validate:all:
desc: "Validate all project configurations"
silent: false
cmds:
- task: validate:config
================================================
FILE: .task/tools/version-sync.yml
================================================
version: "3"
internal: true
includes:
platforms: ../config/platforms.yml
vars:
# Use installed alef binary. Install via: cargo binstall alef-cli
# For local dev with sibling repo: cargo run --manifest-path ../alef/Cargo.toml --
ALEF: "alef"
tasks:
sync:
desc: "Synchronize version across all package manifests and regenerate everything"
cmds:
- "{{.ALEF}} sync-versions"
- "{{.ALEF}} readme"
- "{{.ALEF}} docs"
- "{{.ALEF}} generate --clean"
- "{{.ALEF}} stubs"
- "{{.ALEF}} e2e generate"
check:
desc: "Check if versions are synchronized (dry-run)"
cmds:
- cmd: grep -m 1 'version = ' Cargo.toml | sed 's/version = "\(.*\)"/\1/'
silent: false
bump:major:
desc: "Bump major version (X.0.0) and sync"
cmds:
- "{{.ALEF}} sync-versions --bump major"
- task: sync
bump:minor:
desc: "Bump minor version (0.X.0) and sync"
cmds:
- "{{.ALEF}} sync-versions --bump minor"
- task: sync
bump:patch:
desc: "Bump patch version (0.0.X) and sync"
cmds:
- "{{.ALEF}} sync-versions --bump patch"
- task: sync
show:
desc: "Show current version from Cargo.toml"
cmds:
- cmd: grep -m 1 'version = ' Cargo.toml | sed 's/version = "\(.*\)"/\1/'
silent: false
================================================
FILE: .task/workflows/e2e.yml
================================================
version: "3"
tasks:
generate:all:
desc: Generate all E2E tests from fixtures across all supported languages
cmds:
- alef e2e generate
test:all:
desc: Run all E2E tests across all supported languages
cmds:
- alef test --e2e
lint:all:
desc: Lint all generated E2E test code
cmds:
- alef lint
verify:all:
desc: Full E2E pipeline - generate, lint, and test all suites
cmds:
- alef e2e generate
- alef lint
- alef test --e2e
generate:rust:
desc: Generate Rust E2E tests from fixtures
cmds:
- alef e2e generate --lang rust
test:rust:
desc: Run Rust E2E tests
cmds:
- task: rust:e2e:test
quick:
desc: Run quick E2E tests (Rust only)
cmds:
- task: rust:e2e:test
================================================
FILE: .typos.toml
================================================
[files]
extend-exclude = ["target/", ".alef/", "*.lock", "*.min.js"]
[default.extend-words]
# Add project-specific words here
# crate_name = "crate_name"
================================================
FILE: ATTRIBUTIONS.md
================================================
# Attributions
This project includes vendored code from third-party libraries. This file
provides the required attribution and license information.
## markup5ever_rcdom
- **Version vendored**: 0.36.0+unofficial
- **Original authors**: The html5ever Project Developers
- **Repository**: <https://github.com/servo/html5ever>
- **Vendored into**: `crates/html-to-markdown/src/rcdom.rs`
- **License**: MIT OR Apache-2.0
### MIT License
```text
Copyright (c) 2014 The html5
gitextract_5vwofxnz/ ├── .ai-rulez/ │ ├── config.toml │ ├── context/ │ │ └── crate-structure.md │ ├── domains/ │ │ ├── conversion-algorithms/ │ │ │ └── DOMAIN.md │ │ ├── html-parsing/ │ │ │ └── DOMAIN.md │ │ └── safety-sanitization/ │ │ └── DOMAIN.md │ └── rules/ │ └── alef-generated-bindings.md ├── .cargo/ │ └── config.toml ├── .clang-format ├── .editorconfig ├── .github/ │ ├── CODEOWNERS │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.yml │ │ ├── config.yml │ │ ├── documentation.yml │ │ └── feature_request.yml │ ├── PULL_REQUEST_TEMPLATE.md │ ├── actions/ │ │ ├── build-typescript/ │ │ │ └── action.yml │ │ └── smoke-pie/ │ │ └── action.yml │ ├── dependabot.yaml │ └── workflows/ │ ├── ci.yaml │ ├── deploy-docs.yaml │ ├── publish.yaml │ ├── validate-issues.yml │ └── validate-pr.yml ├── .gitignore ├── .gitmodules ├── .golangci.yml ├── .mailmap ├── .markdownlint.yaml ├── .mvn/ │ └── wrapper/ │ ├── MavenWrapperDownloader.java │ ├── maven-wrapper.jar │ └── maven-wrapper.properties ├── .php-cs-fixer.dist.php ├── .pre-commit-config.yaml ├── .ruby-version ├── .rumdl.toml ├── .sdkmanrc ├── .task/ │ ├── README.md │ ├── checksum/ │ │ ├── _lint-typescript-lint │ │ ├── _test-typescript-test │ │ └── typescript-typecheck │ ├── config/ │ │ ├── platforms.yml │ │ └── vars.yml │ ├── languages/ │ │ ├── python.yml │ │ └── rust.yml │ ├── tools/ │ │ ├── docs.yml │ │ ├── general.yml │ │ └── version-sync.yml │ └── workflows/ │ └── e2e.yml ├── .typos.toml ├── ATTRIBUTIONS.md ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE ├── README.md ├── Taskfile.yaml ├── _typos.toml ├── alef.toml ├── composer.json ├── crates/ │ ├── html-to-markdown/ │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── examples/ │ │ │ ├── basic.rs │ │ │ ├── table.rs │ │ │ ├── test_deser.rs │ │ │ ├── test_escape.rs │ │ │ ├── test_inline_formatting.rs │ │ │ ├── test_lists.rs │ │ │ ├── test_semantic_tags.rs │ │ │ ├── test_tables.rs │ │ │ ├── test_task_lists.rs │ │ │ └── test_whitespace.rs │ │ ├── src/ │ │ │ ├── convert_api.rs │ │ │ ├── converter/ │ │ │ │ ├── block/ │ │ │ │ │ ├── blockquote.rs │ │ │ │ │ ├── container.rs │ │ │ │ │ ├── div.rs │ │ │ │ │ ├── heading.rs │ │ │ │ │ ├── horizontal_rule.rs │ │ │ │ │ ├── line_break.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── paragraph.rs │ │ │ │ │ ├── preformatted.rs │ │ │ │ │ ├── table/ │ │ │ │ │ │ ├── builder.rs │ │ │ │ │ │ ├── caption.rs │ │ │ │ │ │ ├── cell.rs │ │ │ │ │ │ ├── cells.rs │ │ │ │ │ │ ├── layout.rs │ │ │ │ │ │ ├── mod.rs │ │ │ │ │ │ ├── scanner.rs │ │ │ │ │ │ └── utils.rs │ │ │ │ │ └── unknown.rs │ │ │ │ ├── context.rs │ │ │ │ ├── dom_context.rs │ │ │ │ ├── form/ │ │ │ │ │ ├── elements.rs │ │ │ │ │ └── mod.rs │ │ │ │ ├── format/ │ │ │ │ │ ├── djot.rs │ │ │ │ │ ├── markdown.rs │ │ │ │ │ └── mod.rs │ │ │ │ ├── handlers/ │ │ │ │ │ ├── blockquote.rs │ │ │ │ │ ├── code_block.rs │ │ │ │ │ ├── graphic.rs │ │ │ │ │ ├── image.rs │ │ │ │ │ ├── link.rs │ │ │ │ │ └── mod.rs │ │ │ │ ├── inline/ │ │ │ │ │ ├── code.rs │ │ │ │ │ ├── emphasis.rs │ │ │ │ │ ├── link.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── ruby.rs │ │ │ │ │ └── semantic/ │ │ │ │ │ ├── marks.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── typography.rs │ │ │ │ ├── list/ │ │ │ │ │ ├── definition.rs │ │ │ │ │ ├── item.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── ordered.rs │ │ │ │ │ ├── unordered.rs │ │ │ │ │ └── utils.rs │ │ │ │ ├── main.rs │ │ │ │ ├── main_helpers.rs │ │ │ │ ├── media/ │ │ │ │ │ ├── embedded.rs │ │ │ │ │ ├── graphic.rs │ │ │ │ │ ├── image.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── svg.rs │ │ │ │ ├── metadata.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── plain_text.rs │ │ │ │ ├── preprocessing_helpers.rs │ │ │ │ ├── reference_collector.rs │ │ │ │ ├── semantic/ │ │ │ │ │ ├── attributes.rs │ │ │ │ │ ├── definition_list.rs │ │ │ │ │ ├── figure.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── sectioning.rs │ │ │ │ │ └── summary.rs │ │ │ │ ├── text/ │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── processing.rs │ │ │ │ ├── text_node.rs │ │ │ │ ├── utility/ │ │ │ │ │ ├── attributes.rs │ │ │ │ │ ├── caching.rs │ │ │ │ │ ├── content.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── preprocessing.rs │ │ │ │ │ ├── serialization.rs │ │ │ │ │ └── siblings.rs │ │ │ │ └── visitor_hooks.rs │ │ │ ├── error.rs │ │ │ ├── exports.rs │ │ │ ├── inline_images.rs │ │ │ ├── lib.rs │ │ │ ├── metadata/ │ │ │ │ ├── collector.rs │ │ │ │ ├── config.rs │ │ │ │ ├── extraction.rs │ │ │ │ ├── mod.rs │ │ │ │ └── types.rs │ │ │ ├── options/ │ │ │ │ ├── conversion.rs │ │ │ │ ├── inline_image.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── preprocessing.rs │ │ │ │ └── validation.rs │ │ │ ├── prelude.rs │ │ │ ├── rcdom.rs │ │ │ ├── text.rs │ │ │ ├── types/ │ │ │ │ ├── document.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── result.rs │ │ │ │ ├── structure_builder.rs │ │ │ │ ├── structure_collector.rs │ │ │ │ ├── tables.rs │ │ │ │ └── warnings.rs │ │ │ ├── validation.rs │ │ │ ├── visitor/ │ │ │ │ ├── default_impl.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── traits.rs │ │ │ │ └── types.rs │ │ │ ├── visitor_helpers/ │ │ │ │ └── helpers/ │ │ │ │ ├── callbacks/ │ │ │ │ │ └── mod.rs │ │ │ │ ├── content.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── state.rs │ │ │ │ └── traversal.rs │ │ │ ├── visitor_helpers.rs │ │ │ ├── wrapper/ │ │ │ │ ├── sync.rs │ │ │ │ └── utils.rs │ │ │ └── wrapper.rs │ │ └── tests/ │ │ ├── br_in_inline_test.rs │ │ ├── commonmark_compliance_test.rs │ │ ├── djot_output_test.rs │ │ ├── exclude_selectors_test.rs │ │ ├── integration_test.rs │ │ ├── issue_121_regressions.rs │ │ ├── issue_127_regressions.rs │ │ ├── issue_128_regressions.rs │ │ ├── issue_131_regressions.rs │ │ ├── issue_134_regressions.rs │ │ ├── issue_139_regressions.rs │ │ ├── issue_140_regressions.rs │ │ ├── issue_143_regressions.rs │ │ ├── issue_145_regressions.rs │ │ ├── issue_146_regressions.rs │ │ ├── issue_176_regressions.rs │ │ ├── issue_190_regressions.rs │ │ ├── issue_199_regressions.rs │ │ ├── issue_200_regressions.rs │ │ ├── issue_212_regressions.rs │ │ ├── issue_216_217_regressions.rs │ │ ├── json_ld_script_extraction.rs │ │ ├── lists_test.rs │ │ ├── plain_output_test.rs │ │ ├── preprocessing_tests.rs │ │ ├── reference_links_test.rs │ │ ├── sectioning_elements_test.rs │ │ ├── skip_images_test.rs │ │ ├── tables_test.rs │ │ ├── test_custom_elements.rs │ │ ├── test_issue_187.rs │ │ ├── test_issue_218.rs │ │ ├── test_issue_277.rs │ │ ├── test_max_depth.rs │ │ ├── test_nested_simple.rs │ │ ├── test_script_style_stripping.rs │ │ ├── test_spa_bisect.rs │ │ ├── visitor_code_integration_test.rs │ │ ├── visitor_integration_test.rs │ │ └── xml_tables_test.rs │ ├── html-to-markdown-cli/ │ │ ├── Cargo.toml │ │ ├── src/ │ │ │ ├── args.rs │ │ │ ├── convert.rs │ │ │ ├── main.rs │ │ │ ├── output.rs │ │ │ ├── utils.rs │ │ │ └── validators.rs │ │ └── tests/ │ │ └── cli_test.rs │ ├── html-to-markdown-ffi/ │ │ ├── Cargo.toml │ │ ├── build.rs │ │ ├── cbindgen.toml │ │ ├── cmake/ │ │ │ └── html-to-markdown-ffi-config.cmake │ │ ├── include/ │ │ │ └── html_to_markdown.h │ │ └── src/ │ │ └── lib.rs │ ├── html-to-markdown-node/ │ │ ├── Cargo.toml │ │ ├── index.d.ts │ │ ├── index.js │ │ ├── npm/ │ │ │ ├── darwin-arm64/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── darwin-x64/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-arm-gnueabihf/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-arm64-gnu/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-arm64-musl/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-x64-gnu/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-x64-musl/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── win32-arm64-msvc/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ └── win32-x64-msvc/ │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── package.json │ │ └── src/ │ │ └── lib.rs │ ├── html-to-markdown-php/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ ├── html-to-markdown-py/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ ├── html-to-markdown-rs-ffi/ │ │ └── README.md │ ├── html-to-markdown-rs-wasm/ │ │ └── README.md │ └── html-to-markdown-wasm/ │ ├── Cargo.toml │ ├── package.json │ ├── scripts/ │ │ ├── cleanup-gitignore.js │ │ └── patch-bundler-entry.js │ └── src/ │ └── lib.rs ├── deny.toml ├── docs/ │ ├── CNAME │ ├── api-reference.md │ ├── architecture.md │ ├── cli.md │ ├── configuration.md │ ├── contributing.md │ ├── css/ │ │ └── extra.css │ ├── demo/ │ │ ├── html_to_markdown_wasm.js │ │ ├── html_to_markdown_wasm_bg.wasm │ │ ├── index.html │ │ ├── script.js │ │ └── style.css │ ├── errors.md │ ├── index.md │ ├── installation.md │ ├── language-guides.md │ ├── llms.txt │ ├── migration.md │ ├── overrides/ │ │ └── main.html │ ├── reference/ │ │ ├── api-c.md │ │ ├── api-csharp.md │ │ ├── api-elixir.md │ │ ├── api-go.md │ │ ├── api-java.md │ │ ├── api-php.md │ │ ├── api-python.md │ │ ├── api-r.md │ │ ├── api-ruby.md │ │ ├── api-rust.md │ │ ├── api-typescript.md │ │ ├── api-wasm.md │ │ ├── configuration.md │ │ ├── errors.md │ │ └── types.md │ ├── snippets/ │ │ ├── c/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── csharp/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── elixir/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── feedback.md │ │ ├── go/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── java/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── php/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── python/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── r/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── ruby/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── rust/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ ├── typescript/ │ │ │ ├── getting-started/ │ │ │ │ ├── basic_usage.md │ │ │ │ └── with_options.md │ │ │ ├── metadata/ │ │ │ │ └── basic_extraction.md │ │ │ ├── table-extraction/ │ │ │ │ └── basic_extraction.md │ │ │ └── visitor/ │ │ │ └── basic_visitor.md │ │ └── wasm/ │ │ ├── getting-started/ │ │ │ ├── basic_usage.md │ │ │ └── with_options.md │ │ ├── metadata/ │ │ │ └── basic_extraction.md │ │ ├── table-extraction/ │ │ │ └── basic_extraction.md │ │ └── visitor/ │ │ └── basic_visitor.md │ ├── tables.md │ ├── usage.md │ └── visitor.md ├── e2e/ │ ├── c/ │ │ ├── Makefile │ │ ├── download_ffi.sh │ │ ├── main.c │ │ ├── test_conversion.c │ │ ├── test_edge_cases.c │ │ ├── test_metadata.c │ │ ├── test_options.c │ │ ├── test_real_world.c │ │ ├── test_result.c │ │ ├── test_runner.h │ │ ├── test_smoke.c │ │ └── test_structure.c │ ├── csharp/ │ │ ├── HtmlToMarkdown.E2eTests.csproj │ │ └── tests/ │ │ ├── ConversionTests.cs │ │ ├── EdgeCasesTests.cs │ │ ├── MetadataTests.cs │ │ ├── OptionsTests.cs │ │ ├── RealWorldTests.cs │ │ ├── ResultTests.cs │ │ ├── SmokeTests.cs │ │ ├── StructureTests.cs │ │ └── VisitorTests.cs │ ├── dart/ │ │ └── pubspec.yaml │ ├── elixir/ │ │ ├── mix.exs │ │ └── test/ │ │ ├── conversion_test.exs │ │ ├── edge_cases_test.exs │ │ ├── metadata_test.exs │ │ ├── options_test.exs │ │ ├── real_world_test.exs │ │ ├── result_test.exs │ │ ├── smoke_test.exs │ │ ├── structure_test.exs │ │ ├── test_helper.exs │ │ └── visitor_test.exs │ ├── gleam/ │ │ └── gleam.toml │ ├── go/ │ │ ├── conversion_test.go │ │ ├── edge_cases_test.go │ │ ├── go.mod │ │ ├── go.sum │ │ ├── metadata_test.go │ │ ├── options_test.go │ │ ├── real_world_test.go │ │ ├── result_test.go │ │ ├── smoke_test.go │ │ ├── structure_test.go │ │ └── visitor_test.go │ ├── java/ │ │ ├── pom.xml │ │ └── src/ │ │ └── test/ │ │ └── java/ │ │ └── dev/ │ │ └── kreuzberg/ │ │ └── htmltomarkdown/ │ │ └── e2e/ │ │ ├── ConversionTest.java │ │ ├── EdgeCasesTest.java │ │ ├── MetadataTest.java │ │ ├── OptionsTest.java │ │ ├── RealWorldTest.java │ │ ├── ResultTest.java │ │ ├── SmokeTest.java │ │ ├── StructureTest.java │ │ └── VisitorTest.java │ ├── kotlin/ │ │ └── build.gradle.kts │ ├── node/ │ │ ├── package.json │ │ ├── tests/ │ │ │ ├── conversion.test.ts │ │ │ ├── edge_cases.test.ts │ │ │ ├── metadata.test.ts │ │ │ ├── options.test.ts │ │ │ ├── real_world.test.ts │ │ │ ├── result.test.ts │ │ │ ├── smoke.test.ts │ │ │ ├── structure.test.ts │ │ │ └── visitor.test.ts │ │ ├── tsconfig.json │ │ └── vitest.config.ts │ ├── php/ │ │ ├── bootstrap.php │ │ ├── composer.json │ │ ├── phpunit.xml │ │ └── tests/ │ │ ├── ConversionTest.php │ │ ├── EdgeCasesTest.php │ │ ├── MetadataTest.php │ │ ├── OptionsTest.php │ │ ├── RealWorldTest.php │ │ ├── ResultTest.php │ │ ├── SmokeTest.php │ │ ├── StructureTest.php │ │ └── VisitorTest.php │ ├── python/ │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── pyproject.toml │ │ └── tests/ │ │ ├── __init__.py │ │ ├── test_conversion.py │ │ ├── test_edge_cases.py │ │ ├── test_metadata.py │ │ ├── test_options.py │ │ ├── test_real_world.py │ │ ├── test_result.py │ │ ├── test_smoke.py │ │ ├── test_structure.py │ │ └── test_visitor.py │ ├── r/ │ │ ├── DESCRIPTION │ │ ├── run_tests.R │ │ └── tests/ │ │ ├── test_conversion.R │ │ ├── test_edge_cases.R │ │ ├── test_metadata.R │ │ ├── test_options.R │ │ ├── test_real_world.R │ │ ├── test_result.R │ │ ├── test_smoke.R │ │ ├── test_structure.R │ │ └── test_visitor.R │ ├── ruby/ │ │ ├── .rubocop.yaml │ │ ├── Gemfile │ │ └── spec/ │ │ ├── conversion_spec.rb │ │ ├── edge_cases_spec.rb │ │ ├── metadata_spec.rb │ │ ├── options_spec.rb │ │ ├── real_world_spec.rb │ │ ├── result_spec.rb │ │ ├── smoke_spec.rb │ │ ├── structure_spec.rb │ │ └── visitor_spec.rb │ ├── rust/ │ │ ├── Cargo.toml │ │ └── tests/ │ │ ├── conversion_test.rs │ │ ├── edge_cases_test.rs │ │ ├── metadata_test.rs │ │ ├── options_test.rs │ │ ├── real_world_test.rs │ │ ├── result_test.rs │ │ ├── smoke_test.rs │ │ ├── structure_test.rs │ │ └── visitor_test.rs │ ├── swift/ │ │ └── Package.swift │ ├── wasm/ │ │ ├── package.json │ │ ├── tests/ │ │ │ ├── conversion.test.ts │ │ │ ├── edge_cases.test.ts │ │ │ ├── metadata.test.ts │ │ │ ├── options.test.ts │ │ │ ├── real_world.test.ts │ │ │ ├── result.test.ts │ │ │ ├── smoke.test.ts │ │ │ ├── structure.test.ts │ │ │ └── visitor.test.ts │ │ ├── tsconfig.json │ │ └── vitest.config.ts │ └── zig/ │ ├── build.zig │ └── build.zig.zon ├── fixtures/ │ ├── conversion/ │ │ ├── blockquotes.json │ │ ├── code.json │ │ ├── emphasis.json │ │ ├── forms.json │ │ ├── headings.json │ │ ├── images.json │ │ ├── line_breaks.json │ │ ├── links.json │ │ ├── lists.json │ │ ├── paragraphs.json │ │ ├── semantic.json │ │ └── tables.json │ ├── edge-cases/ │ │ ├── empty.json │ │ ├── encoding.json │ │ ├── malformed.json │ │ ├── visitor_errors.json │ │ └── xss.json │ ├── metadata/ │ │ ├── basic.json │ │ ├── document_properties.json │ │ ├── links_and_images.json │ │ ├── open_graph.json │ │ └── structured_data.json │ ├── options/ │ │ ├── br_in_tables.json │ │ ├── code_block_style.json │ │ ├── code_options.json │ │ ├── escape_ascii.json │ │ ├── escaping.json │ │ ├── exclude_selectors.json │ │ ├── heading_style.json │ │ ├── highlight_style.json │ │ ├── inline_and_newlines.json │ │ ├── list_options.json │ │ ├── max_depth.json │ │ ├── newline_style.json │ │ ├── output_format.json │ │ ├── preprocessing.json │ │ ├── remaining_options.json │ │ ├── strong_em_symbol.json │ │ ├── sub_sup_symbols.json │ │ ├── tag_control.json │ │ ├── whitespace_mode.json │ │ └── wrapping.json │ ├── real-world/ │ │ └── articles.json │ ├── result/ │ │ ├── tables.json │ │ └── warnings.json │ ├── smoke/ │ │ └── basic.json │ ├── structure/ │ │ ├── basic.json │ │ └── nesting.json │ └── visitor/ │ ├── advanced_elements.json │ ├── basic.json │ ├── elements.json │ ├── formatting.json │ ├── forms_and_semantics.json │ ├── headings.json │ ├── images.json │ ├── links.json │ └── media.json ├── just ├── package.json ├── packages/ │ ├── csharp/ │ │ ├── .editorconfig │ │ ├── Directory.Build.props │ │ ├── HtmlToMarkdown/ │ │ │ ├── AnnotationKind.cs │ │ │ ├── CodeBlockStyle.cs │ │ │ ├── ConfigErrorException.cs │ │ │ ├── ConversionErrorException.cs │ │ │ ├── ConversionOptions.cs │ │ │ ├── ConversionOptionsBuilder.cs │ │ │ ├── ConversionOptionsUpdate.cs │ │ │ ├── ConversionResult.cs │ │ │ ├── DocumentMetadata.cs │ │ │ ├── DocumentNode.cs │ │ │ ├── DocumentStructure.cs │ │ │ ├── GridCell.cs │ │ │ ├── HeaderMetadata.cs │ │ │ ├── HeadingStyle.cs │ │ │ ├── HighlightStyle.cs │ │ │ ├── HtmlMetadata.cs │ │ │ ├── HtmlToMarkdown.csproj │ │ │ ├── HtmlToMarkdownRs.cs │ │ │ ├── HtmlToMarkdownRsException.cs │ │ │ ├── IVisitor.cs │ │ │ ├── ImageMetadata.cs │ │ │ ├── ImageType.cs │ │ │ ├── InvalidInputException.cs │ │ │ ├── IoErrorException.cs │ │ │ ├── LinkMetadata.cs │ │ │ ├── LinkStyle.cs │ │ │ ├── LinkType.cs │ │ │ ├── ListIndentType.cs │ │ │ ├── NativeMethods.cs │ │ │ ├── NewlineStyle.cs │ │ │ ├── NodeContent.cs │ │ │ ├── NodeContext.cs │ │ │ ├── NodeType.cs │ │ │ ├── OtherException.cs │ │ │ ├── OutputFormat.cs │ │ │ ├── PanicException.cs │ │ │ ├── ParseErrorException.cs │ │ │ ├── PreprocessingOptions.cs │ │ │ ├── PreprocessingOptionsUpdate.cs │ │ │ ├── PreprocessingPreset.cs │ │ │ ├── ProcessingWarning.cs │ │ │ ├── SanitizationErrorException.cs │ │ │ ├── StructuredData.cs │ │ │ ├── StructuredDataType.cs │ │ │ ├── TableData.cs │ │ │ ├── TableGrid.cs │ │ │ ├── TextAnnotation.cs │ │ │ ├── TextDirection.cs │ │ │ ├── TraitBridges.cs │ │ │ ├── VisitResult.cs │ │ │ ├── VisitorCallbacks.cs │ │ │ ├── VisitorHandle.cs │ │ │ ├── WarningKind.cs │ │ │ └── WhitespaceMode.cs │ │ ├── HtmlToMarkdown.Tests/ │ │ │ └── HtmlToMarkdown.Tests.csproj │ │ ├── HtmlToMarkdown.csproj │ │ └── README.md │ ├── elixir/ │ │ ├── .credo.exs │ │ ├── .formatter.exs │ │ ├── .gitignore │ │ ├── README.md │ │ ├── checksum-Elixir.HtmlToMarkdown.Native.exs │ │ ├── config/ │ │ │ └── config.exs │ │ ├── lib/ │ │ │ ├── html_to_markdown/ │ │ │ │ ├── annotation_kind.ex │ │ │ │ ├── code_block_style.ex │ │ │ │ ├── conversion_options.ex │ │ │ │ ├── conversion_options_update.ex │ │ │ │ ├── conversion_result.ex │ │ │ │ ├── document_metadata.ex │ │ │ │ ├── document_node.ex │ │ │ │ ├── document_structure.ex │ │ │ │ ├── grid_cell.ex │ │ │ │ ├── header_metadata.ex │ │ │ │ ├── heading_style.ex │ │ │ │ ├── highlight_style.ex │ │ │ │ ├── html_metadata.ex │ │ │ │ ├── html_visitor_bridge.ex │ │ │ │ ├── image_metadata.ex │ │ │ │ ├── image_type.ex │ │ │ │ ├── link_metadata.ex │ │ │ │ ├── link_style.ex │ │ │ │ ├── link_type.ex │ │ │ │ ├── list_indent_type.ex │ │ │ │ ├── native.ex │ │ │ │ ├── newline_style.ex │ │ │ │ ├── node_content.ex │ │ │ │ ├── node_context.ex │ │ │ │ ├── node_type.ex │ │ │ │ ├── output_format.ex │ │ │ │ ├── preprocessing_options.ex │ │ │ │ ├── preprocessing_options_update.ex │ │ │ │ ├── preprocessing_preset.ex │ │ │ │ ├── processing_warning.ex │ │ │ │ ├── structured_data.ex │ │ │ │ ├── structured_data_type.ex │ │ │ │ ├── table_data.ex │ │ │ │ ├── table_grid.ex │ │ │ │ ├── text_annotation.ex │ │ │ │ ├── text_direction.ex │ │ │ │ ├── visit_result.ex │ │ │ │ ├── warning_kind.ex │ │ │ │ └── whitespace_mode.ex │ │ │ └── html_to_markdown.ex │ │ ├── mix.exs │ │ ├── native/ │ │ │ └── html_to_markdown_nif/ │ │ │ ├── Cargo.toml │ │ │ └── src/ │ │ │ └── lib.rs │ │ └── test/ │ │ └── test_helper.exs │ ├── go/ │ │ ├── .golangci.yml │ │ ├── README.md │ │ ├── binding.go │ │ ├── go.mod │ │ └── v3/ │ │ └── README.md │ ├── java/ │ │ ├── README.md │ │ ├── checkstyle-suppressions.xml │ │ ├── checkstyle.properties │ │ ├── checkstyle.xml │ │ ├── eclipse-formatter.xml │ │ ├── pmd-ruleset.xml │ │ ├── pom.xml │ │ ├── pom.xml.versionsBackup │ │ ├── src/ │ │ │ └── main/ │ │ │ ├── java/ │ │ │ │ └── dev/ │ │ │ │ └── kreuzberg/ │ │ │ │ └── htmltomarkdown/ │ │ │ │ ├── AnnotationKind.java │ │ │ │ ├── CodeBlockStyle.java │ │ │ │ ├── ConfigErrorException.java │ │ │ │ ├── ConversionErrorException.java │ │ │ │ ├── ConversionOptions.java │ │ │ │ ├── ConversionOptionsBuilder.java │ │ │ │ ├── ConversionOptionsUpdate.java │ │ │ │ ├── ConversionOptionsUpdateBuilder.java │ │ │ │ ├── ConversionResult.java │ │ │ │ ├── ConversionResultBuilder.java │ │ │ │ ├── DocumentMetadata.java │ │ │ │ ├── DocumentMetadataBuilder.java │ │ │ │ ├── DocumentNode.java │ │ │ │ ├── DocumentStructure.java │ │ │ │ ├── GridCell.java │ │ │ │ ├── HeaderMetadata.java │ │ │ │ ├── HeadingStyle.java │ │ │ │ ├── HighlightStyle.java │ │ │ │ ├── HtmlMetadata.java │ │ │ │ ├── HtmlMetadataBuilder.java │ │ │ │ ├── HtmlToMarkdown.java │ │ │ │ ├── HtmlToMarkdownRs.java │ │ │ │ ├── HtmlToMarkdownRsException.java │ │ │ │ ├── HtmlVisitorBridge.java │ │ │ │ ├── IHtmlVisitor.java │ │ │ │ ├── ImageMetadata.java │ │ │ │ ├── ImageType.java │ │ │ │ ├── InvalidInputException.java │ │ │ │ ├── IoErrorException.java │ │ │ │ ├── LinkMetadata.java │ │ │ │ ├── LinkStyle.java │ │ │ │ ├── LinkType.java │ │ │ │ ├── ListIndentType.java │ │ │ │ ├── NativeLib.java │ │ │ │ ├── NewlineStyle.java │ │ │ │ ├── NodeContent.java │ │ │ │ ├── NodeContext.java │ │ │ │ ├── NodeType.java │ │ │ │ ├── OtherException.java │ │ │ │ ├── OutputFormat.java │ │ │ │ ├── PanicException.java │ │ │ │ ├── ParseErrorException.java │ │ │ │ ├── PreprocessingOptions.java │ │ │ │ ├── PreprocessingOptionsBuilder.java │ │ │ │ ├── PreprocessingOptionsUpdate.java │ │ │ │ ├── PreprocessingOptionsUpdateBuilder.java │ │ │ │ ├── PreprocessingPreset.java │ │ │ │ ├── ProcessingWarning.java │ │ │ │ ├── SanitizationErrorException.java │ │ │ │ ├── StructuredData.java │ │ │ │ ├── StructuredDataType.java │ │ │ │ ├── TableData.java │ │ │ │ ├── TableGrid.java │ │ │ │ ├── TableGridBuilder.java │ │ │ │ ├── TestVisitor.java │ │ │ │ ├── TestVisitorAdapter.java │ │ │ │ ├── TextAnnotation.java │ │ │ │ ├── TextDirection.java │ │ │ │ ├── VisitContext.java │ │ │ │ ├── VisitResult.java │ │ │ │ ├── Visitor.java │ │ │ │ ├── VisitorBridge.java │ │ │ │ ├── VisitorHandle.java │ │ │ │ ├── WarningKind.java │ │ │ │ ├── WhitespaceMode.java │ │ │ │ └── package-info.java │ │ │ └── resources/ │ │ │ └── .gitkeep │ │ └── versions-rules.xml │ ├── node/ │ │ ├── .oxfmtrc.json │ │ ├── .oxlintrc.json │ │ ├── biome.json │ │ ├── index.d.ts │ │ ├── package.json │ │ ├── src/ │ │ │ └── index.d.ts │ │ └── tsconfig.json │ ├── php/ │ │ ├── .gitignore │ │ ├── .php-cs-fixer.dist.php │ │ ├── README.md │ │ ├── composer.json │ │ ├── php-cs-fixer.php │ │ ├── phpstan-baseline.neon │ │ ├── phpstan-test.neon │ │ ├── phpstan.neon │ │ ├── phpunit.xml │ │ ├── src/ │ │ │ ├── HtmlToMarkdown.php │ │ │ └── functions.php │ │ ├── stubs/ │ │ │ └── html_to_markdown_extension.php │ │ └── tests/ │ │ └── .gitkeep │ ├── python/ │ │ ├── LICENSE │ │ ├── README.md │ │ ├── html_to_markdown/ │ │ │ ├── __init__.py │ │ │ ├── _html_to_markdown.pyi │ │ │ ├── api.py │ │ │ ├── exceptions.py │ │ │ ├── options.py │ │ │ └── py.typed │ │ ├── pyproject.toml │ │ └── tests/ │ │ └── commonmark_spec.json │ ├── r/ │ │ ├── .Rbuildignore │ │ ├── .gitignore │ │ ├── .lintr │ │ ├── DESCRIPTION │ │ ├── LICENSE │ │ ├── NAMESPACE │ │ ├── R/ │ │ │ ├── extendr-wrappers.R │ │ │ ├── htmltomarkdown-package.R │ │ │ ├── htmltomarkdown.R │ │ │ ├── options.R │ │ │ └── version.R │ │ ├── README.md │ │ ├── cleanup │ │ ├── cleanup.win │ │ ├── configure │ │ ├── configure.win │ │ ├── inst/ │ │ │ └── AUTHORS │ │ ├── man/ │ │ │ ├── conversion_options.Rd │ │ │ ├── convert.Rd │ │ │ ├── htmltomarkdown-package.Rd │ │ │ └── version.Rd │ │ ├── src/ │ │ │ ├── Makevars.in │ │ │ ├── Makevars.win.in │ │ │ ├── entrypoint.c │ │ │ └── rust/ │ │ │ ├── Cargo.toml │ │ │ ├── src/ │ │ │ │ ├── lib.rs │ │ │ │ ├── options.rs │ │ │ │ └── types.rs │ │ │ └── vendor-config.toml │ │ ├── tests/ │ │ │ └── testthat.R │ │ └── tools/ │ │ ├── config.R │ │ └── msrv.R │ ├── ruby/ │ │ ├── .gitignore │ │ ├── .rubocop.yml │ │ ├── Gemfile │ │ ├── README.md │ │ ├── Rakefile │ │ ├── Steepfile │ │ ├── exe/ │ │ │ └── html-to-markdown │ │ ├── ext/ │ │ │ └── html_to_markdown_rb/ │ │ │ ├── Cargo.toml │ │ │ ├── Makefile │ │ │ ├── extconf.rb │ │ │ ├── native/ │ │ │ │ └── Cargo.toml │ │ │ └── src/ │ │ │ ├── html-to-markdown/ │ │ │ │ └── version.rb │ │ │ ├── html-to-markdown.rb │ │ │ └── lib.rs │ │ ├── html_to_markdown.gemspec │ │ ├── lib/ │ │ │ ├── html_to_markdown/ │ │ │ │ └── version.rb │ │ │ └── html_to_markdown.rb │ │ ├── sig/ │ │ │ ├── html_to_markdown/ │ │ │ │ ├── cli.rbs │ │ │ │ └── cli_proxy.rbs │ │ │ ├── open3.rbs │ │ │ └── types.rbs │ │ └── spec/ │ │ ├── html_to_markdown_spec.rb │ │ └── spec_helper.rb │ ├── typescript/ │ │ ├── .npmignore │ │ ├── README.md │ │ ├── index.d.ts │ │ ├── package.json │ │ ├── src/ │ │ │ ├── helpers.ts │ │ │ └── index.ts │ │ └── tsconfig.json │ └── wasm/ │ └── src/ │ ├── helpers.ts │ └── index.ts ├── pnpm-workspace.yaml ├── pyproject.toml ├── readme_templates/ │ ├── language_package.md │ └── partials/ │ ├── _api_reference.md │ ├── _badges.md │ ├── _djot_output.md │ ├── _footer.md │ ├── _installation.md │ ├── _metadata_extraction.md │ ├── _plain_text_output.md │ ├── _quick_start.md │ └── _visitor_pattern.md ├── rust-toolchain.toml ├── rustfmt.toml ├── scripts/ │ ├── build-demo.sh │ ├── ci/ │ │ ├── elixir/ │ │ │ ├── install-deps.sh │ │ │ ├── install-hex-rebar.sh │ │ │ ├── run-credo.sh │ │ │ └── run-tests.sh │ │ ├── go/ │ │ │ ├── detect-go-modules.sh │ │ │ ├── install-golangci-lint.sh │ │ │ └── run-golangci-lint.sh │ │ ├── node/ │ │ │ ├── test-napi-cargo.sh │ │ │ ├── test-napi.sh │ │ │ └── test-typescript.sh │ │ ├── php/ │ │ │ ├── run-php-tests.sh │ │ │ ├── run-phpstan.sh │ │ │ └── set-php-config.sh │ │ ├── python/ │ │ │ ├── build-cli.sh │ │ │ └── run-pytest.sh │ │ ├── r/ │ │ │ ├── install-deps.sh │ │ │ ├── run-lintr.sh │ │ │ ├── run-tests.sh │ │ │ └── vendor-core-crate.py │ │ ├── ruby/ │ │ │ ├── run-rbs-validate.sh │ │ │ ├── run-rspec-unix.sh │ │ │ ├── run-rspec-windows.ps1 │ │ │ ├── run-rubocop.sh │ │ │ ├── run-steep.sh │ │ │ └── vendor-core-crate.py │ │ ├── rust/ │ │ │ ├── check-fmt.sh │ │ │ ├── install-cargo-llvm-cov.sh │ │ │ ├── run-clippy.sh │ │ │ ├── run-llvm-cov.sh │ │ │ └── run-tests.sh │ │ ├── smoke/ │ │ │ ├── capture-php-config.sh │ │ │ └── install-pnpm-deps.sh │ │ ├── validate/ │ │ │ ├── install-elixir-deps.sh │ │ │ ├── install-ruby-deps.sh │ │ │ ├── run-prek.sh │ │ │ └── run-rust-checks.sh │ │ └── wasm/ │ │ ├── run-wasmtime-tests.sh │ │ ├── test-wasm-bundle.sh │ │ └── test-wasm-rust.sh │ ├── common/ │ │ ├── enable-corepack.sh │ │ ├── ensure-wasm-target.sh │ │ ├── install-maven-latest.sh │ │ └── install-wasm-pack.sh │ ├── generate_visitor_callbacks.py │ ├── preferred-ruby.sh │ ├── preferred-rustc.sh │ ├── prepare_ruby_gem.rb │ ├── prepare_wheel.py │ ├── publish/ │ │ ├── cli/ │ │ │ ├── build-cli.sh │ │ │ ├── configure-cross-linker.sh │ │ │ ├── install-build-deps-linux.sh │ │ │ ├── install-cross.sh │ │ │ ├── package-cli-artifact.ps1 │ │ │ └── package-cli-artifact.sh │ │ ├── common/ │ │ │ ├── add-rust-target.sh │ │ │ └── ensure-target-commit.sh │ │ ├── crates/ │ │ │ ├── package-crates.sh │ │ │ ├── publish-cli.sh │ │ │ ├── publish-rs.sh │ │ │ ├── verify-cargo-version.sh │ │ │ └── wait-for-indexing.sh │ │ ├── csharp/ │ │ │ ├── pack.sh │ │ │ ├── restore.sh │ │ │ └── stage-ffi.sh │ │ ├── elixir/ │ │ │ ├── build-hex-package.sh │ │ │ ├── install-deps.sh │ │ │ ├── install-hex-rebar.sh │ │ │ ├── run-tests.sh │ │ │ ├── stage-rust-core.sh │ │ │ └── vendor-dependencies.sh │ │ ├── ensure-github-release-exists.sh │ │ ├── generate_elixir_checksums.sh │ │ ├── go/ │ │ │ └── create-module-tag.sh │ │ ├── java/ │ │ │ └── copy-native-libs.sh │ │ ├── maven/ │ │ │ ├── patch-legacy-gpg-args.sh │ │ │ └── prefer-gpg2.sh │ │ ├── node/ │ │ │ ├── build-native-module.ps1 │ │ │ ├── build-native-module.sh │ │ │ ├── clean-npm-dir.ps1 │ │ │ ├── clean-npm-dir.sh │ │ │ ├── create-npm-package-structure.sh │ │ │ ├── generate-typescript-defs.sh │ │ │ ├── install-node-deps.sh │ │ │ ├── pack-platform-packages.sh │ │ │ ├── package-artifacts.ps1 │ │ │ ├── package-artifacts.sh │ │ │ ├── prepare-artifact-directory.sh │ │ │ └── prepublish-main-package.sh │ │ ├── python/ │ │ │ ├── build-cli-for-sdist.sh │ │ │ ├── build-sdist.sh │ │ │ ├── install-build-deps.sh │ │ │ └── prepare-sdist-with-cli.sh │ │ ├── r/ │ │ │ ├── already-published-summary.sh │ │ │ ├── build-cran-package.sh │ │ │ ├── run-tests.sh │ │ │ ├── stage-rust-core.sh │ │ │ └── vendor-dependencies.sh │ │ ├── ruby/ │ │ │ ├── already-published-summary.sh │ │ │ ├── build-gem-unix.sh │ │ │ ├── build-gem-windows.ps1 │ │ │ ├── build-native-gem.rb │ │ │ ├── configure-bindgen-windows.sh │ │ │ ├── install-deps-unix.sh │ │ │ ├── install-deps-windows.ps1 │ │ │ ├── install-msys2-toolchain.ps1 │ │ │ ├── install-rust-gnu.ps1 │ │ │ └── remove-cached-cli.sh │ │ ├── typescript/ │ │ │ └── build-package.sh │ │ ├── upload-c-ffi-artifacts.sh │ │ ├── upload-cli-artifacts.sh │ │ ├── upload-elixir-package.sh │ │ ├── upload-go-ffi-artifacts.sh │ │ ├── upload-homebrew-bottles.sh │ │ ├── upload-php-pie.sh │ │ ├── validate-and-compute-metadata.sh │ │ └── wasm/ │ │ ├── build-bundles.sh │ │ ├── extract-artifacts.sh │ │ ├── install-deps.sh │ │ └── package-artifacts.sh │ ├── readme_config.yaml │ ├── readme_templates/ │ │ ├── language_package.md.jinja │ │ └── partials/ │ │ ├── _api_reference.md.jinja │ │ ├── _badges.md.jinja │ │ ├── _djot_output.md.jinja │ │ ├── _footer.md.jinja │ │ ├── _installation.md.jinja │ │ ├── _metadata_extraction.md.jinja │ │ ├── _plain_text_output.md.jinja │ │ ├── _quick_start.md.jinja │ │ └── _visitor_pattern.md.jinja │ └── update_dotnet_packages.py ├── skills/ │ └── html-to-markdown/ │ ├── SKILL.md │ └── references/ │ ├── cli-reference.md │ ├── configuration.md │ ├── other-bindings.md │ ├── python-api.md │ ├── rust-api.md │ └── typescript-api.md ├── test_apps/ │ ├── README.md │ ├── bun/ │ │ ├── README.md │ │ ├── package.json │ │ └── smoke.test.ts │ ├── c/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── README.md │ │ ├── download_ffi.sh │ │ ├── htm_test │ │ ├── main.c │ │ ├── run_tests │ │ ├── test_conversion.c │ │ ├── test_runner.h │ │ └── test_smoke.c │ ├── csharp/ │ │ ├── E2eTests.csproj │ │ ├── KreuzbergDev.HtmlToMarkdown.E2eTests.csproj │ │ ├── README.md │ │ └── tests/ │ │ ├── ConversionTests.cs │ │ └── SmokeTests.cs │ ├── elixir/ │ │ ├── README.md │ │ ├── deps/ │ │ │ ├── html_to_markdown/ │ │ │ │ ├── .formatter.exs │ │ │ │ ├── .hex │ │ │ │ ├── README.md │ │ │ │ ├── checksum-Elixir.HtmlToMarkdown.Native.exs │ │ │ │ ├── hex_metadata.config │ │ │ │ └── mix.exs │ │ │ ├── jason/ │ │ │ │ ├── .hex │ │ │ │ ├── CHANGELOG.md │ │ │ │ ├── LICENSE │ │ │ │ ├── README.md │ │ │ │ ├── hex_metadata.config │ │ │ │ └── mix.exs │ │ │ ├── rustler/ │ │ │ │ ├── .hex │ │ │ │ ├── README.md │ │ │ │ ├── hex_metadata.config │ │ │ │ ├── mix.exs │ │ │ │ └── priv/ │ │ │ │ └── templates/ │ │ │ │ ├── basic/ │ │ │ │ │ ├── Cargo.toml.eex │ │ │ │ │ ├── README.md │ │ │ │ │ └── src/ │ │ │ │ │ └── lib.rs │ │ │ │ └── root/ │ │ │ │ └── Cargo.toml.eex │ │ │ ├── rustler_precompiled/ │ │ │ │ ├── .hex │ │ │ │ ├── CHANGELOG.md │ │ │ │ ├── PRECOMPILATION_GUIDE.md │ │ │ │ ├── README.md │ │ │ │ ├── TROUBLESHOOTING.md │ │ │ │ ├── hex_metadata.config │ │ │ │ └── mix.exs │ │ │ └── toml/ │ │ │ ├── .hex │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── hex_metadata.config │ │ │ └── mix.exs │ │ ├── mix.exs │ │ └── test/ │ │ ├── conversion_test.exs │ │ ├── smoke_test.exs │ │ └── test_helper.exs │ ├── fixtures/ │ │ ├── README.md │ │ ├── basic-html.json │ │ ├── complex-html.json │ │ ├── edge-cases.json │ │ ├── metadata-extraction.json │ │ └── real-world.json │ ├── go/ │ │ ├── README.md │ │ ├── conversion_test.go │ │ ├── go.mod │ │ ├── go.sum │ │ ├── run_tests.sh │ │ └── smoke_test.go │ ├── java/ │ │ ├── .mvn/ │ │ │ └── wrapper/ │ │ │ └── maven-wrapper.properties │ │ ├── README.md │ │ ├── mvnw │ │ ├── mvnw.cmd │ │ ├── pom.xml │ │ └── src/ │ │ └── test/ │ │ └── java/ │ │ └── dev/ │ │ └── kreuzberg/ │ │ ├── e2e/ │ │ │ ├── ConversionTest.java │ │ │ └── SmokeTest.java │ │ └── htmltomarkdown/ │ │ └── e2e/ │ │ ├── ConversionTest.java │ │ └── SmokeTest.java │ ├── node/ │ │ ├── .nvmrc │ │ ├── README.md │ │ ├── package.json │ │ ├── tests/ │ │ │ ├── conversion.test.ts │ │ │ └── smoke.test.ts │ │ ├── tsconfig.json │ │ └── vitest.config.ts │ ├── php/ │ │ ├── README.md │ │ ├── bootstrap.php │ │ ├── composer.json │ │ ├── phpstan.neon │ │ ├── phpunit.xml │ │ └── tests/ │ │ ├── ConversionTest.php │ │ └── SmokeTest.php │ ├── php-ext/ │ │ ├── README.md │ │ ├── main.php │ │ └── run_tests.sh │ ├── python/ │ │ ├── .python-version │ │ ├── README.md │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── pyproject.toml │ │ └── tests/ │ │ ├── __init__.py │ │ ├── test_conversion.py │ │ └── test_smoke.py │ ├── r/ │ │ ├── DESCRIPTION │ │ ├── run_tests.R │ │ └── tests/ │ │ ├── test_conversion.R │ │ └── test_smoke.R │ ├── ruby/ │ │ ├── .bundle/ │ │ │ └── config │ │ ├── .rubocop.yaml │ │ ├── .ruby-version │ │ ├── Gemfile │ │ ├── README.md │ │ └── spec/ │ │ ├── conversion_spec.rb │ │ └── smoke_spec.rb │ ├── rust/ │ │ ├── Cargo.toml │ │ └── tests/ │ │ ├── conversion_test.rs │ │ └── smoke_test.rs │ └── wasm/ │ ├── .nvmrc │ ├── README.md │ ├── globalSetup.ts │ ├── package.json │ ├── tests/ │ │ ├── conversion.test.ts │ │ └── smoke.test.ts │ ├── tsconfig.json │ └── vitest.config.ts ├── test_documents/ │ └── html/ │ ├── issues/ │ │ ├── gh-121-hacker-news.html │ │ ├── gh-121-hacker-news.md │ │ ├── gh-121-minimal-failing.html │ │ ├── gh-121-spa-app.html │ │ ├── gh-121-spa-app.md │ │ ├── gh-127-issue.html │ │ ├── gh-134-pre-code.html │ │ ├── gh-134-pre-code.md │ │ ├── gh-140-table-cell-pipe-with-escape-misc.md │ │ ├── gh-140-table-cell-pipe.html │ │ ├── gh-140-table-cell-pipe.md │ │ ├── gh-143-links-wordwrap.html │ │ ├── gh-143-links-wordwrap.md │ │ ├── gh-190/ │ │ │ ├── firsteigen.html │ │ │ ├── flex2021.html │ │ │ ├── flex2025.html │ │ │ ├── insight.html │ │ │ ├── kimbrain.html │ │ │ ├── maxkim.html │ │ │ ├── mitrade.html │ │ │ ├── ozonekorea.html │ │ │ ├── plusblog.html │ │ │ ├── rbloggers.html │ │ │ ├── sjsu.html │ │ │ └── vipaarontours.html │ │ ├── test-nested-simple.html │ │ ├── test-nested-simple.md │ │ └── test-with-custom-elements.html │ ├── visitor/ │ │ ├── baseline.html │ │ ├── callbacks.html │ │ ├── complex.html │ │ └── custom.html │ └── wikipedia/ │ ├── large_rust.html │ ├── lists_timeline.html │ ├── medium_python.html │ ├── small_html.html │ └── tables_countries.html ├── tsconfig.base.json └── zensical.toml
Showing preview only (757K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (8340 symbols across 451 files)
FILE: .mvn/wrapper/MavenWrapperDownloader.java
class MavenWrapperDownloader (line 21) | public class MavenWrapperDownloader {
method main (line 52) | public static void main(String args[]) {
method downloadFileFromURL (line 98) | private static void downloadFileFromURL(String urlString, File destina...
FILE: crates/html-to-markdown-cli/src/args.rs
type Cli (line 53) | pub struct Cli {
type Shell (line 410) | pub enum Shell {
FILE: crates/html-to-markdown-cli/src/convert.rs
function base64_encode (line 9) | fn base64_encode(data: &[u8]) -> String {
function build_conversion_options (line 13) | pub fn build_conversion_options(cli: &Cli) -> ConversionOptions {
function perform_conversion (line 71) | pub fn perform_conversion(
FILE: crates/html-to-markdown-cli/src/main.rs
function generate_completions (line 18) | fn generate_completions(shell: Shell) {
function generate_man_page (line 34) | fn generate_man_page() -> Result<(), String> {
function read_input (line 50) | fn read_input(cli: &Cli) -> Result<String, Box<dyn std::error::Error>> {
function main (line 85) | fn main() -> Result<(), Box<dyn std::error::Error>> {
FILE: crates/html-to-markdown-cli/src/output.rs
function write_output (line 7) | pub fn write_output(output_path: Option<PathBuf>, content: &str) -> Resu...
function output_debug_info (line 20) | pub fn output_debug_info(cli: &Cli, msg: &str) {
FILE: crates/html-to-markdown-cli/src/utils.rs
constant DEFAULT_USER_AGENT (line 8) | pub const DEFAULT_USER_AGENT: &str =
function decode_bytes (line 11) | pub fn decode_bytes(bytes: &[u8], encoding_name: &str) -> Result<String,...
function extract_charset (line 37) | pub fn extract_charset(content_type: &str) -> Option<String> {
function fetch_url (line 44) | pub fn fetch_url(url: &str, user_agent: &str, default_encoding: &str) ->...
FILE: crates/html-to-markdown-cli/src/validators.rs
type CliHeadingStyle (line 10) | pub enum CliHeadingStyle {
method from (line 20) | fn from(style: CliHeadingStyle) -> Self {
type CliListIndentType (line 30) | pub enum CliListIndentType {
method from (line 38) | fn from(indent_type: CliListIndentType) -> Self {
type CliNewlineStyle (line 47) | pub enum CliNewlineStyle {
method from (line 55) | fn from(style: CliNewlineStyle) -> Self {
type CliCodeBlockStyle (line 64) | pub enum CliCodeBlockStyle {
method from (line 74) | fn from(style: CliCodeBlockStyle) -> Self {
type CliHighlightStyle (line 84) | pub enum CliHighlightStyle {
method from (line 96) | fn from(style: CliHighlightStyle) -> Self {
type CliWhitespaceMode (line 107) | pub enum CliWhitespaceMode {
method from (line 115) | fn from(mode: CliWhitespaceMode) -> Self {
type CliPreprocessingPreset (line 124) | pub enum CliPreprocessingPreset {
method from (line 134) | fn from(preset: CliPreprocessingPreset) -> Self {
type CliOutputFormat (line 144) | pub enum CliOutputFormat {
method from (line 154) | fn from(format: CliOutputFormat) -> Self {
type CliLinkStyle (line 164) | pub enum CliLinkStyle {
method from (line 172) | fn from(style: CliLinkStyle) -> Self {
function validate_bullets (line 180) | pub fn validate_bullets(s: &str) -> Result<String, String> {
function validate_strong_em_symbol (line 190) | pub fn validate_strong_em_symbol(s: &str) -> Result<char, String> {
FILE: crates/html-to-markdown-cli/tests/cli_test.rs
function cli (line 15) | fn cli() -> Command {
function test_basic_stdin (line 20) | fn test_basic_stdin() {
function test_file_input (line 29) | fn test_file_input() {
function test_file_output (line 42) | fn test_file_output() {
function test_dash_reads_stdin (line 58) | fn test_dash_reads_stdin() {
function test_url_fetches_html (line 68) | fn test_url_fetches_html() {
function test_url_conflicts_with_file_input (line 78) | fn test_url_conflicts_with_file_input() {
function test_url_custom_user_agent (line 93) | fn test_url_custom_user_agent() {
function test_url_handles_quirky_markup (line 115) | fn test_url_handles_quirky_markup() {
function test_url_handles_frameset_with_noframes (line 131) | fn test_url_handles_frameset_with_noframes() {
function test_url_handles_windows_1252_charset (line 158) | fn test_url_handles_windows_1252_charset() {
function test_heading_style_atx (line 174) | fn test_heading_style_atx() {
function test_heading_style_underlined (line 185) | fn test_heading_style_underlined() {
function test_heading_style_atx_closed (line 197) | fn test_heading_style_atx_closed() {
function test_list_indent_width (line 208) | fn test_list_indent_width() {
function test_bullets_option (line 219) | fn test_bullets_option() {
function test_strong_em_symbol_asterisk (line 230) | fn test_strong_em_symbol_asterisk() {
function test_strong_em_symbol_underscore (line 241) | fn test_strong_em_symbol_underscore() {
function test_strong_em_symbol_invalid (line 252) | fn test_strong_em_symbol_invalid() {
function test_escape_asterisks (line 263) | fn test_escape_asterisks() {
function test_escape_underscores (line 273) | fn test_escape_underscores() {
function test_escape_misc (line 283) | fn test_escape_misc() {
function test_sub_symbol (line 293) | fn test_sub_symbol() {
function test_sup_symbol (line 304) | fn test_sup_symbol() {
function test_newline_style_spaces (line 315) | fn test_newline_style_spaces() {
function test_newline_style_backslash (line 326) | fn test_newline_style_backslash() {
function test_code_language (line 337) | fn test_code_language() {
function test_code_block_style_indented (line 350) | fn test_code_block_style_indented() {
function test_code_block_style_backticks (line 361) | fn test_code_block_style_backticks() {
function test_code_block_style_tildes (line 372) | fn test_code_block_style_tildes() {
function test_autolinks (line 383) | fn test_autolinks() {
function test_no_autolinks (line 393) | fn test_no_autolinks() {
function test_default_title (line 403) | fn test_default_title() {
function test_keep_inline_images_in (line 413) | fn test_keep_inline_images_in() {
function test_br_in_tables (line 424) | fn test_br_in_tables() {
function test_highlight_style_double_equal (line 436) | fn test_highlight_style_double_equal() {
function test_highlight_style_html (line 447) | fn test_highlight_style_html() {
function test_highlight_style_bold (line 458) | fn test_highlight_style_bold() {
function test_highlight_style_none (line 469) | fn test_highlight_style_none() {
function test_extract_metadata (line 480) | fn test_extract_metadata() {
function test_whitespace_mode_normalized (line 490) | fn test_whitespace_mode_normalized() {
function test_strip_newlines (line 501) | fn test_strip_newlines() {
function test_wrap (line 511) | fn test_wrap() {
function test_wrap_width_validation (line 522) | fn test_wrap_width_validation() {
function test_convert_as_inline (line 532) | fn test_convert_as_inline() {
function test_strip_tags (line 541) | fn test_strip_tags() {
function test_preprocess (line 553) | fn test_preprocess() {
function test_preset_requires_preprocess (line 562) | fn test_preset_requires_preprocess() {
function test_preprocess_with_preset_minimal (line 574) | fn test_preprocess_with_preset_minimal() {
function test_preprocess_with_preset_aggressive (line 585) | fn test_preprocess_with_preset_aggressive() {
function test_keep_navigation (line 596) | fn test_keep_navigation() {
function test_keep_forms (line 606) | fn test_keep_forms() {
function test_debug_flag (line 616) | fn test_debug_flag() {
function test_encoding_utf8 (line 621) | fn test_encoding_utf8() {
function test_encoding_invalid (line 636) | fn test_encoding_invalid() {
function test_list_indent_width_validation_min (line 647) | fn test_list_indent_width_validation_min() {
function test_list_indent_width_validation_max (line 657) | fn test_list_indent_width_validation_max() {
function test_bullets_validation_empty (line 667) | fn test_bullets_validation_empty() {
function test_bullets_validation_too_long (line 678) | fn test_bullets_validation_too_long() {
function test_nonexistent_file (line 689) | fn test_nonexistent_file() {
function test_invalid_html (line 698) | fn test_invalid_html() {
function test_empty_input (line 703) | fn test_empty_input() {
function test_complex_document (line 708) | fn test_complex_document() {
function test_version_flag (line 743) | fn test_version_flag() {
function test_help_flag (line 752) | fn test_help_flag() {
function test_generate_completion_bash (line 762) | fn test_generate_completion_bash() {
function test_generate_completion_zsh (line 772) | fn test_generate_completion_zsh() {
function test_generate_man (line 782) | fn test_generate_man() {
function test_multiple_options_combined (line 792) | fn test_multiple_options_combined() {
function test_metadata_flags_work_without_extract_metadata (line 810) | fn test_metadata_flags_work_without_extract_metadata() {
function test_metadata_flags_work_with_json (line 820) | fn test_metadata_flags_work_with_json() {
function serve_once (line 830) | fn serve_once(body: &'static str, content_type: Option<&'static str>) ->...
function serve_once_with_capture (line 835) | fn serve_once_with_capture(
function serve_once_bytes (line 842) | fn serve_once_bytes(
FILE: crates/html-to-markdown-ffi/build.rs
function main (line 3) | fn main() {
FILE: crates/html-to-markdown-ffi/include/html_to_markdown.h
type HTMAnnotationKind (line 13) | typedef struct HTMAnnotationKind HTMAnnotationKind;
type HTMCodeBlockStyle (line 14) | typedef struct HTMCodeBlockStyle HTMCodeBlockStyle;
type HTMConversionOptions (line 15) | typedef struct HTMConversionOptions HTMConversionOptions;
type HTMConversionOptionsBuilder (line 16) | typedef struct HTMConversionOptionsBuilder HTMConversionOptionsBuilder;
type HTMConversionOptionsUpdate (line 17) | typedef struct HTMConversionOptionsUpdate HTMConversionOptionsUpdate;
type HTMConversionResult (line 18) | typedef struct HTMConversionResult HTMConversionResult;
type HTMDocumentMetadata (line 19) | typedef struct HTMDocumentMetadata HTMDocumentMetadata;
type HTMDocumentNode (line 20) | typedef struct HTMDocumentNode HTMDocumentNode;
type HTMDocumentStructure (line 21) | typedef struct HTMDocumentStructure HTMDocumentStructure;
type HTMGridCell (line 22) | typedef struct HTMGridCell HTMGridCell;
type HTMHeaderMetadata (line 23) | typedef struct HTMHeaderMetadata HTMHeaderMetadata;
type HTMHeadingStyle (line 24) | typedef struct HTMHeadingStyle HTMHeadingStyle;
type HTMHighlightStyle (line 25) | typedef struct HTMHighlightStyle HTMHighlightStyle;
type HTMHtmlMetadata (line 26) | typedef struct HTMHtmlMetadata HTMHtmlMetadata;
type HTMHtmlVisitor (line 27) | typedef struct HTMHtmlVisitor HTMHtmlVisitor;
type HTMImageMetadata (line 28) | typedef struct HTMImageMetadata HTMImageMetadata;
type HTMImageType (line 29) | typedef struct HTMImageType HTMImageType;
type HTMLinkMetadata (line 30) | typedef struct HTMLinkMetadata HTMLinkMetadata;
type HTMLinkStyle (line 31) | typedef struct HTMLinkStyle HTMLinkStyle;
type HTMLinkType (line 32) | typedef struct HTMLinkType HTMLinkType;
type HTMListIndentType (line 33) | typedef struct HTMListIndentType HTMListIndentType;
type HTMNewlineStyle (line 34) | typedef struct HTMNewlineStyle HTMNewlineStyle;
type HTMNodeContent (line 35) | typedef struct HTMNodeContent HTMNodeContent;
type HTMNodeContext (line 36) | typedef struct HTMNodeContext HTMNodeContext;
type HTMNodeType (line 37) | typedef struct HTMNodeType HTMNodeType;
type HTMOutputFormat (line 38) | typedef struct HTMOutputFormat HTMOutputFormat;
type HTMPreprocessingOptions (line 39) | typedef struct HTMPreprocessingOptions HTMPreprocessingOptions;
type HTMPreprocessingOptionsUpdate (line 40) | typedef struct HTMPreprocessingOptionsUpdate HTMPreprocessingOptionsUpdate;
type HTMPreprocessingPreset (line 41) | typedef struct HTMPreprocessingPreset HTMPreprocessingPreset;
type HTMProcessingWarning (line 42) | typedef struct HTMProcessingWarning HTMProcessingWarning;
type HTMStructuredData (line 43) | typedef struct HTMStructuredData HTMStructuredData;
type HTMStructuredDataType (line 44) | typedef struct HTMStructuredDataType HTMStructuredDataType;
type HTMTableData (line 45) | typedef struct HTMTableData HTMTableData;
type HTMTableGrid (line 46) | typedef struct HTMTableGrid HTMTableGrid;
type HTMTextAnnotation (line 47) | typedef struct HTMTextAnnotation HTMTextAnnotation;
type HTMTextDirection (line 48) | typedef struct HTMTextDirection HTMTextDirection;
type HTMVisitResult (line 49) | typedef struct HTMVisitResult HTMVisitResult;
type HTMVisitorHandle (line 50) | typedef struct HTMVisitorHandle HTMVisitorHandle;
type HTMWarningKind (line 51) | typedef struct HTMWarningKind HTMWarningKind;
type HTMWhitespaceMode (line 52) | typedef struct HTMWhitespaceMode HTMWhitespaceMode;
type HTMHtmHtmlVisitorBridge (line 60) | typedef struct HTMHtmHtmlVisitorBridge HTMHtmHtmlVisitorBridge;
type HTMHtmHtmlVisitorBridge (line 2161) | struct HTMHtmHtmlVisitorBridge
FILE: crates/html-to-markdown-ffi/src/lib.rs
function set_last_error (line 30) | fn set_last_error(code: i32, message: &str) {
function clear_last_error (line 35) | fn clear_last_error() {
function htm_last_error_code (line 45) | pub unsafe extern "C" fn htm_last_error_code() -> i32 {
function htm_last_error_context (line 54) | pub unsafe extern "C" fn htm_last_error_context() -> *const c_char {
function htm_free_string (line 62) | pub unsafe extern "C" fn htm_free_string(ptr: *mut c_char) {
function htm_version (line 76) | pub unsafe extern "C" fn htm_version() -> *const c_char {
function htm_document_metadata_from_json (line 86) | pub unsafe extern "C" fn htm_document_metadata_from_json(
function htm_document_metadata_to_json (line 116) | pub unsafe extern "C" fn htm_document_metadata_to_json(
function htm_document_metadata_free (line 145) | pub unsafe extern "C" fn htm_document_metadata_free(ptr: *mut html_to_ma...
function htm_document_metadata_title (line 158) | pub unsafe extern "C" fn htm_document_metadata_title(
function htm_document_metadata_description (line 179) | pub unsafe extern "C" fn htm_document_metadata_description(
function htm_document_metadata_keywords (line 200) | pub unsafe extern "C" fn htm_document_metadata_keywords(
function htm_document_metadata_author (line 221) | pub unsafe extern "C" fn htm_document_metadata_author(
function htm_document_metadata_canonical_url (line 242) | pub unsafe extern "C" fn htm_document_metadata_canonical_url(
function htm_document_metadata_base_href (line 263) | pub unsafe extern "C" fn htm_document_metadata_base_href(
function htm_document_metadata_language (line 284) | pub unsafe extern "C" fn htm_document_metadata_language(
function htm_document_metadata_text_direction (line 305) | pub unsafe extern "C" fn htm_document_metadata_text_direction(
function htm_document_metadata_open_graph (line 323) | pub unsafe extern "C" fn htm_document_metadata_open_graph(
function htm_document_metadata_twitter_card (line 344) | pub unsafe extern "C" fn htm_document_metadata_twitter_card(
function htm_document_metadata_meta_tags (line 365) | pub unsafe extern "C" fn htm_document_metadata_meta_tags(
function htm_header_metadata_from_json (line 387) | pub unsafe extern "C" fn htm_header_metadata_from_json(
function htm_header_metadata_to_json (line 417) | pub unsafe extern "C" fn htm_header_metadata_to_json(
function htm_header_metadata_free (line 446) | pub unsafe extern "C" fn htm_header_metadata_free(ptr: *mut html_to_mark...
function htm_header_metadata_level (line 459) | pub unsafe extern "C" fn htm_header_metadata_level(ptr: *const html_to_m...
function htm_header_metadata_text (line 472) | pub unsafe extern "C" fn htm_header_metadata_text(
function htm_header_metadata_id (line 490) | pub unsafe extern "C" fn htm_header_metadata_id(
function htm_header_metadata_depth (line 511) | pub unsafe extern "C" fn htm_header_metadata_depth(ptr: *const html_to_m...
function htm_header_metadata_html_offset (line 524) | pub unsafe extern "C" fn htm_header_metadata_html_offset(
function htm_header_metadata_is_valid (line 567) | pub unsafe extern "C" fn htm_header_metadata_is_valid(
function htm_link_metadata_from_json (line 590) | pub unsafe extern "C" fn htm_link_metadata_from_json(
function htm_link_metadata_to_json (line 620) | pub unsafe extern "C" fn htm_link_metadata_to_json(
function htm_link_metadata_free (line 649) | pub unsafe extern "C" fn htm_link_metadata_free(ptr: *mut html_to_markdo...
function htm_link_metadata_href (line 662) | pub unsafe extern "C" fn htm_link_metadata_href(
function htm_link_metadata_text (line 680) | pub unsafe extern "C" fn htm_link_metadata_text(
function htm_link_metadata_title (line 698) | pub unsafe extern "C" fn htm_link_metadata_title(
function htm_link_metadata_link_type (line 719) | pub unsafe extern "C" fn htm_link_metadata_link_type(
function htm_link_metadata_rel (line 734) | pub unsafe extern "C" fn htm_link_metadata_rel(
function htm_link_metadata_attributes (line 755) | pub unsafe extern "C" fn htm_link_metadata_attributes(
function htm_link_metadata_classify_link (line 795) | pub unsafe extern "C" fn htm_link_metadata_classify_link(
function htm_image_metadata_from_json (line 820) | pub unsafe extern "C" fn htm_image_metadata_from_json(
function htm_image_metadata_to_json (line 850) | pub unsafe extern "C" fn htm_image_metadata_to_json(
function htm_image_metadata_free (line 879) | pub unsafe extern "C" fn htm_image_metadata_free(ptr: *mut html_to_markd...
function htm_image_metadata_src (line 892) | pub unsafe extern "C" fn htm_image_metadata_src(
function htm_image_metadata_alt (line 910) | pub unsafe extern "C" fn htm_image_metadata_alt(
function htm_image_metadata_title (line 931) | pub unsafe extern "C" fn htm_image_metadata_title(
function htm_image_metadata_image_type (line 952) | pub unsafe extern "C" fn htm_image_metadata_image_type(
function htm_image_metadata_attributes (line 967) | pub unsafe extern "C" fn htm_image_metadata_attributes(
function htm_structured_data_from_json (line 989) | pub unsafe extern "C" fn htm_structured_data_from_json(
function htm_structured_data_to_json (line 1019) | pub unsafe extern "C" fn htm_structured_data_to_json(
function htm_structured_data_free (line 1048) | pub unsafe extern "C" fn htm_structured_data_free(ptr: *mut html_to_mark...
function htm_structured_data_data_type (line 1061) | pub unsafe extern "C" fn htm_structured_data_data_type(
function htm_structured_data_raw_json (line 1076) | pub unsafe extern "C" fn htm_structured_data_raw_json(
function htm_structured_data_schema_type (line 1094) | pub unsafe extern "C" fn htm_structured_data_schema_type(
function htm_html_metadata_from_json (line 1116) | pub unsafe extern "C" fn htm_html_metadata_from_json(
function htm_html_metadata_to_json (line 1146) | pub unsafe extern "C" fn htm_html_metadata_to_json(
function htm_html_metadata_free (line 1175) | pub unsafe extern "C" fn htm_html_metadata_free(ptr: *mut html_to_markdo...
function htm_html_metadata_document (line 1188) | pub unsafe extern "C" fn htm_html_metadata_document(
function htm_html_metadata_headers (line 1203) | pub unsafe extern "C" fn htm_html_metadata_headers(
function htm_html_metadata_links (line 1224) | pub unsafe extern "C" fn htm_html_metadata_links(
function htm_html_metadata_images (line 1245) | pub unsafe extern "C" fn htm_html_metadata_images(
function htm_html_metadata_structured_data (line 1266) | pub unsafe extern "C" fn htm_html_metadata_structured_data(
function htm_conversion_options_to_json (line 1288) | pub unsafe extern "C" fn htm_conversion_options_to_json(
function htm_conversion_options_free (line 1317) | pub unsafe extern "C" fn htm_conversion_options_free(ptr: *mut html_to_m...
function htm_conversion_options_heading_style (line 1330) | pub unsafe extern "C" fn htm_conversion_options_heading_style(
function htm_conversion_options_list_indent_type (line 1345) | pub unsafe extern "C" fn htm_conversion_options_list_indent_type(
function htm_conversion_options_list_indent_width (line 1360) | pub unsafe extern "C" fn htm_conversion_options_list_indent_width(
function htm_conversion_options_bullets (line 1375) | pub unsafe extern "C" fn htm_conversion_options_bullets(
function htm_conversion_options_strong_em_symbol (line 1393) | pub unsafe extern "C" fn htm_conversion_options_strong_em_symbol(
function htm_conversion_options_escape_asterisks (line 1411) | pub unsafe extern "C" fn htm_conversion_options_escape_asterisks(
function htm_conversion_options_escape_underscores (line 1426) | pub unsafe extern "C" fn htm_conversion_options_escape_underscores(
function htm_conversion_options_escape_misc (line 1441) | pub unsafe extern "C" fn htm_conversion_options_escape_misc(
function htm_conversion_options_escape_ascii (line 1456) | pub unsafe extern "C" fn htm_conversion_options_escape_ascii(
function htm_conversion_options_code_language (line 1471) | pub unsafe extern "C" fn htm_conversion_options_code_language(
function htm_conversion_options_autolinks (line 1489) | pub unsafe extern "C" fn htm_conversion_options_autolinks(
function htm_conversion_options_default_title (line 1504) | pub unsafe extern "C" fn htm_conversion_options_default_title(
function htm_conversion_options_br_in_tables (line 1519) | pub unsafe extern "C" fn htm_conversion_options_br_in_tables(
function htm_conversion_options_highlight_style (line 1534) | pub unsafe extern "C" fn htm_conversion_options_highlight_style(
function htm_conversion_options_extract_metadata (line 1549) | pub unsafe extern "C" fn htm_conversion_options_extract_metadata(
function htm_conversion_options_whitespace_mode (line 1564) | pub unsafe extern "C" fn htm_conversion_options_whitespace_mode(
function htm_conversion_options_strip_newlines (line 1579) | pub unsafe extern "C" fn htm_conversion_options_strip_newlines(
function htm_conversion_options_wrap (line 1594) | pub unsafe extern "C" fn htm_conversion_options_wrap(
function htm_conversion_options_wrap_width (line 1609) | pub unsafe extern "C" fn htm_conversion_options_wrap_width(
function htm_conversion_options_convert_as_inline (line 1624) | pub unsafe extern "C" fn htm_conversion_options_convert_as_inline(
function htm_conversion_options_sub_symbol (line 1639) | pub unsafe extern "C" fn htm_conversion_options_sub_symbol(
function htm_conversion_options_sup_symbol (line 1657) | pub unsafe extern "C" fn htm_conversion_options_sup_symbol(
function htm_conversion_options_newline_style (line 1675) | pub unsafe extern "C" fn htm_conversion_options_newline_style(
function htm_conversion_options_code_block_style (line 1690) | pub unsafe extern "C" fn htm_conversion_options_code_block_style(
function htm_conversion_options_keep_inline_images_in (line 1705) | pub unsafe extern "C" fn htm_conversion_options_keep_inline_images_in(
function htm_conversion_options_preprocessing (line 1726) | pub unsafe extern "C" fn htm_conversion_options_preprocessing(
function htm_conversion_options_encoding (line 1741) | pub unsafe extern "C" fn htm_conversion_options_encoding(
function htm_conversion_options_debug (line 1759) | pub unsafe extern "C" fn htm_conversion_options_debug(
function htm_conversion_options_strip_tags (line 1774) | pub unsafe extern "C" fn htm_conversion_options_strip_tags(
function htm_conversion_options_preserve_tags (line 1795) | pub unsafe extern "C" fn htm_conversion_options_preserve_tags(
function htm_conversion_options_skip_images (line 1816) | pub unsafe extern "C" fn htm_conversion_options_skip_images(
function htm_conversion_options_link_style (line 1831) | pub unsafe extern "C" fn htm_conversion_options_link_style(
function htm_conversion_options_output_format (line 1846) | pub unsafe extern "C" fn htm_conversion_options_output_format(
function htm_conversion_options_include_document_structure (line 1861) | pub unsafe extern "C" fn htm_conversion_options_include_document_structure(
function htm_conversion_options_extract_images (line 1876) | pub unsafe extern "C" fn htm_conversion_options_extract_images(
function htm_conversion_options_max_image_size (line 1891) | pub unsafe extern "C" fn htm_conversion_options_max_image_size(
function htm_conversion_options_capture_svg (line 1906) | pub unsafe extern "C" fn htm_conversion_options_capture_svg(
function htm_conversion_options_infer_dimensions (line 1921) | pub unsafe extern "C" fn htm_conversion_options_infer_dimensions(
function htm_conversion_options_max_depth (line 1936) | pub unsafe extern "C" fn htm_conversion_options_max_depth(
function htm_conversion_options_exclude_selectors (line 1954) | pub unsafe extern "C" fn htm_conversion_options_exclude_selectors(
function htm_conversion_options_visitor (line 1975) | pub unsafe extern "C" fn htm_conversion_options_visitor(
function htm_conversion_options_default (line 1993) | pub unsafe extern "C" fn htm_conversion_options_default() -> *mut html_t...
function htm_conversion_options_builder (line 2004) | pub unsafe extern "C" fn htm_conversion_options_builder() -> *mut html_t...
function htm_conversion_options_apply_update (line 2016) | pub unsafe extern "C" fn htm_conversion_options_apply_update(
function htm_conversion_options_from_update (line 2041) | pub unsafe extern "C" fn htm_conversion_options_from_update(
function htm_conversion_options_from (line 2059) | pub unsafe extern "C" fn htm_conversion_options_from(
function htm_conversion_options_builder_free (line 2077) | pub unsafe extern "C" fn htm_conversion_options_builder_free(
function htm_conversion_options_builder_strip_tags (line 2093) | pub unsafe extern "C" fn htm_conversion_options_builder_strip_tags(
function htm_conversion_options_builder_preserve_tags (line 2132) | pub unsafe extern "C" fn htm_conversion_options_builder_preserve_tags(
function htm_conversion_options_builder_keep_inline_images_in (line 2171) | pub unsafe extern "C" fn htm_conversion_options_builder_keep_inline_imag...
function htm_conversion_options_builder_exclude_selectors (line 2210) | pub unsafe extern "C" fn htm_conversion_options_builder_exclude_selectors(
function htm_conversion_options_builder_visitor (line 2249) | pub unsafe extern "C" fn htm_conversion_options_builder_visitor(
function htm_conversion_options_builder_preprocessing (line 2275) | pub unsafe extern "C" fn htm_conversion_options_builder_preprocessing(
function htm_conversion_options_builder_build (line 2301) | pub unsafe extern "C" fn htm_conversion_options_builder_build(
function htm_conversion_options_update_from_json (line 2320) | pub unsafe extern "C" fn htm_conversion_options_update_from_json(
function htm_conversion_options_update_free (line 2349) | pub unsafe extern "C" fn htm_conversion_options_update_free(
function htm_conversion_options_update_heading_style (line 2364) | pub unsafe extern "C" fn htm_conversion_options_update_heading_style(
function htm_conversion_options_update_list_indent_type (line 2382) | pub unsafe extern "C" fn htm_conversion_options_update_list_indent_type(
function htm_conversion_options_update_list_indent_width (line 2400) | pub unsafe extern "C" fn htm_conversion_options_update_list_indent_width(
function htm_conversion_options_update_bullets (line 2418) | pub unsafe extern "C" fn htm_conversion_options_update_bullets(
function htm_conversion_options_update_strong_em_symbol (line 2439) | pub unsafe extern "C" fn htm_conversion_options_update_strong_em_symbol(
function htm_conversion_options_update_escape_asterisks (line 2460) | pub unsafe extern "C" fn htm_conversion_options_update_escape_asterisks(
function htm_conversion_options_update_escape_underscores (line 2478) | pub unsafe extern "C" fn htm_conversion_options_update_escape_underscores(
function htm_conversion_options_update_escape_misc (line 2496) | pub unsafe extern "C" fn htm_conversion_options_update_escape_misc(
function htm_conversion_options_update_escape_ascii (line 2514) | pub unsafe extern "C" fn htm_conversion_options_update_escape_ascii(
function htm_conversion_options_update_code_language (line 2532) | pub unsafe extern "C" fn htm_conversion_options_update_code_language(
function htm_conversion_options_update_autolinks (line 2553) | pub unsafe extern "C" fn htm_conversion_options_update_autolinks(
function htm_conversion_options_update_default_title (line 2571) | pub unsafe extern "C" fn htm_conversion_options_update_default_title(
function htm_conversion_options_update_br_in_tables (line 2589) | pub unsafe extern "C" fn htm_conversion_options_update_br_in_tables(
function htm_conversion_options_update_highlight_style (line 2607) | pub unsafe extern "C" fn htm_conversion_options_update_highlight_style(
function htm_conversion_options_update_extract_metadata (line 2625) | pub unsafe extern "C" fn htm_conversion_options_update_extract_metadata(
function htm_conversion_options_update_whitespace_mode (line 2643) | pub unsafe extern "C" fn htm_conversion_options_update_whitespace_mode(
function htm_conversion_options_update_strip_newlines (line 2661) | pub unsafe extern "C" fn htm_conversion_options_update_strip_newlines(
function htm_conversion_options_update_wrap (line 2679) | pub unsafe extern "C" fn htm_conversion_options_update_wrap(
function htm_conversion_options_update_wrap_width (line 2697) | pub unsafe extern "C" fn htm_conversion_options_update_wrap_width(
function htm_conversion_options_update_convert_as_inline (line 2715) | pub unsafe extern "C" fn htm_conversion_options_update_convert_as_inline(
function htm_conversion_options_update_sub_symbol (line 2733) | pub unsafe extern "C" fn htm_conversion_options_update_sub_symbol(
function htm_conversion_options_update_sup_symbol (line 2754) | pub unsafe extern "C" fn htm_conversion_options_update_sup_symbol(
function htm_conversion_options_update_newline_style (line 2775) | pub unsafe extern "C" fn htm_conversion_options_update_newline_style(
function htm_conversion_options_update_code_block_style (line 2793) | pub unsafe extern "C" fn htm_conversion_options_update_code_block_style(
function htm_conversion_options_update_keep_inline_images_in (line 2811) | pub unsafe extern "C" fn htm_conversion_options_update_keep_inline_image...
function htm_conversion_options_update_preprocessing (line 2835) | pub unsafe extern "C" fn htm_conversion_options_update_preprocessing(
function htm_conversion_options_update_encoding (line 2853) | pub unsafe extern "C" fn htm_conversion_options_update_encoding(
function htm_conversion_options_update_debug (line 2874) | pub unsafe extern "C" fn htm_conversion_options_update_debug(
function htm_conversion_options_update_strip_tags (line 2892) | pub unsafe extern "C" fn htm_conversion_options_update_strip_tags(
function htm_conversion_options_update_preserve_tags (line 2916) | pub unsafe extern "C" fn htm_conversion_options_update_preserve_tags(
function htm_conversion_options_update_skip_images (line 2940) | pub unsafe extern "C" fn htm_conversion_options_update_skip_images(
function htm_conversion_options_update_link_style (line 2958) | pub unsafe extern "C" fn htm_conversion_options_update_link_style(
function htm_conversion_options_update_output_format (line 2976) | pub unsafe extern "C" fn htm_conversion_options_update_output_format(
function htm_conversion_options_update_include_document_structure (line 2994) | pub unsafe extern "C" fn htm_conversion_options_update_include_document_...
function htm_conversion_options_update_extract_images (line 3012) | pub unsafe extern "C" fn htm_conversion_options_update_extract_images(
function htm_conversion_options_update_max_image_size (line 3030) | pub unsafe extern "C" fn htm_conversion_options_update_max_image_size(
function htm_conversion_options_update_capture_svg (line 3048) | pub unsafe extern "C" fn htm_conversion_options_update_capture_svg(
function htm_conversion_options_update_infer_dimensions (line 3066) | pub unsafe extern "C" fn htm_conversion_options_update_infer_dimensions(
function htm_conversion_options_update_max_depth (line 3084) | pub unsafe extern "C" fn htm_conversion_options_update_max_depth(
function htm_conversion_options_update_exclude_selectors (line 3103) | pub unsafe extern "C" fn htm_conversion_options_update_exclude_selectors(
function htm_conversion_options_update_visitor (line 3127) | pub unsafe extern "C" fn htm_conversion_options_update_visitor(
function htm_preprocessing_options_from_json (line 3146) | pub unsafe extern "C" fn htm_preprocessing_options_from_json(
function htm_preprocessing_options_to_json (line 3176) | pub unsafe extern "C" fn htm_preprocessing_options_to_json(
function htm_preprocessing_options_free (line 3205) | pub unsafe extern "C" fn htm_preprocessing_options_free(ptr: *mut html_t...
function htm_preprocessing_options_enabled (line 3218) | pub unsafe extern "C" fn htm_preprocessing_options_enabled(
function htm_preprocessing_options_preset (line 3233) | pub unsafe extern "C" fn htm_preprocessing_options_preset(
function htm_preprocessing_options_remove_navigation (line 3248) | pub unsafe extern "C" fn htm_preprocessing_options_remove_navigation(
function htm_preprocessing_options_remove_forms (line 3263) | pub unsafe extern "C" fn htm_preprocessing_options_remove_forms(
function htm_preprocessing_options_default (line 3278) | pub unsafe extern "C" fn htm_preprocessing_options_default() -> *mut htm...
function htm_preprocessing_options_apply_update (line 3297) | pub unsafe extern "C" fn htm_preprocessing_options_apply_update(
function htm_preprocessing_options_from_update (line 3333) | pub unsafe extern "C" fn htm_preprocessing_options_from_update(
function htm_preprocessing_options_from (line 3351) | pub unsafe extern "C" fn htm_preprocessing_options_from(
function htm_preprocessing_options_update_from_json (line 3370) | pub unsafe extern "C" fn htm_preprocessing_options_update_from_json(
function htm_preprocessing_options_update_free (line 3399) | pub unsafe extern "C" fn htm_preprocessing_options_update_free(
function htm_preprocessing_options_update_enabled (line 3414) | pub unsafe extern "C" fn htm_preprocessing_options_update_enabled(
function htm_preprocessing_options_update_preset (line 3432) | pub unsafe extern "C" fn htm_preprocessing_options_update_preset(
function htm_preprocessing_options_update_remove_navigation (line 3450) | pub unsafe extern "C" fn htm_preprocessing_options_update_remove_navigat...
function htm_preprocessing_options_update_remove_forms (line 3468) | pub unsafe extern "C" fn htm_preprocessing_options_update_remove_forms(
function htm_document_structure_from_json (line 3487) | pub unsafe extern "C" fn htm_document_structure_from_json(
function htm_document_structure_to_json (line 3517) | pub unsafe extern "C" fn htm_document_structure_to_json(
function htm_document_structure_free (line 3546) | pub unsafe extern "C" fn htm_document_structure_free(ptr: *mut html_to_m...
function htm_document_structure_nodes (line 3559) | pub unsafe extern "C" fn htm_document_structure_nodes(
function htm_document_structure_source_format (line 3580) | pub unsafe extern "C" fn htm_document_structure_source_format(
function htm_document_node_from_json (line 3602) | pub unsafe extern "C" fn htm_document_node_from_json(json: *const c_char...
function htm_document_node_to_json (line 3630) | pub unsafe extern "C" fn htm_document_node_to_json(ptr: *const html_to_m...
function htm_document_node_free (line 3657) | pub unsafe extern "C" fn htm_document_node_free(ptr: *mut html_to_markdo...
function htm_document_node_id (line 3670) | pub unsafe extern "C" fn htm_document_node_id(ptr: *const html_to_markdo...
function htm_document_node_content (line 3686) | pub unsafe extern "C" fn htm_document_node_content(
function htm_document_node_parent (line 3701) | pub unsafe extern "C" fn htm_document_node_parent(ptr: *const html_to_ma...
function htm_document_node_children (line 3717) | pub unsafe extern "C" fn htm_document_node_children(
function htm_document_node_annotations (line 3738) | pub unsafe extern "C" fn htm_document_node_annotations(
function htm_document_node_attributes (line 3759) | pub unsafe extern "C" fn htm_document_node_attributes(
function htm_text_annotation_from_json (line 3784) | pub unsafe extern "C" fn htm_text_annotation_from_json(
function htm_text_annotation_to_json (line 3814) | pub unsafe extern "C" fn htm_text_annotation_to_json(ptr: *const html_to...
function htm_text_annotation_free (line 3841) | pub unsafe extern "C" fn htm_text_annotation_free(ptr: *mut html_to_mark...
function htm_text_annotation_start (line 3854) | pub unsafe extern "C" fn htm_text_annotation_start(ptr: *const html_to_m...
function htm_text_annotation_end (line 3867) | pub unsafe extern "C" fn htm_text_annotation_end(ptr: *const html_to_mar...
function htm_text_annotation_kind (line 3880) | pub unsafe extern "C" fn htm_text_annotation_kind(
function htm_conversion_result_from_json (line 3896) | pub unsafe extern "C" fn htm_conversion_result_from_json(
function htm_conversion_result_to_json (line 3926) | pub unsafe extern "C" fn htm_conversion_result_to_json(
function htm_conversion_result_free (line 3955) | pub unsafe extern "C" fn htm_conversion_result_free(ptr: *mut html_to_ma...
function htm_conversion_result_content (line 3968) | pub unsafe extern "C" fn htm_conversion_result_content(
function htm_conversion_result_document (line 3989) | pub unsafe extern "C" fn htm_conversion_result_document(
function htm_conversion_result_metadata (line 4007) | pub unsafe extern "C" fn htm_conversion_result_metadata(
function htm_conversion_result_tables (line 4022) | pub unsafe extern "C" fn htm_conversion_result_tables(
function htm_conversion_result_warnings (line 4043) | pub unsafe extern "C" fn htm_conversion_result_warnings(
function htm_table_grid_from_json (line 4065) | pub unsafe extern "C" fn htm_table_grid_from_json(json: *const c_char) -...
function htm_table_grid_to_json (line 4093) | pub unsafe extern "C" fn htm_table_grid_to_json(ptr: *const html_to_mark...
function htm_table_grid_free (line 4120) | pub unsafe extern "C" fn htm_table_grid_free(ptr: *mut html_to_markdown_...
function htm_table_grid_rows (line 4133) | pub unsafe extern "C" fn htm_table_grid_rows(ptr: *const html_to_markdow...
function htm_table_grid_cols (line 4146) | pub unsafe extern "C" fn htm_table_grid_cols(ptr: *const html_to_markdow...
function htm_table_grid_cells (line 4159) | pub unsafe extern "C" fn htm_table_grid_cells(ptr: *const html_to_markdo...
function htm_grid_cell_from_json (line 4179) | pub unsafe extern "C" fn htm_grid_cell_from_json(json: *const c_char) ->...
function htm_grid_cell_to_json (line 4207) | pub unsafe extern "C" fn htm_grid_cell_to_json(ptr: *const html_to_markd...
function htm_grid_cell_free (line 4234) | pub unsafe extern "C" fn htm_grid_cell_free(ptr: *mut html_to_markdown_r...
function htm_grid_cell_content (line 4247) | pub unsafe extern "C" fn htm_grid_cell_content(ptr: *const html_to_markd...
function htm_grid_cell_row (line 4263) | pub unsafe extern "C" fn htm_grid_cell_row(ptr: *const html_to_markdown_...
function htm_grid_cell_col (line 4276) | pub unsafe extern "C" fn htm_grid_cell_col(ptr: *const html_to_markdown_...
function htm_grid_cell_row_span (line 4289) | pub unsafe extern "C" fn htm_grid_cell_row_span(ptr: *const html_to_mark...
function htm_grid_cell_col_span (line 4302) | pub unsafe extern "C" fn htm_grid_cell_col_span(ptr: *const html_to_mark...
function htm_grid_cell_is_header (line 4315) | pub unsafe extern "C" fn htm_grid_cell_is_header(ptr: *const html_to_mar...
function htm_table_data_from_json (line 4329) | pub unsafe extern "C" fn htm_table_data_from_json(json: *const c_char) -...
function htm_table_data_to_json (line 4357) | pub unsafe extern "C" fn htm_table_data_to_json(ptr: *const html_to_mark...
function htm_table_data_free (line 4384) | pub unsafe extern "C" fn htm_table_data_free(ptr: *mut html_to_markdown_...
function htm_table_data_grid (line 4397) | pub unsafe extern "C" fn htm_table_data_grid(
function htm_table_data_markdown (line 4412) | pub unsafe extern "C" fn htm_table_data_markdown(ptr: *const html_to_mar...
function htm_processing_warning_from_json (line 4429) | pub unsafe extern "C" fn htm_processing_warning_from_json(
function htm_processing_warning_to_json (line 4459) | pub unsafe extern "C" fn htm_processing_warning_to_json(
function htm_processing_warning_free (line 4488) | pub unsafe extern "C" fn htm_processing_warning_free(ptr: *mut html_to_m...
function htm_processing_warning_message (line 4501) | pub unsafe extern "C" fn htm_processing_warning_message(
function htm_processing_warning_kind (line 4519) | pub unsafe extern "C" fn htm_processing_warning_kind(
function htm_visitor_handle_free (line 4534) | pub unsafe extern "C" fn htm_visitor_handle_free(ptr: *mut html_to_markd...
function htm_node_context_from_json (line 4548) | pub unsafe extern "C" fn htm_node_context_from_json(json: *const c_char)...
function htm_node_context_to_json (line 4576) | pub unsafe extern "C" fn htm_node_context_to_json(ptr: *const html_to_ma...
function htm_node_context_free (line 4603) | pub unsafe extern "C" fn htm_node_context_free(ptr: *mut html_to_markdow...
function htm_node_context_node_type (line 4616) | pub unsafe extern "C" fn htm_node_context_node_type(
function htm_node_context_tag_name (line 4631) | pub unsafe extern "C" fn htm_node_context_tag_name(
function htm_node_context_attributes (line 4649) | pub unsafe extern "C" fn htm_node_context_attributes(
function htm_node_context_depth (line 4670) | pub unsafe extern "C" fn htm_node_context_depth(ptr: *const html_to_mark...
function htm_node_context_index_in_parent (line 4683) | pub unsafe extern "C" fn htm_node_context_index_in_parent(ptr: *const ht...
function htm_node_context_parent_tag (line 4696) | pub unsafe extern "C" fn htm_node_context_parent_tag(
function htm_node_context_is_inline (line 4717) | pub unsafe extern "C" fn htm_node_context_is_inline(ptr: *const html_to_...
function htm_text_direction_from_i32 (line 4731) | pub unsafe extern "C" fn htm_text_direction_from_i32(value: i32) -> i32 {
function htm_text_direction_from_str (line 4747) | pub unsafe extern "C" fn htm_text_direction_from_str(name: *const c_char...
function htm_link_type_from_i32 (line 4776) | pub unsafe extern "C" fn htm_link_type_from_i32(value: i32) -> i32 {
function htm_link_type_from_str (line 4795) | pub unsafe extern "C" fn htm_link_type_from_str(name: *const c_char) -> ...
function htm_image_type_from_i32 (line 4827) | pub unsafe extern "C" fn htm_image_type_from_i32(value: i32) -> i32 {
function htm_image_type_from_str (line 4844) | pub unsafe extern "C" fn htm_image_type_from_str(name: *const c_char) ->...
function htm_structured_data_type_from_i32 (line 4874) | pub unsafe extern "C" fn htm_structured_data_type_from_i32(value: i32) -...
function htm_structured_data_type_from_str (line 4890) | pub unsafe extern "C" fn htm_structured_data_type_from_str(name: *const ...
function htm_preprocessing_preset_from_i32 (line 4919) | pub unsafe extern "C" fn htm_preprocessing_preset_from_i32(value: i32) -...
function htm_preprocessing_preset_from_str (line 4935) | pub unsafe extern "C" fn htm_preprocessing_preset_from_str(name: *const ...
function htm_heading_style_from_i32 (line 4964) | pub unsafe extern "C" fn htm_heading_style_from_i32(value: i32) -> i32 {
function htm_heading_style_from_str (line 4980) | pub unsafe extern "C" fn htm_heading_style_from_str(name: *const c_char)...
function htm_list_indent_type_from_i32 (line 5009) | pub unsafe extern "C" fn htm_list_indent_type_from_i32(value: i32) -> i32 {
function htm_list_indent_type_from_str (line 5024) | pub unsafe extern "C" fn htm_list_indent_type_from_str(name: *const c_ch...
function htm_whitespace_mode_from_i32 (line 5052) | pub unsafe extern "C" fn htm_whitespace_mode_from_i32(value: i32) -> i32 {
function htm_whitespace_mode_from_str (line 5067) | pub unsafe extern "C" fn htm_whitespace_mode_from_str(name: *const c_cha...
function htm_newline_style_from_i32 (line 5095) | pub unsafe extern "C" fn htm_newline_style_from_i32(value: i32) -> i32 {
function htm_newline_style_from_str (line 5110) | pub unsafe extern "C" fn htm_newline_style_from_str(name: *const c_char)...
function htm_code_block_style_from_i32 (line 5138) | pub unsafe extern "C" fn htm_code_block_style_from_i32(value: i32) -> i32 {
function htm_code_block_style_from_str (line 5154) | pub unsafe extern "C" fn htm_code_block_style_from_str(name: *const c_ch...
function htm_highlight_style_from_i32 (line 5183) | pub unsafe extern "C" fn htm_highlight_style_from_i32(value: i32) -> i32 {
function htm_highlight_style_from_str (line 5200) | pub unsafe extern "C" fn htm_highlight_style_from_str(name: *const c_cha...
function htm_link_style_from_i32 (line 5230) | pub unsafe extern "C" fn htm_link_style_from_i32(value: i32) -> i32 {
function htm_link_style_from_str (line 5245) | pub unsafe extern "C" fn htm_link_style_from_str(name: *const c_char) ->...
function htm_output_format_from_i32 (line 5273) | pub unsafe extern "C" fn htm_output_format_from_i32(value: i32) -> i32 {
function htm_output_format_from_str (line 5289) | pub unsafe extern "C" fn htm_output_format_from_str(name: *const c_char)...
function htm_node_content_from_i32 (line 5318) | pub unsafe extern "C" fn htm_node_content_from_i32(value: i32) -> i32 {
function htm_node_content_from_str (line 5344) | pub unsafe extern "C" fn htm_node_content_from_str(name: *const c_char) ...
function htm_annotation_kind_from_i32 (line 5383) | pub unsafe extern "C" fn htm_annotation_kind_from_i32(value: i32) -> i32 {
function htm_annotation_kind_from_str (line 5405) | pub unsafe extern "C" fn htm_annotation_kind_from_str(name: *const c_cha...
function htm_warning_kind_from_i32 (line 5440) | pub unsafe extern "C" fn htm_warning_kind_from_i32(value: i32) -> i32 {
function htm_warning_kind_from_str (line 5459) | pub unsafe extern "C" fn htm_warning_kind_from_str(name: *const c_char) ...
function htm_node_type_from_i32 (line 5491) | pub unsafe extern "C" fn htm_node_type_from_i32(value: i32) -> i32 {
function htm_node_type_from_str (line 5592) | pub unsafe extern "C" fn htm_node_type_from_str(name: *const c_char) -> ...
function htm_visit_result_from_i32 (line 5706) | pub unsafe extern "C" fn htm_visit_result_from_i32(value: i32) -> i32 {
function htm_visit_result_from_str (line 5724) | pub unsafe extern "C" fn htm_visit_result_from_str(name: *const c_char) ...
function htm_link_type_free (line 5754) | pub unsafe extern "C" fn htm_link_type_free(ptr: *mut html_to_markdown_r...
function htm_link_type_to_json (line 5768) | pub unsafe extern "C" fn htm_link_type_to_json(ptr: *const html_to_markd...
function ffi_set_out_error (line 5792) | unsafe fn ffi_set_out_error(out_error: *mut *mut std::ffi::c_char, msg: ...
type HtmHtmlVisitorVTable (line 5812) | pub struct HtmHtmlVisitorVTable {
type HtmHtmlVisitorBridge (line 6251) | pub struct HtmHtmlVisitorBridge {
method fmt (line 6259) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
method new (line 6290) | pub unsafe fn new(name: String, vtable: HtmHtmlVisitorVTable, user_dat...
method visit_element_start (line 6301) | fn visit_element_start(&mut self, _ctx: &html_to_markdown_rs::NodeCont...
method visit_element_end (line 6326) | fn visit_element_end(
method visit_text (line 6362) | fn visit_text(&mut self, _ctx: &html_to_markdown_rs::NodeContext, _tex...
method visit_link (line 6394) | fn visit_link(
method visit_image (line 6450) | fn visit_image(
method visit_heading (line 6506) | fn visit_heading(
method visit_code_block (line 6546) | fn visit_code_block(
method visit_code_inline (line 6585) | fn visit_code_inline(
method visit_list_item (line 6621) | fn visit_list_item(
method visit_list_start (line 6675) | fn visit_list_start(
method visit_list_end (line 6704) | fn visit_list_end(
method visit_table_start (line 6741) | fn visit_table_start(&mut self, _ctx: &html_to_markdown_rs::NodeContex...
method visit_table_row (line 6766) | fn visit_table_row(
method visit_table_end (line 6812) | fn visit_table_end(
method visit_blockquote (line 6848) | fn visit_blockquote(
method visit_strong (line 6885) | fn visit_strong(
method visit_emphasis (line 6921) | fn visit_emphasis(
method visit_strikethrough (line 6957) | fn visit_strikethrough(
method visit_underline (line 6993) | fn visit_underline(
method visit_subscript (line 7029) | fn visit_subscript(
method visit_superscript (line 7065) | fn visit_superscript(
method visit_mark (line 7101) | fn visit_mark(&mut self, _ctx: &html_to_markdown_rs::NodeContext, _tex...
method visit_line_break (line 7133) | fn visit_line_break(&mut self, _ctx: &html_to_markdown_rs::NodeContext...
method visit_horizontal_rule (line 7158) | fn visit_horizontal_rule(&mut self, _ctx: &html_to_markdown_rs::NodeCo...
method visit_custom_element (line 7183) | fn visit_custom_element(
method visit_definition_list_start (line 7227) | fn visit_definition_list_start(
method visit_definition_term (line 7255) | fn visit_definition_term(
method visit_definition_description (line 7291) | fn visit_definition_description(
method visit_definition_list_end (line 7327) | fn visit_definition_list_end(
method visit_form (line 7363) | fn visit_form(
method visit_input (line 7397) | fn visit_input(
method visit_button (line 7448) | fn visit_button(
method visit_audio (line 7484) | fn visit_audio(
method visit_video (line 7515) | fn visit_video(
method visit_iframe (line 7546) | fn visit_iframe(
method visit_details (line 7577) | fn visit_details(
method visit_summary (line 7606) | fn visit_summary(
method visit_figure_start (line 7642) | fn visit_figure_start(&mut self, _ctx: &html_to_markdown_rs::NodeConte...
method visit_figcaption (line 7667) | fn visit_figcaption(
method visit_figure_end (line 7703) | fn visit_figure_end(
method drop (line 6273) | fn drop(&mut self) {
function htm_options_set_visitor (line 7756) | pub unsafe extern "C" fn htm_options_set_visitor(
function htm_convert (line 8130) | pub unsafe extern "C" fn htm_convert(
FILE: crates/html-to-markdown-node/index.d.ts
type JsAnnotationKind (line 40) | type JsAnnotationKind =
type JsCodeBlockStyle (line 56) | enum JsCodeBlockStyle {
type JsConversionOptions (line 82) | interface JsConversionOptions {
class JsConversionOptionsBuilder (line 192) | class JsConversionOptionsBuilder {
type JsConversionOptionsUpdate (line 215) | interface JsConversionOptionsUpdate {
type JsConversionResult (line 316) | interface JsConversionResult {
type JsDocumentMetadata (line 358) | interface JsDocumentMetadata {
type JsDocumentNode (line 393) | interface JsDocumentNode {
type JsDocumentStructure (line 413) | interface JsDocumentStructure {
type JsGridCell (line 421) | interface JsGridCell {
type JsHeaderMetadata (line 458) | interface JsHeaderMetadata {
type JsHeadingStyle (line 476) | enum JsHeadingStyle {
type JsHighlightStyle (line 490) | enum JsHighlightStyle {
type JsHtmlMetadata (line 522) | interface JsHtmlMetadata {
class JsHtmlVisitor (line 563) | class JsHtmlVisitor {
type JsImageMetadata (line 750) | interface JsImageMetadata {
type JsImageType (line 768) | enum JsImageType {
type JsLinkMetadata (line 801) | interface JsLinkMetadata {
type JsLinkStyle (line 822) | enum JsLinkStyle {
type JsLinkType (line 834) | enum JsLinkType {
type JsListIndentType (line 854) | enum JsListIndentType {
type JsNewlineStyle (line 866) | enum JsNewlineStyle {
type JsNodeContent (line 878) | type JsNodeContent =
type JsNodeContext (line 899) | interface JsNodeContext {
type JsNodeType (line 922) | enum JsNodeType {
type JsOutputFormat (line 1106) | enum JsOutputFormat {
type JsPreprocessingOptions (line 1116) | interface JsPreprocessingOptions {
type JsPreprocessingOptionsUpdate (line 1134) | interface JsPreprocessingOptionsUpdate {
type JsPreprocessingPreset (line 1150) | enum JsPreprocessingPreset {
type JsProcessingWarning (line 1160) | interface JsProcessingWarning {
type JsStructuredData (line 1186) | interface JsStructuredData {
type JsStructuredDataType (line 1200) | enum JsStructuredDataType {
type JsTableData (line 1210) | interface JsTableData {
type JsTableGrid (line 1218) | interface JsTableGrid {
type JsTextAnnotation (line 1232) | interface JsTextAnnotation {
type JsTextDirection (line 1246) | enum JsTextDirection {
class JsVisitorHandle (line 1260) | class JsVisitorHandle {
type JsVisitResult (line 1270) | enum JsVisitResult {
type JsWarningKind (line 1301) | enum JsWarningKind {
type JsWhitespaceMode (line 1321) | enum JsWhitespaceMode {
FILE: crates/html-to-markdown-node/index.js
function requireNative (line 65) | function requireNative() {
FILE: crates/html-to-markdown-node/src/lib.rs
type JsDocumentMetadata (line 49) | pub struct JsDocumentMetadata {
method from (line 3622) | fn from(val: html_to_markdown_rs::metadata::DocumentMetadata) -> Self {
type JsHeaderMetadata (line 71) | pub struct JsHeaderMetadata {
method from (line 3654) | fn from(val: html_to_markdown_rs::metadata::HeaderMetadata) -> Self {
type JsLinkMetadata (line 82) | pub struct JsLinkMetadata {
method from (line 3681) | fn from(val: html_to_markdown_rs::metadata::LinkMetadata) -> Self {
type JsImageMetadata (line 94) | pub struct JsImageMetadata {
method from (line 3709) | fn from(val: html_to_markdown_rs::metadata::ImageMetadata) -> Self {
type JsStructuredData (line 106) | pub struct JsStructuredData {
method from (line 3737) | fn from(val: html_to_markdown_rs::metadata::StructuredData) -> Self {
type JsHtmlMetadata (line 117) | pub struct JsHtmlMetadata {
method from (line 3773) | fn from(val: html_to_markdown_rs::metadata::HtmlMetadata) -> Self {
type JsConversionOptions (line 128) | pub struct JsConversionOptions {
method from (line 3837) | fn from(val: html_to_markdown_rs::options::ConversionOptions) -> Self {
type JsConversionOptionsBuilder (line 208) | pub struct JsConversionOptionsBuilder {
method strip_tags (line 215) | pub fn strip_tags(&self, tags: Vec<String>) -> JsConversionOptionsBuil...
method preserve_tags (line 222) | pub fn preserve_tags(&self, tags: Vec<String>) -> JsConversionOptionsB...
method keep_inline_images_in (line 229) | pub fn keep_inline_images_in(&self, tags: Vec<String>) -> JsConversion...
method exclude_selectors (line 236) | pub fn exclude_selectors(&self, selectors: Vec<String>) -> JsConversio...
method preprocessing (line 243) | pub fn preprocessing(&self, preprocessing: JsPreprocessingOptions) -> ...
method build (line 251) | pub fn build(&self) -> JsConversionOptions {
type JsConversionOptionsUpdate (line 258) | pub struct JsConversionOptionsUpdate {
method from (line 3937) | fn from(val: html_to_markdown_rs::options::ConversionOptionsUpdate) ->...
type JsPreprocessingOptions (line 338) | pub struct JsPreprocessingOptions {
method from (line 3998) | fn from(val: html_to_markdown_rs::options::PreprocessingOptions) -> Se...
type JsPreprocessingOptionsUpdate (line 349) | pub struct JsPreprocessingOptionsUpdate {
method from (line 4022) | fn from(val: html_to_markdown_rs::options::PreprocessingOptionsUpdate)...
type JsDocumentStructure (line 360) | pub struct JsDocumentStructure {
method from (line 4044) | fn from(val: html_to_markdown_rs::DocumentStructure) -> Self {
type JsDocumentNode (line 368) | pub struct JsDocumentNode {
method from (line 4068) | fn from(val: html_to_markdown_rs::DocumentNode) -> Self {
type JsTextAnnotation (line 379) | pub struct JsTextAnnotation {
method from (line 4093) | fn from(val: html_to_markdown_rs::TextAnnotation) -> Self {
type JsConversionResult (line 387) | pub struct JsConversionResult {
method from (line 4126) | fn from(val: html_to_markdown_rs::ConversionResult) -> Self {
type JsTableGrid (line 398) | pub struct JsTableGrid {
method from (line 4154) | fn from(val: html_to_markdown_rs::TableGrid) -> Self {
type JsGridCell (line 406) | pub struct JsGridCell {
method from (line 4179) | fn from(val: html_to_markdown_rs::GridCell) -> Self {
type JsTableData (line 420) | pub struct JsTableData {
method from (line 4203) | fn from(val: html_to_markdown_rs::TableData) -> Self {
type JsProcessingWarning (line 427) | pub struct JsProcessingWarning {
method from (line 4223) | fn from(val: html_to_markdown_rs::ProcessingWarning) -> Self {
type JsVisitorHandle (line 434) | pub struct JsVisitorHandle {
type JsNodeContext (line 443) | pub struct JsNodeContext {
method from (line 4233) | fn from(val: html_to_markdown_rs::NodeContext) -> Self {
type JsTextDirection (line 460) | pub enum JsTextDirection {
method from (line 4257) | fn from(val: html_to_markdown_rs::metadata::TextDirection) -> Self {
method default (line 468) | fn default() -> Self {
type JsLinkType (line 475) | pub enum JsLinkType {
method from (line 4280) | fn from(val: html_to_markdown_rs::metadata::LinkType) -> Self {
method default (line 486) | fn default() -> Self {
type JsImageType (line 493) | pub enum JsImageType {
method from (line 4304) | fn from(val: html_to_markdown_rs::metadata::ImageType) -> Self {
method default (line 502) | fn default() -> Self {
type JsStructuredDataType (line 509) | pub enum JsStructuredDataType {
method from (line 4325) | fn from(val: html_to_markdown_rs::metadata::StructuredDataType) -> Self {
method default (line 517) | fn default() -> Self {
type JsPreprocessingPreset (line 524) | pub enum JsPreprocessingPreset {
method from (line 4345) | fn from(val: html_to_markdown_rs::options::PreprocessingPreset) -> Self {
method default (line 532) | fn default() -> Self {
type JsHeadingStyle (line 539) | pub enum JsHeadingStyle {
method from (line 4365) | fn from(val: html_to_markdown_rs::options::HeadingStyle) -> Self {
method default (line 547) | fn default() -> Self {
type JsListIndentType (line 554) | pub enum JsListIndentType {
method from (line 4384) | fn from(val: html_to_markdown_rs::options::ListIndentType) -> Self {
method default (line 561) | fn default() -> Self {
type JsWhitespaceMode (line 568) | pub enum JsWhitespaceMode {
method from (line 4402) | fn from(val: html_to_markdown_rs::options::WhitespaceMode) -> Self {
method default (line 575) | fn default() -> Self {
type JsNewlineStyle (line 582) | pub enum JsNewlineStyle {
method from (line 4420) | fn from(val: html_to_markdown_rs::options::NewlineStyle) -> Self {
method default (line 589) | fn default() -> Self {
type JsCodeBlockStyle (line 596) | pub enum JsCodeBlockStyle {
method from (line 4439) | fn from(val: html_to_markdown_rs::options::CodeBlockStyle) -> Self {
method default (line 604) | fn default() -> Self {
type JsHighlightStyle (line 611) | pub enum JsHighlightStyle {
method from (line 4460) | fn from(val: html_to_markdown_rs::options::HighlightStyle) -> Self {
method default (line 620) | fn default() -> Self {
type JsLinkStyle (line 627) | pub enum JsLinkStyle {
method from (line 4480) | fn from(val: html_to_markdown_rs::options::LinkStyle) -> Self {
method default (line 634) | fn default() -> Self {
type JsOutputFormat (line 641) | pub enum JsOutputFormat {
method from (line 4499) | fn from(val: html_to_markdown_rs::options::OutputFormat) -> Self {
method default (line 649) | fn default() -> Self {
type JsNodeContent (line 656) | pub struct JsNodeContent {
method from (line 4563) | fn from(val: html_to_markdown_rs::NodeContent) -> Self {
method default (line 682) | fn default() -> Self {
type JsAnnotationKind (line 707) | pub struct JsAnnotationKind {
method from (line 4845) | fn from(val: html_to_markdown_rs::AnnotationKind) -> Self {
method default (line 716) | fn default() -> Self {
type JsWarningKind (line 727) | pub enum JsWarningKind {
method from (line 4910) | fn from(val: html_to_markdown_rs::WarningKind) -> Self {
method default (line 738) | fn default() -> Self {
type JsNodeType (line 745) | pub enum JsNodeType {
method from (line 4923) | fn from(val: html_to_markdown_rs::NodeType) -> Self {
method default (line 838) | fn default() -> Self {
type JsVisitResult (line 845) | pub enum JsVisitResult {
method from (line 5018) | fn from(val: html_to_markdown_rs::VisitResult) -> Self {
method default (line 855) | fn default() -> Self {
function convert (line 862) | pub fn convert(html: String, options: Option<JsConversionOptions>) -> Re...
function nodecontext_to_js_object (line 888) | fn nodecontext_to_js_object<'e>(
type JsHtmlVisitorBridge (line 916) | pub struct JsHtmlVisitorBridge {
method fmt (line 921) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
method new (line 927) | pub fn new(js_obj: napi::bindgen_prelude::Object<'_>) -> Self {
method env (line 935) | fn env(&self) -> napi::Env {
method visit_element_start (line 943) | fn visit_element_start(&mut self, _ctx: &html_to_markdown_rs::NodeCont...
method visit_element_end (line 987) | fn visit_element_end(
method visit_text (line 1048) | fn visit_text(&mut self, _ctx: &html_to_markdown_rs::NodeContext, _tex...
method visit_link (line 1105) | fn visit_link(
method visit_image (line 1205) | fn visit_image(
method visit_heading (line 1305) | fn visit_heading(
method visit_code_block (line 1405) | fn visit_code_block(
method visit_code_inline (line 1492) | fn visit_code_inline(
method visit_list_item (line 1553) | fn visit_list_item(
method visit_list_start (line 1637) | fn visit_list_start(
method visit_list_end (line 1692) | fn visit_list_end(
method visit_table_start (line 1763) | fn visit_table_start(&mut self, _ctx: &html_to_markdown_rs::NodeContex...
method visit_table_row (line 1807) | fn visit_table_row(
method visit_table_end (line 1878) | fn visit_table_end(
method visit_blockquote (line 1939) | fn visit_blockquote(
method visit_strong (line 2016) | fn visit_strong(
method visit_emphasis (line 2077) | fn visit_emphasis(
method visit_strikethrough (line 2138) | fn visit_strikethrough(
method visit_underline (line 2199) | fn visit_underline(
method visit_subscript (line 2260) | fn visit_subscript(
method visit_superscript (line 2321) | fn visit_superscript(
method visit_mark (line 2382) | fn visit_mark(&mut self, _ctx: &html_to_markdown_rs::NodeContext, _tex...
method visit_line_break (line 2439) | fn visit_line_break(&mut self, _ctx: &html_to_markdown_rs::NodeContext...
method visit_horizontal_rule (line 2483) | fn visit_horizontal_rule(&mut self, _ctx: &html_to_markdown_rs::NodeCo...
method visit_custom_element (line 2527) | fn visit_custom_element(
method visit_definition_list_start (line 2604) | fn visit_definition_list_start(
method visit_definition_term (line 2651) | fn visit_definition_term(
method visit_definition_description (line 2712) | fn visit_definition_description(
method visit_definition_list_end (line 2776) | fn visit_definition_list_end(
method visit_form (line 2837) | fn visit_form(
method visit_input (line 2934) | fn visit_input(
method visit_button (line 3044) | fn visit_button(
method visit_audio (line 3105) | fn visit_audio(
method visit_video (line 3176) | fn visit_video(
method visit_iframe (line 3247) | fn visit_iframe(
method visit_details (line 3318) | fn visit_details(
method visit_summary (line 3373) | fn visit_summary(
method visit_figure_start (line 3434) | fn visit_figure_start(&mut self, _ctx: &html_to_markdown_rs::NodeConte...
method visit_figcaption (line 3478) | fn visit_figcaption(
method visit_figure_end (line 3539) | fn visit_figure_end(
function from (line 3603) | fn from(val: JsDocumentMetadata) -> Self {
function from (line 3641) | fn from(val: JsHeaderMetadata) -> Self {
function from (line 3667) | fn from(val: JsLinkMetadata) -> Self {
function from (line 3695) | fn from(val: JsImageMetadata) -> Self {
function from (line 3726) | fn from(val: JsStructuredData) -> Self {
function from (line 3748) | fn from(val: JsHtmlMetadata) -> Self {
function from (line 3787) | fn from(val: JsConversionOptions) -> Self {
function from (line 3887) | fn from(val: JsConversionOptionsUpdate) -> Self {
function from (line 3986) | fn from(val: JsPreprocessingOptions) -> Self {
function from (line 4010) | fn from(val: JsPreprocessingOptionsUpdate) -> Self {
function from (line 4034) | fn from(val: JsDocumentStructure) -> Self {
function from (line 4054) | fn from(val: JsDocumentNode) -> Self {
function from (line 4082) | fn from(val: JsTextAnnotation) -> Self {
function from (line 4105) | fn from(val: JsConversionResult) -> Self {
function from (line 4140) | fn from(val: JsTableGrid) -> Self {
function from (line 4165) | fn from(val: JsGridCell) -> Self {
function from (line 4193) | fn from(val: JsTableData) -> Self {
function from (line 4213) | fn from(val: JsProcessingWarning) -> Self {
function from (line 4247) | fn from(val: JsTextDirection) -> Self {
function from (line 4267) | fn from(val: JsLinkType) -> Self {
function from (line 4293) | fn from(val: JsImageType) -> Self {
function from (line 4315) | fn from(val: JsStructuredDataType) -> Self {
function from (line 4335) | fn from(val: JsPreprocessingPreset) -> Self {
function from (line 4355) | fn from(val: JsHeadingStyle) -> Self {
function from (line 4375) | fn from(val: JsListIndentType) -> Self {
function from (line 4393) | fn from(val: JsWhitespaceMode) -> Self {
function from (line 4411) | fn from(val: JsNewlineStyle) -> Self {
function from (line 4429) | fn from(val: JsCodeBlockStyle) -> Self {
function from (line 4449) | fn from(val: JsHighlightStyle) -> Self {
function from (line 4471) | fn from(val: JsLinkStyle) -> Self {
function from (line 4489) | fn from(val: JsOutputFormat) -> Self {
function from (line 4509) | fn from(val: JsNodeContent) -> Self {
function from (line 4825) | fn from(val: JsAnnotationKind) -> Self {
function from (line 4897) | fn from(val: JsWarningKind) -> Self {
constant CONVERSION_ERROR_ERROR_PARSE_ERROR (line 5030) | pub const CONVERSION_ERROR_ERROR_PARSE_ERROR: &str = "ParseError";
constant CONVERSION_ERROR_ERROR_SANITIZATION_ERROR (line 5031) | pub const CONVERSION_ERROR_ERROR_SANITIZATION_ERROR: &str = "Sanitizatio...
constant CONVERSION_ERROR_ERROR_CONFIG_ERROR (line 5032) | pub const CONVERSION_ERROR_ERROR_CONFIG_ERROR: &str = "ConfigError";
constant CONVERSION_ERROR_ERROR_IO_ERROR (line 5033) | pub const CONVERSION_ERROR_ERROR_IO_ERROR: &str = "IoError";
constant CONVERSION_ERROR_ERROR_PANIC (line 5034) | pub const CONVERSION_ERROR_ERROR_PANIC: &str = "Panic";
constant CONVERSION_ERROR_ERROR_INVALID_INPUT (line 5035) | pub const CONVERSION_ERROR_ERROR_INVALID_INPUT: &str = "InvalidInput";
constant CONVERSION_ERROR_ERROR_OTHER (line 5036) | pub const CONVERSION_ERROR_ERROR_OTHER: &str = "Other";
function conversion_error_to_napi_err (line 5040) | fn conversion_error_to_napi_err(e: html_to_markdown_rs::error::Conversio...
FILE: crates/html-to-markdown-php/src/lib.rs
type DocumentMetadata (line 27) | pub struct DocumentMetadata {
method from_json (line 65) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_open_graph (line 70) | pub fn get_open_graph(&self) -> HashMap<String, String> {
method get_twitter_card (line 75) | pub fn get_twitter_card(&self) -> HashMap<String, String> {
method get_meta_tags (line 80) | pub fn get_meta_tags(&self) -> HashMap<String, String> {
method from (line 2771) | fn from(val: html_to_markdown_rs::metadata::DocumentMetadata) -> Self {
type HeaderMetadata (line 88) | pub struct HeaderMetadata {
method __construct (line 108) | pub fn __construct(level: u8, text: String, depth: i64, html_offset: i...
method is_valid (line 118) | pub fn is_valid(&self) -> bool {
method from (line 2808) | fn from(val: html_to_markdown_rs::metadata::HeaderMetadata) -> Self {
type LinkMetadata (line 133) | pub struct LinkMetadata {
method from_json (line 155) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_attributes (line 160) | pub fn get_attributes(&self) -> HashMap<String, String> {
method classify_link (line 185) | pub fn classify_link(href: String) -> String {
method from (line 2828) | fn from(val: html_to_markdown_rs::metadata::LinkMetadata) -> Self {
type ImageMetadata (line 196) | pub struct ImageMetadata {
method from_json (line 219) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_attributes (line 224) | pub fn get_attributes(&self) -> HashMap<String, String> {
method from (line 2852) | fn from(val: html_to_markdown_rs::metadata::ImageMetadata) -> Self {
type StructuredData (line 232) | pub struct StructuredData {
method from_json (line 246) | pub fn from_json(json: String) -> PhpResult<Self> {
method from (line 2879) | fn from(val: html_to_markdown_rs::metadata::StructuredData) -> Self {
type HtmlMetadata (line 254) | pub struct HtmlMetadata {
method from_json (line 269) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_document (line 274) | pub fn get_document(&self) -> DocumentMetadata {
method get_headers (line 279) | pub fn get_headers(&self) -> Vec<HeaderMetadata> {
method get_links (line 284) | pub fn get_links(&self) -> Vec<LinkMetadata> {
method get_images (line 289) | pub fn get_images(&self) -> Vec<ImageMetadata> {
method get_structured_data (line 294) | pub fn get_structured_data(&self) -> Vec<StructuredData> {
method from (line 2900) | fn from(val: html_to_markdown_rs::metadata::HtmlMetadata) -> Self {
type ConversionOptions (line 303) | pub struct ConversionOptions {
method from_json (line 444) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_preprocessing (line 449) | pub fn get_preprocessing(&self) -> PreprocessingOptions {
method get_visitor (line 454) | pub fn get_visitor(&self) -> Option<VisitorHandle> {
method default (line 459) | pub fn default() -> ConversionOptions {
method builder (line 466) | pub fn builder() -> ConversionOptionsBuilder {
method from (line 2920) | fn from(val: html_to_markdown_rs::options::ConversionOptions) -> Self {
type ConversionOptionsBuilder (line 476) | pub struct ConversionOptionsBuilder {
method strip_tags (line 485) | pub fn strip_tags(&self, tags: Vec<String>) -> ConversionOptionsBuilder {
method preserve_tags (line 494) | pub fn preserve_tags(&self, tags: Vec<String>) -> ConversionOptionsBui...
method keep_inline_images_in (line 503) | pub fn keep_inline_images_in(&self, tags: Vec<String>) -> ConversionOp...
method exclude_selectors (line 512) | pub fn exclude_selectors(&self, selectors: Vec<String>) -> ConversionO...
method visitor (line 521) | pub fn visitor(&self, visitor: Option<&VisitorHandle>) -> ConversionOp...
method preprocessing (line 530) | pub fn preprocessing(&self, preprocessing: &PreprocessingOptions) -> C...
method build (line 539) | pub fn build(&self) -> ConversionOptions {
type ConversionOptionsUpdate (line 548) | pub struct ConversionOptionsUpdate {
method from_json (line 675) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_preprocessing (line 680) | pub fn get_preprocessing(&self) -> Option<PreprocessingOptionsUpdate> {
method get_visitor (line 685) | pub fn get_visitor(&self) -> Option<VisitorHandle> {
method from (line 3000) | fn from(val: html_to_markdown_rs::options::ConversionOptionsUpdate) ->...
type PreprocessingOptions (line 693) | pub struct PreprocessingOptions {
method from_json (line 710) | pub fn from_json(json: String) -> PhpResult<Self> {
method default (line 715) | pub fn default() -> PreprocessingOptions {
method from (line 3096) | fn from(val: html_to_markdown_rs::options::PreprocessingOptions) -> Se...
type PreprocessingOptionsUpdate (line 723) | pub struct PreprocessingOptionsUpdate {
method from_json (line 740) | pub fn from_json(json: String) -> PhpResult<Self> {
method from (line 3118) | fn from(val: html_to_markdown_rs::options::PreprocessingOptionsUpdate)...
type DocumentStructure (line 748) | pub struct DocumentStructure {
method from_json (line 758) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_nodes (line 763) | pub fn get_nodes(&self) -> Vec<DocumentNode> {
method from (line 3145) | fn from(val: html_to_markdown_rs::DocumentStructure) -> Self {
type DocumentNode (line 771) | pub struct DocumentNode {
method from_json (line 791) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_content (line 796) | pub fn get_content(&self) -> NodeContent {
method get_annotations (line 801) | pub fn get_annotations(&self) -> Vec<TextAnnotation> {
method get_attributes (line 806) | pub fn get_attributes(&self) -> Option<HashMap<String, String>> {
method from (line 3169) | fn from(val: html_to_markdown_rs::DocumentNode) -> Self {
type TextAnnotation (line 814) | pub struct TextAnnotation {
method from_json (line 827) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_kind (line 832) | pub fn get_kind(&self) -> AnnotationKind {
method from (line 3194) | fn from(val: html_to_markdown_rs::TextAnnotation) -> Self {
type ConversionResult (line 840) | pub struct ConversionResult {
method from_json (line 867) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_document (line 872) | pub fn get_document(&self) -> Option<DocumentStructure> {
method get_metadata (line 877) | pub fn get_metadata(&self) -> HtmlMetadata {
method get_tables (line 882) | pub fn get_tables(&self) -> Vec<TableData> {
method get_warnings (line 887) | pub fn get_warnings(&self) -> Vec<ProcessingWarning> {
method from (line 3212) | fn from(val: html_to_markdown_rs::ConversionResult) -> Self {
type TableGrid (line 896) | pub struct TableGrid {
method from_json (line 909) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_cells (line 914) | pub fn get_cells(&self) -> Vec<GridCell> {
method from (line 3237) | fn from(val: html_to_markdown_rs::TableGrid) -> Self {
type GridCell (line 923) | pub struct GridCell {
method __construct (line 946) | pub fn __construct(content: String, row: u32, col: u32, row_span: u32,...
method from (line 3262) | fn from(val: html_to_markdown_rs::GridCell) -> Self {
type TableData (line 961) | pub struct TableData {
method from_json (line 971) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_grid (line 976) | pub fn get_grid(&self) -> TableGrid {
method from (line 3286) | fn from(val: html_to_markdown_rs::TableData) -> Self {
type ProcessingWarning (line 984) | pub struct ProcessingWarning {
method from_json (line 995) | pub fn from_json(json: String) -> PhpResult<Self> {
method from (line 3303) | fn from(val: html_to_markdown_rs::ProcessingWarning) -> Self {
type VisitorHandle (line 1003) | pub struct VisitorHandle {
type NodeContext (line 1013) | pub struct NodeContext {
method from_json (line 1038) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_attributes (line 1043) | pub fn get_attributes(&self) -> HashMap<String, String> {
method from (line 3316) | fn from(val: html_to_markdown_rs::NodeContext) -> Self {
constant TEXTDIRECTION_LEFTTORIGHT (line 1049) | pub const TEXTDIRECTION_LEFTTORIGHT: &str = "LeftToRight";
constant TEXTDIRECTION_RIGHTTOLEFT (line 1050) | pub const TEXTDIRECTION_RIGHTTOLEFT: &str = "RightToLeft";
constant TEXTDIRECTION_AUTO (line 1051) | pub const TEXTDIRECTION_AUTO: &str = "Auto";
constant LINKTYPE_ANCHOR (line 1054) | pub const LINKTYPE_ANCHOR: &str = "Anchor";
constant LINKTYPE_INTERNAL (line 1055) | pub const LINKTYPE_INTERNAL: &str = "Internal";
constant LINKTYPE_EXTERNAL (line 1056) | pub const LINKTYPE_EXTERNAL: &str = "External";
constant LINKTYPE_EMAIL (line 1057) | pub const LINKTYPE_EMAIL: &str = "Email";
constant LINKTYPE_PHONE (line 1058) | pub const LINKTYPE_PHONE: &str = "Phone";
constant LINKTYPE_OTHER (line 1059) | pub const LINKTYPE_OTHER: &str = "Other";
constant IMAGETYPE_DATAURI (line 1062) | pub const IMAGETYPE_DATAURI: &str = "DataUri";
constant IMAGETYPE_INLINESVG (line 1063) | pub const IMAGETYPE_INLINESVG: &str = "InlineSvg";
constant IMAGETYPE_EXTERNAL (line 1064) | pub const IMAGETYPE_EXTERNAL: &str = "External";
constant IMAGETYPE_RELATIVE (line 1065) | pub const IMAGETYPE_RELATIVE: &str = "Relative";
constant STRUCTUREDDATATYPE_JSONLD (line 1068) | pub const STRUCTUREDDATATYPE_JSONLD: &str = "JsonLd";
constant STRUCTUREDDATATYPE_MICRODATA (line 1069) | pub const STRUCTUREDDATATYPE_MICRODATA: &str = "Microdata";
constant STRUCTUREDDATATYPE_RDFA (line 1070) | pub const STRUCTUREDDATATYPE_RDFA: &str = "RDFa";
constant PREPROCESSINGPRESET_MINIMAL (line 1073) | pub const PREPROCESSINGPRESET_MINIMAL: &str = "Minimal";
constant PREPROCESSINGPRESET_STANDARD (line 1074) | pub const PREPROCESSINGPRESET_STANDARD: &str = "Standard";
constant PREPROCESSINGPRESET_AGGRESSIVE (line 1075) | pub const PREPROCESSINGPRESET_AGGRESSIVE: &str = "Aggressive";
constant HEADINGSTYLE_UNDERLINED (line 1078) | pub const HEADINGSTYLE_UNDERLINED: &str = "Underlined";
constant HEADINGSTYLE_ATX (line 1079) | pub const HEADINGSTYLE_ATX: &str = "Atx";
constant HEADINGSTYLE_ATXCLOSED (line 1080) | pub const HEADINGSTYLE_ATXCLOSED: &str = "AtxClosed";
constant LISTINDENTTYPE_SPACES (line 1083) | pub const LISTINDENTTYPE_SPACES: &str = "Spaces";
constant LISTINDENTTYPE_TABS (line 1084) | pub const LISTINDENTTYPE_TABS: &str = "Tabs";
constant WHITESPACEMODE_NORMALIZED (line 1087) | pub const WHITESPACEMODE_NORMALIZED: &str = "Normalized";
constant WHITESPACEMODE_STRICT (line 1088) | pub const WHITESPACEMODE_STRICT: &str = "Strict";
constant NEWLINESTYLE_SPACES (line 1091) | pub const NEWLINESTYLE_SPACES: &str = "Spaces";
constant NEWLINESTYLE_BACKSLASH (line 1092) | pub const NEWLINESTYLE_BACKSLASH: &str = "Backslash";
constant CODEBLOCKSTYLE_INDENTED (line 1095) | pub const CODEBLOCKSTYLE_INDENTED: &str = "Indented";
constant CODEBLOCKSTYLE_BACKTICKS (line 1096) | pub const CODEBLOCKSTYLE_BACKTICKS: &str = "Backticks";
constant CODEBLOCKSTYLE_TILDES (line 1097) | pub const CODEBLOCKSTYLE_TILDES: &str = "Tildes";
constant HIGHLIGHTSTYLE_DOUBLEEQUAL (line 1100) | pub const HIGHLIGHTSTYLE_DOUBLEEQUAL: &str = "DoubleEqual";
constant HIGHLIGHTSTYLE_HTML (line 1101) | pub const HIGHLIGHTSTYLE_HTML: &str = "Html";
constant HIGHLIGHTSTYLE_BOLD (line 1102) | pub const HIGHLIGHTSTYLE_BOLD: &str = "Bold";
constant HIGHLIGHTSTYLE_NONE (line 1103) | pub const HIGHLIGHTSTYLE_NONE: &str = "None";
constant LINKSTYLE_INLINE (line 1106) | pub const LINKSTYLE_INLINE: &str = "Inline";
constant LINKSTYLE_REFERENCE (line 1107) | pub const LINKSTYLE_REFERENCE: &str = "Reference";
constant OUTPUTFORMAT_MARKDOWN (line 1110) | pub const OUTPUTFORMAT_MARKDOWN: &str = "Markdown";
constant OUTPUTFORMAT_DJOT (line 1111) | pub const OUTPUTFORMAT_DJOT: &str = "Djot";
constant OUTPUTFORMAT_PLAIN (line 1112) | pub const OUTPUTFORMAT_PLAIN: &str = "Plain";
type NodeContent (line 1117) | pub struct NodeContent {
method from_json (line 1157) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_node_type_tag (line 1162) | pub fn get_node_type_tag(&self) -> String {
method get_level (line 1167) | pub fn get_level(&self) -> Option<u8> {
method get_text (line 1172) | pub fn get_text(&self) -> Option<String> {
method get_ordered (line 1177) | pub fn get_ordered(&self) -> Option<bool> {
method get_grid (line 1182) | pub fn get_grid(&self) -> Option<TableGrid> {
method get_description (line 1187) | pub fn get_description(&self) -> Option<String> {
method get_src (line 1192) | pub fn get_src(&self) -> Option<String> {
method get_image_index (line 1197) | pub fn get_image_index(&self) -> Option<u32> {
method get_language (line 1202) | pub fn get_language(&self) -> Option<String> {
method get_term (line 1207) | pub fn get_term(&self) -> Option<String> {
method get_definition (line 1212) | pub fn get_definition(&self) -> Option<String> {
method get_format (line 1217) | pub fn get_format(&self) -> Option<String> {
method get_content (line 1222) | pub fn get_content(&self) -> Option<String> {
method get_entries (line 1227) | pub fn get_entries(&self) -> Option<Vec<String>> {
method get_label (line 1232) | pub fn get_label(&self) -> Option<String> {
method get_heading_level (line 1237) | pub fn get_heading_level(&self) -> Option<u8> {
method get_heading_text (line 1242) | pub fn get_heading_text(&self) -> Option<String> {
method from (line 3333) | fn from(val: html_to_markdown_rs::NodeContent) -> Self {
type AnnotationKind (line 1250) | pub struct AnnotationKind {
method from_json (line 1262) | pub fn from_json(json: String) -> PhpResult<Self> {
method get_annotation_type_tag (line 1267) | pub fn get_annotation_type_tag(&self) -> String {
method get_url (line 1272) | pub fn get_url(&self) -> Option<String> {
method get_title (line 1277) | pub fn get_title(&self) -> Option<String> {
method from (line 3473) | fn from(val: html_to_markdown_rs::AnnotationKind) -> Self {
constant WARNINGKIND_IMAGEEXTRACTIONFAILED (line 1283) | pub const WARNINGKIND_IMAGEEXTRACTIONFAILED: &str = "ImageExtractionFail...
constant WARNINGKIND_ENCODINGFALLBACK (line 1284) | pub const WARNINGKIND_ENCODINGFALLBACK: &str = "EncodingFallback";
constant WARNINGKIND_TRUNCATEDINPUT (line 1285) | pub const WARNINGKIND_TRUNCATEDINPUT: &str = "TruncatedInput";
constant WARNINGKIND_MALFORMEDHTML (line 1286) | pub const WARNINGKIND_MALFORMEDHTML: &str = "MalformedHtml";
constant WARNINGKIND_SANITIZATIONAPPLIED (line 1287) | pub const WARNINGKIND_SANITIZATIONAPPLIED: &str = "SanitizationApplied";
constant WARNINGKIND_DEPTHLIMITEXCEEDED (line 1288) | pub const WARNINGKIND_DEPTHLIMITEXCEEDED: &str = "DepthLimitExceeded";
constant NODETYPE_TEXT (line 1291) | pub const NODETYPE_TEXT: &str = "Text";
constant NODETYPE_ELEMENT (line 1292) | pub const NODETYPE_ELEMENT: &str = "Element";
constant NODETYPE_HEADING (line 1293) | pub const NODETYPE_HEADING: &str = "Heading";
constant NODETYPE_PARAGRAPH (line 1294) | pub const NODETYPE_PARAGRAPH: &str = "Paragraph";
constant NODETYPE_DIV (line 1295) | pub const NODETYPE_DIV: &str = "Div";
constant NODETYPE_BLOCKQUOTE (line 1296) | pub const NODETYPE_BLOCKQUOTE: &str = "Blockquote";
constant NODETYPE_PRE (line 1297) | pub const NODETYPE_PRE: &str = "Pre";
constant NODETYPE_HR (line 1298) | pub const NODETYPE_HR: &str = "Hr";
constant NODETYPE_LIST (line 1299) | pub const NODETYPE_LIST: &str = "List";
constant NODETYPE_LISTITEM (line 1300) | pub const NODETYPE_LISTITEM: &str = "ListItem";
constant NODETYPE_DEFINITIONLIST (line 1301) | pub const NODETYPE_DEFINITIONLIST: &str = "DefinitionList";
constant NODETYPE_DEFINITIONTERM (line 1302) | pub const NODETYPE_DEFINITIONTERM: &str = "DefinitionTerm";
constant NODETYPE_DEFINITIONDESCRIPTION (line 1303) | pub const NODETYPE_DEFINITIONDESCRIPTION: &str = "DefinitionDescription";
constant NODETYPE_TABLE (line 1304) | pub const NODETYPE_TABLE: &str = "Table";
constant NODETYPE_TABLEROW (line 1305) | pub const NODETYPE_TABLEROW: &str = "TableRow";
constant NODETYPE_TABLECELL (line 1306) | pub const NODETYPE_TABLECELL: &str = "TableCell";
constant NODETYPE_TABLEHEADER (line 1307) | pub const NODETYPE_TABLEHEADER: &str = "TableHeader";
constant NODETYPE_TABLEBODY (line 1308) | pub const NODETYPE_TABLEBODY: &str = "TableBody";
constant NODETYPE_TABLEHEAD (line 1309) | pub const NODETYPE_TABLEHEAD: &str = "TableHead";
constant NODETYPE_TABLEFOOT (line 1310) | pub const NODETYPE_TABLEFOOT: &str = "TableFoot";
constant NODETYPE_LINK (line 1311) | pub const NODETYPE_LINK: &str = "Link";
constant NODETYPE_IMAGE (line 1312) | pub const NODETYPE_IMAGE: &str = "Image";
constant NODETYPE_STRONG (line 1313) | pub const NODETYPE_STRONG: &str = "Strong";
constant NODETYPE_EM (line 1314) | pub const NODETYPE_EM: &str = "Em";
constant NODETYPE_CODE (line 1315) | pub const NODETYPE_CODE: &str = "Code";
constant NODETYPE_STRIKETHROUGH (line 1316) | pub const NODETYPE_STRIKETHROUGH: &str = "Strikethrough";
constant NODETYPE_UNDERLINE (line 1317) | pub const NODETYPE_UNDERLINE: &str = "Underline";
constant NODETYPE_SUBSCRIPT (line 1318) | pub const NODETYPE_SUBSCRIPT: &str = "Subscript";
constant NODETYPE_SUPERSCRIPT (line 1319) | pub const NODETYPE_SUPERSCRIPT: &str = "Superscript";
constant NODETYPE_MARK (line 1320) | pub const NODETYPE_MARK: &str = "Mark";
constant NODETYPE_SMALL (line 1321) | pub const NODETYPE_SMALL: &str = "Small";
constant NODETYPE_BR (line 1322) | pub const NODETYPE_BR: &str = "Br";
constant NODETYPE_SPAN (line 1323) | pub const NODETYPE_SPAN: &str = "Span";
constant NODETYPE_ARTICLE (line 1324) | pub const NODETYPE_ARTICLE: &str = "Article";
constant NODETYPE_SECTION (line 1325) | pub const NODETYPE_SECTION: &str = "Section";
constant NODETYPE_NAV (line 1326) | pub const NODETYPE_NAV: &str = "Nav";
constant NODETYPE_ASIDE (line 1327) | pub const NODETYPE_ASIDE: &str = "Aside";
constant NODETYPE_HEADER (line 1328) | pub const NODETYPE_HEADER: &str = "Header";
constant NODETYPE_FOOTER (line 1329) | pub const NODETYPE_FOOTER: &str = "Footer";
constant NODETYPE_MAIN (line 1330) | pub const NODETYPE_MAIN: &str = "Main";
constant NODETYPE_FIGURE (line 1331) | pub const NODETYPE_FIGURE: &str = "Figure";
constant NODETYPE_FIGCAPTION (line 1332) | pub const NODETYPE_FIGCAPTION: &str = "Figcaption";
constant NODETYPE_TIME (line 1333) | pub const NODETYPE_TIME: &str = "Time";
constant NODETYPE_DETAILS (line 1334) | pub const NODETYPE_DETAILS: &str = "Details";
constant NODETYPE_SUMMARY (line 1335) | pub const NODETYPE_SUMMARY: &str = "Summary";
constant NODETYPE_FORM (line 1336) | pub const NODETYPE_FORM: &str = "Form";
constant NODETYPE_INPUT (line 1337) | pub const NODETYPE_INPUT: &str = "Input";
constant NODETYPE_SELECT (line 1338) | pub const NODETYPE_SELECT: &str = "Select";
constant NODETYPE_OPTION (line 1339) | pub const NODETYPE_OPTION: &str = "Option";
constant NODETYPE_BUTTON (line 1340) | pub const NODETYPE_BUTTON: &str = "Button";
constant NODETYPE_TEXTAREA (line 1341) | pub const NODETYPE_TEXTAREA: &str = "Textarea";
constant NODETYPE_LABEL (line 1342) | pub const NODETYPE_LABEL: &str = "Label";
constant NODETYPE_FIELDSET (line 1343) | pub const NODETYPE_FIELDSET: &str = "Fieldset";
constant NODETYPE_LEGEND (line 1344) | pub const NODETYPE_LEGEND: &str = "Legend";
constant NODETYPE_AUDIO (line 1345) | pub const NODETYPE_AUDIO: &str = "Audio";
constant NODETYPE_VIDEO (line 1346) | pub const NODETYPE_VIDEO: &str = "Video";
constant NODETYPE_PICTURE (line 1347) | pub const NODETYPE_PICTURE: &str = "Picture";
constant NODETYPE_SOURCE (line 1348) | pub const NODETYPE_SOURCE: &str = "Source";
constant NODETYPE_IFRAME (line 1349) | pub const NODETYPE_IFRAME: &str = "Iframe";
constant NODETYPE_SVG (line 1350) | pub const NODETYPE_SVG: &str = "Svg";
constant NODETYPE_CANVAS (line 1351) | pub const NODETYPE_CANVAS: &str = "Canvas";
constant NODETYPE_RUBY (line 1352) | pub const NODETYPE_RUBY: &str = "Ruby";
constant NODETYPE_RT (line 1353) | pub const NODETYPE_RT: &str = "Rt";
constant NODETYPE_RP (line 1354) | pub const NODETYPE_RP: &str = "Rp";
constant NODETYPE_ABBR (line 1355) | pub const NODETYPE_ABBR: &str = "Abbr";
constant NODETYPE_KBD (line 1356) | pub const NODETYPE_KBD: &str = "Kbd";
constant NODETYPE_SAMP (line 1357) | pub const NODETYPE_SAMP: &str = "Samp";
constant NODETYPE_VAR (line 1358) | pub const NODETYPE_VAR: &str = "Var";
constant NODETYPE_CITE (line 1359) | pub const NODETYPE_CITE: &str = "Cite";
constant NODETYPE_Q (line 1360) | pub const NODETYPE_Q: &str = "Q";
constant NODETYPE_DEL (line 1361) | pub const NODETYPE_DEL: &str = "Del";
constant NODETYPE_INS (line 1362) | pub const NODETYPE_INS: &str = "Ins";
constant NODETYPE_DATA (line 1363) | pub const NODETYPE_DATA: &str = "Data";
constant NODETYPE_METER (line 1364) | pub const NODETYPE_METER: &str = "Meter";
constant NODETYPE_PROGRESS (line 1365) | pub const NODETYPE_PROGRESS: &str = "Progress";
constant NODETYPE_OUTPUT (line 1366) | pub const NODETYPE_OUTPUT: &str = "Output";
constant NODETYPE_TEMPLATE (line 1367) | pub const NODETYPE_TEMPLATE: &str = "Template";
constant NODETYPE_SLOT (line 1368) | pub const NODETYPE_SLOT: &str = "Slot";
constant NODETYPE_HTML (line 1369) | pub const NODETYPE_HTML: &str = "Html";
constant NODETYPE_HEAD (line 1370) | pub const NODETYPE_HEAD: &str = "Head";
constant NODETYPE_BODY (line 1371) | pub const NODETYPE_BODY: &str = "Body";
constant NODETYPE_TITLE (line 1372) | pub const NODETYPE_TITLE: &str = "Title";
constant NODETYPE_META (line 1373) | pub const NODETYPE_META: &str = "Meta";
constant NODETYPE_LINKTAG (line 1374) | pub const NODETYPE_LINKTAG: &str = "LinkTag";
constant NODETYPE_STYLE (line 1375) | pub const NODETYPE_STYLE: &str = "Style";
constant NODETYPE_SCRIPT (line 1376) | pub const NODETYPE_SCRIPT: &str = "Script";
constant NODETYPE_BASE (line 1377) | pub const NODETYPE_BASE: &str = "Base";
constant NODETYPE_CUSTOM (line 1378) | pub const NODETYPE_CUSTOM: &str = "Custom";
constant VISITRESULT_CONTINUE (line 1381) | pub const VISITRESULT_CONTINUE: &str = "Continue";
constant VISITRESULT_CUSTOM (line 1382) | pub const VISITRESULT_CUSTOM: &str = "Custom";
constant VISITRESULT_SKIP (line 1383) | pub const VISITRESULT_SKIP: &str = "Skip";
constant VISITRESULT_PRESERVEHTML (line 1384) | pub const VISITRESULT_PRESERVEHTML: &str = "PreserveHtml";
constant VISITRESULT_ERROR (line 1385) | pub const VISITRESULT_ERROR: &str = "Error";
type HtmlToMarkdownApi (line 1389) | pub struct HtmlToMarkdownApi;
method convert (line 1394) | pub fn convert(
function nodecontext_to_php_array (line 1419) | fn nodecontext_to_php_array(
type PhpHtmlVisitorBridge (line 1470) | pub struct PhpHtmlVisitorBridge {
method fmt (line 1490) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
method new (line 1496) | pub fn new(php_obj: &mut ext_php_rs::types::ZendObject) -> Self {
method visit_element_start (line 1511) | fn visit_element_start(&mut self, _ctx: &html_to_markdown_rs::NodeCont...
method visit_element_end (line 1536) | fn visit_element_end(
method visit_text (line 1566) | fn visit_text(&mut self, _ctx: &html_to_markdown_rs::NodeContext, _tex...
method visit_link (line 1592) | fn visit_link(
method visit_image (line 1629) | fn visit_image(
method visit_heading (line 1666) | fn visit_heading(
method visit_code_block (line 1703) | fn visit_code_block(
method visit_code_inline (line 1738) | fn visit_code_inline(
method visit_list_item (line 1768) | fn visit_list_item(
method visit_list_start (line 1806) | fn visit_list_start(
method visit_list_end (line 1840) | fn visit_list_end(
method visit_table_start (line 1876) | fn visit_table_start(&mut self, _ctx: &html_to_markdown_rs::NodeContex...
method visit_table_row (line 1901) | fn visit_table_row(
method visit_table_end (line 1937) | fn visit_table_end(
method visit_blockquote (line 1967) | fn visit_blockquote(
method visit_strong (line 1999) | fn visit_strong(
method visit_emphasis (line 2029) | fn visit_emphasis(
method visit_strikethrough (line 2059) | fn visit_strikethrough(
method visit_underline (line 2089) | fn visit_underline(
method visit_subscript (line 2119) | fn visit_subscript(
method visit_superscript (line 2149) | fn visit_superscript(
method visit_mark (line 2179) | fn visit_mark(&mut self, _ctx: &html_to_markdown_rs::NodeContext, _tex...
method visit_line_break (line 2205) | fn visit_line_break(&mut self, _ctx: &html_to_markdown_rs::NodeContext...
method visit_horizontal_rule (line 2230) | fn visit_horizontal_rule(&mut self, _ctx: &html_to_markdown_rs::NodeCo...
method visit_custom_element (line 2255) | fn visit_custom_element(
method visit_definition_list_start (line 2287) | fn visit_definition_list_start(
method visit_definition_term (line 2315) | fn visit_definition_term(
method visit_definition_description (line 2345) | fn visit_definition_description(
method visit_definition_list_end (line 2375) | fn visit_definition_list_end(
method visit_form (line 2405) | fn visit_form(
method visit_input (line 2443) | fn visit_input(
method visit_button (line 2483) | fn visit_button(
method visit_audio (line 2513) | fn visit_audio(
method visit_video (line 2546) | fn visit_video(
method visit_iframe (line 2579) | fn visit_iframe(
method visit_details (line 2612) | fn visit_details(
method visit_summary (line 2646) | fn visit_summary(
method visit_figure_start (line 2676) | fn visit_figure_start(&mut self, _ctx: &html_to_markdown_rs::NodeConte...
method visit_figcaption (line 2701) | fn visit_figcaption(
method visit_figure_end (line 2731) | fn visit_figure_end(
method clone (line 1481) | fn clone(&self) -> Self {
function from (line 2763) | fn from(val: DocumentMetadata) -> Self {
function from (line 2795) | fn from(val: HeaderMetadata) -> Self {
function from (line 2820) | fn from(val: LinkMetadata) -> Self {
function from (line 2844) | fn from(val: ImageMetadata) -> Self {
function from (line 2871) | fn from(val: StructuredData) -> Self {
function from (line 2892) | fn from(val: HtmlMetadata) -> Self {
function from (line 2912) | fn from(val: ConversionOptions) -> Self {
function from (line 2992) | fn from(val: ConversionOptionsUpdate) -> Self {
function from (line 3088) | fn from(val: PreprocessingOptions) -> Self {
function from (line 3110) | fn from(val: PreprocessingOptionsUpdate) -> Self {
function from (line 3135) | fn from(val: DocumentStructure) -> Self {
function from (line 3155) | fn from(val: DocumentNode) -> Self {
function from (line 3183) | fn from(val: TextAnnotation) -> Self {
function from (line 3204) | fn from(val: ConversionResult) -> Self {
function from (line 3226) | fn from(val: TableGrid) -> Self {
function from (line 3248) | fn from(val: GridCell) -> Self {
function from (line 3276) | fn from(val: TableData) -> Self {
function from (line 3295) | fn from(val: ProcessingWarning) -> Self {
function from (line 3419) | fn from(val: NodeContent) -> Self {
function from (line 3517) | fn from(val: AnnotationKind) -> Self {
function conversion_error_to_php_err (line 3538) | fn conversion_error_to_php_err(e: html_to_markdown_rs::error::Conversion...
function get_module (line 3568) | pub fn get_module(module: ModuleBuilder) -> ModuleBuilder {
FILE: crates/html-to-markdown-py/src/lib.rs
type DocumentMetadata (line 50) | pub struct DocumentMetadata {
method new (line 95) | pub fn new(
method from (line 3594) | fn from(val: html_to_markdown_rs::metadata::DocumentMetadata) -> Self {
type HeaderMetadata (line 126) | pub struct HeaderMetadata {
method new (line 149) | pub fn new(level: u8, text: String, depth: usize, html_offset: usize, ...
method is_valid (line 160) | pub fn is_valid(&self) -> bool {
method from (line 3626) | fn from(val: html_to_markdown_rs::metadata::HeaderMetadata) -> Self {
type LinkMetadata (line 174) | pub struct LinkMetadata {
method new (line 200) | pub fn new(
method classify_link (line 220) | pub fn classify_link(href: String) -> LinkType {
method from (line 3653) | fn from(val: html_to_markdown_rs::metadata::LinkMetadata) -> Self {
type ImageMetadata (line 227) | pub struct ImageMetadata {
method new (line 254) | pub fn new(
method from (line 3681) | fn from(val: html_to_markdown_rs::metadata::ImageMetadata) -> Self {
type StructuredData (line 275) | pub struct StructuredData {
method new (line 292) | pub fn new(data_type: StructuredDataType, raw_json: String, schema_typ...
method from (line 3709) | fn from(val: html_to_markdown_rs::metadata::StructuredData) -> Self {
type HtmlMetadata (line 303) | pub struct HtmlMetadata {
method new (line 326) | pub fn new(
method from (line 3733) | fn from(val: html_to_markdown_rs::metadata::HtmlMetadata) -> Self {
type ConversionOptions (line 346) | pub struct ConversionOptions {
method new (line 541) | pub fn new(
method apply_update (line 630) | pub fn apply_update(&self, update: ConversionOptionsUpdate) -> Self {
method default (line 684) | pub fn default() -> ConversionOptions {
method builder (line 690) | pub fn builder() -> ConversionOptionsBuilder {
method from_update (line 698) | pub fn from_update(update: ConversionOptionsUpdate) -> ConversionOptio...
method from (line 706) | pub fn from(update: ConversionOptionsUpdate) -> ConversionOptions {
method from (line 3797) | fn from(val: html_to_markdown_rs::options::ConversionOptions) -> Self {
method clone (line 488) | fn clone(&self) -> Self {
type ConversionOptionsBuilder (line 714) | pub struct ConversionOptionsBuilder {
method strip_tags (line 721) | pub fn strip_tags(&self, tags: Vec<String>) -> ConversionOptionsBuilder {
method preserve_tags (line 728) | pub fn preserve_tags(&self, tags: Vec<String>) -> ConversionOptionsBui...
method keep_inline_images_in (line 735) | pub fn keep_inline_images_in(&self, tags: Vec<String>) -> ConversionOp...
method exclude_selectors (line 742) | pub fn exclude_selectors(&self, selectors: Vec<String>) -> ConversionO...
method visitor (line 749) | pub fn visitor(&self, visitor: Option<VisitorHandle>) -> ConversionOpt...
method preprocessing (line 756) | pub fn preprocessing(&self, preprocessing: PreprocessingOptions) -> Co...
method build (line 764) | pub fn build(&self) -> ConversionOptions {
type ConversionOptionsUpdate (line 772) | pub struct ConversionOptionsUpdate {
method new (line 953) | pub fn new(
method from (line 3897) | fn from(val: html_to_markdown_rs::options::ConversionOptionsUpdate) ->...
method clone (line 900) | fn clone(&self) -> Self {
type PreprocessingOptions (line 1044) | pub struct PreprocessingOptions {
method new (line 1064) | pub fn new(
method apply_update (line 1079) | pub fn apply_update(&self, update: PreprocessingOptionsUpdate) -> Self {
method default (line 1094) | pub fn default() -> PreprocessingOptions {
method from_update (line 1100) | pub fn from_update(update: PreprocessingOptionsUpdate) -> Preprocessin...
method from (line 1108) | pub fn from(update: PreprocessingOptionsUpdate) -> PreprocessingOptions {
method from (line 3958) | fn from(val: html_to_markdown_rs::options::PreprocessingOptions) -> Se...
type PreprocessingOptionsUpdate (line 1116) | pub struct PreprocessingOptionsUpdate {
method new (line 1136) | pub fn new(
method from (line 3982) | fn from(val: html_to_markdown_rs::options::PreprocessingOptionsUpdate)...
type DocumentStructure (line 1153) | pub struct DocumentStructure {
method new (line 1168) | pub fn new(nodes: Vec<DocumentNode>, source_format: Option<String>) ->...
method from (line 4004) | fn from(val: html_to_markdown_rs::DocumentStructure) -> Self {
type DocumentNode (line 1175) | pub struct DocumentNode {
method new (line 1203) | pub fn new(
method from (line 4028) | fn from(val: html_to_markdown_rs::DocumentNode) -> Self {
type TextAnnotation (line 1224) | pub struct TextAnnotation {
method new (line 1242) | pub fn new(start: u32, end: u32, kind: AnnotationKind) -> Self {
method from (line 4053) | fn from(val: html_to_markdown_rs::TextAnnotation) -> Self {
type ConversionResult (line 1249) | pub struct ConversionResult {
method new (line 1284) | pub fn new(
method from (line 4080) | fn from(val: html_to_markdown_rs::ConversionResult) -> Self {
type TableGrid (line 1306) | pub struct TableGrid {
method new (line 1323) | pub fn new(rows: Option<u32>, cols: Option<u32>, cells: Option<Vec<Gri...
method from (line 4105) | fn from(val: html_to_markdown_rs::TableGrid) -> Self {
type GridCell (line 1335) | pub struct GridCell {
method new (line 1361) | pub fn new(content: String, row: u32, col: u32, row_span: u32, col_spa...
method from (line 4130) | fn from(val: html_to_markdown_rs::GridCell) -> Self {
type TableData (line 1375) | pub struct TableData {
method new (line 1389) | pub fn new(grid: TableGrid, markdown: String) -> Self {
method from (line 4154) | fn from(val: html_to_markdown_rs::TableData) -> Self {
type ProcessingWarning (line 1396) | pub struct ProcessingWarning {
method new (line 1410) | pub fn new(message: String, kind: WarningKind) -> Self {
method from (line 4174) | fn from(val: html_to_markdown_rs::ProcessingWarning) -> Self {
type VisitorHandle (line 1417) | pub struct VisitorHandle {
type NodeContext (line 1423) | pub struct NodeContext {
method new (line 1452) | pub fn new(
method from (line 4184) | fn from(val: html_to_markdown_rs::NodeContext) -> Self {
type TextDirection (line 1475) | pub enum TextDirection {
method from (line 4208) | fn from(val: html_to_markdown_rs::metadata::TextDirection) -> Self {
type LinkType (line 1484) | pub enum LinkType {
method from (line 4231) | fn from(val: html_to_markdown_rs::metadata::LinkType) -> Self {
type ImageType (line 1496) | pub enum ImageType {
method from (line 4255) | fn from(val: html_to_markdown_rs::metadata::ImageType) -> Self {
type StructuredDataType (line 1506) | pub enum StructuredDataType {
method from (line 4276) | fn from(val: html_to_markdown_rs::metadata::StructuredDataType) -> Self {
type PreprocessingPreset (line 1515) | pub enum PreprocessingPreset {
method from (line 4296) | fn from(val: html_to_markdown_rs::options::PreprocessingPreset) -> Self {
type HeadingStyle (line 1524) | pub enum HeadingStyle {
method from (line 4316) | fn from(val: html_to_markdown_rs::options::HeadingStyle) -> Self {
type ListIndentType (line 1533) | pub enum ListIndentType {
method from (line 4335) | fn from(val: html_to_markdown_rs::options::ListIndentType) -> Self {
type WhitespaceMode (line 1541) | pub enum WhitespaceMode {
method from (line 4353) | fn from(val: html_to_markdown_rs::options::WhitespaceMode) -> Self {
type NewlineStyle (line 1549) | pub enum NewlineStyle {
method from (line 4371) | fn from(val: html_to_markdown_rs::options::NewlineStyle) -> Self {
type CodeBlockStyle (line 1557) | pub enum CodeBlockStyle {
method from (line 4390) | fn from(val: html_to_markdown_rs::options::CodeBlockStyle) -> Self {
type HighlightStyle (line 1566) | pub enum HighlightStyle {
method from (line 4411) | fn from(val: html_to_markdown_rs::options::HighlightStyle) -> Self {
type LinkStyle (line 1577) | pub enum LinkStyle {
method from (line 4431) | fn from(val: html_to_markdown_rs::options::LinkStyle) -> Self {
type OutputFormat (line 1585) | pub enum OutputFormat {
method from (line 4450) | fn from(val: html_to_markdown_rs::options::OutputFormat) -> Self {
type NodeContent (line 1594) | pub struct NodeContent {
method from (line 1608) | fn from(val: html_to_markdown_rs::NodeContent) -> Self {
method serialize (line 1614) | fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::...
method deserialize (line 1628) | fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result...
function from (line 1602) | fn from(val: NodeContent) -> Self {
method default (line 1620) | fn default() -> Self {
type AnnotationKind (line 1636) | pub struct AnnotationKind {
method new (line 1643) | fn new(py: Python<'_>, value: &Bound<'_, pyo3::types::PyDict>) -> PyRe...
method from (line 1659) | fn from(val: html_to_markdown_rs::AnnotationKind) -> Self {
method serialize (line 1665) | fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::...
method deserialize (line 1679) | fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result...
function from (line 1653) | fn from(val: AnnotationKind) -> Self {
method default (line 1671) | fn default() -> Self {
type WarningKind (line 1687) | pub enum WarningKind {
method from (line 4473) | fn from(val: html_to_markdown_rs::WarningKind) -> Self {
type NodeType (line 1699) | pub enum NodeType {
method from (line 4486) | fn from(val: html_to_markdown_rs::NodeType) -> Self {
type VisitResult (line 1793) | pub struct VisitResult {
method new (line 1800) | fn new(py: Python<'_>, value: &Bound<'_, pyo3::types::PyDict>) -> PyRe...
method from (line 1816) | fn from(val: html_to_markdown_rs::VisitResult) -> Self {
method serialize (line 1822) | fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::...
method deserialize (line 1836) | fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result...
function from (line 1810) | fn from(val: VisitResult) -> Self {
method default (line 1828) | fn default() -> Self {
function convert (line 1845) | pub fn convert(html: String, options: Option<ConversionOptions>) -> PyRe...
function nodecontext_to_py_dict (line 1873) | fn nodecontext_to_py_dict<'py>(
type PyHtmlVisitorBridge (line 1893) | pub struct PyHtmlVisitorBridge {
method new (line 1898) | pub fn new(python_obj: Py<PyAny>) -> Self {
method visit_element_start (line 1904) | fn visit_element_start(&mut self, _ctx: &html_to_markdown_rs::NodeCont...
method visit_element_end (line 1941) | fn visit_element_end(
method visit_text (line 1982) | fn visit_text(&mut self, _ctx: &html_to_markdown_rs::NodeContext, _tex...
method visit_link (line 2019) | fn visit_link(
method visit_image (line 2062) | fn visit_image(
method visit_heading (line 2105) | fn visit_heading(
method visit_code_block (line 2148) | fn visit_code_block(
method visit_code_inline (line 2190) | fn visit_code_inline(
method visit_list_item (line 2231) | fn visit_list_item(
method visit_list_start (line 2277) | fn visit_list_start(
method visit_list_end (line 2318) | fn visit_list_end(
method visit_table_start (line 2360) | fn visit_table_start(&mut self, _ctx: &html_to_markdown_rs::NodeContex...
method visit_table_row (line 2397) | fn visit_table_row(
method visit_table_end (line 2442) | fn visit_table_end(
method visit_blockquote (line 2483) | fn visit_blockquote(
method visit_strong (line 2525) | fn visit_strong(
method visit_emphasis (line 2566) | fn visit_emphasis(
method visit_strikethrough (line 2607) | fn visit_strikethrough(
method visit_underline (line 2648) | fn visit_underline(
method visit_subscript (line 2689) | fn visit_subscript(
method visit_superscript (line 2730) | fn visit_superscript(
method visit_mark (line 2771) | fn visit_mark(&mut self, _ctx: &html_to_markdown_rs::NodeContext, _tex...
method visit_line_break (line 2808) | fn visit_line_break(&mut self, _ctx: &html_to_markdown_rs::NodeContext...
method visit_horizontal_rule (line 2845) | fn visit_horizontal_rule(&mut self, _ctx: &html_to_markdown_rs::NodeCo...
method visit_custom_element (line 2882) | fn visit_custom_element(
method visit_definition_list_start (line 2927) | fn visit_definition_list_start(
method visit_definition_term (line 2967) | fn visit_definition_term(
method visit_definition_description (line 3008) | fn visit_definition_description(
method visit_definition_list_end (line 3052) | fn visit_definition_list_end(
method visit_form (line 3093) | fn visit_form(
method visit_input (line 3135) | fn visit_input(
method visit_button (line 3181) | fn visit_button(
method visit_audio (line 3222) | fn visit_audio(
method visit_video (line 3263) | fn visit_video(
method visit_iframe (line 3304) | fn visit_iframe(
method visit_details (line 3345) | fn visit_details(
method visit_summary (line 3386) | fn visit_summary(
method visit_figure_start (line 3427) | fn visit_figure_start(&mut self, _ctx: &html_to_markdown_rs::NodeConte...
method visit_figcaption (line 3464) | fn visit_figcaption(
method visit_figure_end (line 3505) | fn visit_figure_end(
function conversion_error_to_py_err (line 3558) | fn conversion_error_to_py_err(e: html_to_markdown_rs::ConversionError) -...
function from (line 3575) | fn from(val: DocumentMetadata) -> Self {
function from (line 3613) | fn from(val: HeaderMetadata) -> Self {
function from (line 3639) | fn from(val: LinkMetadata) -> Self {
function from (line 3667) | fn from(val: ImageMetadata) -> Self {
function from (line 3698) | fn from(val: StructuredData) -> Self {
function from (line 3720) | fn from(val: HtmlMetadata) -> Self {
function from (line 3747) | fn from(val: ConversionOptions) -> Self {
function from (line 3847) | fn from(val: ConversionOptionsUpdate) -> Self {
function from (line 3946) | fn from(val: PreprocessingOptions) -> Self {
function from (line 3970) | fn from(val: PreprocessingOptionsUpdate) -> Self {
function from (line 3994) | fn from(val: DocumentStructure) -> Self {
function from (line 4014) | fn from(val: DocumentNode) -> Self {
function from (line 4042) | fn from(val: TextAnnotation) -> Self {
function from (line 4065) | fn from(val: ConversionResult) -> Self {
function from (line 4094) | fn from(val: TableGrid) -> Self {
function from (line 4116) | fn from(val: GridCell) -> Self {
function from (line 4144) | fn from(val: TableData) -> Self {
function from (line 4164) | fn from(val: ProcessingWarning) -> Self {
function from (line 4198) | fn from(val: TextDirection) -> Self {
function from (line 4218) | fn from(val: LinkType) -> Self {
function from (line 4244) | fn from(val: ImageType) -> Self {
function from (line 4266) | fn from(val: StructuredDataType) -> Self {
function from (line 4286) | fn from(val: PreprocessingPreset) -> Self {
function from (line 4306) | fn from(val: HeadingStyle) -> Self {
function from (line 4326) | fn from(val: ListIndentType) -> Self {
function from (line 4344) | fn from(val: WhitespaceMode) -> Self {
function from (line 4362) | fn from(val: NewlineStyle) -> Self {
function from (line 4380) | fn from(val: CodeBlockStyle) -> Self {
function from (line 4400) | fn from(val: HighlightStyle) -> Self {
function from (line 4422) | fn from(val: LinkStyle) -> Self {
function from (line 4440) | fn from(val: OutputFormat) -> Self {
function from (line 4460) | fn from(val: WarningKind) -> Self {
function _html_to_markdown (line 4581) | pub fn _html_to_markdown(m: &Bound<'_, PyModule>) -> PyResult<()> {
FILE: crates/html-to-markdown-wasm/scripts/patch-bundler-entry.js
function injectTypedef (line 149) | function injectTypedef(content, specifier) {
function patchJsDoc (line 160) | function patchJsDoc(targetPath, typeSpecifier) {
FILE: crates/html-to-markdown-wasm/src/lib.rs
type WasmDocumentMetadata (line 43) | pub struct WasmDocumentMetadata {
method new (line 61) | pub fn new(
method title (line 90) | pub fn title(&self) -> Option<String> {
method set_title (line 95) | pub fn set_title(&mut self, value: Option<String>) {
method description (line 100) | pub fn description(&self) -> Option<String> {
method set_description (line 105) | pub fn set_description(&mut self, value: Option<String>) {
method keywords (line 110) | pub fn keywords(&self) -> Vec<String> {
method set_keywords (line 115) | pub fn set_keywords(&mut self, value: Vec<String>) {
method author (line 120) | pub fn author(&self) -> Option<String> {
method set_author (line 125) | pub fn set_author(&mut self, value: Option<String>) {
method canonical_url (line 130) | pub fn canonical_url(&self) -> Option<String> {
method set_canonical_url (line 135) | pub fn set_canonical_url(&mut self, value: Option<String>) {
method base_href (line 140) | pub fn base_href(&self) -> Option<String> {
method set_base_href (line 145) | pub fn set_base_href(&mut self, value: Option<String>) {
method language (line 150) | pub fn language(&self) -> Option<String> {
method set_language (line 155) | pub fn set_language(&mut self, value: Option<String>) {
method text_direction (line 160) | pub fn text_direction(&self) -> Option<WasmTextDirection> {
method set_text_direction (line 165) | pub fn set_text_direction(&mut self, value: Option<WasmTextDirection>) {
method open_graph (line 170) | pub fn open_graph(&self) -> JsValue {
method set_open_graph (line 175) | pub fn set_open_graph(&mut self, value: JsValue) {
method twitter_card (line 180) | pub fn twitter_card(&self) -> JsValue {
method set_twitter_card (line 185) | pub fn set_twitter_card(&mut self, value: JsValue) {
method meta_tags (line 190) | pub fn meta_tags(&self) -> JsValue {
method set_meta_tags (line 195) | pub fn set_meta_tags(&mut self, value: JsValue) {
method from (line 4904) | fn from(val: html_to_markdown_rs::metadata::DocumentMetadata) -> Self {
type WasmHeaderMetadata (line 222) | pub struct WasmHeaderMetadata {
method new (line 233) | pub fn new(level: u8, text: String, depth: usize, html_offset: usize, ...
method level (line 244) | pub fn level(&self) -> u8 {
method set_level (line 249) | pub fn set_level(&mut self, value: u8) {
method text (line 254) | pub fn text(&self) -> String {
method set_text (line 259) | pub fn set_text(&mut self, value: String) {
method id (line 264) | pub fn id(&self) -> Option<String> {
method set_id (line 269) | pub fn set_id(&mut self, value: Option<String>) {
method depth (line 274) | pub fn depth(&self) -> usize {
method set_depth (line 279) | pub fn set_depth(&mut self, value: usize) {
method html_offset (line 284) | pub fn html_offset(&self) -> usize {
method set_html_offset (line 289) | pub fn set_html_offset(&mut self, value: usize) {
method is_valid (line 322) | pub fn is_valid(&self) -> bool {
method from (line 4936) | fn from(val: html_to_markdown_rs::metadata::HeaderMetadata) -> Self {
type WasmLinkMetadata (line 349) | pub struct WasmLinkMetadata {
method new (line 361) | pub fn new(
method href (line 380) | pub fn href(&self) -> String {
method set_href (line 385) | pub fn set_href(&mut self, value: String) {
method text (line 390) | pub fn text(&self) -> String {
method set_text (line 395) | pub fn set_text(&mut self, value: String) {
method title (line 400) | pub fn title(&self) -> Option<String> {
method set_title (line 405) | pub fn set_title(&mut self, value: Option<String>) {
method link_type (line 410) | pub fn link_type(&self) -> WasmLinkType {
method set_link_type (line 415) | pub fn set_link_type(&mut self, value: WasmLinkType) {
method rel (line 420) | pub fn rel(&self) -> Vec<String> {
method set_rel (line 425) | pub fn set_rel(&mut self, value: Vec<String>) {
method attributes (line 430) | pub fn attributes(&self) -> JsValue {
method set_attributes (line 435) | pub fn set_attributes(&mut self, value: JsValue) {
method classify_link (line 459) | pub fn classify_link(href: String) -> WasmLinkType {
method from (line 4963) | fn from(val: html_to_markdown_rs::metadata::LinkMetadata) -> Self {
type WasmImageMetadata (line 486) | pub struct WasmImageMetadata {
method new (line 498) | pub fn new(
method src (line 517) | pub fn src(&self) -> String {
method set_src (line 522) | pub fn set_src(&mut self, value: String) {
method alt (line 527) | pub fn alt(&self) -> Option<String> {
method set_alt (line 532) | pub fn set_alt(&mut self, value: Option<String>) {
method title (line 537) | pub fn title(&self) -> Option<String> {
method set_title (line 542) | pub fn set_title(&mut self, value: Option<String>) {
method dimensions (line 547) | pub fn dimensions(&self) -> Option<Vec<u32>> {
method set_dimensions (line 552) | pub fn set_dimensions(&mut self, value: Option<Vec<u32>>) {
method image_type (line 557) | pub fn image_type(&self) -> WasmImageType {
method set_image_type (line 562) | pub fn set_image_type(&mut self, value: WasmImageType) {
method attributes (line 567) | pub fn attributes(&self) -> JsValue {
method set_attributes (line 572) | pub fn set_attributes(&mut self, value: JsValue) {
method from (line 4991) | fn from(val: html_to_markdown_rs::metadata::ImageMetadata) -> Self {
type WasmStructuredData (line 596) | pub struct WasmStructuredData {
method new (line 605) | pub fn new(data_type: WasmStructuredDataType, raw_json: String, schema...
method data_type (line 614) | pub fn data_type(&self) -> WasmStructuredDataType {
method set_data_type (line 619) | pub fn set_data_type(&mut self, value: WasmStructuredDataType) {
method raw_json (line 624) | pub fn raw_json(&self) -> String {
method set_raw_json (line 629) | pub fn set_raw_json(&mut self, value: String) {
method schema_type (line 634) | pub fn schema_type(&self) -> Option<String> {
method set_schema_type (line 639) | pub fn set_schema_type(&mut self, value: Option<String>) {
method from (line 5019) | fn from(val: html_to_markdown_rs::metadata::StructuredData) -> Self {
type WasmHtmlMetadata (line 665) | pub struct WasmHtmlMetadata {
method new (line 676) | pub fn new(
method document (line 693) | pub fn document(&self) -> WasmDocumentMetadata {
method set_document (line 698) | pub fn set_document(&mut self, value: WasmDocumentMetadata) {
method headers (line 703) | pub fn headers(&self) -> Vec<WasmHeaderMetadata> {
method set_headers (line 708) | pub fn set_headers(&mut self, value: Vec<WasmHeaderMetadata>) {
method links (line 713) | pub fn links(&self) -> Vec<WasmLinkMetadata> {
method set_links (line 718) | pub fn set_links(&mut self, value: Vec<WasmLinkMetadata>) {
method images (line 723) | pub fn images(&self) -> Vec<WasmImageMetadata> {
method set_images (line 728) | pub fn set_images(&mut self, value: Vec<WasmImageMetadata>) {
method structured_data (line 733) | pub fn structured_data(&self) -> Vec<WasmStructuredData> {
method set_structured_data (line 738) | pub fn set_structured_data(&mut self, value: Vec<WasmStructuredData>) {
method from (line 5043) | fn from(val: html_to_markdown_rs::metadata::HtmlMetadata) -> Self {
type WasmConversionOptions (line 760) | pub struct WasmConversionOptions {
method new (line 808) | pub fn new(
method heading_style (line 896) | pub fn heading_style(&self) -> WasmHeadingStyle {
method set_heading_style (line 901) | pub fn set_heading_style(&mut self, value: WasmHeadingStyle) {
method list_indent_type (line 906) | pub fn list_indent_type(&self) -> WasmListIndentType {
method set_list_indent_type (line 911) | pub fn set_list_indent_type(&mut self, value: WasmListIndentType) {
method list_indent_width (line 916) | pub fn list_indent_width(&self) -> usize {
method set_list_indent_width (line 921) | pub fn set_list_indent_width(&mut self, value: usize) {
method bullets (line 926) | pub fn bullets(&self) -> String {
method set_bullets (line 931) | pub fn set_bullets(&mut self, value: String) {
method strong_em_symbol (line 936) | pub fn strong_em_symbol(&self) -> String {
method set_strong_em_symbol (line 941) | pub fn set_strong_em_symbol(&mut self, value: String) {
method escape_asterisks (line 946) | pub fn escape_asterisks(&self) -> bool {
method set_escape_asterisks (line 951) | pub fn set_escape_asterisks(&mut self, value: bool) {
method escape_underscores (line 956) | pub fn escape_underscores(&self) -> bool {
method set_escape_underscores (line 961) | pub fn set_escape_underscores(&mut self, value: bool) {
method escape_misc (line 966) | pub fn escape_misc(&self) -> bool {
method set_escape_misc (line 971) | pub fn set_escape_misc(&mut self, value: bool) {
method escape_ascii (line 976) | pub fn escape_ascii(&self) -> bool {
method set_escape_ascii (line 981) | pub fn set_escape_ascii(&mut self, value: bool) {
method code_language (line 986) | pub fn code_language(&self) -> String {
method set_code_language (line 991) | pub fn set_code_language(&mut self, value: String) {
method autolinks (line 996) | pub fn autolinks(&self) -> bool {
method set_autolinks (line 1001) | pub fn set_autolinks(&mut self, value: bool) {
method default_title (line 1006) | pub fn default_title(&self) -> bool {
method set_default_title (line 1011) | pub fn set_default_title(&mut self, value: bool) {
method br_in_tables (line 1016) | pub fn br_in_tables(&self) -> bool {
method set_br_in_tables (line 1021) | pub fn set_br_in_tables(&mut self, value: bool) {
method highlight_style (line 1026) | pub fn highlight_style(&self) -> WasmHighlightStyle {
method set_highlight_style (line 1031) | pub fn set_highlight_style(&mut self, value: WasmHighlightStyle) {
method extract_metadata (line 1036) | pub fn extract_metadata(&self) -> bool {
method set_extract_metadata (line 1041) | pub fn set_extract_metadata(&mut self, value: bool) {
method whitespace_mode (line 1046) | pub fn whitespace_mode(&self) -> WasmWhitespaceMode {
method set_whitespace_mode (line 1051) | pub fn set_whitespace_mode(&mut self, value: WasmWhitespaceMode) {
method strip_newlines (line 1056) | pub fn strip_newlines(&self) -> bool {
method set_strip_newlines (line 1061) | pub fn set_strip_newlines(&mut self, value: bool) {
method wrap (line 1066) | pub fn wrap(&self) -> bool {
method set_wrap (line 1071) | pub fn set_wrap(&mut self, value: bool) {
method wrap_width (line 1076) | pub fn wrap_width(&self) -> usize {
method set_wrap_width (line 1081) | pub fn set_wrap_width(&mut self, value: usize) {
method convert_as_inline (line 1086) | pub fn convert_as_inline(&self) -> bool {
method set_convert_as_inline (line 1091) | pub fn set_convert_as_inline(&mut self, value: bool) {
method sub_symbol (line 1096) | pub fn sub_symbol(&self) -> String {
method set_sub_symbol (line 1101) | pub fn set_sub_symbol(&mut self, value: String) {
method sup_symbol (line 1106) | pub fn sup_symbol(&self) -> String {
method set_sup_symbol (line 1111) | pub fn set_sup_symbol(&mut self, value: String) {
method newline_style (line 1116) | pub fn newline_style(&self) -> WasmNewlineStyle {
method set_newline_style (line 1121) | pub fn set_newline_style(&mut self, value: WasmNewlineStyle) {
method code_block_style (line 1126) | pub fn code_block_style(&self) -> WasmCodeBlockStyle {
method set_code_block_style (line 1131) | pub fn set_code_block_style(&mut self, value: WasmCodeBlockStyle) {
method keep_inline_images_in (line 1136) | pub fn keep_inline_images_in(&self) -> Vec<String> {
method set_keep_inline_images_in (line 1141) | pub fn set_keep_inline_images_in(&mut self, value: Vec<String>) {
method preprocessing (line 1146) | pub fn preprocessing(&self) -> WasmPreprocessingOptions {
method set_preprocessing (line 1151) | pub fn set_preprocessing(&mut self, value: WasmPreprocessingOptions) {
method encoding (line 1156) | pub fn encoding(&self) -> String {
method set_encoding (line 1161) | pub fn set_encoding(&mut self, value: String) {
method debug (line 1166) | pub fn debug(&self) -> bool {
method set_debug (line 1171) | pub fn set_debug(&mut self, value: bool) {
method strip_tags (line 1176) | pub fn strip_tags(&self) -> Vec<String> {
method set_strip_tags (line 1181) | pub fn set_strip_tags(&mut self, value: Vec<String>) {
method preserve_tags (line 1186) | pub fn preserve_tags(&self) -> Vec<String> {
method set_preserve_tags (line 1191) | pub fn set_preserve_tags(&mut self, value: Vec<String>) {
method skip_images (line 1196) | pub fn skip_images(&self) -> bool {
method set_skip_images (line 1201) | pub fn set_skip_images(&mut self, value: bool) {
method link_style (line 1206) | pub fn link_style(&self) -> WasmLinkStyle {
method set_link_style (line 1211) | pub fn set_link_style(&mut self, value: WasmLinkStyle) {
method output_format (line 1216) | pub fn output_format(&self) -> WasmOutputFormat {
method set_output_format (line 1221) | pub fn set_output_format(&mut self, value: WasmOutputFormat) {
method include_document_structure (line 1226) | pub fn include_document_structure(&self) -> bool {
method set_include_document_structure (line 1231) | pub fn set_include_document_structure(&mut self, value: bool) {
method extract_images (line 1236) | pub fn extract_images(&self) -> bool {
method set_extract_images (line 1241) | pub fn set_extract_images(&mut self, value: bool) {
method max_image_size (line 1246) | pub fn max_image_size(&self) -> u64 {
method set_max_image_size (line 1251) | pub fn set_max_image_size(&mut self, value: u64) {
method capture_svg (line 1256) | pub fn capture_svg(&self) -> bool {
method set_capture_svg (line 1261) | pub fn set_capture_svg(&mut self, value: bool) {
method infer_dimensions (line 1266) | pub fn infer_dimensions(&self) -> bool {
method set_infer_dimensions (line 1271) | pub fn set_infer_dimensions(&mut self, value: bool) {
method max_depth (line 1276) | pub fn max_depth(&self) -> Option<usize> {
method set_max_depth (line 1281) | pub fn set_max_depth(&mut self, value: Option<usize>) {
method exclude_selectors (line 1286) | pub fn exclude_selectors(&self) -> Vec<String> {
method set_exclude_selectors (line 1291) | pub fn set_exclude_selectors(&mut self, value: Vec<String>) {
method visitor (line 1296) | pub fn visitor(&self) -> Option<wasm_bindgen::JsValue> {
method set_visitor (line 1301) | pub fn set_visitor(&mut self, value: Option<wasm_bindgen::JsValue>) {
method default (line 1307) | pub fn default() -> WasmConversionOptions {
method builder (line 1313) | pub fn builder() -> WasmConversionOptionsBuilder {
method apply_update (line 1321) | pub fn apply_update(&self, _update: WasmConversionOptionsUpdate) -> () {
method from_update (line 1327) | pub fn from_update(update: WasmConversionOptionsUpdate) -> WasmConvers...
method from (line 1334) | pub fn from(update: WasmConversionOptionsUpdate) -> WasmConversionOpti...
method from (line 5107) | fn from(val: html_to_markdown_rs::options::ConversionOptions) -> Self {
type WasmConversionOptionsBuilder (line 1345) | pub struct WasmConversionOptionsBuilder {
method strip_tags (line 1353) | pub fn strip_tags(&self, tags: Vec<String>) -> WasmConversionOptionsBu...
method preserve_tags (line 1361) | pub fn preserve_tags(&self, tags: Vec<String>) -> WasmConversionOption...
method keep_inline_images_in (line 1369) | pub fn keep_inline_images_in(&self, tags: Vec<String>) -> WasmConversi...
method exclude_selectors (line 1377) | pub fn exclude_selectors(&self, selectors: Vec<String>) -> WasmConvers...
method preprocessing (line 1385) | pub fn preprocessing(&self, preprocessing: WasmPreprocessingOptions) -...
method build (line 1393) | pub fn build(&self) -> WasmConversionOptions {
type WasmConversionOptionsUpdate (line 1404) | pub struct WasmConversionOptionsUpdate {
method new (line 1452) | pub fn new(
method heading_style (line 1540) | pub fn heading_style(&self) -> Option<WasmHeadingStyle> {
method set_heading_style (line 1545) | pub fn set_heading_style(&mut self, value: Option<WasmHeadingStyle>) {
method list_indent_type (line 1550) | pub fn list_indent_type(&self) -> Option<WasmListIndentType> {
method set_list_indent_type (line 1555) | pub fn set_list_indent_type(&mut self, value: Option<WasmListIndentTyp...
method list_indent_width (line 1560) | pub fn list_indent_width(&self) -> Option<usize> {
method set_list_indent_width (line 1565) | pub fn set_list_indent_width(&mut self, value: Option<usize>) {
method bullets (line 1570) | pub fn bullets(&self) -> Option<String> {
method set_bullets (line 1575) | pub fn set_bullets(&mut self, value: Option<String>) {
method strong_em_symbol (line 1580) | pub fn strong_em_symbol(&self) -> Option<String> {
method set_strong_em_symbol (line 1585) | pub fn set_strong_em_symbol(&mut self, value: Option<String>) {
method escape_asterisks (line 1590) | pub fn escape_asterisks(&self) -> Option<bool> {
method set_escape_asterisks (line 1595) | pub fn set_escape_asterisks(&mut self, value: Option<bool>) {
method escape_underscores (line 1600) | pub fn escape_underscores(&self) -> Option<bool> {
method set_escape_underscores (line 1605) | pub fn set_escape_underscores(&mut self, value: Option<bool>) {
method escape_misc (line 1610) | pub fn escape_misc(&self) -> Option<bool> {
method set_escape_misc (line 1615) | pub fn set_escape_misc(&mut self, value: Option<bool>) {
method escape_ascii (line 1620) | pub fn escape_ascii(&self) -> Option<bool> {
method set_escape_ascii (line 1625) | pub fn set_escape_ascii(&mut self, value: Option<bool>) {
method code_language (line 1630) | pub fn code_language(&self) -> Option<String> {
method set_code_language (line 1635) | pub fn set_code_language(&mut self, value: Option<String>) {
method autolinks (line 1640) | pub fn autolinks(&self) -> Option<bool> {
method set_autolinks (line 1645) | pub fn set_autolinks(&mut self, value: Option<bool>) {
method default_title (line 1650) | pub fn default_title(&self) -> Option<bool> {
method set_default_title (line 1655) | pub fn set_default_title(&mut self, value: Option<bool>) {
method br_in_tables (line 1660) | pub fn br_in_tables(&self) -> Option<bool> {
method set_br_in_tables (line 1665) | pub fn set_br_in_tables(&mut self, value: Option<bool>) {
method highlight_style (line 1670) | pub fn highlight_style(&self) -> Option<WasmHighlightStyle> {
method set_highlight_style (line 1675) | pub fn set_highlight_style(&mut self, value: Option<WasmHighlightStyle...
method extract_metadata (line 1680) | pub fn extract_metadata(&self) -> Option<bool> {
method set_extract_metadata (line 1685) | pub fn set_extract_metadata(&mut self, value: Option<bool>) {
method whitespace_mode (line 1690) | pub fn whitespace_mode(&self) -> Option<WasmWhitespaceMode> {
method set_whitespace_mode (line 1695) | pub fn set_whitespace_mode(&mut self, value: Option<WasmWhitespaceMode...
method strip_newlines (line 1700) | pub fn strip_newlines(&self) -> Option<bool> {
method set_strip_newlines (line 1705) | pub fn set_strip_newlines(&mut self, value: Option<bool>) {
method wrap (line 1710) | pub fn wrap(&self) -> Option<bool> {
method set_wrap (line 1715) | pub fn set_wrap(&mut self, value: Option<bool>) {
method wrap_width (line 1720) | pub fn wrap_width(&self) -> Option<usize> {
method set_wrap_width (line 1725) | pub fn set_wrap_width(&mut self, value: Option<usize>) {
method convert_as_inline (line 1730) | pub fn convert_as_inline(&self) -> Option<bool> {
method set_convert_as_inline (line 1735) | pub fn set_convert_as_inline(&mut self, value: Option<bool>) {
method sub_symbol (line 1740) | pub fn sub_symbol(&self) -> Option<String> {
method set_sub_symbol (line 1745) | pub fn set_sub_symbol(&mut self, value: Option<String>) {
method sup_symbol (line 1750) | pub fn sup_symbol(&self) -> Option<String> {
method set_sup_symbol (line 1755) | pub fn set_sup_symbol(&mut self, value: Option<String>) {
method newline_style (line 1760) | pub fn newline_style(&self) -> Option<WasmNewlineStyle> {
method set_newline_style (line 1765) | pub fn set_newline_style(&mut self, value: Option<WasmNewlineStyle>) {
method code_block_style (line 1770) | pub fn code_block_style(&self) -> Option<WasmCodeBlockStyle> {
method set_code_block_style (line 1775) | pub fn set_code_block_style(&mut self, value: Option<WasmCodeBlockStyl...
method keep_inline_images_in (line 1780) | pub fn keep_inline_images_in(&self) -> Option<Vec<String>> {
method set_keep_inline_images_in (line 1785) | pub fn set_keep_inline_images_in(&mut self, value: Option<Vec<String>>) {
method preprocessing (line 1790) | pub fn preprocessing(&self) -> Option<WasmPreprocessingOptionsUpdate> {
method set_preprocessing (line 1795) | pub fn set_preprocessing(&mut self, value: Option<WasmPreprocessingOpt...
method encoding (line 1800) | pub fn encoding(&self) -> Option<String> {
method set_encoding (line 1805) | pub fn set_encoding(&mut self, value: Option<String>) {
method debug (line 1810) | pub fn debug(&self) -> Option<bool> {
method set_debug (line 1815) | pub fn set_debug(&mut self, value: Option<bool>) {
method strip_tags (line 1820) | pub fn strip_tags(&self) -> Option<Vec<String>> {
method set_strip_tags (line 1825) | pub fn set_strip_tags(&mut self, value: Option<Vec<String>>) {
method preserve_tags (line 1830) | pub fn preserve_tags(&self) -> Option<Vec<String>> {
method set_preserve_tags (line 1835) | pub fn set_preserve_tags(&mut self, value: Option<Vec<String>>) {
method skip_images (line 1840) | pub fn skip_images(&self) -> Option<bool> {
method set_skip_images (line 1845) | pub fn set_skip_images(&mut self, value: Option<bool>) {
method link_style (line 1850) | pub fn link_style(&self) -> Option<WasmLinkStyle> {
method set_link_style (line 1855) | pub fn set_link_style(&mut self, value: Option<WasmLinkStyle>) {
method output_format (line 1860) | pub fn output_format(&self) -> Option<WasmOutputFormat> {
method set_output_format (line 1865) | pub fn set_output_format(&mut self, value: Option<WasmOutputFormat>) {
method include_document_structure (line 1870) | pub fn include_document_structure(&self) -> Option<bool> {
method set_include_document_structure (line 1875) | pub fn set_include_document_structure(&mut self, value: Option<bool>) {
method extract_images (line 1880) | pub fn extract_images(&self) -> Option<bool> {
method set_extract_images (line 1885) | pub fn set_extract_images(&mut self, value: Option<bool>) {
method max_image_size (line 1890) | pub fn max_image_size(&self) -> Option<u64> {
method set_max_image_size (line 1895) | pub fn set_max_image_size(&mut self, value: Option<u64>) {
method capture_svg (line 1900) | pub fn capture_svg(&self) -> Option<bool> {
method set_capture_svg (line 1905) | pub fn set_capture_svg(&mut self, value: Option<bool>) {
method infer_dimensions (line 1910) | pub fn infer_dimensions(&self) -> Option<bool> {
method set_infer_dimensions (line 1915) | pub fn set_infer_dimensions(&mut self, value: Option<bool>) {
method max_depth (line 1920) | pub fn max_depth(&self) -> Option<usize> {
method set_max_depth (line 1925) | pub fn set_max_depth(&mut self, value: Option<usize>) {
method exclude_selectors (line 1930) | pub fn exclude_selectors(&self) -> Option<Vec<String>> {
method set_exclude_selectors (line 1935) | pub fn set_exclude_selectors(&mut self, value: Option<Vec<String>>) {
method visitor (line 1940) | pub fn visitor(&self) -> Option<wasm_bindgen::JsValue> {
method set_visitor (line 1945) | pub fn set_visitor(&mut self, value: Option<wasm_bindgen::JsValue>) {
method from (line 5207) | fn from(val: html_to_markdown_rs::options::ConversionOptionsUpdate) ->...
type WasmPreprocessingOptions (line 1953) | pub struct WasmPreprocessingOptions {
method new (line 1963) | pub fn new(
method enabled (line 1978) | pub fn enabled(&self) -> bool {
method set_enabled (line 1983) | pub fn set_enabled(&mut self, value: bool) {
method preset (line 1988) | pub fn preset(&self) -> WasmPreprocessingPreset {
method set_preset (line 1993) | pub fn set_preset(&mut self, value: WasmPreprocessingPreset) {
method remove_navigation (line 1998) | pub fn remove_navigation(&self) -> bool {
method set_remove_navigation (line 2003) | pub fn set_remove_navigation(&mut self, value: bool) {
method remove_forms (line 2008) | pub fn remove_forms(&self) -> bool {
method set_remove_forms (line 2013) | pub fn set_remove_forms(&mut self, value: bool) {
method default (line 2019) | pub fn default() -> WasmPreprocessingOptions {
method apply_update (line 2032) | pub fn apply_update(&self, _update: WasmPreprocessingOptionsUpdate) ->...
method from_update (line 2049) | pub fn from_update(update: WasmPreprocessingOptionsUpdate) -> WasmPrep...
method from (line 2056) | pub fn from(update: WasmPreprocessingOptionsUpdate) -> WasmPreprocessi...
method from (line 5268) | fn from(val: html_to_markdown_rs::options::PreprocessingOptions) -> Se...
type WasmPreprocessingOptionsUpdate (line 2069) | pub struct WasmPreprocessingOptionsUpdate {
method new (line 2079) | pub fn new(
method enabled (line 2094) | pub fn enabled(&self) -> Option<bool> {
method set_enabled (line 2099) | pub fn set_enabled(&mut self, value: Option<bool>) {
method preset (line 2104) | pub fn preset(&self) -> Option<WasmPreprocessingPreset> {
method set_preset (line 2109) | pub fn set_preset(&mut self, value: Option<WasmPreprocessingPreset>) {
method remove_navigation (line 2114) | pub fn remove_navigation(&self) -> Option<bool> {
method set_remove_navigation (line 2119) | pub fn set_remove_navigation(&mut self, value: Option<bool>) {
method remove_forms (line 2124) | pub fn remove_forms(&self) -> Option<bool> {
method set_remove_forms (line 2129) | pub fn set_remove_forms(&mut self, value: Option<bool>) {
method from (line 5292) | fn from(val: html_to_markdown_rs::options::PreprocessingOptionsUpdate)...
type WasmDocumentStructure (line 2139) | pub struct WasmDocumentStructure {
method new (line 2147) | pub fn new(nodes: Vec<WasmDocumentNode>, source_format: Option<String>...
method nodes (line 2152) | pub fn nodes(&self) -> Vec<WasmDocumentNode> {
method set_nodes (line 2157) | pub fn set_nodes(&mut self, value: Vec<WasmDocumentNode>) {
method source_format (line 2162) | pub fn source_format(&self) -> Option<String> {
method set_source_format (line 2167) | pub fn set_source_format(&mut self, value: Option<String>) {
method from (line 5314) | fn from(val: html_to_markdown_rs::DocumentStructure) -> Self {
type WasmDocumentNode (line 2175) | pub struct WasmDocumentNode {
method new (line 2187) | pub fn new(
method id (line 2206) | pub fn id(&self) -> String {
method set_id (line 2211) | pub fn set_id(&mut self, value: String) {
method content (line 2216) | pub fn content(&self) -> WasmNodeContent {
method set_content (line 2221) | pub fn set_content(&mut self, value: WasmNodeContent) {
method parent (line 2226) | pub fn parent(&self) -> Option<u32> {
method set_parent (line 2231) | pub fn set_parent(&mut self, value: Option<u32>) {
method children (line 2236) | pub fn children(&self) -> Vec<u32> {
method set_children (line 2241) | pub fn set_children(&mut self, value: Vec<u32>) {
method annotations (line 2246) | pub fn annotations(&self) -> Vec<WasmTextAnnotation> {
method set_annotations (line 2251) | pub fn set_annotations(&mut self, value: Vec<WasmTextAnnotation>) {
method attributes (line 2256) | pub fn attributes(&self) -> Option<JsValue> {
method set_attributes (line 2261) | pub fn set_attributes(&mut self, value: Option<JsValue>) {
method from (line 5341) | fn from(val: html_to_markdown_rs::DocumentNode) -> Self {
type WasmTextAnnotation (line 2271) | pub struct WasmTextAnnotation {
method new (line 2280) | pub fn new(start: u32, end: u32, kind: WasmAnnotationKind) -> WasmText...
method start (line 2285) | pub fn start(&self) -> u32 {
method set_start (line 2290) | pub fn set_start(&mut self, value: u32) {
method end (line 2295) | pub fn end(&self) -> u32 {
method set_end (line 2300) | pub fn set_end(&mut self, value: u32) {
method kind (line 2305) | pub fn kind(&self) -> WasmAnnotationKind {
method set_kind (line 2310) | pub fn set_kind(&mut self, value: WasmAnnotationKind) {
method from (line 5369) | fn from(val: html_to_markdown_rs::TextAnnotation) -> Self {
type WasmConversionResult (line 2331) | pub struct WasmConversionResult {
method new (line 2343) | pub fn new(
method content (line 2362) | pub fn content(&self) -> Option<String> {
method set_content (line 2367) | pub fn set_content(&mut self, value: Option<String>) {
method document (line 2372) | pub fn document(&self) -> Option<WasmDocumentStructure> {
method set_document (line 2377) | pub fn set_document(&mut self, value: Option<WasmDocumentStructure>) {
method metadata (line 2382) | pub fn metadata(&self) -> WasmHtmlMetadata {
method set_metadata (line 2387) | pub fn set_metadata(&mut self, value: WasmHtmlMetadata) {
method tables (line 2392) | pub fn tables(&self) -> Vec<WasmTableData> {
method set_tables (line 2397) | pub fn set_tables(&mut self, value: Vec<WasmTableData>) {
method images (line 2402) | pub fn images(&self) -> Vec<String> {
method set_images (line 2407) | pub fn set_images(&mut self, value: Vec<String>) {
method warnings (line 2412) | pub fn warnings(&self) -> Vec<WasmProcessingWarning> {
method set_warnings (line 2417) | pub fn set_warnings(&mut self, value: Vec<WasmProcessingWarning>) {
method from (line 5396) | fn from(val: html_to_markdown_rs::ConversionResult) -> Self {
type WasmTableGrid (line 2425) | pub struct WasmTableGrid {
method new (line 2434) | pub fn new(rows: Option<u32>, cols: Option<u32>, cells: Option<Vec<Was...
method rows (line 2443) | pub fn rows(&self) -> u32 {
method set_rows (line 2448) | pub fn set_rows(&mut self, value: u32) {
method cols (line 2453) | pub fn cols(&self) -> u32 {
method set_cols (line 2458) | pub fn set_cols(&mut self, value: u32) {
method cells (line 2463) | pub fn cells(&self) -> Vec<WasmGridCell> {
method set_cells (line 2468) | pub fn set_cells(&mut self, value: Vec<WasmGridCell>) {
method from (line 5421) | fn from(val: html_to_markdown_rs::TableGrid) -> Self {
type WasmGridCell (line 2476) | pub struct WasmGridCell {
method new (line 2488) | pub fn new(content: String, row: u32, col: u32, row_span: u32, col_spa...
method content (line 2500) | pub fn content(&self) -> String {
method set_content (line 2505) | pub fn set_content(&mut self, value: String) {
method row (line 2510) | pub fn row(&self) -> u32 {
method set_row (line 2515) | pub fn set_row(&mut self, value: u32) {
method col (line 2520) | pub fn col(&self) -> u32 {
method set_col (line 2525) | pub fn set_col(&mut self, value: u32) {
method row_span (line 2530) | pub fn row_span(&self) -> u32 {
method set_row_span (line 2535) | pub fn set_row_span(&mut self, value: u32) {
method col_span (line 2540) | pub fn col_span(&self) -> u32 {
method set_col_span (line 2545) | pub fn set_col_span(&mut self, value: u32) {
method is_header (line 2550) | pub fn is_header(&self) -> bool {
method set_is_header (line 2555) | pub fn set_is_header(&mut self, value: bool) {
method from (line 5446) | fn from(val: html_to_markdown_rs::GridCell) -> Self {
type WasmTableData (line 2563) | pub struct WasmTableData {
method new (line 2571) | pub fn new(grid: WasmTableGrid, markdown: String) -> WasmTableData {
method grid (line 2576) | pub fn grid(&self) -> WasmTableGrid {
method set_grid (line 2581) | pub fn set_grid(&mut self, value: WasmTableGrid) {
method markdown (line 2586) | pub fn markdown(&self) -> String {
method set_markdown (line 2591) | pub fn set_markdown(&mut self, value: String) {
method from (line 5470) | fn from(val: html_to_markdown_rs::TableData) -> Self {
type WasmProcessingWarning (line 2599) | pub struct WasmProcessingWarning {
method new (line 2607) | pub fn new(message: String, kind: WasmWarningKind) -> WasmProcessingWa...
method message (line 2612) | pub fn message(&self) -> String {
method set_message (line 2617) | pub fn set_message(&mut self, value: String) {
method kind (line 2622) | pub fn kind(&self) -> WasmWarningKind {
method set_kind (line 2627) | pub fn set_kind(&mut self, value: WasmWarningKind) {
method from (line 5490) | fn from(val: html_to_markdown_rs::ProcessingWarning) -> Self {
type WasmVisitorHandle (line 2637) | pub struct WasmVisitorHandle {
type WasmNodeContext (line 2650) | pub struct WasmNodeContext {
method new (line 2663) | pub fn new(
method node_type (line 2684) | pub fn node_type(&self) -> WasmNodeType {
method set_node_type (line 2689) | pub fn set_node_type(&mut self, value: WasmNodeType) {
method tag_name (line 2694) | pub fn tag_name(&self) -> String {
method set_tag_name (line 2699) | pub fn set_tag_name(&mut self, value: String) {
method attributes (line 2704) | pub fn attributes(&self) -> JsValue {
method set_attributes (line 2709) | pub fn set_attributes(&mut self, value: JsValue) {
method depth (line 2714) | pub fn depth(&self) -> usize {
method set_depth (line 2719) | pub fn set_depth(&mut self, value: usize) {
method index_in_parent (line 2724) | pub fn index_in_parent(&self) -> usize {
method set_index_in_parent (line 2729) | pub fn set_index_in_parent(&mut self, value: usize) {
method parent_tag (line 2734) | pub fn parent_tag(&self) -> Option<String> {
method set_parent_tag (line 2739) | pub fn set_parent_tag(&mut self, value: Option<String>) {
method is_inline (line 2744) | pub fn is_inline(&self) -> bool {
method set_is_inline (line 2749) | pub fn set_is_inline(&mut self, value: bool) {
method from (line 5500) | fn from(val: html_to_markdown_rs::NodeContext) -> Self {
type WasmTextDirection (line 2759) | pub enum WasmTextDirection {
method from (line 5524) | fn from(val: html_to_markdown_rs::metadata::TextDirection) -> Self {
method default (line 2767) | fn default() -> Self {
type WasmLinkType (line 2777) | pub enum WasmLinkType {
method from (line 5547) | fn from(val: html_to_markdown_rs::metadata::LinkType) -> Self {
method default (line 2788) | fn default() -> Self {
type WasmImageType (line 2798) | pub enum WasmImageType {
method from (line 5571) | fn from(val: html_to_markdown_rs::metadata::ImageType) -> Self {
method default (line 2807) | fn default() -> Self {
type WasmStructuredDataType (line 2817) | pub enum WasmStructuredDataType {
method from (line 5592) | fn from(val: html_to_markdown_rs::metadata::StructuredDataType) -> Self {
method default (line 2825) | fn default() -> Self {
type WasmPreprocessingPreset (line 2835) | pub enum WasmPreprocessingPreset {
method from (line 5612) | fn from(val: html_to_markdown_rs::options::PreprocessingPreset) -> Self {
method default (line 2843) | fn default() -> Self {
type WasmHeadingStyle (line 2853) | pub enum WasmHeadingStyle {
method from (line 5632) | fn from(val: html_to_markdown_rs::options::HeadingStyle) -> Self {
method default (line 2861) | fn default() -> Self {
type WasmListIndentType (line 2871) | pub enum WasmListIndentType {
method from (line 5651) | fn from(val: html_to_markdown_rs::options::ListIndentType) -> Self {
method default (line 2878) | fn default() -> Self {
type WasmWhitespaceMode (line 2888) | pub enum WasmWhitespaceMode {
method from (line 5669) | fn from(val: html_to_markdown_rs::options::WhitespaceMode) -> Self {
method default (line 2895) | fn default() -> Self {
type WasmNewlineStyle (line 2905) | pub enum WasmNewlineStyle {
method from (line 5687) | fn from(val: html_to_markdown_rs::options::NewlineStyle) -> Self {
method default (line 2912) | fn default() -> Self {
type WasmCodeBlockStyle (line 2922) | pub enum WasmCodeBlockStyle {
method from (line 5706) | fn from(val: html_to_markdown_rs::options::CodeBlockStyle) -> Self {
method default (line 2930) | fn default() -> Self {
type WasmHighlightStyle (line 2940) | pub enum WasmHighlightStyle {
method from (line 5727) | fn from(val: html_to_markdown_rs::options::HighlightStyle) -> Self {
method default (line 2949) | fn default() -> Self {
type WasmLinkStyle (line 2960) | pub enum WasmLinkStyle {
method from (line 5747) | fn from(val: html_to_markdown_rs::options::LinkStyle) -> Self {
method default (line 2967) | fn default() -> Self {
type WasmOutputFormat (line 2977) | pub enum WasmOutputFormat {
method from (line 5766) | fn from(val: html_to_markdown_rs::options::OutputFormat) -> Self {
method default (line 2985) | fn default() -> Self {
type WasmNodeContent (line 2995) | pub enum WasmNodeContent {
method from (line 5826) | fn from(val: html_to_markdown_rs::NodeContent) -> Self {
method default (line 3013) | fn default() -> Self {
type WasmAnnotationKind (line 3023) | pub enum WasmAnnotationKind {
method from (line 5865) | fn from(val: html_to_markdown_rs::AnnotationKind) -> Self {
method default (line 3037) | fn default() -> Self {
type WasmWarningKind (line 3045) | pub enum WasmWarningKind {
method from (line 5894) | fn from(val: html_to_markdown_rs::WarningKind) -> Self {
method default (line 3056) | fn default() -> Self {
type WasmNodeType (line 3067) | pub enum WasmNodeType {
method from (line 5907) | fn from(val: html_to_markdown_rs::NodeType) -> Self {
method default (line 3160) | fn default() -> Self {
type WasmVisitResult (line 3172) | pub enum WasmVisitResult {
method from (line 6002) | fn from(val: html_to_markdown_rs::VisitResult) -> Self {
method default (line 3182) | fn default() -> Self {
function convert (line 3190) | pub fn convert(html: String, options: Option<WasmConversionOptions>) -> ...
function nodecontext_to_js_value (line 3211) | fn nodecontext_to_js_value(ctx: &html_to_markdown_rs::visitor::NodeConte...
type WasmHtmlVisitorBridge (line 3261) | pub struct WasmHtmlVisitorBridge {
method fmt (line 3266) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
method new (line 3272) | pub fn new(js_obj: wasm_bindgen::JsValue) -> Self {
method visit_element_start (line 3278) | fn visit_element_start(&mut self, _ctx: &html_to_markdown_rs::NodeCont...
method visit_element_end (line 3312) | fn visit_element_end(
method visit_text (line 3351) | fn visit_text(
method visit_link (line 3390) | fn visit_link(
method visit_image (line 3436) | fn visit_image(
method visit_heading (line 3482) | fn visit_heading(
method visit_code_block (line 3528) | fn visit_code_block(
method visit_code_inline (line 3572) | fn visit_code_inline(
method visit_list_item (line 3611) | fn visit_list_item(
method visit_list_start (line 3654) | fn visit_list_start(
method visit_list_end (line 3693) | fn visit_list_end(
method visit_table_start (line 3734) | fn visit_table_start(&mut self, _ctx: &html_to_markdown_rs::NodeContex...
method visit_table_row (line 3768) | fn visit_table_row(
method visit_table_end (line 3809) | fn visit_table_end(
method visit_blockquote (line 3848) | fn visit_blockquote(
method visit_strong (line 3889) | fn visit_strong(
method visit_emphasis (line 3928) | fn visit_emphasis(
method visit_strikethrough (line 3967) | fn visit_strikethrough(
method visit_underline (line 4006) | fn visit_underline(
method visit_subscript (line 4045) | fn visit_subscript(
method visit_superscript (line 4084) | fn visit_superscript(
method visit_mark (line 4123) | fn visit_mark(
method visit_line_break (line 4162) | fn visit_line_break(&mut self, _ctx: &html_to_markdown_rs::NodeContext...
method visit_horizontal_rule (line 4196) | fn visit_horizontal_rule(
method visit_custom_element (line 4233) | fn visit_custom_element(
method visit_definition_list_start (line 4274) | fn visit_definition_list_start(
method visit_definition_term (line 4311) | fn visit_definition_term(
method visit_definition_description (line 4350) | fn visit_definition_description(
method visit_definition_list_end (line 4389) | fn visit_definition_list_end(
method visit_form (line 4428) | fn visit_form(
method visit_input (line 4475) | fn visit_input(
method visit_button (line 4524) | fn visit_button(
method visit_audio (line 4563) | fn visit_audio(
method visit_video (line 4605) | fn visit_video(
method visit_iframe (line 4647) | fn visit_iframe(
method visit_details (line 4689) | fn visit_details(
method visit_summary (line 4728) | fn visit_summary(
method visit_figure_start (line 4767) | fn visit_figure_start(&mut self, _ctx: &html_to_markdown_rs::NodeConte...
method visit_figcaption (line 4801) | fn visit_figcaption(
method visit_figure_end (line 4840) | fn visit_figure_end(
function from (line 4885) | fn from(val: WasmDocumentMetadata) -> Self {
function from (line 4923) | fn from(val: WasmHeaderMetadata) -> Self {
function from (line 4949) | fn from(val: WasmLinkMetadata) -> Self {
function from (line 4977) | fn from(val: WasmImageMetadata) -> Self {
function from (line 5008) | fn from(val: WasmStructuredData) -> Self {
function from (line 5030) | fn from(val: WasmHtmlMetadata) -> Self {
function from (line 5057) | fn from(val: WasmConversionOptions) -> Self {
function from (line 5157) | fn from(val: WasmConversionOptionsUpdate) -> Self {
function from (line 5256) | fn from(val: WasmPreprocessingOptions) -> Self {
function from (line 5280) | fn from(val: WasmPreprocessingOptionsUpdate) -> Self {
function from (line 5304) | fn from(val: WasmDocumentStructure) -> Self {
function from (line 5324) | fn from(val: WasmDocumentNode) -> Self {
function from (line 5358) | fn from(val: WasmTextAnnotation) -> Self {
function from (line 5381) | fn from(val: WasmConversionResult) -> Self {
function from (line 5410) | fn from(val: WasmTableGrid) -> Self {
function from (line 5432) | fn from(val: WasmGridCell) -> Self {
function from (line 5460) | fn from(val: WasmTableData) -> Self {
function from (line 5480) | fn from(val: WasmProcessingWarning) -> Self {
function from (line 5514) | fn from(val: WasmTextDirection) -> Self {
function from (line 5534) | fn from(val: WasmLinkType) -> Self {
function from (line 5560) | fn from(val: WasmImageType) -> Self {
function from (line 5582) | fn from(val: WasmStructuredDataType) -> Self {
function from (line 5602) | fn from(val: WasmPreprocessingPreset) -> Self {
function from (line 5622) | fn from(val: WasmHeadingStyle) -> Self {
function from (line 5642) | fn from(val: WasmListIndentType) -> Self {
function from (line 5660) | fn from(val: WasmWhitespaceMode) -> Self {
function from (line 5678) | fn from(val: WasmNewlineStyle) -> Self {
function from (line 5696) | fn from(val: WasmCodeBlockStyle) -> Self {
function from (line 5716) | fn from(val: WasmHighlightStyle) -> Self {
function from (line 5738) | fn from(val: WasmLinkStyle) -> Self {
function from (line 5756) | fn from(val: WasmOutputFormat) -> Self {
function from (line 5776) | fn from(val: WasmNodeContent) -> Self {
function from (line 5846) | fn from(val: WasmAnnotationKind) -> Self {
function from (line 5881) | fn from(val: WasmWarningKind) -> Self {
function conversion_error_error_code (line 6015) | fn conversion_error_error_code(e: &html_to_markdown_rs::error::Conversio...
function conversion_error_to_js_value (line 6031) | fn conversion_error_to_js_value(e: html_to_markdown_rs::error::Conversio...
FILE: crates/html-to-markdown/examples/basic.rs
function convert (line 3) | fn convert(
function main (line 10) | fn main() {
FILE: crates/html-to-markdown/examples/table.rs
function convert (line 3) | fn convert(
function main (line 10) | fn main() {
FILE: crates/html-to-markdown/examples/test_deser.rs
function main (line 4) | fn main() {
FILE: crates/html-to-markdown/examples/test_escape.rs
function convert (line 3) | fn convert(
function main (line 10) | fn main() {
FILE: crates/html-to-markdown/examples/test_inline_formatting.rs
function convert (line 2) | fn convert(
function main (line 11) | fn main() {
FILE: crates/html-to-markdown/examples/test_lists.rs
function convert (line 3) | fn convert(
function main (line 10) | fn main() {
FILE: crates/html-to-markdown/examples/test_semantic_tags.rs
function convert (line 3) | fn convert(
function main (line 10) | fn main() {
FILE: crates/html-to-markdown/examples/test_tables.rs
function convert (line 3) | fn convert(
function main (line 10) | fn main() {
FILE: crates/html-to-markdown/examples/test_task_lists.rs
function convert (line 3) | fn convert(
function main (line 10) | fn main() {
FILE: crates/html-to-markdown/examples/test_whitespace.rs
function convert (line 3) | fn convert(
function main (line 10) | fn main() {
FILE: crates/html-to-markdown/src/convert_api.rs
function convert (line 41) | pub fn convert(html: &str, options: Option<ConversionOptions>) -> Result...
function normalize_input (line 214) | fn normalize_input(html: &str) -> Result<Cow<'_, str>> {
function decode_utf16_if_needed (line 243) | fn decode_utf16_if_needed(html: &str) -> Cow<'_, str> {
function decode_utf16_bytes (line 261) | fn decode_utf16_bytes(bytes: &[u8], encoding: Utf16Encoding) -> String {
function strip_nul_bytes (line 287) | fn strip_nul_bytes(html: &str) -> Cow<'_, str> {
function normalize_line_endings (line 298) | fn normalize_line_endings(html: &str) -> Cow<'_, str> {
function fast_text_only (line 309) | fn fast_text_only(html: &str, options: &ConversionOptions) -> Option<Str...
FILE: crates/html-to-markdown/src/converter/block/blockquote.rs
type Context (line 18) | type Context = crate::converter::Context;
type DomContext (line 19) | type DomContext = crate::converter::DomContext;
function handle (line 25) | pub fn handle(
function serialize_node_to_html (line 175) | fn serialize_node_to_html(node_handle: &NodeHandle, parser: &Parser, out...
FILE: crates/html-to-markdown/src/converter/block/container.rs
type Context (line 15) | type Context = crate::converter::Context;
type DomContext (line 16) | type DomContext = crate::converter::DomContext;
function handle_structural_container (line 31) | pub fn handle_structural_container(
function handle_passthrough (line 67) | pub fn handle_passthrough(
function handle_noop (line 104) | pub fn handle_noop(
FILE: crates/html-to-markdown/src/converter/block/div.rs
type Context (line 14) | type Context = crate::converter::Context;
type DomContext (line 15) | type DomContext = crate::converter::DomContext;
function handle (line 28) | pub fn handle(
function add_list_continuation_indent (line 136) | fn add_list_continuation_indent(
FILE: crates/html-to-markdown/src/converter/block/heading.rs
type Context (line 19) | type Context = crate::converter::Context;
type DomContext (line 20) | type DomContext = crate::converter::DomContext;
function handle (line 30) | pub fn handle(
function heading_allows_inline_images (line 152) | pub fn heading_allows_inline_images(
function normalize_heading_text (line 160) | fn normalize_heading_text(text: &str) -> Cow<'_, str> {
function push_heading (line 192) | pub fn push_heading(output: &mut String, ctx: &Context, options: &Conver...
function continuation_indent_string (line 280) | fn continuation_indent_string(list_depth: usize, _options: &ConversionOp...
function visitor_heading_output (line 293) | fn visitor_heading_output(
function find_single_heading_child (line 377) | pub fn find_single_heading_child(node_handle: NodeHandle, parser: &Parse...
function heading_level_from_name (line 418) | fn heading_level_from_name(name: &str) -> Option<usize> {
FILE: crates/html-to-markdown/src/converter/block/horizontal_rule.rs
type Context (line 11) | type Context = crate::converter::Context;
type DomContext (line 12) | type DomContext = crate::converter::DomContext;
function handle (line 18) | pub fn handle(
FILE: crates/html-to-markdown/src/converter/block/line_break.rs
type Context (line 11) | type Context = crate::converter::Context;
type DomContext (line 12) | type DomContext = crate::converter::DomContext;
function handle (line 18) | pub fn handle(
FILE: crates/html-to-markdown/src/converter/block/paragraph.rs
type Context (line 14) | type Context = crate::converter::Context;
type DomContext (line 15) | type DomContext = crate::converter::DomContext;
function handle (line 21) | pub fn handle(
function add_list_continuation_indent (line 112) | fn add_list_continuation_indent(
function is_empty_inline_element (line 127) | fn is_empty_inline_element(node_handle: &NodeHandle, parser: &Parser, _d...
FILE: crates/html-to-markdown/src/converter/block/preformatted.rs
type Context (line 18) | type Context = crate::converter::Context;
type DomContext (line 19) | type DomContext = crate::converter::DomContext;
function handle_pre (line 22) | pub fn handle_pre(
function extract_language_from_pre (line 152) | fn extract_language_from_pre(node_handle: &NodeHandle, parser: &Parser) ...
function format_code_block (line 196) | fn format_code_block(
function dedent_code_block (line 259) | fn dedent_code_block(content: &str) -> String {
FILE: crates/html-to-markdown/src/converter/block/table/builder.rs
constant MAX_TABLE_COLS (line 16) | const MAX_TABLE_COLS: usize = 1000;
function table_total_columns (line 31) | pub fn table_total_columns(
function handle_table (line 90) | pub fn handle_table(
function single_nested_table_stays_as_table (line 401) | fn single_nested_table_stays_as_table() {
FILE: crates/html-to-markdown/src/converter/block/table/caption.rs
function handle_caption (line 19) | pub fn handle_caption(
FILE: crates/html-to-markdown/src/converter/block/table/cell.rs
constant MAX_TABLE_COLS (line 9) | const MAX_TABLE_COLS: usize = 1000;
function get_colspan (line 23) | pub fn get_colspan(node_handle: &tl::NodeHandle, parser: &tl::Parser) ->...
function get_colspan_rowspan (line 45) | pub fn get_colspan_rowspan(node_handle: &tl::NodeHandle, parser: &tl::Pa...
function clamp_table_span (line 67) | fn clamp_table_span(value: usize) -> usize {
function collect_table_cells (line 81) | pub fn collect_table_cells(
function convert_table_cell (line 114) | pub fn convert_table_cell(
function rich_formatting_preserved_in_cells (line 186) | fn rich_formatting_preserved_in_cells() {
FILE: crates/html-to-markdown/src/converter/block/table/cells.rs
constant MAX_TABLE_COLS (line 16) | const MAX_TABLE_COLS: usize = 1000;
function append_layout_row (line 31) | pub fn append_layout_row(
function convert_table_row (line 113) | pub fn convert_table_row(
FILE: crates/html-to-markdown/src/converter/block/table/layout.rs
function indent_table_for_list (line 20) | pub fn indent_table_for_list(
function continuation_indent_string (line 55) | fn continuation_indent_string(list_depth: usize, options: &crate::option...
FILE: crates/html-to-markdown/src/converter/block/table/mod.rs
function dispatch_table_handler (line 44) | pub fn dispatch_table_handler(
function handle_table_with_context (line 85) | pub fn handle_table_with_context(
function collect_table_grid (line 139) | fn collect_table_grid(
function collect_grid_row (line 214) | fn collect_grid_row(
FILE: crates/html-to-markdown/src/converter/block/table/scanner.rs
type TableScan (line 17) | pub struct TableScan {
function scan_table (line 46) | pub fn scan_table(
function scan_table_node (line 67) | fn scan_table_node(
FILE: crates/html-to-markdown/src/converter/block/table/utils.rs
function is_tag_name (line 21) | pub(super) fn is_tag_name(
FILE: crates/html-to-markdown/src/converter/block/unknown.rs
type Context (line 14) | type Context = crate::converter::Context;
type DomContext (line 15) | type DomContext = crate::converter::DomContext;
function handle (line 35) | pub fn handle(
FILE: crates/html-to-markdown/src/converter/context.rs
type InlineCollectorHandle (line 22) | pub type InlineCollectorHandle = Rc<RefCell<InlineImageCollector>>;
type InlineCollectorHandle (line 25) | pub type InlineCollectorHandle = ();
type ImageMetadataPayload (line 29) | pub type ImageMetadataPayload = (BTreeMap<String, String>, Option<u32>, ...
type Context (line 36) | pub struct Context {
method set_excluded_node_ids (line 121) | pub(crate) fn set_excluded_node_ids(&mut self, ids: HashSet<u32>) {
method new (line 131) | pub fn new(
FILE: crates/html-to-markdown/src/converter/dom_context.rs
type TagInfo (line 18) | pub struct TagInfo {
type DomContext (line 31) | pub struct DomContext {
method ensure_capacity (line 46) | pub(crate) fn ensure_capacity(&mut self, id: u32) {
method parent_of (line 62) | pub(crate) fn parent_of(&self, id: u32) -> Option<u32> {
method node_handle (line 66) | pub(crate) fn node_handle(&self, id: u32) -> Option<&tl::NodeHandle> {
method children_of (line 70) | pub(crate) fn children_of(&self, id: u32) -> Option<&Vec<tl::NodeHandl...
method sibling_index (line 76) | pub(crate) fn sibling_index(&self, id: u32) -> Option<usize> {
method tag_info (line 80) | pub(crate) fn tag_info(&self, id: u32, parser: &tl::Parser) -> Option<...
method tag_name_for (line 86) | pub(crate) fn tag_name_for<'a>(
method next_tag_name (line 100) | pub(crate) fn next_tag_name<'a>(&'a self, node_handle: tl::NodeHandle,...
method previous_inline_like (line 105) | pub(crate) fn previous_inline_like(&self, node_handle: tl::NodeHandle,...
method next_inline_like (line 144) | pub(crate) fn next_inline_like(&self, node_handle: tl::NodeHandle, par...
method next_whitespace_text (line 183) | pub(crate) fn next_whitespace_text(&self, node_handle: tl::NodeHandle,...
method next_tag_id (line 220) | pub(crate) fn next_tag_id(&self, id: u32, parser: &tl::Parser) -> Opti...
method build_tag_info (line 254) | pub(crate) fn build_tag_info(&self, id: u32, parser: &tl::Parser) -> O...
method text_content (line 272) | pub(crate) fn text_content(&self, node_handle: tl::NodeHandle, parser:...
method text_content_uncached (line 287) | pub(crate) fn text_content_uncached(&self, node_handle: tl::NodeHandle...
method parent_tag_name (line 313) | pub(crate) fn parent_tag_name(&self, node_id: u32, parser: &tl::Parser...
method get_sibling_index (line 334) | pub(crate) fn get_sibling_index(&self, node_id: u32) -> Option<usize> {
FILE: crates/html-to-markdown/src/converter/form/elements.rs
function handle_form (line 30) | pub fn handle_form(
function handle_fieldset (line 85) | pub fn handle_fieldset(
function handle_legend (line 140) | pub fn handle_legend(
function handle_label (line 201) | pub fn handle_label(
function handle_input (line 234) | pub fn handle_input(
function handle_textarea (line 256) | pub fn handle_textarea(
function handle_select (line 290) | pub fn handle_select(
function handle_option (line 325) | pub fn handle_option(
function handle_optgroup (line 368) | pub fn handle_optgroup(
function handle_button (line 413) | pub fn handle_button(
function handle_progress (line 447) | pub fn handle_progress(
function handle_meter (line 481) | pub fn handle_meter(
function handle_output (line 515) | pub fn handle_output(
function handle_datalist (line 549) | pub fn handle_datalist(
function handle (line 577) | pub fn handle(
FILE: crates/html-to-markdown/src/converter/form/mod.rs
function dispatch_form_handler (line 63) | pub fn dispatch_form_handler(
FILE: crates/html-to-markdown/src/converter/format/djot.rs
type DjotRenderer (line 7) | pub struct DjotRenderer;
method emphasis (line 10) | fn emphasis(&self, content: &str) -> String {
method strong (line 14) | fn strong(&self, content: &str, _symbol: char) -> String {
method strikethrough (line 19) | fn strikethrough(&self, content: &str) -> String {
method highlight (line 23) | fn highlight(&self, content: &str) -> String {
method inserted (line 27) | fn inserted(&self, content: &str) -> String {
method subscript (line 31) | fn subscript(&self, content: &str, _custom_symbol: &str) -> String {
method superscript (line 36) | fn superscript(&self, content: &str, _custom_symbol: &str) -> String {
method span_with_attributes (line 41) | fn span_with_attributes(&self, content: &str, classes: &[&str], id: Opti...
method div_with_attributes (line 53) | fn div_with_attributes(&self, content: &str, classes: &[&str]) -> String {
method is_djot (line 61) | fn is_djot(&self) -> bool {
FILE: crates/html-to-markdown/src/converter/format/markdown.rs
type MarkdownRenderer (line 7) | pub struct MarkdownRenderer;
method emphasis (line 10) | fn emphasis(&self, content: &str) -> String {
method strong (line 14) | fn strong(&self, content: &str, symbol: char) -> String {
method strikethrough (line 18) | fn strikethrough(&self, content: &str) -> String {
method highlight (line 22) | fn highlight(&self, content: &str) -> String {
method inserted (line 26) | fn inserted(&self, content: &str) -> String {
method subscript (line 30) | fn subscript(&self, content: &str, custom_symbol: &str) -> String {
method superscript (line 38) | fn superscript(&self, content: &str, custom_symbol: &str) -> String {
method span_with_attributes (line 46) | fn span_with_attributes(&self, content: &str, _classes: &[&str], _id: Op...
method div_with_attributes (line 51) | fn div_with_attributes(&self, content: &str, _classes: &[&str]) -> String {
method is_djot (line 56) | fn is_djot(&self) -> bool {
FILE: crates/html-to-markdown/src/converter/format/mod.rs
type FormatRenderer (line 13) | pub trait FormatRenderer: Send + Sync {
method emphasis (line 15) | fn emphasis(&self, content: &str) -> String;
method strong (line 18) | fn strong(&self, content: &str, symbol: char) -> String;
method strikethrough (line 21) | fn strikethrough(&self, content: &str) -> String;
method highlight (line 24) | fn highlight(&self, content: &str) -> String;
method inserted (line 27) | fn inserted(&self, content: &str) -> String;
method subscript (line 30) | fn subscript(&self, content: &str, custom_symbol: &str) -> String;
method superscript (line 33) | fn superscript(&self, content: &str, custom_symbol: &str) -> String;
method span_with_attributes (line 36) | fn span_with_attributes(&self, content: &str, classes: &[&str], id: Op...
method div_with_attributes (line 39) | fn div_with_attributes(&self, content: &str, classes: &[&str]) -> String;
method is_djot (line 42) | fn is_djot(&self) -> bool;
FILE: crates/html-to-markdown/src/converter/handlers/blockquote.rs
function handle_blockquote (line 34) | pub fn handle_blockquote(
FILE: crates/html-to-markdown/src/converter/handlers/code_block.rs
function handle_code (line 35) | pub fn handle_code(
function handle_pre (line 143) | pub fn handle_pre(
function format_inline_code (line 317) | fn format_inline_code(content: &str, output: &mut String) {
function format_code_block (line 374) | fn format_code_block(
FILE: crates/html-to-markdown/src/converter/handlers/graphic.rs
type GraphicMetadataPayload (line 23) | type GraphicMetadataPayload = (BTreeMap<String, String>, Option<u32>, Op...
function handle_graphic (line 36) | pub fn handle_graphic(
function format_graphic_markdown (line 199) | fn format_graphic_markdown(
FILE: crates/html-to-markdown/src/converter/handlers/image.rs
type ImageMetadataPayload (line 27) | type ImageMetadataPayload = (BTreeMap<String, String>, Option<u32>, Opti...
function handle_img (line 40) | pub fn handle_img(
function format_image_markdown (line 229) | fn format_image_markdown(
FILE: crates/html-to-markdown/src/converter/handlers/link.rs
function handle_link (line 43) | pub fn handle_link(
FILE: crates/html-to-markdown/src/converter/inline/code.rs
type Context (line 23) | type Context = crate::converter::Context;
type DomContext (line 24) | type DomContext = crate::converter::DomContext;
function handle (line 38) | pub fn handle(
function handle_code (line 71) | fn handle_code(
function handle_kbd_samp (line 182) | fn handle_kbd_samp(
function render_code_with_escaping (line 257) | fn render_code_with_escaping(trimmed: &str, output: &mut String) {
FILE: crates/html-to-markdown/src/converter/inline/emphasis.rs
type Context (line 20) | type Context = crate::converter::Context;
type DomContext (line 21) | type DomContext = crate::converter::DomContext;
function handle (line 35) | pub fn handle(
function handle_strong (line 59) | fn handle_strong(
function handle_emphasis (line 204) | fn handle_emphasis(
FILE: crates/html-to-markdown/src/converter/inline/link.rs
type Context (line 23) | type Context = crate::converter::Context;
type DomContext (line 24) | type DomContext = crate::converter::DomContext;
function handle (line 48) | pub fn handle(
function append_markdown_link (line 364) | pub fn append_markdown_link(
FILE: crates/html-to-markdown/src/converter/inline/mod.rs
function dispatch_inline_handler (line 105) | pub fn dispatch_inline_handler(
function test_dispatcher_routes_emphasis_tags (line 150) | fn test_dispatcher_routes_emphasis_tags() {
function test_dispatcher_routes_code_tags (line 162) | fn test_dispatcher_routes_code_tags() {
function test_dispatcher_routes_semantic_tags (line 174) | fn test_dispatcher_routes_semantic_tags() {
function test_dispatcher_recognizes_link_tag (line 202) | fn test_dispatcher_recognizes_link_tag() {
function test_dispatcher_routes_ruby_tags (line 210) | fn test_dispatcher_routes_ruby_tags() {
function test_unknown_tags_not_routed (line 222) | fn test_unknown_tags_not_routed() {
FILE: crates/html-to-markdown/src/converter/inline/ruby.rs
type Context (line 18) | type Context = crate::converter::Context;
type DomContext (line 19) | type DomContext = crate::converter::DomContext;
function handle (line 48) | pub fn handle(
FILE: crates/html-to-markdown/src/converter/inline/semantic/marks.rs
type Context (line 15) | type Context = crate::converter::Context;
type DomContext (line 16) | type DomContext = crate::converter::DomContext;
function handle_mark (line 25) | pub fn handle_mark(
function handle_strikethrough (line 154) | pub fn handle_strikethrough(
function handle_inserted (line 283) | pub fn handle_inserted(
function handle_underline (line 399) | pub fn handle_underline(
type MarkSkipVisitor (line 493) | struct MarkSkipVisitor;
method visit_mark (line 496) | fn visit_mark(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
type MarkCustomVisitor (line 502) | struct MarkCustomVisitor;
method visit_mark (line 505) | fn visit_mark(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
type MarkPreserveVisitor (line 511) | struct MarkPreserveVisitor;
method visit_mark (line 514) | fn visit_mark(&mut self, _ctx: &NodeContext, _text: &str) -> VisitResult {
function make_visitor (line 519) | fn make_visitor<V: HtmlVisitor + 'static>(v: V) -> ConversionOptions {
function test_visitor_mark_skip (line 527) | fn test_visitor_mark_skip() {
function test_visitor_mark_custom (line 544) | fn test_visitor_mark_custom() {
function test_visitor_mark_preserve_html (line 556) | fn test_visitor_mark_preserve_html() {
FILE: crates/html-to-markdown/src/converter/inline/semantic/mod.rs
type Context (line 20) | type Context = crate::converter::Context;
type DomContext (line 21) | type DomContext = crate::converter::DomContext;
function handle (line 40) | pub fn handle(
FILE: crates/html-to-markdown/src/converter/inline/semantic/typography.rs
type Context (line 17) | type Context = crate::converter::Context;
type DomContext (line 18) | type DomContext = crate::converter::DomContext;
function handle_small (line 23) | pub fn handle_small(
function handle_subscript (line 50) | pub fn handle_subscript(
function handle_superscript (line 152) | pub fn handle_superscript(
function handle_variable (line 254) | pub fn handle_variable(
function handle_definition (line 291) | pub fn handle_definition(
function handle_abbreviation (line 328) | pub fn handle_abbreviation(
function handle_span (line 374) | pub fn handle_span(
type SubSkipVisitor (line 434) | struct SubSkipVisitor;
method visit_subscript (line 437) | fn visit_subscript(&mut self, _ctx: &NodeContext, _text: &str) -> VisitR...
type SubCustomVisitor (line 443) | struct SubCustomVisitor;
method visit_subscript (line 446) | fn visit_subscript(&mut self, _ctx: &NodeContext, _text: &str) -> VisitR...
type SubPreserveVisitor (line 452) | struct SubPreserveVisitor;
method visit_subscript (line 455) | fn visit_subscript(&mut self, _ctx: &NodeContext, _text: &str) -> VisitR...
type SupSkipVisitor (line 461) | struct SupSkipVisitor;
method visit_superscript (line 464) | fn visit_superscript(&mut self, _ctx: &NodeContext, _text: &str) -> Visi...
type SupCustomVisitor (line 470) | struct SupCustomVisitor;
method visit_superscript (line 473) | fn visit_superscript(&mut self, _ctx: &NodeContext, _text: &str) -> Visi...
type SupPreserveVisitor (line 479) | struct SupPreserveVisitor;
method visit_superscript (line 482) | fn visit_superscript(&mut self, _ctx: &NodeContext, _text: &str) -> Visi...
function make_visitor (line 487) | fn make_visitor<V: HtmlVisitor + 'static>(v: V) -> ConversionOptions {
function test_visitor_subscript_skip (line 495) | fn test_visitor_subscript_skip() {
function test_visitor_subscript_custom (line 504) | fn test_visitor_subscript_custom() {
function test_visitor_subscript_preserve_html (line 516) | fn test_visitor_subscript_preserve_html() {
function test_visitor_superscript_skip (line 528) | fn test_visitor_superscript_skip() {
function test_visitor_superscript_custom (line 536) | fn test_visitor_superscript_custom() {
function test_visitor_superscript_preserve_html (line 548) | fn test_visitor_superscript_preserve_html() {
FILE: crates/html-to-markdown/src/converter/list/definition.rs
type Context (line 12) | type Context = crate::converter::Context;
type DomContext (line 13) | type DomContext = crate::converter::DomContext;
function handle_dl (line 18) | pub fn handle_dl(
function handle_dt (line 64) | pub fn handle_dt(
function handle_dd (line 99) | pub fn handle_dd(
FILE: crates/html-to-markdown/src/converter/list/item.rs
type Context (line 19) | type Context = crate::converter::Context;
type DomContext (line 20) | type DomContext = crate::converter::DomContext;
function handle_li (line 27) | pub fn handle_li(
FILE: crates/html-to-markdown/src/converter/list/mod.rs
function dispatch_list_handler (line 32) | pub fn dispatch_list_handler(
FILE: crates/html-to-markdown/src/converter/list/ordered.rs
type Context (line 21) | type Context = crate::converter::Context;
type DomContext (line 22) | type DomContext = crate::converter::DomContext;
function handle_ol (line 29) | pub fn handle_ol(
FILE: crates/html-to-markdown/src/converter/list/unordered.rs
type Context (line 21) | type Context = crate::converter::Context;
type DomContext (line 22) | type DomContext = crate::converter::DomContext;
function handle_ul (line 29) | pub fn handle_ul(
FILE: crates/html-to-markdown/src/converter/list/utils.rs
type Context (line 12) | type Context = crate::converter::Context;
type DomContext (line 13) | type DomContext = crate::converter::DomContext;
function calculate_list_continuation_indent (line 35) | pub const fn calculate_list_continuation_indent(depth: usize) -> usize {
function is_loose_list (line 60) | pub fn is_loose_list(node_handle: tl::NodeHandle, parser: &tl::Parser, d...
function add_list_continuation_indent (line 126) | pub fn add_list_continuation_indent(
function continuation_indent_string (line 162) | pub fn continuation_indent_string(list_depth: usize, options: &Conversio...
function add_list_leading_separator (line 190) | pub fn add_list_leading_separator(output: &mut String, ctx: &Context) {
function add_nested_list_trailing_separator (line 223) | pub fn add_nested_list_trailing_separator(output: &mut String, ctx: &Con...
function calculate_list_nesting_depth (line 245) | pub const fn calculate_list_nesting_depth(ctx: &Context) -> usize {
function is_list_item (line 254) | pub fn is_list_item(node_handle: tl::NodeHandle, parser: &tl::Parser, do...
function process_list_children (line 269) | pub fn process_list_children(
FILE: crates/html-to-markdown/src/converter/main.rs
function convert_html_impl (line 45) | pub fn convert_html_impl(
function finish_structure_collector (line 298) | fn finish_structure_collector(
function walk_node (line 317) | pub fn walk_node(
FILE: crates/html-to-markdown/src/converter/main_helpers.rs
function tag_name_eq (line 11) | pub fn tag_name_eq(a: impl AsRef<str>, b: &str) -> bool {
function trim_trailing_whitespace (line 16) | pub fn trim_trailing_whitespace(output: &mut String) {
function trim_line_end_whitespace (line 23) | pub fn trim_line_end_whitespace(output: &mut String) {
function has_custom_element_tags (line 51) | pub fn has_custom_element_tags(html: &str) -> bool {
constant HTML5_VOID_ELEMENTS (line 105) | const HTML5_VOID_ELEMENTS: &[&str] = &[
function expand_xml_self_closing_tags (line 129) | pub fn expand_xml_self_closing_tags(input: &str) -> String {
function repair_with_html5ever (line 265) | pub fn repair_with_html5ever(input: &str) -> Option<String> {
function format_metadata_frontmatter (line 286) | pub fn format_metadata_frontmatter(metadata: &BTreeMap<String, String>) ...
function extract_head_metadata (line 299) | pub fn extract_head_metadata(
function has_more_than_one_char (line 393) | pub fn has_more_than_one_char(text: &str) -> bool {
function is_inline_element (line 399) | pub fn is_inline_element(tag_name: &str) -> bool {
FILE: crates/html-to-markdown/src/converter/media/embedded.rs
function extract_media_src (line 19) | pub fn extract_media_src<'a>(tag: &'a HTMLTag<'a>) -> Cow<'a, str> {
function find_source_src (line 31) | pub fn find_source_src<'a, T>(children: T, parser: &'a Parser) -> Option...
function is_source_element (line 46) | pub fn is_source_element(tag: &HTMLTag) -> bool {
function should_output_media_link (line 53) | pub fn should_output_media_link(src: &str) -> bool {
function handle_audio (line 62) | pub fn handle_audio(
function handle_video (line 174) | pub fn handle_video(
function handle_picture (line 284) | pub fn handle_picture(
function handle_iframe (line 311) | pub fn handle_iframe(
FILE: crates/html-to-markdown/src/converter/media/image.rs
type InlineCollectorHandle (line 10) | type InlineCollectorHandle = std::rc::Rc<std::cell::RefCell<InlineImageC...
function handle_inline_data_image (line 22) | pub fn handle_inline_data_image(
function non_empty_trimmed (line 176) | fn non_empty_trimmed(value: &str) -> Option<String> {
FILE: crates/html-to-markdown/src/converter/media/mod.rs
function dispatch_media_handler (line 42) | pub fn dispatch_media_handler(
FILE: crates/html-to-markdown/src/converter/media/svg.rs
type InlineCollectorHandle (line 13) | type InlineCollectorHandle = std::rc::Rc<std::cell::RefCell<InlineImageC...
function handle_inline_svg (line 26) | pub fn handle_inline_svg(
function serialize_element (line 96) | pub fn serialize_element(node_handle: &NodeHandle, parser: &Parser) -> S...
function serialize_node (line 135) | pub fn serialize_node(node_handle: &NodeHandle, parser: &Parser) -> Stri...
function non_empty_trimmed (line 149) | fn non_empty_trimmed(value: &str) -> Option<String> {
function handle_svg (line 163) | pub fn handle_svg(
function handle_math (line 233) | pub fn handle_math(
FILE: crates/html-to-markdown/src/converter/metadata.rs
type Context (line 17) | type Context = crate::converter::Context;
type DomContext (line 18) | type DomContext = crate::converter::DomContext;
function handle (line 27) | pub fn handle(
function handle_head (line 58) | fn handle_head(
function handle_script (line 131) | fn handle_script(
function handle_math (line 169) | fn handle_math(
FILE: crates/html-to-markdown/src/converter/plain_text.rs
type ListContext (line 16) | enum ListContext {
constant SKIP_TAGS (line 27) | const SKIP_TAGS: &[&str] = &["script", "style", "head", "template", "nos...
constant BLOCK_TAGS (line 30) | const BLOCK_TAGS: &[&str] = &[
function extract_plain_text (line 67) | pub fn extract_plain_text(dom: &tl::VDom, parser: &tl::Parser, options: ...
function walk_plain (line 103) | fn walk_plain(
function walk_children (line 232) | fn walk_children(
function walk_table (line 249) | fn walk_table(
function collect_descendant_handles (line 304) | fn collect_descendant_handles(
function ensure_blank_line (line 324) | fn ensure_blank_line(buf: &mut String) {
function ensure_newline (line 339) | fn ensure_newline(buf: &mut String) {
function collapse_triple_newlines (line 349) | fn collapse_triple_newlines(buf: &mut String) {
function trim_line_ends (line 371) | fn trim_line_ends(buf: &mut String) {
function post_process (line 383) | fn post_process(buf: &mut String) {
FILE: crates/html-to-markdown/src/converter/preprocessing_helpers.rs
function inline_ancestor_allows_block (line 13) | pub fn inline_ancestor_allows_block(tag_name: &str) -> bool {
function has_inline_block_misnest (line 21) | pub fn has_inline_block_misnest(dom_ctx: &DomContext, parser: &tl::Parse...
function should_drop_for_preprocessing (line 82) | pub fn should_drop_for_preprocessing(tag_name: &str, tag: &tl::HTMLTag, ...
function element_has_noise_hint (line 151) | fn element_has_noise_hint(tag: &tl::HTMLTag) -> bool {
FILE: crates/html-to-markdown/src/converter/reference_collector.rs
type ReferenceCollectorHandle (line 8) | pub type ReferenceCollectorHandle = Rc<RefCell<ReferenceCollector>>;
type ReferenceKey (line 11) | struct ReferenceKey {
type ReferenceCollector (line 19) | pub struct ReferenceCollector {
method new (line 26) | pub fn new() -> Self {
method get_or_insert (line 33) | pub fn get_or_insert(&mut self, url: &str, title: Option<&str>) -> usi...
method finish (line 50) | pub fn finish(&self) -> String {
FILE: crates/html-to-markdown/src/converter/semantic/attributes.rs
function handle_dfn (line 31) | pub fn handle_dfn(
function handle_abbr (line 80) | pub fn handle_abbr(
function handle_time_data (line 127) | pub fn handle_time_data(
function handle_cite (line 156) | pub fn handle_cite(
function handle_q (line 204) | pub fn handle_q(
function handle (line 236) | pub fn handle(
function append_inline_suffix (line 260) | fn append_inline_suffix(
FILE: crates/html-to-markdown/src/converter/semantic/definition_list.rs
function handle_hgroup (line 26) | pub fn handle_hgroup(
function handle_dl (line 56) | pub fn handle_dl(
function handle_dt (line 108) | pub fn handle_dt(
function handle_dd (line 148) | pub fn handle_dd(
function handle_menu (line 191) | pub fn handle_menu(
function handle (line 248) | pub fn handle(
FILE: crates/html-to-markdown/src/converter/semantic/figure.rs
function handle_figure (line 30) | pub fn handle_figure(
function handle_figcaption (line 102) | pub fn handle_figcaption(
function handle (line 151) | pub fn handle(
function figure_caption_separated_from_image (line 171) | fn figure_caption_separated_from_image() {
FILE: crates/html-to-markdown/src/converter/semantic/mod.rs
function dispatch_semantic_handler (line 74) | pub fn dispatch_semantic_handler(
FILE: crates/html-to-markdown/src/converter/semantic/sectioning.rs
function handle (line 34) | pub fn handle(
FILE: crates/html-to-markdown/src/converter/semantic/summary.rs
function handle_details (line 26) | pub fn handle_details(
function handle_summary (line 88) | pub fn handle_summary(
function handle_dialog (line 159) | pub fn handle_dialog(
function handle (line 206) | pub fn handle(
FILE: crates/html-to-markdown/src/converter/text/processing.rs
function dedent_code_block (line 18) | pub fn dedent_code_block(content: &str) -> String {
FILE: crates/html-to-markdown/src/converter/text_node.rs
type Context (line 21) | type Context = crate::converter::Context;
function process_text_node (line 33) | pub fn process_text_node(
FILE: crates/html-to-markdown/src/converter/utility/attributes.rs
function tag_has_main_semantics (line 9) | pub fn tag_has_main_semantics(tag: &tl::HTMLTag) -> bool {
function element_has_navigation_hint (line 41) | pub fn element_has_navigation_hint(tag: &tl::HTMLTag) -> bool {
function attribute_matches_any (line 91) | pub fn attribute_matches_any(tag: &tl::HTMLTag, attr: &str, keywords: &[...
function attribute_contains_any (line 116) | pub fn attribute_contains_any(tag: &tl::HTMLTag, attr: &str, keywords: &...
function has_semantic_content_ancestor (line 129) | pub fn has_semantic_content_ancestor(node_handle: &tl::NodeHandle, parse...
FILE: crates/html-to-markdown/src/converter/utility/caching.rs
function build_dom_context (line 13) | pub fn build_dom_context(dom: &tl::VDom, parser: &tl::Parser, input_len:...
function text_cache_capacity_for_input (line 43) | pub fn text_cache_capacity_for_input(input_len: usize) -> NonZeroUsize {
function record_node_hierarchy (line 53) | pub fn record_node_hierarchy(
FILE: crates/html-to-markdown/src/converter/utility/content.rs
function collect_tag_attributes (line 19) | pub fn collect_tag_attributes(tag: &tl::HTMLTag) -> BTreeMap<String, Str...
function chomp_inline (line 31) | pub fn chomp_inline(text: &str) -> (&str, &str, &str) {
function get_text_content (line 62) | pub fn get_text_content(node_handle: &tl::NodeHandle, parser: &tl::Parse...
function collect_link_label_text (line 68) | pub fn collect_link_label_text(
function normalize_link_label (line 121) | pub fn normalize_link_label(label: &str) -> String {
function normalized_tag_name (line 149) | pub fn normalized_tag_name(raw: Cow<'_, str>) -> Cow<'_, str> {
function is_block_level_element (line 160) | pub fn is_block_level_element(tag_name: &str) -> bool {
function floor_char_boundary (line 169) | pub fn floor_char_boundary(s: &str, index: usize) -> usize {
function escape_link_label (line 194) | pub fn escape_link_label(text: &str) -> String {
function is_block_level_name (line 234) | pub fn is_block_level_name(tag_name: &str, is_inline: bool) -> bool {
FILE: crates/html-to-markdown/src/converter/utility/preprocessing.rs
function strip_script_and_style_tags (line 10) | pub fn strip_script_and_style_tags(input: &str) -> Cow<'_, str> {
function find_closing_tag_bytes (line 166) | pub fn find_closing_tag_bytes(bytes: &[u8], start: usize, tag: &[u8]) ->...
function eq_ascii_insensitive (line 215) | pub fn eq_ascii_insensitive(a: &[u8], b: &[u8]) -> bool {
function preprocess_html (line 223) | pub fn preprocess_html(input: &str) -> Cow<'_, str> {
function is_json_ld_script_open_tag (line 382) | pub fn is_json_ld_script_open_tag(tag: &str) -> bool {
function eq_ascii_case_insensitive (line 446) | pub fn eq_ascii_case_insensitive(haystack: &[u8], needle: &[u8]) -> bool {
function matches_tag_start (line 457) | pub fn matches_tag_start(bytes: &[u8], mut start: usize, tag: &[u8]) -> ...
function find_tag_end (line 480) | pub fn find_tag_end(bytes: &[u8], mut idx: usize) -> Option<usize> {
function find_closing_tag (line 505) | pub fn find_closing_tag(bytes: &[u8], mut idx: usize, tag: &[u8]) -> Opt...
function matches_end_tag_start (line 536) | pub fn matches_end_tag_start(bytes: &[u8], start: usize, tag: &[u8]) -> ...
function sanitize_markdown_url (line 556) | pub fn sanitize_markdown_url(url: &str) -> Cow<'_, str> {
function strip_hidden_elements (line 588) | pub fn strip_hidden_elements(input: &str) -> Cow<'_, str> {
function tag_has_hidden_attribute (line 657) | fn tag_has_hidden_attribute(tag: &str) -> bool {
function sanitize_markdown_url_extracts_scheme_relative_markdown_like_url (line 690) | fn sanitize_markdown_url_extracts_scheme_relative_markdown_like_url() {
function sanitize_markdown_url_extracts_standard_markdown_like_url (line 697) | fn sanitize_markdown_url_extracts_standard_markdown_like_url() {
function sanitize_markdown_url_leaves_normal_urls_unchanged (line 704) | fn sanitize_markdown_url_leaves_normal_urls_unchanged() {
FILE: crates/html-to-markdown/src/converter/utility/serialization.rs
function serialize_element (line 11) | pub fn serialize_element(node_handle: &tl::NodeHandle, parser: &tl::Pars...
function serialize_node (line 51) | pub fn serialize_node(node_handle: &tl::NodeHandle, parser: &tl::Parser)...
function serialize_tag_to_html (line 64) | pub fn serialize_tag_to_html(handle: &tl::NodeHandle, parser: &tl::Parse...
function serialize_node_to_html (line 73) | pub fn serialize_node_to_html(handle: &tl::NodeHandle, parser: &tl::Pars...
FILE: crates/html-to-markdown/src/converter/utility/siblings.rs
function get_next_sibling_tag (line 10) | pub fn get_next_sibling_tag<'a>(
function get_previous_sibling_tag (line 20) | pub fn get_previous_sibling_tag<'a>(
function previous_sibling_is_inline_tag (line 56) | pub fn previous_sibling_is_inline_tag(node_handle: &tl::NodeHandle, pars...
function next_sibling_is_whitespace_text (line 62) | pub fn next_sibling_is_whitespace_text(
function next_sibling_is_inline_tag (line 72) | pub fn next_sibling_is_inline_tag(node_handle: &tl::NodeHandle, parser: ...
function append_inline_suffix (line 80) | pub fn append_inline_suffix(
FILE: crates/html-to-markdown/src/converter/visitor_hooks.rs
function handle_visitor_element_start (line 42) | pub fn handle_visitor_element_start(
function handle_visitor_element_end (line 115) | pub fn handle_visitor_element_end(
type VisitAction (line 180) | pub enum VisitAction {
FILE: crates/html-to-markdown/src/error.rs
type Result (line 6) | pub type Result<T> = std::result::Result<T, ConversionError>;
type ConversionError (line 10) | pub enum ConversionError {
FILE: crates/html-to-markdown/src/inline_images.rs
type InlineImageConfig (line 8) | pub struct InlineImageConfig {
method new (line 44) | pub const fn new(max_decoded_size_bytes: u64) -> Self {
method apply_update (line 61) | pub fn apply_update(&mut self, update: InlineImageConfigUpdate) {
method from_update (line 89) | pub fn from_update(update: InlineImageConfigUpdate) -> Self {
constant DEFAULT_INLINE_IMAGE_LIMIT (line 20) | pub const DEFAULT_INLINE_IMAGE_LIMIT: u64 = 5 * 1024 * 1024;
type InlineImageConfigUpdate (line 30) | pub struct InlineImageConfigUpdate {
type InlineImageFormat (line 98) | pub enum InlineImageFormat {
method fmt (line 116) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
type InlineImageSource (line 131) | pub enum InlineImageSource {
method fmt (line 139) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
type InlineImage (line 149) | pub struct InlineImage {
method fmt (line 167) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
type InlineImageWarning (line 174) | pub struct InlineImageWarning {
type HtmlExtraction (line 183) | pub struct HtmlExtraction {
type InlineImageCollector (line 194) | pub struct InlineImageCollector {
method new (line 203) | pub(crate) fn new(config: InlineImageConfig) -> Result<Self, Conversio...
method capture_svg (line 226) | pub(crate) const fn capture_svg(&self) -> bool {
method should_infer_dimensions (line 230) | pub(crate) const fn should_infer_dimensions(&self) -> bool {
method max_decoded_size (line 234) | pub(crate) const fn max_decoded_size(&self) -> u64 {
method next_index (line 238) | pub(crate) const fn next_index(&mut self) -> usize {
method finalize_filename (line 243) | pub(crate) fn finalize_filename(&self, provided: Option<&str>, index: ...
method warn_skip (line 266) | pub(crate) fn warn_skip(&mut self, index: usize, reason: impl Into<Str...
method warn_info (line 271) | pub(crate) fn warn_info(&mut self, index: usize, reason: impl Into<Str...
method push_image (line 276) | pub(crate) fn push_image(&mut self, index: usize, mut image: InlineIma...
method build_image (line 285) | pub(crate) const fn build_image(
method infer_dimensions (line 306) | pub(crate) fn infer_dimensions(
method finish (line 333) | pub(crate) fn finish(self) -> (Vec<InlineImage>, Vec<InlineImageWarnin...
FILE: crates/html-to-markdown/src/lib.rs
function test_binary_input_rejected (line 100) | fn test_binary_input_rejected() {
function test_binary_magic_rejected (line 107) | fn test_binary_magic_rejected() {
function test_utf16_hint_recovered (line 114) | fn test_utf16_hint_recovered() {
function test_plain_text_allowed (line 121) | fn test_plain_text_allowed() {
function test_plain_text_escaped_when_enabled (line 128) | fn test_plain_text_escaped_when_enabled() {
FILE: crates/html-to-markdown/src/metadata/collector.rs
type MetadataCollector (line 27) | pub struct MetadataCollector {
method new (line 54) | pub(crate) fn new(config: MetadataConfig) -> Self {
method add_header (line 80) | pub(crate) fn add_header(&mut self, level: u8, text: String, id: Optio...
method add_link (line 103) | pub(crate) fn add_link(
method add_image (line 145) | pub(crate) fn add_image(
method add_json_ld (line 182) | pub(crate) fn add_json_ld(&mut self, json_content: String) {
method set_head_metadata (line 203) | pub(crate) fn set_head_metadata(&mut self, metadata: BTreeMap<String, ...
method set_language (line 214) | pub(crate) fn set_language(&mut self, lang: String) {
method set_text_direction (line 227) | pub(crat
Copy disabled (too large)
Download .json
Condensed preview — 1171 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (11,280K chars).
[
{
"path": ".ai-rulez/config.toml",
"chars": 1655,
"preview": "# AI-Rulez Configuration (migrated to V4 TOML format)\n# Documentation: https://github.com/Goldziher/ai-rulez\n\nversion = "
},
{
"path": ".ai-rulez/context/crate-structure.md",
"chars": 1171,
"preview": "---\npriority: high\n---\n\n# Crate & Package Structure\n\n## Workspace crates (`crates/`)\n\n- `html-to-markdown` — core librar"
},
{
"path": ".ai-rulez/domains/conversion-algorithms/DOMAIN.md",
"chars": 1172,
"preview": "# Conversion Algorithms Domain\n\n## Purpose\n\nCore HTML-to-Markdown transformation logic. Converts parsed DOM trees into w"
},
{
"path": ".ai-rulez/domains/html-parsing/DOMAIN.md",
"chars": 1235,
"preview": "# HTML Parsing Domain\n\n## Purpose\n\nFoundation of the conversion pipeline: HTML parser selection, DOM tree construction, "
},
{
"path": ".ai-rulez/domains/safety-sanitization/DOMAIN.md",
"chars": 1468,
"preview": "# Safety & Sanitization Domain\n\n## Purpose\n\nProtects the conversion pipeline from malicious or malformed input. Ensures "
},
{
"path": ".ai-rulez/rules/alef-generated-bindings.md",
"chars": 510,
"preview": "---\npriority: critical\n---\n\n- Files in `packages/*/` and binding crates are generated or managed by Alef — check `alef.t"
},
{
"path": ".cargo/config.toml",
"chars": 628,
"preview": "\n[build]\nincremental = true\n\n[target.wasm32-unknown-unknown]\nrustflags = [\"-C\", \"target-feature=+bulk-memory\", \"--cfg\", "
},
{
"path": ".clang-format",
"chars": 183,
"preview": "---\nBasedOnStyle: LLVM\nIndentWidth: 4\nColumnLimit: 100\nBreakBeforeBraces: Attach\nAllowShortFunctionsOnASingleLine: Empty"
},
{
"path": ".editorconfig",
"chars": 6574,
"preview": "# EditorConfig is awesome: https://EditorConfig.org\n\n# top-most EditorConfig file\nroot = true\n\n# All files\n[*]\ncharset ="
},
{
"path": ".github/CODEOWNERS",
"chars": 251,
"preview": "# Default owner — everything\n* @Goldziher\n\n# Zensical config and documentation\n/zensical.toml @Goldziher @pratik-mahalle"
},
{
"path": ".github/ISSUE_TEMPLATE/bug_report.yml",
"chars": 756,
"preview": "name: Bug Report\ndescription: Report a bug or unexpected behavior\ntitle: \"bug: \"\nlabels: [\"bug\"]\nprojects: [\"kreuzberg-d"
},
{
"path": ".github/ISSUE_TEMPLATE/config.yml",
"chars": 27,
"preview": "blank_issues_enabled: true\n"
},
{
"path": ".github/ISSUE_TEMPLATE/documentation.yml",
"chars": 497,
"preview": "name: Documentation Issue\ndescription: Report missing, unclear, or incorrect documentation\ntitle: \"docs: \"\nlabels: [\"doc"
},
{
"path": ".github/ISSUE_TEMPLATE/feature_request.yml",
"chars": 410,
"preview": "name: Feature Request\ndescription: Suggest a new feature or improvement\ntitle: \"feat: \"\nlabels: [\"enhancement\"]\nprojects"
},
{
"path": ".github/PULL_REQUEST_TEMPLATE.md",
"chars": 177,
"preview": "## Related\n\n<!-- Link issues or discussions if applicable -->\n\n## Description\n\n<!-- What does this PR do? -->\n\n## Checkl"
},
{
"path": ".github/actions/build-typescript/action.yml",
"chars": 274,
"preview": "name: Build TypeScript package\ndescription: Builds TypeScript package (requires Node bindings to be built first)\n\nruns:\n"
},
{
"path": ".github/actions/smoke-pie/action.yml",
"chars": 1604,
"preview": "name: Smoke test PIE install\ndescription: Tests PHP extension installation via PIE\n\ninputs:\n pie-artifacts-dir:\n des"
},
{
"path": ".github/dependabot.yaml",
"chars": 1782,
"preview": "version: 2\nupdates:\n - package-ecosystem: \"github-actions\"\n directory: \"/\"\n schedule:\n interval: \"daily\"\n "
},
{
"path": ".github/workflows/ci.yaml",
"chars": 41686,
"preview": "name: CI\n\non:\n push:\n branches: [main]\n paths:\n - \"crates/**\"\n - \"packages/**\"\n - \"e2e/**\"\n -"
},
{
"path": ".github/workflows/deploy-docs.yaml",
"chars": 1371,
"preview": "name: Deploy Documentation\n\non:\n push:\n branches: [main]\n paths:\n - 'docs/**'\n - 'zensical.toml'\n "
},
{
"path": ".github/workflows/publish.yaml",
"chars": 75942,
"preview": "name: Publish Release\n\non:\n workflow_dispatch:\n inputs:\n tag:\n description: \"Release tag to build (e.g.,"
},
{
"path": ".github/workflows/validate-issues.yml",
"chars": 187,
"preview": "name: Validate Issues\n\non:\n issues:\n types: [opened, edited]\n\njobs:\n validate:\n uses: kreuzberg-dev/actions/.git"
},
{
"path": ".github/workflows/validate-pr.yml",
"chars": 198,
"preview": "name: Validate PR\n\non:\n pull_request:\n types: [opened, edited, synchronize]\n\njobs:\n validate:\n uses: kreuzberg-d"
},
{
"path": ".gitignore",
"chars": 2481,
"preview": "# Python\n__pycache__/\n*.py[cod]\n*$py.class\n*.so\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\np"
},
{
"path": ".gitmodules",
"chars": 101,
"preview": "[submodule \"homebrew-tap\"]\n\tpath = homebrew-tap\n\turl = https://github.com/Goldziher/homebrew-tap.git\n"
},
{
"path": ".golangci.yml",
"chars": 3838,
"preview": "version: \"2\"\n\nrun:\n timeout: 5m\n issues-exit-code: 1\n tests: true\n concurrency: 4\n modules-download-mode: readonly\n"
},
{
"path": ".mailmap",
"chars": 162,
"preview": "Na'aman Hirschfeld <nhirschfeld@gmail.com> Na'aman Hischfeld <nhirschfeld@gmail.com>\nNa'aman Hirschfeld <nhirschfeld@gma"
},
{
"path": ".markdownlint.yaml",
"chars": 127,
"preview": "default: true\nMD007:\n indent: 4\nMD033: false\nMD041: false\nMD013: false\nMD014: false\nMD024:\n siblings_only: true\nMD046:"
},
{
"path": ".mvn/wrapper/MavenWrapperDownloader.java",
"chars": 4880,
"preview": "/*\n * Copyright 2007-present the original author or authors.\n *\n * Licensed under the Apache License, Version 2.0 (the \""
},
{
"path": ".mvn/wrapper/maven-wrapper.properties",
"chars": 279,
"preview": "distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip\nwrap"
},
{
"path": ".php-cs-fixer.dist.php",
"chars": 849,
"preview": "<?php\n\ndeclare(strict_types=1);\n\nuse PhpCsFixer\\Config;\nuse PhpCsFixer\\Finder;\n\nreturn (new Config())\n ->setRiskyAllo"
},
{
"path": ".pre-commit-config.yaml",
"chars": 5999,
"preview": "default_install_hook_types:\n - pre-commit\n - commit-msg\nexclude: ^docs/snippets/|vendor/|node_modules/|target/|dist/|a"
},
{
"path": ".ruby-version",
"chars": 6,
"preview": "3.4.8\n"
},
{
"path": ".rumdl.toml",
"chars": 1186,
"preview": "# rumdl — Rust-based markdown linter\n# https://github.com/rvben/rumdl\n\nrespect-gitignore = true\nexclude = [\"node_modules"
},
{
"path": ".sdkmanrc",
"chars": 28,
"preview": "java=25.0.2-tem\nmaven=3.9.9\n"
},
{
"path": ".task/README.md",
"chars": 15077,
"preview": "# .task/ Directory - Modular Task Organization\n\nThis directory contains the modular Task configuration for the html-to-m"
},
{
"path": ".task/checksum/_lint-typescript-lint",
"chars": 32,
"preview": "5185d264d62b8f691570c5e0c226b22\n"
},
{
"path": ".task/checksum/_test-typescript-test",
"chars": 33,
"preview": "b93fe0d03a54250e90b23f1a50fb35ec\n"
},
{
"path": ".task/checksum/typescript-typecheck",
"chars": 33,
"preview": "99aa06d3014798d86001c324468d497f\n"
},
{
"path": ".task/config/platforms.yml",
"chars": 2601,
"preview": "version: \"3\"\ninternal: true\n\nincludes:\n vars: ./vars.yml\n\nvars:\n # Executable extension - empty for Unix, .exe for Win"
},
{
"path": ".task/config/vars.yml",
"chars": 3138,
"preview": "version: \"3\"\ninternal: true\n\nvars:\n # Version extraction from Cargo.toml (workspace.package.version)\n VERSION:\n sh:"
},
{
"path": ".task/languages/python.yml",
"chars": 1650,
"preview": "version: \"3\"\ninternal: true\n\nvars:\n BUILD_PROFILE: \"{{.BUILD_PROFILE | default \\\"release\\\"}}\"\n PYTHON_PKG: \"packages/p"
},
{
"path": ".task/languages/rust.yml",
"chars": 6580,
"preview": "version: \"3\"\ninternal: true\n\nincludes:\n platforms: ../config/platforms.yml\n\nvars:\n RUST_LOG: \"{{.RUST_LOG | default \\\""
},
{
"path": ".task/tools/docs.yml",
"chars": 569,
"preview": "version: '3'\n\ntasks:\n generate-readme:\n desc: Generate package READMEs using alef\n cmds:\n - alef readme\n\n g"
},
{
"path": ".task/tools/general.yml",
"chars": 1331,
"preview": "version: \"3\"\ninternal: true\n\nincludes:\n platforms: ../config/platforms.yml\n\nvars:\n SCRIPTS_DIR: \"{{.TASKFILE_DIR}}/../"
},
{
"path": ".task/tools/version-sync.yml",
"chars": 1312,
"preview": "version: \"3\"\ninternal: true\n\nincludes:\n platforms: ../config/platforms.yml\n\nvars:\n # Use installed alef binary. Instal"
},
{
"path": ".task/workflows/e2e.yml",
"chars": 785,
"preview": "version: \"3\"\n\ntasks:\n generate:all:\n desc: Generate all E2E tests from fixtures across all supported languages\n c"
},
{
"path": ".typos.toml",
"chars": 155,
"preview": "[files]\nextend-exclude = [\"target/\", \".alef/\", \"*.lock\", \"*.min.js\"]\n\n[default.extend-words]\n# Add project-specific word"
},
{
"path": "ATTRIBUTIONS.md",
"chars": 11294,
"preview": "# Attributions\n\nThis project includes vendored code from third-party libraries. This file\nprovides the required attribut"
},
{
"path": "CHANGELOG.md",
"chars": 131696,
"preview": "# Changelog\n\nAll notable changes to html-to-markdown will be documented in this file.\n\nThe format is based on [Keep a Ch"
},
{
"path": "CONTRIBUTING.md",
"chars": 10363,
"preview": "# Contributing to html-to-markdown\n\n## Prerequisites\n\n### Core Development\n\n- **Python** 3.10+\n- **Rust** 1.80+ (stable)"
},
{
"path": "Cargo.toml",
"chars": 2899,
"preview": "[workspace]\nresolver = \"2\"\nmembers = [\n \"crates/html-to-markdown\",\n \"crates/html-to-markdown-cli\",\n \"crates/htm"
},
{
"path": "LICENSE",
"chars": 1086,
"preview": "The MIT License (MIT)\n\nCopyright 2024-2025 Na'aman Hirschfeld\n\nPermission is hereby granted, free of charge, to any pers"
},
{
"path": "README.md",
"chars": 7751,
"preview": "# html-to-markdown\n\n<div align=\"center\" style=\"display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin"
},
{
"path": "Taskfile.yaml",
"chars": 11097,
"preview": "version: \"3\"\n\n# ============================================================================\n# html-to-markdown — Root T"
},
{
"path": "_typos.toml",
"chars": 516,
"preview": "[files]\nextend-exclude = [\n # Test fixtures and real-world HTML samples contain intentional\n # misspellings, non-E"
},
{
"path": "alef.toml",
"chars": 29407,
"preview": "version = \"0.13.9\"\nlanguages = [\"python\", \"node\", \"ruby\", \"php\", \"ffi\", \"go\", \"java\", \"csharp\", \"elixir\", \"wasm\", \"r\"]\n\n"
},
{
"path": "composer.json",
"chars": 2969,
"preview": "{\n \"name\": \"kreuzberg-dev/html-to-markdown\",\n \"description\": \"Modern PHP API for the html_to_markdown native extension"
},
{
"path": "crates/html-to-markdown/Cargo.toml",
"chars": 1464,
"preview": "[package]\nname = \"html-to-markdown-rs\"\nversion.workspace = true\nedition.workspace = true\nauthors.workspace = true\nlicens"
},
{
"path": "crates/html-to-markdown/README.md",
"chars": 8702,
"preview": "# html-to-markdown-rs\n\nHigh-performance HTML to Markdown converter built with Rust.\n\nThis crate is the core engine compi"
},
{
"path": "crates/html-to-markdown/examples/basic.rs",
"chars": 641,
"preview": "//! Example: Basic HTML to Markdown conversion\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::Conver"
},
{
"path": "crates/html-to-markdown/examples/table.rs",
"chars": 642,
"preview": "//! Example: Converting HTML tables to Markdown\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::Conve"
},
{
"path": "crates/html-to-markdown/examples/test_deser.rs",
"chars": 1228,
"preview": "#![allow(missing_docs)]\nuse html_to_markdown_rs::ConversionOptions;\n\nfn main() {\n let json = r#\"{\"headingStyle\":\"\",\"l"
},
{
"path": "crates/html-to-markdown/examples/test_escape.rs",
"chars": 1866,
"preview": "//! Example: Testing HTML escape sequences and special characters\n\nfn convert(\n html: &str,\n opts: Option<html_to_"
},
{
"path": "crates/html-to-markdown/examples/test_inline_formatting.rs",
"chars": 3725,
"preview": "#![allow(missing_docs)]\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::ConversionOptions>,\n) -> html_"
},
{
"path": "crates/html-to-markdown/examples/test_lists.rs",
"chars": 1278,
"preview": "//! Example: Testing HTML list conversion (ordered and unordered lists)\n\nfn convert(\n html: &str,\n opts: Option<ht"
},
{
"path": "crates/html-to-markdown/examples/test_semantic_tags.rs",
"chars": 2744,
"preview": "//! Example: Testing HTML5 semantic tags (article, section, nav, etc.)\n\nfn convert(\n html: &str,\n opts: Option<htm"
},
{
"path": "crates/html-to-markdown/examples/test_tables.rs",
"chars": 2628,
"preview": "//! Example: Converting HTML tables to Markdown\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::Conve"
},
{
"path": "crates/html-to-markdown/examples/test_task_lists.rs",
"chars": 1782,
"preview": "//! Example: Testing task list conversion (checkboxes)\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs"
},
{
"path": "crates/html-to-markdown/examples/test_whitespace.rs",
"chars": 1063,
"preview": "//! Example: Testing whitespace handling and normalization\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdow"
},
{
"path": "crates/html-to-markdown/src/convert_api.rs",
"chars": 11513,
"preview": "//! Main HTML to Markdown conversion API.\n//!\n//! This module provides the primary `convert()` function for converting H"
},
{
"path": "crates/html-to-markdown/src/converter/block/blockquote.rs",
"chars": 6110,
"preview": "//! Handler for blockquote elements.\n//!\n//! Converts HTML blockquote tags to Markdown blockquotes with support for:\n//!"
},
{
"path": "crates/html-to-markdown/src/converter/block/container.rs",
"chars": 3762,
"preview": "//! Handler for structural container elements.\n//!\n//! This module provides handlers for structural containers that proc"
},
{
"path": "crates/html-to-markdown/src/converter/block/div.rs",
"chars": 4799,
"preview": "//! Handler for div element.\n//!\n//! Converts HTML div elements to Markdown by processing children while maintaining\n//!"
},
{
"path": "crates/html-to-markdown/src/converter/block/heading.rs",
"chars": 13771,
"preview": "//! Handler for heading elements (h1-h6).\n//!\n//! Converts HTML heading tags to Markdown heading syntax with support for"
},
{
"path": "crates/html-to-markdown/src/converter/block/horizontal_rule.rs",
"chars": 1986,
"preview": "//! Handler for horizontal rule elements (hr).\n//!\n//! Converts HTML horizontal rule tags to Markdown horizontal rules ("
},
{
"path": "crates/html-to-markdown/src/converter/block/line_break.rs",
"chars": 1287,
"preview": "//! Handler for line break elements (br).\n//!\n//! Converts HTML line break tags to Markdown line breaks using the config"
},
{
"path": "crates/html-to-markdown/src/converter/block/mod.rs",
"chars": 187,
"preview": "pub mod blockquote;\npub mod container;\npub mod div;\npub mod heading;\npub mod horizontal_rule;\npub mod line_break;\npub mo"
},
{
"path": "crates/html-to-markdown/src/converter/block/paragraph.rs",
"chars": 4782,
"preview": "//! Handler for paragraph elements (p, div).\n//!\n//! Converts HTML paragraph tags to Markdown paragraphs with proper spa"
},
{
"path": "crates/html-to-markdown/src/converter/block/preformatted.rs",
"chars": 10993,
"preview": "//! Handler for preformatted code elements (pre, code).\n//!\n//! Converts HTML preformatted and code tags to Markdown cod"
},
{
"path": "crates/html-to-markdown/src/converter/block/table/builder.rs",
"chars": 18032,
"preview": "//! Core table building and structure calculation.\n//!\n//! Handles main table element conversion, column calculation, an"
},
{
"path": "crates/html-to-markdown/src/converter/block/table/caption.rs",
"chars": 1599,
"preview": "//! Caption element handler for table captions.\n//!\n//! Handles HTML `<caption>` elements within tables, converting them"
},
{
"path": "crates/html-to-markdown/src/converter/block/table/cell.rs",
"chars": 6851,
"preview": "//! Table cell conversion utilities.\n//!\n//! Handles conversion of table cell (td/th) elements to Markdown format,\n//! i"
},
{
"path": "crates/html-to-markdown/src/converter/block/table/cells.rs",
"chars": 9371,
"preview": "//! Cell and row handling for Markdown conversion.\n//!\n//! Provides functionality for processing table cells and rows, i"
},
{
"path": "crates/html-to-markdown/src/converter/block/table/layout.rs",
"chars": 1869,
"preview": "//! Table layout and indentation utilities.\n//!\n//! Handles table indentation for list context and table content\n//! for"
},
{
"path": "crates/html-to-markdown/src/converter/block/table/mod.rs",
"chars": 8957,
"preview": "//! Table element handler for HTML to Markdown conversion.\n//!\n//! This module provides specialized handling for table e"
},
{
"path": "crates/html-to-markdown/src/converter/block/table/scanner.rs",
"chars": 6141,
"preview": "//! Table scanning and analysis utilities.\n//!\n//! Provides the TableScan struct and scanning functions for analyzing ta"
},
{
"path": "crates/html-to-markdown/src/converter/block/table/utils.rs",
"chars": 1084,
"preview": "//! Utility functions for table processing.\n//!\n//! Provides helper functions for tag name normalization and comparison."
},
{
"path": "crates/html-to-markdown/src/converter/block/unknown.rs",
"chars": 3249,
"preview": "//! Handler for unknown/unspecified HTML elements.\n//!\n//! Processes HTML elements that don't have specific handlers by "
},
{
"path": "crates/html-to-markdown/src/converter/context.rs",
"chars": 8925,
"preview": "//! Conversion context for HTML to Markdown conversion.\n//!\n//! The `Context` struct maintains state during the recursiv"
},
{
"path": "crates/html-to-markdown/src/converter/dom_context.rs",
"chars": 13240,
"preview": "//! DOM context providing efficient access to parent/child relationships and text content.\n//!\n//! This module defines t"
},
{
"path": "crates/html-to-markdown/src/converter/form/elements.rs",
"chars": 19643,
"preview": "//! Handlers for HTML form elements.\n//!\n//! This module provides comprehensive handling for all HTML form-related eleme"
},
{
"path": "crates/html-to-markdown/src/converter/form/mod.rs",
"chars": 3039,
"preview": "//! Form element handlers for HTML to Markdown conversion.\n//!\n//! This module provides specialized handlers for HTML fo"
},
{
"path": "crates/html-to-markdown/src/converter/format/djot.rs",
"chars": 1799,
"preview": "//! Djot format renderer.\n\nuse super::FormatRenderer;\n\n/// Renderer for Djot lightweight markup output.\n#[derive(Debug, "
},
{
"path": "crates/html-to-markdown/src/converter/format/markdown.rs",
"chars": 1643,
"preview": "//! Markdown format renderer.\n\nuse super::FormatRenderer;\n\n/// Renderer for standard Markdown output.\n#[derive(Debug, Cl"
},
{
"path": "crates/html-to-markdown/src/converter/format/mod.rs",
"chars": 1563,
"preview": "//! Output format renderers for HTML to Markdown/Djot conversion.\n//!\n//! This module provides format-specific rendering"
},
{
"path": "crates/html-to-markdown/src/converter/handlers/blockquote.rs",
"chars": 5712,
"preview": "//! Blockquote element handler for HTML to Markdown conversion.\n//!\n//! Handles `<blockquote>` elements including:\n//! -"
},
{
"path": "crates/html-to-markdown/src/converter/handlers/code_block.rs",
"chars": 15111,
"preview": "//! Code and pre element handlers for HTML to Markdown conversion.\n//!\n//! Handles `<code>` and `<pre>` elements includi"
},
{
"path": "crates/html-to-markdown/src/converter/handlers/graphic.rs",
"chars": 8268,
"preview": "//! Graphic element handler for HTML to Markdown conversion.\n//!\n//! Handles `<graphic>` elements including:\n//! - Alter"
},
{
"path": "crates/html-to-markdown/src/converter/handlers/image.rs",
"chars": 9297,
"preview": "//! Image element handler for HTML to Markdown conversion.\n//!\n//! Handles `<img>` elements including:\n//! - Basic image"
},
{
"path": "crates/html-to-markdown/src/converter/handlers/link.rs",
"chars": 11755,
"preview": "//! Link element handler for HTML to Markdown conversion.\n//!\n//! Handles `<a>` elements including:\n//! - Basic link mar"
},
{
"path": "crates/html-to-markdown/src/converter/handlers/mod.rs",
"chars": 837,
"preview": "//! Element handlers extracted from the main conversion pipeline.\n//!\n//! This module contains handler functions for spe"
},
{
"path": "crates/html-to-markdown/src/converter/inline/code.rs",
"chars": 10280,
"preview": "//! Handler for code-related inline elements (code, kbd, samp).\n//!\n//! Converts HTML code elements to Markdown inline c"
},
{
"path": "crates/html-to-markdown/src/converter/inline/emphasis.rs",
"chars": 13172,
"preview": "//! Handler for emphasis elements (strong, b, em, i).\n//!\n//! Converts HTML emphasis tags to Markdown formatting with su"
},
{
"path": "crates/html-to-markdown/src/converter/inline/link.rs",
"chars": 16033,
"preview": "//! Handler for link elements (a, anchor).\n//!\n//! Converts HTML anchor tags to Markdown links with support for:\n//! - S"
},
{
"path": "crates/html-to-markdown/src/converter/inline/mod.rs",
"chars": 8599,
"preview": "//! Inline element handlers for HTML to Markdown conversion.\n//!\n//! This module provides specialized handlers for inlin"
},
{
"path": "crates/html-to-markdown/src/converter/inline/ruby.rs",
"chars": 15484,
"preview": "//! Handler for ruby annotation inline elements (ruby, rb, rt, rp, rtc).\n//!\n//! Converts HTML ruby annotation elements "
},
{
"path": "crates/html-to-markdown/src/converter/inline/semantic/marks.rs",
"chars": 19724,
"preview": "//! Handlers for mark/highlight and strikethrough/underline elements.\n//!\n//! Contains:\n//! - Mark (highlight) element w"
},
{
"path": "crates/html-to-markdown/src/converter/inline/semantic/mod.rs",
"chars": 3541,
"preview": "//! Handler for semantic inline elements (mark, del, s, ins, u, small, sub, sup, var, dfn, abbr, span).\n//!\n//! Converts"
},
{
"path": "crates/html-to-markdown/src/converter/inline/semantic/typography.rs",
"chars": 17413,
"preview": "//! Handlers for typography and text semantic elements.\n//!\n//! Contains:\n//! - Small text (pass through)\n//! - Subscrip"
},
{
"path": "crates/html-to-markdown/src/converter/list/definition.rs",
"chars": 3536,
"preview": "//! Definition list handling (dl, dt, dd elements).\n//!\n//! Processes definition lists with:\n//! - Definition terms (dt)"
},
{
"path": "crates/html-to-markdown/src/converter/list/item.rs",
"chars": 12255,
"preview": "//! List item handling (li element).\n//!\n//! Processes list items with support for:\n//! - Task list detection and render"
},
{
"path": "crates/html-to-markdown/src/converter/list/mod.rs",
"chars": 2414,
"preview": "//! List element handlers for HTML to Markdown conversion.\n//!\n//! This module provides specialized handling for various"
},
{
"path": "crates/html-to-markdown/src/converter/list/ordered.rs",
"chars": 6354,
"preview": "//! Ordered list handling (ol, li elements).\n//!\n//! Processes ordered lists with support for:\n//! - Custom start counte"
},
{
"path": "crates/html-to-markdown/src/converter/list/unordered.rs",
"chars": 6215,
"preview": "//! Unordered list handling (ul, li elements).\n//!\n//! Processes unordered lists with support for:\n//! - Bullet cycling "
},
{
"path": "crates/html-to-markdown/src/converter/list/utils.rs",
"chars": 10530,
"preview": "//! Utility functions for list processing.\n//!\n//! Contains helper functions for loose list detection, indentation calcu"
},
{
"path": "crates/html-to-markdown/src/converter/main.rs",
"chars": 26310,
"preview": "//! Main conversion pipeline for HTML to Markdown.\n//!\n//! This module implements the core conversion functions and the "
},
{
"path": "crates/html-to-markdown/src/converter/main_helpers.rs",
"chars": 16913,
"preview": "//! Helper functions for HTML to Markdown conversion.\n//!\n//! This module contains utility functions used by the main co"
},
{
"path": "crates/html-to-markdown/src/converter/media/embedded.rs",
"chars": 13573,
"preview": "//! Embedded media element handling (iframe, video, audio, source).\n//!\n//! Converts various embedded media elements:\n//"
},
{
"path": "crates/html-to-markdown/src/converter/media/graphic.rs",
"chars": 254,
"preview": "//! Graphic element handling (custom graphic elements with alternative source attributes).\n//!\n//! The `<graphic>` eleme"
},
{
"path": "crates/html-to-markdown/src/converter/media/image.rs",
"chars": 5531,
"preview": "//! Image element handling (img, data URIs, inline image collection).\n\n#[allow(unused_imports)]\nuse std::collections::BT"
},
{
"path": "crates/html-to-markdown/src/converter/media/mod.rs",
"chars": 2833,
"preview": "//! Media element handlers for HTML-to-Markdown conversion.\n//!\n//! This module provides specialized handling for variou"
},
{
"path": "crates/html-to-markdown/src/converter/media/svg.rs",
"chars": 8589,
"preview": "//! SVG and MathML element handling with serialization and base64 encoding.\n\nuse crate::converter::main_helpers::tag_nam"
},
{
"path": "crates/html-to-markdown/src/converter/metadata.rs",
"chars": 7489,
"preview": "//! Handler for metadata and script elements (head, script, style, math).\n//!\n//! Converts various metadata-related elem"
},
{
"path": "crates/html-to-markdown/src/converter/mod.rs",
"chars": 5947,
"preview": "//! HTML to Markdown conversion engine with modular architecture.\n//!\n//! This module provides the complete conversion p"
},
{
"path": "crates/html-to-markdown/src/converter/plain_text.rs",
"chars": 13097,
"preview": "//! Plain text extraction from parsed HTML DOM.\n//!\n//! Provides a fast-path text extractor that walks the DOM tree coll"
},
{
"path": "crates/html-to-markdown/src/converter/preprocessing_helpers.rs",
"chars": 6601,
"preview": "//! HTML preprocessing and validation helpers.\n//!\n//! This module contains helper functions for preprocessing HTML befo"
},
{
"path": "crates/html-to-markdown/src/converter/reference_collector.rs",
"chars": 2138,
"preview": "//! Collector for reference-style link definitions.\n\nuse std::cell::RefCell;\nuse std::collections::HashMap;\nuse std::rc:"
},
{
"path": "crates/html-to-markdown/src/converter/semantic/attributes.rs",
"chars": 8991,
"preview": "//! Handlers for semantic inline elements with attributes.\n//!\n//! Processes semantic inline elements that often carry s"
},
{
"path": "crates/html-to-markdown/src/converter/semantic/definition_list.rs",
"chars": 8672,
"preview": "//! Handlers for HTML5 definition list and heading group elements.\n//!\n//! Processes list and heading semantic elements:"
},
{
"path": "crates/html-to-markdown/src/converter/semantic/figure.rs",
"chars": 6937,
"preview": "//! Handlers for HTML5 figure elements.\n//!\n//! Processes figure-related semantic elements:\n//! - `<figure>` - Self-cont"
},
{
"path": "crates/html-to-markdown/src/converter/semantic/mod.rs",
"chars": 4230,
"preview": "//! Semantic HTML5 element handlers for HTML to Markdown conversion.\n//!\n//! This module provides specialized handlers f"
},
{
"path": "crates/html-to-markdown/src/converter/semantic/sectioning.rs",
"chars": 3098,
"preview": "//! Handlers for HTML5 sectioning elements.\n//!\n//! Processes semantic sectioning elements:\n//! - `<article>` - Independ"
},
{
"path": "crates/html-to-markdown/src/converter/semantic/summary.rs",
"chars": 7471,
"preview": "//! Handlers for HTML5 interactive elements.\n//!\n//! Processes interactive disclosure and dialog semantic elements:\n//! "
},
{
"path": "crates/html-to-markdown/src/converter/text/mod.rs",
"chars": 287,
"preview": "//! Text processing module for HTML to Markdown conversion.\n//!\n//! This module provides utilities for normalizing, esca"
},
{
"path": "crates/html-to-markdown/src/converter/text/processing.rs",
"chars": 1718,
"preview": "//! Text processing utilities for HTML to Markdown conversion.\n//!\n//! This module provides functions for processing tex"
},
{
"path": "crates/html-to-markdown/src/converter/text_node.rs",
"chars": 9781,
"preview": "//! Text node processing for HTML to Markdown conversion.\n//!\n//! Handles raw text nodes with:\n//! - HTML entity decodin"
},
{
"path": "crates/html-to-markdown/src/converter/utility/attributes.rs",
"chars": 4869,
"preview": "//! Attribute handling and extraction utilities.\n//!\n//! Functions for working with element attributes, semantic detecti"
},
{
"path": "crates/html-to-markdown/src/converter/utility/caching.rs",
"chars": 2874,
"preview": "//! Performance caching utilities.\n//!\n//! Caching mechanisms for expensive operations during conversion, including\n//! "
},
{
"path": "crates/html-to-markdown/src/converter/utility/content.rs",
"chars": 8392,
"preview": "//! Content extraction and manipulation utilities.\n//!\n//! Functions for extracting and processing element content, incl"
},
{
"path": "crates/html-to-markdown/src/converter/utility/mod.rs",
"chars": 567,
"preview": "//! Utility module: helper functions for common operations.\n//!\n//! This module contains utility functions used across c"
},
{
"path": "crates/html-to-markdown/src/converter/utility/preprocessing.rs",
"chars": 25223,
"preview": "//! HTML preprocessing and normalization.\n//!\n//! Functions for preprocessing HTML before conversion, including script/s"
},
{
"path": "crates/html-to-markdown/src/converter/utility/serialization.rs",
"chars": 4157,
"preview": "//! Output serialization and formatting.\n//!\n//! Utilities for serializing HTML elements back to string format, used for"
},
{
"path": "crates/html-to-markdown/src/converter/utility/siblings.rs",
"chars": 2987,
"preview": "//! Sibling node navigation and handling.\n//!\n//! Utilities for working with sibling nodes in the DOM tree, including na"
},
{
"path": "crates/html-to-markdown/src/converter/visitor_hooks.rs",
"chars": 7227,
"preview": "//! Visitor callback hooks for custom HTML traversal during conversion.\n//!\n//! This module contains the visitor pattern"
},
{
"path": "crates/html-to-markdown/src/error.rs",
"chars": 1113,
"preview": "//! Error types for HTML to Markdown conversion.\n\nuse thiserror::Error;\n\n/// Result type for conversion operations.\npub "
},
{
"path": "crates/html-to-markdown/src/exports.rs",
"chars": 1050,
"preview": "//! Public API re-exports from submodules.\n//!\n//! This module centralizes all public type and function exports,\n//! mak"
},
{
"path": "crates/html-to-markdown/src/inline_images.rs",
"chars": 11470,
"preview": "#![allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::unused_self)]\nuse std::collections::BTreeMap;\n\nuse"
},
{
"path": "crates/html-to-markdown/src/lib.rs",
"chars": 4380,
"preview": "#![allow(\n clippy::too_many_lines,\n clippy::option_if_let_else,\n clippy::match_wildcard_for_single_variants,\n "
},
{
"path": "crates/html-to-markdown/src/metadata/collector.rs",
"chars": 14268,
"preview": "//! Metadata collector for single-pass extraction.\n\nuse super::config::MetadataConfig;\nuse super::extraction::{extract_d"
},
{
"path": "crates/html-to-markdown/src/metadata/config.rs",
"chars": 15097,
"preview": "//! Metadata extraction configuration.\n\n/// Default maximum size for structured data extraction (1 MB)\npub const DEFAULT"
},
{
"path": "crates/html-to-markdown/src/metadata/extraction.rs",
"chars": 14904,
"preview": "//! Metadata extraction utilities and helpers.\n\nuse super::types::{DocumentMetadata, StructuredData, StructuredDataType,"
},
{
"path": "crates/html-to-markdown/src/metadata/mod.rs",
"chars": 10612,
"preview": "#![allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::unused_self)]\n//! Metadata extraction for HTML to "
},
{
"path": "crates/html-to-markdown/src/metadata/types.rs",
"chars": 15262,
"preview": "#![allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::unused_self)]\n//! Type definitions for metadata ex"
},
{
"path": "crates/html-to-markdown/src/options/conversion.rs",
"chars": 21880,
"preview": "#![allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::unused_self)]\n\n//! Main conversion options with bu"
},
{
"path": "crates/html-to-markdown/src/options/inline_image.rs",
"chars": 3425,
"preview": "//! Inline image configuration.\n//!\n//! This module provides configuration for controlling how images are rendered\n//! w"
},
{
"path": "crates/html-to-markdown/src/options/mod.rs",
"chars": 870,
"preview": "//! Configuration options for HTML to Markdown conversion.\n//!\n//! This module provides comprehensive configuration opti"
},
{
"path": "crates/html-to-markdown/src/options/preprocessing.rs",
"chars": 6727,
"preview": "#![allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::unused_self)]\n\n//! HTML preprocessing configuratio"
},
{
"path": "crates/html-to-markdown/src/options/validation.rs",
"chars": 13114,
"preview": "//! Validation and parsing utilities for option enums.\n//!\n//! This module provides parsing and serialization logic for "
},
{
"path": "crates/html-to-markdown/src/prelude.rs",
"chars": 52,
"preview": "//! Prelude module for convenient internal imports.\n"
},
{
"path": "crates/html-to-markdown/src/rcdom.rs",
"chars": 15082,
"preview": "// Vendored from markup5ever_rcdom v0.36.0+unofficial\n// Original source: https://github.com/servo/html5ever (rcdom/)\n//"
},
{
"path": "crates/html-to-markdown/src/text.rs",
"chars": 10628,
"preview": "#![allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::unused_self)]\n//! Text processing utilities for Ma"
},
{
"path": "crates/html-to-markdown/src/types/document.rs",
"chars": 6752,
"preview": "//! Structured document tree types aligned with kreuzberg's `DocumentStructure`.\n\nuse std::collections::HashMap;\n\n#[cfg("
},
{
"path": "crates/html-to-markdown/src/types/mod.rs",
"chars": 630,
"preview": "//! Core types for structured HTML extraction results.\n//!\n//! These types are aligned with kreuzberg's `DocumentStructu"
},
{
"path": "crates/html-to-markdown/src/types/result.rs",
"chars": 1838,
"preview": "//! The primary result type for HTML conversion and extraction.\n\n#[cfg(feature = \"serde\")]\nuse serde::{Deserialize, Seri"
},
{
"path": "crates/html-to-markdown/src/types/structure_builder.rs",
"chars": 30517,
"preview": "//! Builds a [`DocumentStructure`] from a parsed `tl::VDom`.\n//!\n//! Walk the DOM once, mapping each HTML element to the"
},
{
"path": "crates/html-to-markdown/src/types/structure_collector.rs",
"chars": 17031,
"preview": "//! Collector that builds a [`DocumentStructure`] during the converter's HTML DOM walk.\n//!\n//! Follows the same single-"
},
{
"path": "crates/html-to-markdown/src/types/tables.rs",
"chars": 1661,
"preview": "//! Structured table types aligned with kreuzberg's `TableGrid`.\n\n#[cfg(feature = \"serde\")]\nuse serde::{Deserialize, Ser"
},
{
"path": "crates/html-to-markdown/src/types/warnings.rs",
"chars": 1229,
"preview": "//! Processing warning types for non-fatal issues during conversion.\n\n#[cfg(feature = \"serde\")]\nuse serde::{Deserialize,"
},
{
"path": "crates/html-to-markdown/src/validation.rs",
"chars": 4496,
"preview": "//! Input validation module for HTML to Markdown conversion.\n//!\n//! Provides validation functions to detect and reject "
},
{
"path": "crates/html-to-markdown/src/visitor/default_impl.rs",
"chars": 1833,
"preview": "//! Default visitor implementations and utilities.\n//!\n//! This module provides standard visitor patterns and helpers fo"
},
{
"path": "crates/html-to-markdown/src/visitor/mod.rs",
"chars": 1533,
"preview": "//! Visitor pattern for HTML to Markdown conversion.\n//!\n//! This module provides a comprehensive visitor trait that all"
},
{
"path": "crates/html-to-markdown/src/visitor/traits.rs",
"chars": 13053,
"preview": "//! Visitor traits for HTML to Markdown conversion.\n//!\n//! This module contains the synchronous visitor trait.\n\nuse sup"
},
{
"path": "crates/html-to-markdown/src/visitor/types.rs",
"chars": 7510,
"preview": "//! Type definitions for the visitor pattern.\n//!\n//! This module contains the core data types used in the visitor patte"
},
{
"path": "crates/html-to-markdown/src/visitor_helpers/helpers/callbacks/mod.rs",
"chars": 45,
"preview": "//! Callback management for visitor pattern.\n"
},
{
"path": "crates/html-to-markdown/src/visitor_helpers/helpers/content.rs",
"chars": 3589,
"preview": "//! Content extraction and result handling.\n//!\n//! This module provides the `VisitorDispatch` enum and helper methods f"
},
{
"path": "crates/html-to-markdown/src/visitor_helpers/helpers/mod.rs",
"chars": 954,
"preview": "//! Helper functions for visitor pattern integration.\n//!\n//! This module provides efficient utilities for building visi"
},
{
"path": "crates/html-to-markdown/src/visitor_helpers/helpers/state.rs",
"chars": 3362,
"preview": "//! Visitor state management and context building.\n//!\n//! This module handles construction of `NodeContext` objects tha"
},
{
"path": "crates/html-to-markdown/src/visitor_helpers/helpers/traversal.rs",
"chars": 7693,
"preview": "//! Visitor callback dispatch and result handling.\n//!\n//! This module provides the core dispatching logic for synchrono"
},
{
"path": "crates/html-to-markdown/src/visitor_helpers.rs",
"chars": 18662,
"preview": "//! Helper functions for visitor pattern integration.\n//!\n//! This module provides efficient utilities for building visi"
},
{
"path": "crates/html-to-markdown/src/wrapper/sync.rs",
"chars": 14756,
"preview": "//! Synchronous text wrapping for Markdown output.\n\nuse super::utils::{\n is_heading, is_list_like, is_numbered_list, "
},
{
"path": "crates/html-to-markdown/src/wrapper/utils.rs",
"chars": 8912,
"preview": "//! Utility functions for text wrapping.\n//!\n//! This module contains helper functions for parsing and wrapping Markdown"
},
{
"path": "crates/html-to-markdown/src/wrapper.rs",
"chars": 285,
"preview": "//! Text wrapping functionality for Markdown output.\n//!\n//! This module provides text wrapping capabilities similar to "
},
{
"path": "crates/html-to-markdown/tests/br_in_inline_test.rs",
"chars": 2555,
"preview": "#![allow(missing_docs)]\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::ConversionOptions>,\n) -> html"
},
{
"path": "crates/html-to-markdown/tests/commonmark_compliance_test.rs",
"chars": 8918,
"preview": "#![allow(missing_docs)]\n\nuse html_to_markdown_rs::ConversionOptions;\nuse serde::Deserialize;\n\n#[derive(Debug, Deserializ"
},
{
"path": "crates/html-to-markdown/tests/djot_output_test.rs",
"chars": 4771,
"preview": "#![allow(missing_docs)]\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::ConversionOptions>,\n) -> html_"
},
{
"path": "crates/html-to-markdown/tests/exclude_selectors_test.rs",
"chars": 3913,
"preview": "#![allow(missing_docs)]\n\nuse html_to_markdown_rs::ConversionOptions;\n\nfn convert(html: &str, opts: Option<ConversionOpti"
},
{
"path": "crates/html-to-markdown/tests/integration_test.rs",
"chars": 18271,
"preview": "#![allow(missing_docs)]\n\nuse html_to_markdown_rs::ConversionOptions;\n\n#[test]\nfn test_basic_paragraph() {\n let html ="
},
{
"path": "crates/html-to-markdown/tests/issue_121_regressions.rs",
"chars": 1567,
"preview": "#![allow(missing_docs)]\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::ConversionOptions>,\n) -> html"
},
{
"path": "crates/html-to-markdown/tests/issue_127_regressions.rs",
"chars": 1886,
"preview": "#![allow(missing_docs)]\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::ConversionOptions>,\n) -> html"
},
{
"path": "crates/html-to-markdown/tests/issue_128_regressions.rs",
"chars": 570,
"preview": "#![allow(missing_docs)]\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::ConversionOptions>,\n) -> html"
},
{
"path": "crates/html-to-markdown/tests/issue_131_regressions.rs",
"chars": 2257,
"preview": "#![allow(missing_docs)]\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::ConversionOptions>,\n) -> html"
},
{
"path": "crates/html-to-markdown/tests/issue_134_regressions.rs",
"chars": 1105,
"preview": "#![allow(missing_docs)]\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::ConversionOptions>,\n) -> html"
},
{
"path": "crates/html-to-markdown/tests/issue_139_regressions.rs",
"chars": 807,
"preview": "#![allow(missing_docs)]\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::ConversionOptions>,\n) -> html"
},
{
"path": "crates/html-to-markdown/tests/issue_140_regressions.rs",
"chars": 5945,
"preview": "#![allow(missing_docs)]\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::ConversionOptions>,\n) -> html"
},
{
"path": "crates/html-to-markdown/tests/issue_143_regressions.rs",
"chars": 1309,
"preview": "#![allow(missing_docs)]\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::ConversionOptions>,\n) -> html"
},
{
"path": "crates/html-to-markdown/tests/issue_145_regressions.rs",
"chars": 3752,
"preview": "#![allow(missing_docs)]\n\nuse html_to_markdown_rs::ConversionOptions;\n\n#[test]\nfn test_strip_newlines_preserves_block_spa"
},
{
"path": "crates/html-to-markdown/tests/issue_146_regressions.rs",
"chars": 3841,
"preview": "#![allow(missing_docs)]\n\nuse html_to_markdown_rs::ConversionOptions;\n\n#[test]\nfn test_strip_tags_prevents_metadata_extra"
},
{
"path": "crates/html-to-markdown/tests/issue_176_regressions.rs",
"chars": 2073,
"preview": "#![allow(missing_docs)]\n\n//! Regression tests for issue #176: Newlines not preserved with adjacent blockquotes\n\n#[test]\n"
},
{
"path": "crates/html-to-markdown/tests/issue_190_regressions.rs",
"chars": 3813,
"preview": "//! Regression coverage for issue #190.\n\nfn convert(\n html: &str,\n opts: Option<html_to_markdown_rs::ConversionOpt"
}
]
// ... and 971 more files (download for full content)
About this extraction
This page contains the full source code of the Goldziher/html-to-markdown GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 1171 files (10.2 MB), approximately 2.7M tokens, and a symbol index with 8340 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.