Showing preview only (357K chars total). Download the full file or copy to clipboard to get everything.
Repository: michal-h21/make4ht
Branch: master
Commit: e1aa8a497128
Files: 83
Total size: 333.9 KB
Directory structure:
gitextract_7n7efxfs/
├── .github/
│ ├── actions/
│ │ └── make4ht-build/
│ │ ├── Dockerfile
│ │ ├── action.yml
│ │ └── entrypoint.sh
│ └── workflows/
│ └── main.yml
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── INSTALL.md
├── Makefile
├── README.md
├── config.cfg
├── domfilters/
│ ├── make4ht-aeneas.lua
│ ├── make4ht-booktabs.lua
│ ├── make4ht-collapsetoc.lua
│ ├── make4ht-fixinlines.lua
│ ├── make4ht-idcolons.lua
│ ├── make4ht-inlinecss.lua
│ ├── make4ht-itemparagraphs.lua
│ ├── make4ht-joincharacters.lua
│ ├── make4ht-joincolors.lua
│ ├── make4ht-mathmlfixes.lua
│ ├── make4ht-odtfonts.lua
│ ├── make4ht-odtimagesize.lua
│ ├── make4ht-odtpartable.lua
│ ├── make4ht-odtsvg.lua
│ ├── make4ht-sectionid.lua
│ ├── make4ht-t4htlinks.lua
│ ├── make4ht-tablecaption.lua
│ └── make4ht-tablerows.lua
├── extensions/
│ ├── make4ht-ext-common_domfilters.lua
│ ├── make4ht-ext-common_filters.lua
│ ├── make4ht-ext-copy_images.lua
│ ├── make4ht-ext-detect_engine.lua
│ ├── make4ht-ext-dvisvgm_hashes.lua
│ ├── make4ht-ext-inlinecss.lua
│ ├── make4ht-ext-join_colors.lua
│ ├── make4ht-ext-latexmk_build.lua
│ ├── make4ht-ext-mathjaxnode.lua
│ ├── make4ht-ext-mjcli.lua
│ ├── make4ht-ext-nodynamicodt.lua
│ ├── make4ht-ext-odttemplate.lua
│ ├── make4ht-ext-preprocess_input.lua
│ ├── make4ht-ext-staticsite.lua
│ └── make4ht-ext-tidy.lua
├── filters/
│ ├── make4ht-cleanspan-nat.lua
│ ├── make4ht-cleanspan.lua
│ ├── make4ht-domfilter.lua
│ ├── make4ht-entities-to-unicode.lua
│ ├── make4ht-entities.lua
│ ├── make4ht-filter.lua
│ ├── make4ht-fix-links.lua
│ ├── make4ht-fixligatures.lua
│ ├── make4ht-hruletohr.lua
│ ├── make4ht-mathjaxnode.lua
│ ├── make4ht-mjcli.lua
│ ├── make4ht-odttemplate.lua
│ ├── make4ht-staticsite.lua
│ └── make4ht-svg-height.lua
├── formats/
│ ├── make4ht-docbook.lua
│ ├── make4ht-html5.lua
│ ├── make4ht-jats.lua
│ ├── make4ht-odt.lua
│ ├── make4ht-tei.lua
│ └── make4ht-xhtml.lua
├── lapp-mk4.lua
├── make4ht
├── make4ht-aeneas-config.lua
├── make4ht-config.lua
├── make4ht-doc.tex
├── make4ht-dvireader.lua
├── make4ht-errorlogparser.lua
├── make4ht-filterlib.lua
├── make4ht-htlatex.lua
├── make4ht-indexing.lua
├── make4ht-lib.lua
├── make4ht-logging.lua
├── make4ht-xtpipes.lua
├── mkparams.lua
├── mkutils.lua
├── test/
│ ├── dom-test.lua
│ └── test-mkparams.lua
└── tools/
├── make_chardata.lua
└── make_mathmlchardata.lua
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/actions/make4ht-build/Dockerfile
================================================
FROM debian:unstable-slim
LABEL "maintainer"="Michal Hoftich <michal.h21@gmail.com>"
LABEL "repository"="https://github.com/michal-h21/make4ht"
LABEL "homepage"="https://github.com/michal-h21/make4ht"
LABEL "com.github.actions.name"="LaTeX to XML"
LABEL "com.github.actions.description"="Convert LaTeX documents to XML with make4ht."
LABEL "com.github.actions.icon"="code"
LABEL "com.github.actions.color"="blue"
ENV DEBIAN_FRONTEND noninteractive
# Install all TeX and LaTeX dependencies
RUN apt-get update && \
apt-get install --yes --no-install-recommends \
make luatex texlive-base texlive-luatex texlive-latex-extra context \
tidy \
# texlive-fonts-recommended \
fonts-noto-mono \
texlive-plain-generic \
texlive-latex-recommended \
pandoc latexmk texlive lmodern fonts-lmodern tex-gyre fonts-texgyre \
texlive-lang-english && \
apt-get autoclean && apt-get --purge --yes autoremove && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
ADD entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
================================================
FILE: .github/actions/make4ht-build/action.yml
================================================
name: "LaTeX to XML"
description: "Convert LaTeX documents to XML with make4ht"
runs:
using: "docker"
image: "Dockerfile"
================================================
FILE: .github/actions/make4ht-build/entrypoint.sh
================================================
#!/bin/bash
# make4ht -um draft
make install SUDO=""
make htmldoc
cat htmldoc/make4ht-doc.html
================================================
FILE: .github/workflows/main.yml
================================================
name: Build documentation
on:
push:
paths:
- README.md
- CHANGELOG.md
- make4ht-doc.tex
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Run a one-line script
run: echo Hello, world!
- name: Run a multi-line script
run: |
echo Add other actions to build,
echo test, and deploy your project.
- name: Generate HTML docs
uses: ./.github/actions/make4ht-build
- name: Publish the web pages
uses: peaceiris/actions-gh-pages@v2.5.0
env:
ACTIONS_DEPLOY_KEY: ${{ secrets.ACTIONS_DEPLOY_KEY }}
PUBLISH_BRANCH: gh-pages
PUBLISH_DIR: ./htmldoc
================================================
FILE: .gitignore
================================================
*.aux
*.lg
*.log
*.tmp
*.xref
*.4tc
*.4ct
*.swp
*.idv
*.dvi
*.fls
*.html
*.out
*.pdf
backup
build
*.toc
*.fdb_latexmk
doc
tags
*.css
readme.tex
changelog.tex
make4ht-char-def.lua
================================================
FILE: .travis.yml
================================================
dist: bionic
install:
# Local
- sudo apt-get install -qq luatex texlive-base texlive-luatex luarocks
# Global
- sudo apt-get install -qq pandoc latexmk texlive texlive-xetex texlive-fonts-recommended fonts-lmodern tex-gyre fonts-texgyre fonts-noto
script:
# Already runs locally
- ./make4ht -v
# - luarocks --local install busted
- git fetch --tags
- make
- make justinstall
# - sudo ln -s /home/travis/texmf/scripts/lua/make4ht/make4ht /usr/local/bin/make4ht
# - make test
# Now runs globally
- make4ht -v
================================================
FILE: CHANGELOG.md
================================================
# Changelog
- 2026/05/11
- added new `mkutils` function, `escape_pattern`
- use `escape_pattern` to sanitize output file names
https://tex.stackexchange.com/q/762690/2891
- look for images outside of the build dir in the ODT output.
https://tex.stackexchange.com/q/762690/2891
- 2026/05/10
- set anchor id in lists to the `<li>` element in the `itemparagraphs` DOM filter.
- 2026/02/24
- version `0.4e` released.
- 2026/02/18
- consecutive `<mn>` elements in MathML are now merged only within parent elements whose meaning does not depend on the order of their child elements.
- 2026/01/31
- remove `#` character from internal links in the JATS output (it needs to be here to distinguish internal and external links).
- 2026/01/29
- removed the `<script>` element from list of inline elements in the `fixinlines` DOM filter.
- use `article` instead of `ltxdoc` class, because the latter results in infinite loop with some verbatims.
- 2025/10/01
- added the `tablecaption` DOM filter. It moves `<caption>` elements as the first child of `<table>`, to create a valid HTML code.
https://puszcza.gnu.org.ua/bugs/index.php?315
- 2025/08/16
- added support for the MathML `intent` and `arg` attributes in the `mathml-fixes` DOM filter.
- 2025/08/14
- produce fatal error if `Make:autohtlatex` fails.
- 2025/06/27
- fixed `latexmk_build` extension to work with the current build process.
- 2025/06/02
- use `--embed-bitmaps` option for `dvisvgm` in the `dvisvgm_hashes` extension.
- 2025/04/16
- added support for numbers in `id` attributes created by the `sectionid` DOM filter.
- 2025/04/14
- version `0.4d` released.
- 2025/04/11
- support `--build-dir` in `autohtlatex`
- 2025/03/12
- added `Make:autohtlatex` function to automate \LaTeX\ compilation, ensuring
repeated runs until temporary file checksums stabilize or a max limit is reached.
This function is used by default instead of `Make:htlatex`.
- 2025/03/02
- fixed bug in the `make4ht-indexing` library. If subentries generated by
Xindex keywords were only numbers, they were mistaken for the page numbers.
https://tug.org/pipermail/tex4ht/2025q1/003699.html
- 2025/02/19
- version `0.4c` released.
- 2025/02/03
- added the `nodynamicodt` extension. It removes dynamic content from ODT files.
https://puszcza.gnu.org.ua/bugs/?505#discussion
- 2025/01/15
- fixed handling of commands with number as optional arguments in the `make4ht-indexing` library.
https://fosstodon.org/@juergen_hubert@thefolklore.cafe/113809011903088411
- 2025/01/09
- convert horizontal rules in MathML arrays to the `rowlines` attribute.
https://tex.stackexchange.com/a/734616/2891
- 2025/01/06
- use Unicode characters for MathML font styles.
https://tex.stackexchange.com/a/734331/2891
- 2024/12/23
- fixed bug in the `make4ht-indexing` library. It didn't handle index entries
with the duplicate locator number that were broken over several lines correctly.
https://tex.stackexchange.com/a/733106/2891
- 2024/12/20
- remove duplicate index locators in the `make4ht-indexing` library.
https://tex.stackexchange.com/a/733106/2891
- 2024/12/19
- fixed fatal error caused by handling comments in the `fixinlines` DOM filter.
- 2024/12/17
- keep track of braces around index entries page numbers.
https://tex.stackexchange.com/a/733106/2891
- 2024/12/08
- support other index locators than index command counters.
https://tex.stackexchange.com/a/732446/2891
- 2024/12/05
- recognize inline math and comments as inline content in the `fixinlines` DOM filter.
- 2024/11/09
- fixed `tablerows` for longtables longer than 200 rows.
https://tex.stackexchange.com/a/730466/2891
- 2024/10/22
- version `0.4b` released.
- test for the existence of the class atribute of tables before performing string matches in the `tablerows` DOM filter.
- 2024/10/21
- version `0.4a` released.
- 2024/09/30
- remove last empty row in `longtable` environments.
- 2024/09/25
- added `make4ht-char-def` library to remove dependency on `char-def` from
ConTeXt. It is used by the `sectionid` DOM filter. The library is
automatically created from UnicodeData.txt by `tools/make_chardata.lua`.
https://tex.stackexchange.com/q/727202/2891
- 2024/09/18
- print error messages from the `log` file.
- 2024/08/22
- print debug message with XML parsing error context
- 2024/08/21
- try the HTML parser for DOM filters if XML parser fails.
- 2024/06/18
- changed default scaling in `dvisvgm_hashes` to 1.4.
- 2024/06/17
- fixed support for index entries that are broken over several lines.
- 2024/05/29
- create temporary Lg file for Xtpipes.
https://github.com/michal-h21/make4ht/issues/148#issuecomment-2136160818
- fixed handling of the generated ODT file, don't copy it unnecessarily and
handle build and output directories.
https://github.com/michal-h21/make4ht/issues/148#issuecomment-2136160818
- 2024/04/23
- test if referenced files are contained in the `build-dir` directory, use the current dir otherwise.
https://github.com/michal-h21/tex4ebook/issues/126
- 2024/04/16
- fixed bug in `dvisvgm_hashes` log parsing. On Windows, it didn't expect `\r` characters at the line ends, they then ended in new SVG filenames.
- 2024/04/15
- made the `make` command used in the `dvisvgm_hashes` extension configurable.
https://tex.stackexchange.com/q/715633/2891
- 2024/03/29
- ignore numbers in braces in `make4ht-indexing` library.
https://tex.stackexchange.com/q/714247/2891
- 2024/02/22
- version `0.4` released
- fixed name of the `<title-group>` element in JATS output.
- 2024/02/21
- add standalone minus sign to the `<mn>` element in MathML.
- 2024/01/26
- fixed passing of the params table in the `staticsite` extension.
- 2024/01/22
- don't enable parsing of void elements in DOM filter with XML output formats.
- 2023/12/15
- call `fix_rel_mo` MathML fix only in the ODT output.
- 2023/12/11
- fixed handling of the `--build-dir` argument in `dvisvgm_hashes` extension.
- 2023/11/03
- remove leading dashes in ids created by the `sectionid` DOM filter.
- 2023/10/26
- check that removed elements in the `sectionid` DOM filter are `<a>` elements.
- 2023/10/25
- fixed addition of named IDs to figures.
- 2023/10/19
- print info about packages with no corresponding `.4ht` file.
- 2023/10/05
- added fix for LibreOffice's bug regarding relation type math operators.
- 2023/10/03
- fixed most features that didn't work with the `--build-dir` argument.
- 2023/10/02
- working on new command line argument `--build-dir`. It should allow moving of temporary files to a temporary directory. The idea and most of the code comes from Robin Seth Ekman.
- 2023/09/07
- fix spacing for the `dcases*` environment in the `mathml_fixes` DOM filter.
- 2023/08/24
- support non-numerical values in index entry destinations (for example roman numerals).
- 2023/08/22
- updated list of DOM filters used by the `common_domfilters` extension, and documented that it is used automatically in the HTML output.
- 2023/08/12
- remove unnecessary `<a>` elements with `id` attributes and set the `id` on the parent.
- 2023/07/06
- add prefix to section IDs if the section name is empty.
- 2023/06/20
- added the `copy_images` extension.
- 2023/06/14
- fixed bug in the `mathmlfixes` DOM filter -- non-empty last rows were
removed if they contained only one element.
- 2023/05/26
- load `tex4ht.sty` before input file processing starts.
- 2023/04/19
- handle additional characters in the `idcolons` DOM filter.
- 2023/04/06
- fixed handling of ID attributes in the `idcolons` DOM filter.
- 2023/03/07
- remove empty rows in `longtable`.
- 2023/02/24
- version `0.3k` released.
- 2023/01/09
- fixed detection of image file names in `mkutils.parse_lg()`
- 2022/11/25
- reverted change of index page numbers, it was buggy
- test if the `.idx` file exists.
- 2022/11/24
- `make4ht-indexing`: fixed handling of numbers in index entries text.
- 2022/11/01
- remove empty last rows in MathML tables.
- 2022/10/21
- added the `inlinecss` DOM filter and extension with the same name.
- 2022/09/29
- the `join_characters` DOM filter now shouldn't produce extra `<span>` elements after white space.
- 2022/09/16
- use the `no^` option to compile the `make4ht` HTML docs, to prevent clash with the Doc package.
- 2022/07/22
- `mathmlfixes` DOM filter:
- don't change `<mo>` to `<mtext>` if the element contain the `stretchy` attribute.
- add `<mtext>` to `<mstyle>` if it contains only plain text
- 2022/07/08
- configure elements used in `join_characters` DOM filter.
- added support for the `mml:` prefix in `mathml_fixes` DOM filter.
- 2022/06/28
- handle `\maketitle` in JATS.
- 2022/06/24
- handle internal and external links in the JATS output.
- better detection of empty paragraphs.
- 2022/06/16
- use DOM filters to fix JATS output.
- 2022/04/22
- use more explicit options for `latexmk`.
- 2022/04/19
- remove all `htlatex` calls from the build sequence when the `latexmk_build` extension is used.
- fixed other issues that caused spurious executions of `latexmk`.
- 2022/04/01
- don't copy files to the output dir if it wasn't requested
- fixed copying of the ODT file to the output dir.
- 2022/03/29
- check if tidy return non-empty string in the `tidy` extension.
- 2022/03/24
- don't use totally random names in the `preprocess_input` extension, in order to support images correctly.
- 2022/03/22
- version `0.3l` released.
- fixed issues with filenames on Windows.
- 2022/03/01
- use `rmarkdown` package to process `.rmd` files in the `preprocess_input`
extension (thanks to James Clawson).
- 2022/02/18
- version `0.3k` released.
- 2022/02/07
- fixed support for some fonts in the ODT format.
- added `odtfonts` DOM filter.
- 2022/01/30
- fix `mathvariant` attribue of `<mi>` elements if they are children of `<mstyle>`.
- 2021/12/17
- quote jobname in order to support filenames like `(xxx).tex`.
- 2021/12/13
- fixed setting of properties in the `staticsite` filter.
- 2021/12/06
- in the end, use `<mtext>` even for one `<mo>` in the `fix_operators` function. LO
had issues with `<mi>`.
- 2021/12/03
- don't add additional `<mrow>` elements in the `mathmlfixes` DOM filter. It caused
various issues.
- 2021/12/01
- transform `<mn>x</mn><mo>.</mo><mn>x</mn>` to `<mn>x.x</mn>` in MathML.
- transform `<mo>` elements that are single childs to `<mi>` in MathML, and
list of consecutive `<mo>` elements to `<mtext>`. This should fix rendering
issues of superscripts in LibreOffice.
- added filter names in extensions to prevent multiple execution of filters.
- 2021/11/29
- make current logging level available outside of the Logging module.
- print Xtpipes and Tidy output if these command fail in the Xtpipes module.
- 2021/11/18
- don't put `<mrow>` as children of `<mrow>` in the `mathmlfixes` DOM filter.
- 2021/11/04
- more intelligent handling of text and inline elements outside of paragraphs
in the `fixinlines` DOM filter.
- 2021/10/11
- version `0.3j` released.
- 2021/10/09
- fixed wrong DOM object name in the ODT format.
- add addtional `<mrow>` elements when necessary.
- 2021/09/30
- version `0.3i` released.
- 2021/09/21
- run DOM parse in sandbox in the ODT format picture size function.
- 2021/09/20
- remove LaTeX commands from TOC entries in `sectionid` DOM filter.
- 2021/09/09
- corrected SVG dimension setting in the ODT output. Dimensions are set also for PNG and JPG pictures.
- 2021/09/05
- corrected detection of closing brace in CSS style in `mjcli` filter.
- 2021/08/13
- use LaTeX new hook mechanism to load `tex4ht.sty` before document class.
It fixes some issues with packages required in classes.
- 2021/08/12
- correctly set dimensions for `SVG` images in the `ODT` format.
- 2021/07/29
- sort YAML header in the `staticsite` filter.
- 2021/07/25
- version `0.3h` released.
- 2021/07/25
- use current directory as default output dir in `staticsite` extension.
- 2021/07/23
- fixed detection of single paragraphs inside `<li>` in the `itemparagraphs` DOM filter.
- 2021/07/18
- remove elements produced by `\maketitle` in the `staticsite` extension.
- 2021/07/05
- sort colors alphabetically in the `joincolors` DOM filter to enable reproducible builds.
- 2021/06/26
- rewrote the `collapsetoc` DOM filter.
- 2021/06/20
- test for the `svg` picture mode in the `tex4ht` command. Use the `-g.svg`
option if it is detected. This is necessary for correct support of
pictorial characters.
- 2021/06/16
- better handling of duplicate ID attributes in `sectionid` DOM filter.
- support `notoc` option in `sectionid`.
- 2021/06/13
- added `itemparagraphs` DOM filter. It removes unnecessary paragraphs from `<li>` elements.
- 2021/05/06
- remove `<hr>` elements in `.hline` rows in `tablerows` DOM filter.
- 2021/05/01
- added function `mkutils.isModuleAvailable`. It checks if Lua library is available.
- check for `char-def` library in `sectionid` DOM filter.
- 2021/04/08
- removed `build_changed`. New script, [siterebuild](https://github.com/michal-h21/siterebuild), should be used instead.
- new DOM filter, `sectionid`. It uses sanitized titles instead of automatically generated numbers as section IDs.
- added `sectionid` to `common_domfilters`.
- use `context` in the Docker file, because it contains the `char-def.lua` file.
- 2021/03/20
- use `kpse` library when files are copied to the output directory.
- added `clean` mode. It removes all generated, temporary and auxilary files.
- 2021/03/19
- version `0.3g` released.
- 2021/02/08
- remove `<?xtpipes ?>` processing instructions from the generated ODT file.
- 2021/02/01
- better error messages when extension cannot be loaded.
- added `mjcli` extension.
- `mjcli` filter supports \LaTeX\ syntax.
- updated documentation.
- 2021/01/31
- added new MathJax Node filter, `mjcli`.
- 2020/12/19
- build web documentation only when documentation sources change.
- 2020/11/22
- set exit status for the `make4ht` command.
- 2020/11/22
- new extension, `build_changed`.
- 2020/11/01
- fix deprecated `<mfenced>` element in MathML
- convert `<mo fence>` elements to `<mfenced>` in `ODT` format.
- 2020/10/28
- fixed handling of nested `<span>` elements in `joincharacters` DOM filter.
- 2020/10/25
- fixed command name for `Make:httex`, it was `Make:htttex`.
- 2020/10/17
- generate YAML header for all generated files with the `staticsite` extension.
- 2020/09/17
- require `mathml` option when `mathjaxnode` extension is used.
- 2020/09/07
- version `0.3f` released.
- 2020/08/26
- `fixinlines` DOM filter: added `<a>` element into list of inline elements.
- 2020/08/24
- initialize attributes in new element in `mathmlfixes` DOM extension.
- 2020/07/18
- changed CSS for the HTML documentation.
- 2020/07/17
- fixed bug in index parsing.
- 2020/07/10
- use the `joincharacters` DOM filter for TEI output.
- 2020/07/08
- don't fail when filename cannot be detected in `make4ht-errorlogparser.lua`.
- 2020/05/27
- test if copied file exists in `mkutils.cp`.
- 2020/05/19
- fixed image filename replace in `dvisvgm_hashes` extension.
- 2020/05/16
- fixed HTML filename matching in extensions.
- 2020/05/08
- use global environment in the build files.
- 2020/03/03
- added `jats` format.
- 2020/02/28
- version `0.3e released`.
- 2020/02/24
- `t4htlinks` DOM filter: cleanup file names from internal links.
- `make4ht-indexing`: added support for splitindex.
- 2020/02/19
- use `UTF-8` output by default. `8-bit` output is broken and non fixable.
- 2020/02/07
- use `lualatex-dev` instead of `harflatex`
- 2020/02/06
- added support for `harflatex` and `harftex` in the `detect_engine` extension.
- 2020/01/22
- version `0.3d` released.
- added `Make:httex` command for Plain TeX support.
- added `detect_engine` extension. It supports detection of the used engine
and format from TeX Shop or TeXWorks magic comments. These comments can
look like: `%!TEX TS-program = xelatex`.
- 2020/01/22
- fixed support for multiple indices in `make4ht-indexing.lua`.
- 2019/12/29
- use the `mathvariant="italic"` attribute for joined `<mi>` elements.
- fixed comparison of element attributes in `joincharacters` DOM filter.
- 2019/12/28
- print warning if the input file doesn't exist.
- 2019/12/17
- added `booktabs` DOM filter.
- load the `booktabs` in `common_domfilters` by default.
- 2019/12/14
- fixed bug in the `tablerows` DOM filter -- it could remove table rows if
they contained only one column with elements that contained no text
content.
- 2019/11/28
- version `0.3c` released.
- updated `mathmlfixes` DOM filter. It handles `<mstyle>` element inside token elements now.
- use `mathmlfixes` and `joincharacters` DOM filters for math XML files in the ODT output.
- 2019/11/25
- added `pythontex` command.
- added `mathmlfixes` DOM filter.
- use the `mathmlfixes` DOM filter in `common_domfilters` extension.
- 2019/11/22
- `make4ht-joincharacters` dom filter: added support for the `<mi>`
element. Test all attributes for match when joining characters.
- `html5` format: use the `common_domfilters` by default.
- 2019/11/03
- version `0.3b`
- use `make4ht-ext-` prefix for extensions to prevent filename clashes with corresponding filters.
- 2019/11/01
- version `0.3a` released.
- added `make4ht-` prefix to all extensions and formats
- removed the unused `mathjaxnode.lua` file.
- 2019/11/01
- version `0.3` released.
- added `Make:makeindex`, `Make:xindex` and `Make:bibtex` commands.
- 2019/10/25
- modified the `Make:xindy` command to use the indexing mechanism.
- 2019/10/24
- added functions for preparing and cleaning of the index files in `make4ht-indexing.lua`.
- 2019/10/23
- replaced `os.execute` function with `mkutils.execute`. It uses the logging mechanism for the output.
- finished transforming of filters, extensions and formats to the logging system.
- 2019/10/22
- added `tablerows` domfilter.
- added the `tablerows` domfilter to the `common_domfilters` extension.
- converted most of the filters to use the logging mechanism.
- 2019/10/20
- added `status` log level.
- 2019/10/18
- converted most print commands to use the logging mechanism.
- added `output` log level used for printing of the commands output.
- 2019/10/17
- added `--loglevel` CLI parameter.
- added logging mechanism.
- moved `htlatex` related code to `make4ht-htlatex.lua` from `mkutils.lua`
- 2019/10/11
- added `xindy` settings.
- added simple regular expression to detect errors in the log file, because log parsing can be slow.
- 2019/10/09
- added the `interaction` parameter for the `htlatex` command. The default
value is `batchmode` to suppress the user input on errors, and to
suppress full log output to the terminal.
- added the `make4ht-errorlogparser` module. It is used to parse errors in
the `htlatex` run unless `interaction` is set to `errorstopmode`.
- 2019/10/08
- set up Github Actions pipeline to compile the documentation to HTML and publish it at https://www.kodymirus.cz/make4ht/make4ht-doc.html.
- 2019/10/07
- don't move the `common_domfilters` extension to the first place in the
file matches pipeline. We may want to run `tidy` or regex filters first,
to fix XML validation errors.
- 2019/10/04
- added HTML documentation.
- 2019/09/27
- don't convert Latin 1 entities to Unicode in the `entities_to_unicode` extension.
- 2019/09/20
- fixed bugs in the temporary directory handling for the ODT output.
- 2019/09/13
- added `preprocess_input` extension. It enables compilation of formats
supported by [Knitr](https://yihui.name/knitr/) (`.Rnw`, `.Rtex`, `.Rmd`, `.Rrst`)
and also Markdown and reStructuredText formats.
- 2019/09/12
- added support for the ODT files in `common_domfilters` extension.
- renamed `charclases` option for the `joincharacters` DOM filter to `charclasses`.
- don't execute the `fixentities` filter before Xtpipes, it makes no sense.
- 2019/09/11
- added support for Biber in the build files.
- 2019/08/28
- added support for input from `stdin`.
- 2019/08/27
- fixed `-jobname` detection regex.
- added function `handle_jobname`.
- added the `--jobname` command line option.
- 2019/08/26
- quote file names and paths in `xtpipes` and `tidy` invocation.
- 2019/08/25
- the issue tracker link in the help message is now configurable.
- fixed bug in the XeTeX handling: the `.xdv` argument for `tex4ht` wasn't
used if command line arguments for `tex4ht` were present.
- 2019/07/03
- new DOM filter: `odtpartable`. It fixes tables nested in paragraphs in the ODT format.
- 2019/06/13
- new DOM extension: `collapsetoc`.
- 2019/05/29
- new module: `make4ht-indexing` for working with index files.
- 2019/05/24
- version 0.2g released
- fixed failing `dvisvgm_hashes` extension on Windows.
- 2019/05/02
- fixed infinite loop bug in the `dvisvgm_hashes` extension.
- 2019/04/09
- `make4ht-joincolors` fix: remove the hash character from the color name.
This caused issues with colors specified in the hexadecimal format.
- 2019/04/02
- `dvisvgm_hashes` fix: update also the lgfile.images table with generated filenames, in order to support tex4ebook
- 2019/04/01
- fixed bug in `dvisvgm_hashes` extension: didn't check for table index existence in string concenation
- 2019/03/21
- version 0.2f released
- 2019/03/15
- check for the image dimensions existence in the `odtimagesize` domfilter.
- 2019/03/13
- don't use `odtimagesize` domfilter in the `ODT` format, the issue it fixes had been resolved in `tex4ht`.
- 2019/03/08
- use `%USERPROFILE` for home dir search on Windows.
- 2019/01/28
- added `joincolors` domfilter and `join_colors` extension. It can join CSS rules created for the LaTeX colors and update the HTML file.
- 2019/01/22
- version 0.2e released
- updated the `odttemplate` filter. It will use styles from the generated ODT file that haven't been present in the template file.
- 2019/01/10
- version 0.2d released
- 2019/01/05
- added `docbook` and `tei` output formats.
- 2018/12/19
- new library: `make4ht-xtpipes.lua`. It contains code for xtpipes handling.
- moved Xtpipes handling code from `formats/odt.lua`.
- 2018/12/18
- new filter: `odttemplate`. It can be used for replacing style in a generated `ODT` file by a style from another existing `ODT` file.
- new extension: `odttemplate`. Companioning extension for filter with the same name.
- fixed bug in `make4ht-filters.lua`: the parameters table haven't been passed to filters.
- 2018/12/17
- fixed extension handling. The disabling from the command line didn't take
precedence over extensions enabled in the config file. Extensions also
could be executed multiple times.
- 2018/11/08
- removed replacing newlines by blank strings in the `joincharacters` domfilter. The issue it fixed doesn't seem to exist anymore, and it ate spaces sometimes.
- 2018/11/01
- added `t4htlinks` domfilter
- fixed the `xtpipes` and `filters` execution order in the `ODT` format
- 2018/10/26
- fixed ODT generation for files that contains special characters for Lua string patterns
- replace non-breaking spaces with entities. It caused issues in LO
- 2018/10/18
- fixed the executable installation
- 2018/09/16
- added the `scale` option for `dvisvgm_hashes` extension
- 2018/09/14
- require the `-dvi` option with `latexmk_build` extension
- 2018/09/12
- added `xindy` command for the build file
- 2018/09/03
- expanded the `--help` option
- 2018/08/27
- added `odtimagesize` domfilter
- load `odtimagesize` by default in the ODT format
- 2018/08/23
- released version 0.2c
- 2018/08/21
- added processor core detection on Windows
- make processor number configurable
- updated the documentation.
- 2018/08/20
- added `dvisvgm_hashes` extension
- 2018/07/03
- create the `mimetype` file to achieve the ODT file validity
- 2018/07/02
- disabled conversion of XML entities for &, < and > characters back to Unicode, because it breaks XML validity
- 2018/06/27
- fixed root dir detection
- 2018/06/26
- added code for detection of TeX distribution root for Miktex and TL
- 2018/06/25
- moved call to `xtpipes` from `t4ht` to the `ODT` format drives. This should fix issues with path expansion in `tex4ht.env` in TeX distributions.
- 2018/06/22
- added `mkutils.find_zip` function. It detects `zip` or `miktex-zip` executables
- 2018/06/19
- added new filter: `entities-to-unicode`. It converts XML entites for Unicode characters back to Unicode.
- execute `entities-to-unicode` filter on text and math files in the ODT output.
- 2018/06/12
- added support for direct `ODT` file packing
- 2018/06/11
- new function available for formats, `format.modify_build`
- function `mkutils.delete_dir` for directory removal
- function `mkutils.mv` for file moving
- started on packing of the `ODT` files directly by the format, instead of `t4ht`
- 2018/06/08
- added support for filenames containing spaces
- added support for filenames containing non-ascii characters
- don't require sudo for the installation, let the user to install symbolic links to `$PATH`
- 2018/05/03
- released version `0.2b`
- bug fix: use only `load` function in `Make:run`, in order to support a local environment.
- 2018/05/03
- released version `0.2a`
- renamed `latexmk` extension to `latexmk_build`, due to clash in TL
- 2018/04/18
- `staticsite` extension:
- make YAML header configurable
- set the `time` and `updated` headers
- don't override existing tables in `filter_settings`
- 2018/04/17
- done first version of `staticsite` extension
- 2018/04/16
- check for Git repo in the Makefile, don't run Git commands outside of repo
- 2018/04/15
- added `staticsite` filter
- working on `staticsite` extension
- 2018/04/13
- use `ipairs` instead of `pairs` to traverse lists of images and image match functions
- load extensions in the correct order
- 2018/04/09
- released version `0.2`
- disabled default loading of `common_domfilters` extension
- 2018/04/06
- added `Make:enable_extension` and `Make:disable_extension` functions
- documented the configuration file
- 2018/03/09
- load the configuration file before extensions
- 2018/03/02
- Aeneas execution works
- Aeneas documentation
- added support for `.make4ht` configuration file
- 2018/02/28
- Aeneas configuration file creation works
- 2018/02/22
- fixed bug in `fixinlines` DOM filter
- 2018/02/21
- added Aeneas domfilter
- fixed bugs in `joincharacters` DOM filter
- 2018/02/20
- fixed bug in `joincharacters` DOM filter
- make `woff` default font format for `mathjaxnode`
- added documentation for `mathjaxnode` settings
- 2018/02/19
- fixed bug in filter loading
- added `mathjaxnode` extension
- 2018/02/15
- use HTML5 as a default format
- use `common_domfilters` implicitly for the XHTML and HTML5 formats
- 2018/02/12
- added `common_domfilters` extension
- documented DOM filters
- 2018/02/12
- handle XML parsing errors in the DOM handler
- enable extension loading in Formatters
- 2018/02/11
- fixed Tidy extension output to support LuaXML
- fixed white space issues with `joincharacters` DOM filter
- 2018/02/09
- fixed issues with the Mathjax filter
- documented basic info about thd DOM filters
- DOM filter optimalizations
- 2018/02/08
- make Tidy extension configurable
- documented filter settings
- 2018/02/07
- added filter for Mathjax-node
- 2018/02/06
- created DOM filter function
- added DOM filter for spurious inlinine elements
- 2018/02/03
- added settings handling functions
- settings made available for extensions and filters
- 2017/12/08
- fixed the `mk4` build file loading when it is placed in the current working dir and another one with same filename somewhere in the TEXMF tree.
- 2017/11/10
- Added new filter: `svg-height`. It tries to fix height of some of the images produced by `dvisvgm`
- 2017/10/06
- Added support for output format selection. Supported formats are `xhtml`, `html5` and `odt`
- Added support for extensions
- 2017/09/10
- Added support for Latexmk
- Added support of `math` library and `tonumber` function in the build files
- 2017/09/04
- fixed bug caused by the previous change -- the --help and --version didn't work
- 2017/08/22
- fixed the command line option parsing for `tex4ht`, `t4ht` and `latex` commands
- various grammar and factual fixes in the documentation
- 2017/04/26
- Released version `v0.1c`
- 2017/03/16
- check for `TeX capacity exceeded` error in the \LaTeX\ run.
- 2016/12/19
- use full input name in `tex_file` variable. This should enable use of files without `.tex` extension.
- 2016/10/22
- new command available in the build file: `Make:add_file(filename)`. This enables filters and commands to register files to the output.
- use ipairs instead of pairs for traversing files and executing filters. This should ensure correct order of executions.
- 2016/10/18
- new filter: replace colons in `id` and `href` attributes with underscores
- 2016/01/11
- fixed bug in loading documents with full path specified
- 2015/12/06 version 0.1b
- modifed lapp library to recognize `--version` and
- added `--help` and `--version` command line options
- 2015/11/30
- use `kpse` library for build file locating
- 2015/11/17
- better `-jobname` handling
- 2015/09/23 version 0.1a
- various documentation updates
- `mozhtf` profile for unicode output is used, this should prevent ligatures in the output files
- 2015/06/29 version 0.1
- major README file update
- 2015/06/26
- added Makefile
- moved INSTALL instructions from README to INSTALL
================================================
FILE: INSTALL.md
================================================
Installation
------------
If you use TeX Live 2015 or up-to date Miktex distributions, `make4ht` should be installed already on your system.
You need to install it only if you use older distribution or try new features which aren't accessible in the version
included in the distributions.
## Prerequisites
You need a teX distribution such as TeX Live or Miktex. It must include `tex4ht` system, Noto fonts and `texlua` script. All modern
distributions include it. You also need [Pandoc](http://pandoc.org/) in order to make the documentation and
`latexmk`, which should be included in your TeX distro.
## Unix systems
Run these commands:
make
make install
`make4ht` is installed to `/usr/local/bin` directory by default. The
directory can be changed by passing it's location to the `BIN_DIR` variable:
make install BIN_DIR=~/.local/bin/
## Windows
See a [guide by Volker Gottwald](https://d800fotos.wordpress.com/2015/01/19/create-e-books-from-latex-tex-files-ebook-aus-latex-tex-dateien-erstellen/) on how
to install `make4ht` and `tex4ebook`.
Create a batch file for `make4ht` somewhere in the `path`:
texlua "C:\full\path\to\make4ht" %*
you can find directories in the path with
path
command, or you can create new directory and [add it to the path](http://stackoverflow.com/questions/9546324/adding-directory-to-path-environment-variable-in-windows).
Note for `Miktex` users: you may need to create `texmf` directory first. See
[this answer on TeX.sx](http://tex.stackexchange.com/questions/69483/create-a-local-texmf-tree-in-miktex).
## Troubleshooting
### Missing support for LuaLaTeX in `latexmk`
If you get the following error message:
latexmk -lualatex make4ht-doc.tex
Latexmk: -lualatex bad option
Latexmk: Bad options specified
Use
latexmk -help
to get usage information
make: *** [make4ht-doc.pdf] Error 10
then you have old version of `latexmk`. Try to replace line:
latexmk -lualatex make4ht-doc.tex
in the `Makefile` with
lualatex make4ht-doc.tex
lualatex make4ht-doc.tex
`latexmk` takes care of correct number of compilations needed to produce the correct document.
### Need to update TeX database
If you get following error message:
/usr/local/bin/make4ht:5: module 'make4ht-lib' not found:
no field package.preload['make4ht-lib']
[kpse lua searcher] file not found: 'make4ht-lib'
[kpse C searcher] file not found: 'make4ht-lib'
then try to run command
texhash
this will update the TeX file database and newly installed files should be usable.
================================================
FILE: Makefile
================================================
.PHONY: build tags
lua_content = make4ht $(wildcard *.lua)
filters = $(wildcard filters/*.lua)
domfilters = $(wildcard domfilters/*.lua)
extensions = $(wildcard extensions/*.lua)
formats = $(wildcard formats/*.lua)
tex_content = $(wildcard *.tex)
doc_root = make4ht-doc
doc_tex = $(doc_root).tex
doc_file = $(doc_root).pdf
htmldoc = $(HTML_DOC_DIR)/$(doc_root).html
doc_sources = $(doc_tex) readme.tex changelog.tex tags
TEXMFHOME = $(shell kpsewhich -var-value=TEXMFHOME)
INSTALL_DIR = $(TEXMFHOME)/scripts/lua/make4ht
MANUAL_DIR = $(TEXMFHOME)/doc/latex/make4ht
FILTERS_DIR = $(INSTALL_DIR)/filters
DOMFILTERS_DIR = $(INSTALL_DIR)/domfilters
FORMATS_DIR = $(INSTALL_DIR)/formats
EXTENSION_DIR = $(INSTALL_DIR)/extensions
BIN_DIR = /usr/local/bin
# expand the bin directory
SYSTEM_DIR = $(realpath $(BIN_DIR))
EXECUTABLE = $(SYSTEM_DIR)/make4ht
BUILD_DIR = build
BUILD_MAKE4HT = $(BUILD_DIR)/make4ht
HTML_DOC_DIR = htmldoc
VERSION:= undefined
DATE:= undefined
ifeq ($(strip $(shell git rev-parse --is-inside-work-tree 2>/dev/null)),true)
VERSION:= $(shell git --no-pager describe --abbrev=0 --tags --always )
DATE:= $(firstword $(shell git --no-pager show --date=short --format="%ad" --name-only))
endif
# use sudo for install to destination directory outise home
ifeq ($(findstring home,$(SYSTEM_DIR)),home)
SUDO:=
else
SUDO:=sudo
endif
# install the executable only if the symlink doesn't exist yet
ifeq ("$(wildcard $(EXECUTABLE))","")
INSTALL_COMMAND:=$(SUDO) ln -s $(INSTALL_DIR)/make4ht $(EXECUTABLE)
else
INSTALL_COMMAND:=
endif
all: doc
tags:
ifeq ($(strip $(shell git rev-parse --is-inside-work-tree 2>/dev/null)),true)
git fetch --tags
endif
doc: chardef $(doc_file) readme.tex
htmldoc: chardef ${htmldoc}
make4ht-doc.pdf: $(doc_sources)
latexmk -pdf -pdflatex='lualatex "\def\version{${VERSION}}\def\gitdate{${DATE}}\input{%S}"' make4ht-doc.tex
$(htmldoc): $(doc_sources)
make4ht -ulm draft -c config.cfg -f html5+tidy+common_domfilters+latexmk_build -d ${HTML_DOC_DIR} ${doc_tex} "no^" "" "" "\"\def\version{${VERSION}}\def\gitdate{${DATE}}\""
readme.tex: README.md
pandoc -f markdown+definition_lists -t LaTeX README.md > readme.tex
changelog.tex: CHANGELOG.md
pandoc -f markdown+definition_lists -t LaTeX CHANGELOG.md > changelog.tex
build: chardef doc $(lua_content) $(filters) $(domfilters)
@rm -rf build
@mkdir -p $(BUILD_MAKE4HT)
@mkdir -p $(BUILD_MAKE4HT)/filters
@mkdir -p $(BUILD_MAKE4HT)/domfilters
@mkdir -p $(BUILD_MAKE4HT)/extensions
@mkdir -p $(BUILD_MAKE4HT)/formats
@cp $(lua_content) $(tex_content) make4ht-doc.pdf $(BUILD_MAKE4HT)
@cat make4ht | sed -e "s/{{version}}/${VERSION}/" > $(BUILD_MAKE4HT)/make4ht
@cp $(filters) $(BUILD_MAKE4HT)/filters
@cp $(domfilters) $(BUILD_MAKE4HT)/domfilters
@cp $(formats) $(BUILD_MAKE4HT)/formats
@cp $(extensions) $(BUILD_MAKE4HT)/extensions
@cp README.md $(BUILD_MAKE4HT)/README
@cd $(BUILD_DIR) && zip -r make4ht.zip make4ht
install: chardef doc $(lua_content) $(filters) $(domfilters) justinstall
cp $(doc_file) $(MANUAL_DIR)
justinstall: chardef
mkdir -p $(INSTALL_DIR)
mkdir -p $(MANUAL_DIR)
mkdir -p $(FILTERS_DIR)
mkdir -p $(DOMFILTERS_DIR)
mkdir -p $(FORMATS_DIR)
mkdir -p $(EXTENSION_DIR)
cp $(lua_content) $(INSTALL_DIR)
@cat make4ht | sed -e "s/{{version}}/${VERSION}/" > $(INSTALL_DIR)/make4ht
cp $(filters) $(FILTERS_DIR)
cp $(domfilters) $(DOMFILTERS_DIR)
cp $(extensions) $(EXTENSION_DIR)
cp $(formats) $(FORMATS_DIR)
chmod +x $(INSTALL_DIR)/make4ht
echo $(wildcard $(EXECUTABLE))
$(INSTALL_COMMAND)
chardef:
texlua tools/make_chardata.lua > make4ht-char-def.lua
texlua tools/make_mathmlchardata.lua > make4ht-mathml-char-def.lua
version:
echo $(VERSION), $(DATE)
.PHONY: test
test:
texlua test/test-mkparams.lua
================================================
FILE: README.md
================================================
% [](https://travis-ci.org/michal-h21/make4ht)
% HTML version of the documentation can be found [here](https://www.kodymirus.cz/make4ht/make4ht-doc.html)
# Introduction
`make4ht` is a build system for [\TeX4ht](https://tug.org/tex4ht/), \TeX\ to XML converter. It provides a command line tool
that drives the conversion process. It also provides a library that can be used to create
customized conversion tools. An example of such a tool is
[tex4ebook](https://github.com/michal-h21/tex4ebook), a tool for conversion from \TeX\ to
ePub and other e-book formats.
See section \ref{sec:htlatex} for some reasons why you should consider to use `make4ht` instead
of `htlatex`, section \ref{sec:output} talks about supported output formats and extensions and section \ref{sec:buildfiles}
describes build files, which can be used to execute additional commands or post-process the generated files.
# Usage
The basic conversion from \LaTeX\ to `HTML` using `make4ht` can be executed using the following command:
$ make4ht filename.tex
It will produce a file named `filename.html` if the compilation goes without fatal errors.
## Command line options {#clioptions}
\label{sec:clioptions}
make4ht - build system for TeX4ht
Usage:
make4ht [options] filename ["tex4ht.sty op." "tex4ht op."
"t4ht op" "latex op"]
-a,--loglevel (default status) Set log level.
possible values: debug, info, status, warning, error, fatal
-b,--backend (default tex4ht) Backend used for xml generation.
possible values: tex4ht or lua4ht
-c,--config (default xhtml) Custom config file
-d,--output-dir (default "") Output directory
-B,--build-dir (default nil) Build directory
-e,--build-file (default nil) If the build filename is different
than `filename`.mk4
-f,--format (default nil) Output file format
-j,--jobname (default nil) Set the jobname
-l,--lua Use lualatex for document compilation
-m,--mode (default default) Switch which can be used in the makefile
-n,--no-tex4ht Disable DVI file processing with tex4ht command
-s,--shell-escape Enables running external programs from LaTeX
-u,--utf8 For output documents in utf8 encoding
-x,--xetex Use xelatex for document compilation
-v,--version Print version number
<filename> (string) Input filename
## Option handling
It is possible to invoke `make4ht` in the same way as `htlatex`:
$ make4ht filename "customcfg, charset=utf-8" "-cunihtf -utf8" "-dfoo"
Note that this will not use `make4ht` routines for the output directory handling.
See section \ref{sec:output-dir} for more information about this issue.
To use these routines, change the previous listing to:
$ make4ht -d foo filename "customcfg, charset=utf-8" "-cunihtf -utf8"
This call has the same effect as the following:
$ make4ht -u -c customcfg -d foo filename
Output directory does not have to exist, it `make4ht` creates it automatically.
Specified path can be relative to the current directory, or absolute:
$ make4ht -d use/current/dir/ filename
$ make4ht -d ../gotoparrentdir filename
$ make4ht -d ~/gotohomedir filename
$ make4ht -d c:\documents\windowspathsareworkingtoo filename
The short options that do not take parameters can be collapsed:
$ make4ht -ulc customcfg -d foo filename
## Input from the standard input
To pass the output from other commands to `make4ht`, use the `-` character as a
filename. It is best to use this feature together with the `--jobname` or `-j`
option.
$ cat hello.tex | make4ht -j world -
## Change amount of information printed on the command line
By default, `make4ht` tries to be quiet, so it hides most of the command line
messages and output from the executed commands. It displays status
messages, warnings, and errors. The logging level can be selected using the
`--loglevel` or `-a` options. If the compilation fails, it may be useful to display more
information using the `info` or `debug` levels.
$ make4ht -a debug faulty.tex
# Difference of `make4ht` from `htlatex`
\label{sec:htlatex}
\TeX4ht\ system supports several output formats, most notably `XHTML`, `HTML 5`
and `ODT`, but it also supports `TEI` or `Docbook`.
The conversion can be invoked using several scripts, which are distributed with \TeX4ht.
They differ in parameters passed to the underlying commands.
These scripts invoke \LaTeX\ or Plain \TeX\ with special instructions to load
the `tex4ht.sty` package. The \TeX\ run produces a special `DVI` file
that contains the code for the desired output format. The produced `DVI` file
is then processed using the `tex4ht` command, which in conjunction with the
`t4ht` command produces the desired output files.
## Passing of command line arguments to low-level commands used in the conversion
The basic conversion script provided by \TeX4ht\ system is named `htlatex`. It compiles \LaTeX\
files to `HTML` with this command sequence:
$ latex $latex_options 'code for loading tex4ht.sty \input{filename}'
$ latex $latex_options 'code for loading tex4ht.sty \input{filename}'
$ latex $latex_options 'code for loading tex4ht.sty \input{filename}'
$ tex4ht $tex4ht_options filename
$ t4ht $t4ht_options filename
The options for various parts of the system can be passed on the command line:
$ htlatex filename "tex4ht.sty options" "tex4ht_options" "t4ht_options" "latex_options"
For basic `HTML` conversion it is possible to use the most basic invocation:
$ htlatex filename.tex
It can be much more involved for the `HTML 5` output in `UTF-8` encoding:
$ htlatex filename.tex "xhtml,html5,charset=utf-8" " -cmozhtf -utf8"
`make4ht` can simplify it:
$ make4ht -u filename.tex
The `-u` option requires the `UTF-8` encoding. `HTML 5` is used as the default
output format by `make4ht`.
More information about the command line arguments can be found in section
\ref{sec:clioptions}.
## Compilation sequence
`htlatex` has a fixed compilation order and a hard-coded number of \LaTeX\ invocations.
It is not possible to execute additional commands during the compilation.
When we want to run a program that interacts with \LaTeX, such as `Makeindex`
or `Bibtex`, we have two options. The first option is to create a new script based on
`htlatex` and add the wanted commands to the modified script. The second option
is to execute `htlatex`, then the additional and then `htlatex` again. The
second option means that \LaTeX\ will be invoked six times, as each call to
`htlatex` executes three calls to \LaTeX. This can lead to significantly long
compilation times.
`make4ht` provides a solution for this issue using a build file, or extensions.
These can be used for interaction with external tools.
`make4ht` also provides compilation modes, which enables to select commands that
should be executed using a command line option.
There is a built-in `draft` mode, which invokes \LaTeX\ only once, instead of
the default three invocations. It is useful for the compilations of the
document before its final stage, when it is not important that all
cross-references work. It can save quite a lot of the compilation time:
$ make4ht -um draft filename.tex
Another buil-in mode is `clean`. It executes the `Make:clean()` command to
remove all generated and temporary files from the current directory.
No \LaTeX\ compilation happens in this mode.
It should be used in this way:
# copy generated files to a direcory
$ make4ht -d outdir filename.tex
# remove all generated files in the current dir
# the -a info option will print files that are removed
$ make4ht -m clean -a info filename.tex
More information about the build files can be found in section \ref{sec:buildfiles}.
## Handling of the generated files
\label{sec:output-dir}
There are also issues with the behavior of the `t4ht` application. It reads the
`.lg` file generated by the `tex4ht` command. This file contains
information about the generated files, `CSS` instructions, calls to the external
applications, instructions for image conversions, etc.
`t4ht` can be instructed to copy the generated files to an output directory, but
it doesn't preserve the directory structure. When the images are placed in a
subdirectory, they will be copied to the output directory, losing the directory structure.
Links will be pointing to a non-existing subdirectory. The following command
should copy all output files to the correct destinations.
$ make4ht -d outputdir filename.tex
`make4ht` can also output temporary files to a build directory, thanks to the `--build-dir` (or `-B`)
option. The following command with put `.aux`, `.4tc` and other auxiliary files to the
`build` dir, and the generated `.html` and `.css` files to the `outputdir` directory.
$ make4ht -B build -d outputdir filename.tex
## Image conversion and postprocessing of the generated files
\TeX4ht\ can convert parts of the document to images. This is useful
for diagrams or complicated math, for example.
By default, the image conversion is configured in a
[`.env` file](https://www.tug.org/applications/tex4ht/mn34.html#mn35.html).
It has a bit of strange syntax, with
operating system dependent rules.
`make4ht` provides simpler means for the image conversion in the build files.
It is possible to change the image conversion parameters without a need to modify the `.env` file.
The process is described in section \ref{sec:imageconversion}.
It is also possible to post-process the generated output files. The post-processing can be done
either using external programs such as `XSLT` processors and `HTML Tidy` or
using `Lua` functions. More information can be found in section \ref{sec:postprocessing}.
# Output file formats and extensions
\label{sec:output}
The default output format used by `make4ht` is `html5`. A different
format can be requested using the `--format` option. Supported formats are:
- `xhtml`
- `html5`
- `odt`
- `tei`
- `docbook`
The `--format` option can be also used for extension loading.
## Extensions
Extensions can be used to modify the build process without the need to use a build file. They
may post-process the output files or request additional commands for the compilation.
The extensions can be enabled or disabled by appending `+EXTENSION` or `-EXTENSION` after
the output format name:
$ make4ht -f html5+tidy filename.tex
In `xhtml` and `html5` output formats, the `common_domfilters` extension is triggered automatically, but
it can still be disabled using:
$ make4ht -f html5-common_domfilters filename.tex
Available extensions:
common\_filters
: clean the output HTML files using filters.
common\_domfilters
: clean the HTML file using DOM filters. It is more powerful than
`common_filters`. It used following DOM filters: `fixinlines`, `idcolons`,
`joincharacters`, `mathmlfixes`, `tablerows`,`booktabs`, `sectionid`
and`itemparagraphs`
copy\_images
: Copies the images to the output directory. This is useful if the original
images are stored in directories above the document directory.
detect\_engine
: detect engine and format necessary for the document compilation from the
magic comments supported by \LaTeX\ editors such as TeXShop or TeXWorks.
Add something like the following line at the beginning of the main \TeX\ file:
`%!TEX TS-program = xelatex`
It supports also Plain \TeX, use for example `tex` or `luatex` as the program name.
dvisvgm\_hashes
: efficient generation of SVG pictures using Dvisvgm. It can utilize
multiple processor cores and generates only changed images.
inlinecss
: load the `inlinecss` DOM filter.
join\_colors
: load the `joincolors` DOM filter for all HTML files.
latexmk\_build
: use [Latexmk](https://ctan.org/pkg/latexmk?lang=en) for the \LaTeX\ compilation.
mathjaxnode
: (**deprecated**, use `mjcli` extension instead) Old information: use [mathjax-node-page](https://github.com/pkra/mathjax-node-page/) to
convert from MathML code to HTML + CSS or SVG. See [the available
settings](#mathjaxsettings).
mjcli
: use [mjcli](https://github.com/michal-h21/mjcli) to convert math in MathML or \LaTeX\
format to plain HTML + CSS. MathML is used by default. If you want to use \LaTeX\ math,
add "mathjax" option on the command line (like `make4ht -f html5+mjcli filename.tex "mathjax"`).
See [the available settings](#mathjaxsettings).
nodynamicodt
: change dynamic content in ODT files (such as tables of contents or bibliographies) to text.
odttemplate
: it automatically loads the `odttemplate` filter (page \pageref{sec:odttemplate}).
preprocess\_input
: compilation of the formats
supported by [Knitr](https://yihui.name/knitr/) (`.Rnw`, `.Rtex`, `.Rmd`, `.Rrst`)
and also Markdown and reStructuredText formats. It requires
[R](https://www.r-project.org/) + [Knitr](https://yihui.name/knitr/)
installation, it requires also [Pandoc](https://pandoc.org/) for formats based on Markdown or
reStructuredText.
staticsite
: build the document in a form suitable for static site generators like [Jekyll](https://jekyllrb.com/).
tidy
: clean the `HTML` files using the `tidy` command.
# Build files
\label{sec:buildfiles}
`make4ht` supports build files. These are `Lua` scripts that can adjust
the build process. They can request external applications like `BibTeX` or `Makeindex`,
pass options to the commands, modify the image conversion process, or post-process the
generated files.
`make4ht` tries to load default build file named as `filename + .mk4 extension`.
It is possible to select a different build file with `-e` or `--build-file` command line
option.
Sample build file:
Make:htlatex()
Make:match("html$", "tidy -m -xml -utf8 -q -i ${filename}")
`Make:htlatex()` is preconfigured command for calling \LaTeX\ with the `tex4ht.sty` package
loaded. In this example, it will be executed only once. After the
compilation, the `tidy` command is executed on the output `HTML` files.
Note that it is not necessary to call `tex4ht` and `t4ht` commands explicitly in the
build file, they are called automatically.
## User commands
It is possible to add more commands like `Make:htlatex` using the `Make:add` command:
Make:add("name", "command", {settings table}, repetition)
This defines the `name` command, which can be then executed using `Make:name()`
command in the build file.
The `name` and `command` parameters are required, the rest of the parameters are optional.
The defined command receives a table with settings as a parameter at the call time.
The default settings are provided by `make4ht`. Additional settings can be
declared in the `Make:add` commands, user can also override the default settings
when the command is executed in the build file:
Make:name({hello="world"})
More information about settings, including the default settings provided by
`make4ht`, can be found in section \ref{sec:settings} on page
\pageref{sec:settings}.
### The `command` function
\label{sec:commandfunction}
The `command` parameter can be either a string template or function:
Make:add("text", "echo hello, input file: ${input}")
The template can get a variable value from the parameters table using a
`${var_name}` placeholder. Templates are executed using the operating system, so
they should invoke existing OS commands.
### The `settings table` table
The `settings table` parameter is optional. If it is present, it should be
a table with new settings available in the command. It can also override the default
`make4ht` settings for the defined command.
Make:add("sample_function", function(params)
for k, v in pairs(params) do
print(k..": "..v)
end, {custom="Hello world"}
)
### Repetition
The `repetition` parameter specifies the maximum number of executions of the
particular command. This is used for instance for `tex4ht` and `t4ht`
commands, as they should be executed only once in the compilation. They would
be executed multiple times when they are included in the build file, as they
are called by `make4ht` by default. Because these commands allow only one
`repetition`, the second execution is blocked.
### Expected exit code
You can set the expected exit code from a command with a `correct_exit` key in the
settings table. The compilation will be terminated when the command returns a
different exit code.
Make:add("biber", "biber ${input}", {correct_exit=0})
Commands that execute lua functions can return the numerical values using the `return` statement.
This mechanism isn't used for \TeX, because it doesn't differentiate between fatal and non-fatal errors.
It returns the same exit code in all cases. Because of this, log parsing is used for a fatal error detection instead.
Error code value `1` is returned in the case of a fatal error, `0` is used
otherwise. The `Make.testlogfile` function can be used in the build file to
detect compilation errors in the TeX log file.
## Provided commands
`Make:htlatex`
: One call to the TeX engine with special configuration for loading of the `tex4ht.sty` package.
`Make:autohtlatex`
: Variant of `Make:htlatex` that automates the compilation of \LaTeX\ documents,
ensuring that the process is repeated until the output stabilizes or an error occurs.
`Make:clean`
: This command removes all generated files, including images, HTML files and
various auxilary files, from the current directory. It keeps files whose
file names don't match the input file name. It is preferable to use `make4ht -m clean filename.tex`
to clean output files.
`Make:httex`
: Variant of `Make:htlatex` suitable for Plain \TeX.
`Make:latexmk`
: Use `Latexmk` for the document compilation. `tex4ht.sty` will be loaded automatically.
`Make:tex4ht`
: Process the `DVI` file and create output files.
`Make:t4ht`
: Create the CSS file and generate images.
`Make:biber`
: Process bibliography using the `biber` command.
`Make:pythontex`
: Process the input file using `pythontex`.
`Make:bibtex`
: Process bibliography using the `bibtex` command.
`Make:xindy`
: Generate index using Xindy index processor.
`Make:makeindex`
: Generate index using the Makeindex command.
`Make:xindex`
: Generate index using the Xindex command.
## File matches
\label{sec:postprocessing}
Another type of action that can be specified in the build file is
`Make:match`. It can be used to post-process the generated files:
Make:match("html$", "tidy -m -xml -utf8 -q -i ${filename}")
The above example will clean all output `HTML` files using the `tidy` command.
The `Make:match` action tests output filenames using a `Lua` pattern matching function.
It executes a command or a function, specified in the second argument, on files
whose filenames match the pattern.
The commands to be executed can be specified as strings. They can contain
`${var_name}` placeholders, which are replaced with corresponding variables
from the `settings` table. The templating system was described in
subsection \ref{sec:commandfunction}. There is an additional variable
available in this table, called `filename`. It contains the name of the current
output file.
If a function is used instead, it will get two parameters. The first one is the
current filename, the second one is the `settings` table.
Make:match("html$", function(filename, settings)
print("Post-processing file: ".. filename)
print("Available settings")
for k,v in pairs(settings)
print(k,v)
end
return true
end)
Multiple post-processing actions can be executed on each filename. The Lua
action functions can return an exit code. If the exit code is false, the execution
of the post-processing chain for the current file will be terminated.
### Filters
\label{sec:filters}
To make it easier to post-process the generated files using the `match`
actions, `make4ht` provides a filtering mechanism thanks to the
`make4ht-filter` module.
The `make4ht-filter` module returns a function that can be used for the filter
chain building. Multiple filters can be chained into a pipeline. Each filter
can modify the string that is passed to it from the previous filters. The
changes are then saved to the processed file.
Several built-in filters are available, it is also possible to create new ones.
Example that use only the built-in filters:
local filter = require "make4ht-filter"
local process = filter{"cleanspan", "fixligatures", "hruletohr"}
Make:htlatex()
Make:match("html$",process)
Function `filter` accepts also function arguments, in this case this function
takes file contents as a parameter and modified contents are returned.
Example with custom filter:
local filter = require "make4ht-filter"
local changea = function(s) return s:gsub("a","z") end
local process = filter{"cleanspan", "fixligatures", changea}
Make:htlatex()
Make:match("html$",process)
In this example, spurious span elements are joined, ligatures are decomposed,
and then all letters "a" are replaced with "z" letters.
Built-in filters are the following:
cleanspan
: clean spurious span elements when accented characters are used
cleanspan-nat
: alternative clean span filter, provided by Nat Kuhn
fixligatures
: decompose ligatures to base characters
hruletohr
: `\hrule` commands are translated to series of underscore characters
by \TeX4ht, this filter translates these underscores to `<hr>` elements
entites
: convert prohibited named entities to numeric entities (only
` ` currently).
fix-links
: replace colons in local links and `id` attributes with underscores. Some
cross-reference commands may produce colons in internal links, which results in
a validation error.
mathjaxnode
: (**deprecated**, use `mjcli` extension instead) Old information: use [mathjax-node-page](https://github.com/pkra/mathjax-node-page/) to
convert from MathML code to HTML + CSS or SVG. See [the available
settings](#mathjaxsettings).
mjcli
: use [mjcli](https://github.com/michal-h21/mjcli) to convert math in MathML or \LaTeX\
format to plain HTML + CSS. See [the available settings](#mathjaxsettings).
odttemplate
: use styles from another `ODT` file serving as a template in the current
document. It works for the `styles.xml` file in the `ODT` file. During
the compilation, this file is named as `\jobname.4oy`.
\label{sec:odttemplate}
staticsite
: create HTML files in a format suitable for static site generators such as [Jekyll](https://jekyllrb.com/)
svg-height
: some SVG images produced by `dvisvgm` seem to have wrong dimensions. This filter
tries to set the correct image size.
### DOM filters
DOM filters are variants of filters that use the
[`LuaXML`](https://ctan.org/pkg/luaxml) library to modify
directly the XML object. This enables more powerful
operations than the regex-based filters from the previous section.
Example:
local domfilter = require "make4ht-domfilter"
local process = domfilter {"joincharacters"}
Make:match("html$", process)
Available DOM filters:
aeneas
: [Aeneas](https://www.readbeyond.it/aeneas/) is a tool for automagical synchronization of text and audio.
This filter modifies the HTML code to support synchronization.
booktabs
: fix lines produced by the `\cmidrule` command provided by the Booktabs package.
collapsetoc
: collapse table of contents to contain only top-level sectioning level and sections on the current page.
fixinlines
: put all inline elements which are direct children of the `<body>` elements to a paragraph.
idcolons
: replace the colon (`:`) character in internal links and `id` attributes. They cause validation issues.
inlinecss
: remove CSS rules that target elements with unique attributes, such as color boxes, table rules, or inline math pictures,
and insert their properties as a inline `style` attribute in the HTML document.
joincharacters
: join consecutive `<span>` or `<mn>` elements. This DOM filter supersedes the `cleanspan` filter.
joincolors
: many `<span>` elements with unique `id` attributes are created when \LaTeX\ colors are being used in the document.
A CSS rule is added for each of these elements, which may result in
substantial growth of the CSS file. This filter replaces these rules with a
common one for elements with the same color value. See also the `inlinecss` DOM filter and extension, which provides an
alternative using inline styles.
odtfonts
: fix styles for fonts that were wrongly converted by `Xtpipes` in the ODT format.
odtimagesize
: set correct dimensions for images in the ODT format. It is no longer used, as the dimensions are set by TeX4ht itself.
odtpartable
: resolve tables nested inside paragraphs, which is invalid in the ODT format.
tablerows
: remove spurious rows from HTML tables.
mathmlfixes
: fix common issues for MathML.
sectionid
: create `id` attribute for HTML sectioning elements derived from the section
title. It also updates links to these sections. Use the `notoc` command line
option to prevent that.
t4htlinks
: fix hyperlinks in the ODT format.
## Image conversion
\label{sec:imageconversion}
It is possible to convert parts of the \LaTeX\ input as pictures. It can be used
for preserving the appearance of math or diagrams, for example.
These pictures are stored in a special `DVI` file, which can be processed by
a `DVI` to image commands, such as `dvipng` or `dvisvgm`.
This conversion is normally configured in the `tex4ht.env` file. This file
is system dependent and it has quite an unintuitive syntax.
The configuration is processed by the `t4ht` application and the conversion
command is called for all pictures.
It is possible to disable `t4ht` image processing and configure image
conversion in the build file using the `image` action:
Make:image("png$",
"dvipng -bg Transparent -T tight -o ${output} -pp ${page} ${source}")
`Make:image` takes two parameters, a `Lua` pattern to match the image name, and
the action.
Action can be either a string template with the conversion command
or a function that takes a table with parameters as an argument.
There are three parameters:
- `output` - output image filename
- `source` - `DVI` file with the pictures
- `page` - page number of the converted image
## The `mode` variable
The `mode` variable available in the build process contains
contents of the `--mode` command line option. It can be used to run some commands
conditionally. For example:
if mode == "draft" then
Make:htlatex{}
else
Make:htlatex{}
Make:htlatex{}
Make:htlatex{}
end
In this example (which is the default configuration used by `make4ht`),
\LaTeX\ is called only once when `make4ht` is called with the `draft` mode:
make4ht -m draft filename
## The `settings` table
\label{sec:settings}
It is possible to access the parameters outside commands, file matches
and image conversion functions. For example, to convert the document to
the `OpenDocument Format (ODT)`, the following settings can be used. They are
based on the `oolatex` command:
settings.tex4ht_sty_par = settings.tex4ht_sty_par ..",ooffice"
settings.tex4ht_par = settings.tex4ht_par .. " ooffice/! -cmozhtf"
settings.t4ht_par = settings.t4ht_par .. " -cooxtpipes -coo "
(Note that it is possible to use the `--format odt` option
which is superior to the previous code. This example is intended just as an
illustration)
There are some functions to simplify access to the settings:
`set_settings{parameters}`
: overwrite settings with values from a passed table
`settings_add{parameters}`
: add values to the current settings
`filter_settings "filter name" {parameters}`
: set settings for a filter
`get_filter_settings(name)`
: get settings for a filter
For example, it is possible to simplify the sample from the previous code listings:
settings_add {
tex4ht_sty_par =",ooffice",
tex4ht_par = " ooffice/! -cmozhtf",
t4ht_par = " -cooxtpipes -coo "
}
Settings for filters and extensions can be set using `filter_settings`:
filter_settings "test" {
hello = "world"
}
These settings can be retrieved in the extensions and filters using the `get_filter_settings` function:
function test(input)
local options = get_filter_settings("test")
print(options.hello)
return input
end
### Default settings
The default parameters are the following:
`htlatex`
: used \TeX\ engine
`input`
: content of `\jobname`, see also the `tex_file` parameter.
`interaction`
: interaction mode for the \TeX\ engine. The default value is `batchmode` to
suppress user input on compilation errors. It also suppresses most of the \TeX\
compilation log output. Use the `errorstopmode` for the default behavior.
`tex_file`
: input \TeX\ filename
`latex_par`
: command line parameters to the \TeX\ engine
`packages`
: additional \LaTeX\ code inserted before `\documentclass`.
Useful for passing options to packages used in the document or to load additional packages.
`tex4ht_sty_par`
: options for `tex4ht.sty`
`tex4ht_par`
: command line options for the `tex4ht` command
`t4ht_par`
: command line options for the `t4ht` command
`outdir`
: the output directory
`correct_exit`
: expected `exit code` from the command. The compilation will be terminated
if the exit code of the executed command has a different value.
`auto_extensions`
: table with extensions of auxiliary files that should be watched by the `Make:autohtlatex` command.
`max_compilations`
: maximum number of \LaTeX\ runs by the `Make:autohtlatex` command.
# `make4ht` configuration file {#configfile}
It is possible to globally modify the build settings using the configuration
file. It is a special version of a build file where the global settings can be set.
Common tasks for the configuration file can be a declaration of the new commands,
loading of the default filters or specification of a default build sequence.
One additional functionality not available in the build files are commands for
enabling and disabling of extensions.
## Location
The configuration file can be saved either in the
`$HOME/.config/make4ht/config.lua` file, or in the `.make4ht` file placed in
the current directory or it's parent directories (up to the `$HOME` directory).
## Additional commands
There are two additional commands:
`Make:enable_extension(name)`
: require extension
`Make:disable_extension(name)`
: disable extension
## Example
The following example of the configuration file adds support for the `biber` command, requires
`common_domfilters` extension and requires MathML
output for math.
Make:add("biber", "biber ${input}")
Make:enable_extension "common_domfilters"
settings_add {
tex4ht_sty_par =",mathml"
}
<!--
# Development
## Custom filters
## New extensions
## How to add a new output format
-->
# List of available settings for filters and extensions.
These settings may be set using `filter_settings` function in a build file or in the `make4ht` configuration file.
## Compilation commands
## The `autohtlatex` command
auto_extensions
: table with extensions of auxiliary files that should be watched by the `Make:autohtlatex` command.
max_compilations
: maximum number of \LaTeX\ runs by the `Make:autohtlatex` command.
## Indexing commands
The indexing commands (like `xindy` or `makeindex`) use some common settings.
idxfile
: name of the `.idx` file. Default value is `\jobname.idx`.
indfile
: name of the `.ind` file. Default value is the same as `idxfile` with the file extension changed to `.ind`.
Each indexing command can have some additional settings.
### The `xindy` command
encoding
: text encoding of the `.idx` file. Default value is `utf8`.
language
: index language. Default language is English.
modules
: table with names of additional `Xindy` modules to be used.
### The `makeindex` command
options
: additional command line options for the Makeindex command.
### The `xindex` command
options
: additional command line options for the Xindex command.
language
: document language
## The `tidy` extension
options
: command line options for the `tidy` command. Default value is `-m -utf8 -w 512 -q`.
## The `collapsetoc` dom filter
`toc_query`
: CSS selector for selection of element that contains the table of contents.
`title_query`
: CSS selector for selecting all elements that contain the section ID attribute.
`toc_levels`
: table containing a hierarchy of classes used in TOC
`max_depth`
: set detph of displayed children TOC levels
Default values:
filter_settings "collapsetoc" {
toc_query = ".tableofcontents",
title_query = "h1 a, h2 a, h3 a, h4 a, h5 a, h6 a",
max_depth = 1,
toc_levels = {
tocpart = 1,
toclikepart = 1,
tocappendix = 1,
toclikechapter = 2,
tocchapter = 2,
tocsection = 3,
toclikesection = 3,
tocsubsection = 4,
toclikesubsection = 4,
tocsubsubsection = 5,
toclikesubsubsection = 5,
tocparagraph = 6,
toclikeparagraph = 6,
tocsubparagraph = 7,
toclikesubparagraph = 7,
}
}
## The `copy_images` extension
extensions
: table with list of image extensions that should be processed.
img\_dir
: name of the output directory where images should be stored
Default values:
filter_settings "copy_images" {
extensions = {"png", "jpg", "jpeg", "svg"},
img_dir = ""
}
## The `fixinlines` dom filter
inline\_elements
: table of inline elements that shouldn't be direct descendants of the `body` element. The element names should be table keys, the values should be true.
Example
filter_settings "fixinlines" {inline_elements = {a = true, b = true}}
## The `joincharacters` dom filter
charclasses
: table of elements that should be concatenated when two or more of such elements with the same value of the `class` attribute are placed one after another.
Example
filter_settings "joincharacters" { charclasses = { span=true, mn = true}}
## The `mjcli` filter and extension {#mathjaxsettings}
`mjcli` detects whether to use MathML or \LaTeX\ input by use of the `mathjax` option for `make4ht`. By default, it uses MathML. \LaTeX\ input can be required using:
make4ht -f html5+mjcli filename.tex "mathjax"
### Available settings
options
: command line options for the `mjcli` command.
Example
filter_settings "mjcli" {
options="--svg"
}
cssfilename
: the `mjcli` command puts some CSS code into the HTML pages. The `mjcli` filter extracts this information and saves it to a standalone CSS file. Default name of this CSS file is `${input}-mathjax.css`
fontdir
: directory with MathJax font files. This option enables the use of local fonts, which
is useful in the conversion to ePub, for example. The font directory should be
sub-directory of the current directory. Only \TeX\ font is supported at the moment.
Example
filter_settings "mjcli" {
fontdir="fonts/TeX/woff/"
}
## The `staticsite` filter and extension
site\_root
: directory where generated files should be copied.
map
: a hash table where keys contain patterns that match filenames and values contain
destination directory for the matched files. The destination directories are
relative to the `site_root` (it is possible to use `..` to switch to a parent
directory).
file\_pattern
: a pattern used for filename generation. It is possible to use string templates
and format strings for `os.date` function. The default pattern `%Y-%m-%d-${input}`
creates names in the form of `YYYY-MM-DD-file_name`.
header
: table with variables to be set in the YAML header in HTML files. If the
table value is a function, it is executed with current parameters and HTML page
DOM object as arguments.
remove\_maketitle
: the `staticsite` extension removes text produced by the `\maketitle` command by default. Set this
option to `false` to disable the removal.
Example:
-- set the environmental variable 'blog_root' with path to
-- the directory that should hold the generated HTML files
local outdir = os.getenv "blog_root"
filter_settings "staticsite" {
site_root = outdir,
map = {
[".css$"] = "/css/"
},
header = {
layout="post",
date = function(parameters, dom)
return os.date("!%Y-%m-%d %T", parameters.time)
end
}
}
## The `dvisvgm_hashes` extension
options
: command line options for Dvisvgm. The default value is `-n --exact -c ${scale},${scale}`.
cpu_cnt
: the number of processor cores used for the conversion. The extension tries to detect the available cores automatically by default.
make_command
: variant of the `make` command used for the parallel conversion of large
number of pages. It receives tvo variables, `process_count` and `make_file`.
Default value is "make -j ${process_count} -f ${make_file}".
test_make_command
: command that tests if the selected variant of the `make` command exists. Default value is `make -v`.
parallel_size
: the number of pages used in each Dvisvgm call. The extension detects changed
pages in the DVI file and constructs multiple calls to Dvisvgm with only changed
pages.
scale
: amount of SVG scaling. The default value is 1.4.
## The `odttemplate` filter and extension
template
: filename of the template `ODT` file
`odttemplate` can also get the template filename from the `odttemplate` option from `tex4ht_sty_par` parameter. It can be set using the following command line call:
make4ht -f odt+odttemplate filename.tex "odttemplate=template.odt"
## The `aeneas` filter
skip\_elements
: List of CSS selectors that match elements that shouldn't be processed. Default value: `{ "math", "svg"}`.
id\_prefix
: prefix used in the ID attribute forming.
sentence\_match
: Lua pattern used to match a sentence. Default value: `"([^%.^%?^!]*)([%.%?!]?)"`.
## The `make4ht-aeneas-config` package
Companion for the `aeneas` DOM filter is the `make4ht-aeneas-config` plugin. It
can be used to write the Aeneas configuration file or execute Aeneas on the
generated HTML files.
Available functions:
write\_job(parameters)
: write Aenas job configuration to `config.xml` file. See the [Aeneas
documentation](https://www.readbeyond.it/aeneas/docs/clitutorial.html#processing-jobs)
for more information about jobs.
execute(parameters)
: execute Aeneas.
process\_files(parameters)
: process the audio and generated subtitle files.
By default, a `SMIL` file is created. It is assumed that there is an audio file
in the `mp3` format, named as the \TeX\ file. It is possible to use different formats
and filenames using mapping.
The configuration options can be passed directly to the functions or set using
`filter_settings "aeneas-config" {parameters}` function.
### Available parameters
lang
: document language. It is interfered from the HTML file, so it is not necessary to set it.
map
: mapping between HTML, audio and subtitle files. More info below.
text\_type
: type of input. The `aeneas` DOM filter produces an `unparsed` text type.
id\_sort
: sorting of id attributes. The default value is `numeric`.
id\_regex
: regular expression to parse the id attributes.
sub\_format
: generated subtitle format. The default value is `smil`.
### Additional parameters for the job configuration file
- description
- prefix
- config\_name
- keep\_config
It is possible to generate multiple HTML files from the \LaTeX\ source. For
example, `tex4ebook` generates a separate file for each chapter or section. It is
possible to set options for each HTML file, in particular names of the
corresponding audio files. This mapping is done using the `map` parameter.
Example:
filter_settings "aeneas-config" {
map = {
["sampleli1.html"] = {audio_file="sample.mp3"},
["sample.html"] = false
}
}
Table keys are the configured filenames. It is necessary to insert them as
`["filename.html"]`, because of Lua syntax rules.
This example maps audio file `sample.mp3` to a section subpage. The main HTML
file, which may contain title and table of contents doesn't have a
corresponding audio file.
Filenames of the subfiles correspond to the chapter numbers, so they are not
stable when a new chapter is added. It is possible to request filenames
derived from the chapter titles using the `sec-filename` option for `tex4ht.sty`.
### Available `map` options
audio\_file
: the corresponding audio file
sub\_file
: name of the generated subtitle file
The following options are the same as their counterparts from the main parameters table and generally, don't need to be set:
- prefix
- file\_desc
- file\_id
- text\_type
- id\_sort
- id\_prefix
- sub\_format
### Full example
local domfilter = require "make4ht-domfilter"
local aeneas_config = require "make4ht-aeneas-config"
filter_settings "aeneas-config" {
map = {
["krecekli1.xhtml"] = {audio_file="krecek.mp3"},
["krecek.xhtml"] = false
}
}
local process = domfilter {"aeneas"}
Make:match("html$", process)
if mode == "draft" then
aeneas_config.process_files {}
else
aeneas_config.execute {}
end
# Troubleshooting
## Incorrect handling of command line arguments for `tex4ht`, `t4ht` or `latex`
Sometimes, you may get a similar error:
make4ht:unrecognized parameter: i
It may be caused by a following `make4ht` invocation:
$ make4ht hello.tex "customcfg,charset=utf-8" "-cunihtf -utf8" -d foo
The command line option parser is confused by mixing options for `make4ht` and
\TeX4ht\ in this case. It tries to interpret the `-cunihtf -utf8`, which are
options for the `tex4ht` command, as `make4ht` options. To fix that, try to
move the `-d foo` directly after the `make4ht` command:
$ make4ht -d foo hello.tex "customcfg,charset=utf-8" "-cunihtf -utf8"
Another option is to add a space before the `tex4ht` options:
$ make4ht hello.tex "customcfg,charset=utf-8" " -cunihtf -utf8" -d foo
The former way is preferable, though.
## Table of Contents points to a wrong destination
The `sectionid` DOM filter creates better link destinations for sectioning commands.
In some cases, for example if you use Pandoc, the document may already contain the
link destination with the same name. In such cases the original destination is preserved
in the file. In this case links to the section will point to that place, instead of
correct destination in the section. This may happen for example if you use Pandoc for
the Markdown to \LaTeX\ conversion. It creates `\hypertarget` commands that are placed
just before section. The links points to that place, instead of the actual section.
In this case you don't want to update links. Use the `notoc` option to prevent that.
## Filenames containing spaces
`tex4ht` command cannot handle filenames containing spaces. to fix this issue, `make4ht`
replaces spaces in the input filenames with underscores. The generated
XML filenames use underscores instead of spaces as well.
## Filenames containing non-ASCII characters
The `odt` output doesn't support accented filenames, it is best to stick to ASCII characters in filenames.
# License
Permission is granted to copy, distribute and/or modify this software
under the terms of the LaTeX Project Public License, version 1.3.
================================================
FILE: config.cfg
================================================
\Preamble{xhtml}
% this was fixed in the upstream, but Debian used in the Docker container
% doesn't contain it yet
\def\hypertarget#1#2{\Link{}{#1}\EndLink#2}
\Configure{@HEAD}{\HCode{\Hnewline<script src="https://hypothes.is/embed.js" async></script>}}
\Configure{@HEAD}{\HCode{\Hnewline<meta property="og:title" content="\LikeRef{TITLE+}" />}}
\Configure{@HEAD}{\HCode{\Hnewline<meta property="og:author" content="Michal Hoftich" />}}
\Configure{@HEAD}{\HCode{\Hnewline<meta property="og:type" content="website" />}}
\Configure{@HEAD}{\HCode{\Hnewline<meta property="og:url" content="https://www.kodymirus.cz/make4ht/make4ht-doc.html" />}}
\Configure{@HEAD}{\HCode{\Hnewline<link rel="canonical" href="https://www.kodymirus.cz/make4ht/make4ht-doc.html" /> }}
\Css{body{
font-size:18px;
font-size-adjust: 0.5;
width:65ch;
max-width:100\%;
margin: 1em auto;
}}
\Css{p,li,dt{
line-height: calc(1ex / 0.32);
text-align: justify;
hyphens: auto;
}}
\Css{div.center p {text-align:center;}}
% save the \@title command
\begin{document}
\makeatletter
\Tag{TITLE+}{The make4ht build system}
\makeatother
\def\TeX{TeX}
\def\LaTeX{LaTeX}
\EndPreamble
================================================
FILE: domfilters/make4ht-aeneas.lua
================================================
-- DOM filter for Aeneas, tool for automatical text and audio synchronization
-- https://github.com/readbeyond/aeneas
-- It adds elements with id attributes for text chunks, in sentence length.
--
--
local cssquery = require "luaxml-cssquery"
local mkutils = require "mkutils"
local log = logging.new "aeneas"
-- Table of CSS selectors to be skipped.
local skip_elements = { "math", "svg"}
-- The id attribute format is configurable
-- Aeneas must be told to search for the ID pattern using is_text_unparsed_id_regex
-- option in Aneas configuration file
local id_prefix = "ast"
-- Pattern to mach a sentence. It should match two groups, first is actual
-- sentence, the second optional interpunction mark.
local sentence_match = "([^%.^%?^!]*)([%.%?!]?)"
-- convert table with selectors to a query list
local function prepare_selectors(skips)
local css = cssquery()
for _, selector in ipairs(skips) do
css:add_selector(selector)
end
return css
end
-- save the HTML language
local function save_config(dom, saves)
local get_lang = function(d)
local html = d:query_selector("html")[1] or {}
return html:get_attribute("lang")
end
local saves = saves or {}
local config = get_filter_settings "aeneas_config"
if config.language then return end
saves.lang = get_lang(dom)
filter_settings "aeneas-config" (saves)
end
-- make span element with unique id for a sentence
local function make_span(id,parent, text)
local newobj = parent:create_element("span", {id=id })
newobj.processed = true -- to disable multiple processing of the node
local text_node = newobj:create_text_node(text)
newobj:add_child_node(text_node)
return newobj
end
-- make the id attribute and update the id value
local function make_id(lastid, id_prefix)
local id = id_prefix .. lastid
lastid = lastid + 1
return id, lastid
end
-- parse text for sentences and add spans
local function make_ids(parent, text, lastid, id_prefix)
local t = {}
local id
for chunk, punct in text:gmatch(sentence_match) do
id, lastid = make_id(lastid, id_prefix)
local newtext = chunk..punct
-- the newtext is empty string sometimes. we can skipt it then.
if newtext~="" then
table.insert(t, make_span(id, parent, newtext))
end
end
return t, lastid
end
-- test if the DOM element is in list of skipped CSS selectors
local function is_skipped(el, css)
local matched = css:match_querylist(el)
return #matched > 0
end
local function aeneas(dom, par)
local par = par or {}
local id = 1
local options = get_filter_settings "aeneas"
local skip_elements = options.skip_elements or par.skip_elements or skip_elements
local id_prefix = options.id_prefix or par.id_prefix or id_prefix
local skip_object = prepare_selectors(skip_elements)
sentence_match = options.sentence_match or par.sentence_match or sentence_match
local body = dom:query_selector("body")[1]
-- process only the document body
if not body then return dom end
-- save information for aeneas_config
save_config(dom, {id_prefix = id_prefix})
body:traverse_elements(function(el)
-- skip disabled elements
if(is_skipped(el, skip_object)) then return false end
-- skip already processed elements
if el.processed then return false end
local newchildren = {} -- this will contain the new elements
local children = el:get_children()
local first_child = children[1]
-- if the element contains only text, doesn't already have an id attribute and the text is short,
-- the id is set directly on that element.
if #children == 1
and first_child:is_text()
and not el:get_attribute("id")
and string.len(first_child._text) < 20
and el._attr
then
local idtitle
idtitle, id = make_id(id, id_prefix)
log:debug(el._name, first_child._text)
el:set_attribute("id", idtitle)
return el
end
for _, child in ipairs(children) do
-- process only non-empty text
if child:is_text() and child._text:match("%a+") then
local newnodes
newnodes, id = make_ids(child, child._text, id, id_prefix)
for _, node in ipairs(newnodes) do
table.insert(newchildren, node or {})
end
else
-- insert the current processing element to the new element list
-- if it isn't only text
table.insert(newchildren, child or {})
end
end
-- replace element children with the new ones
if #newchildren > 0 then
el._children = {}
for _, c in ipairs(newchildren) do
el:add_child_node(c)
end
end
end)
return dom
end
return aeneas
================================================
FILE: domfilters/make4ht-booktabs.lua
================================================
local function find_cmidrules(current_rows)
-- save rows with cmidrules here
local matched_rows = {}
local continue = false
for row_no, row in ipairs(current_rows) do
local columnposition = 1
local matched_cmidrule = false
for _, col in ipairs(row:query_selector("td")) do
-- keep track of culumns
local span = tonumber(col:get_attribute("colspan")) or 1
local cmidrule = col:query_selector(".cmidrule")
-- column contain cmidrule
if #cmidrule > 0 then
-- remove any child elements, we don't need them anymore
col._children = {}
-- only one cmidrule can be on each row, save the position, column span and all attributes
matched_rows[row_no] = {attributes = col._attr, column = columnposition, span = span, continue = continue}
matched_cmidrule = true
end
columnposition = columnposition + span
end
if matched_cmidrule then
-- save the row number of the first cmidrule on the current row
continue = continue or row_no
else
continue = false
end
end
-- save the table rows count, so we can loop over them sequentially later
matched_rows.length = #current_rows
return matched_rows
end
local function update_row(current_rows, match, newspan, i)
local row_to_update = current_rows[match.continue]
-- insert spanning column if necessary
if newspan > 0 then
local td = row_to_update:create_element("td", {colspan=tostring(newspan), span="nazdar"})
row_to_update:add_child_node(td)
end
-- insert the rule column
local td = row_to_update:create_element("td", match.attributes)
row_to_update:add_child_node(td)
-- remove unnecessary row
current_rows[i]:remove_node()
end
local function join_rows(matched_rows,current_rows)
for i = 1, matched_rows.length do
local match = matched_rows[i]
if match then
-- we only need to process rows that place subsequent cmidrules on the same row
local continue = match.continue
if continue then
local prev_row = matched_rows[continue]
-- find column where the previous cmidrule ends
local prev_end = prev_row.column + prev_row.span
local newspan = match.column - prev_end
update_row(current_rows, match, newspan, i)
-- update the current row position
prev_row.column = match.column
prev_row.span = match.span
end
end
end
end
local function process_booktabs(dom)
local tables = dom:query_selector("table")
for _, tbl in ipairs(tables) do
local current_rows = tbl:query_selector("tr")
local matched_rows = find_cmidrules(current_rows)
join_rows(matched_rows, current_rows)
end
return dom
end
return process_booktabs
================================================
FILE: domfilters/make4ht-collapsetoc.lua
================================================
-- mini TOC support for make4ht
local domobject = require "luaxml-domobject"
local filter = require "make4ht-filter"
local log = logging.new "collapsetoc"
local mktuils = require "mkutils"
-- assign levels to entries in the .4tc file
local toc_levels = {
tocpart = 1,
toclikepart = 1,
tocappendix = 2,
toclikechapter = 2,
tocchapter = 2,
tocsection = 3,
toclikesection = 3,
tocsubsection = 4,
toclikesubsection = 4,
tocsubsubsection = 5,
toclikesubsubsection = 5,
tocparagraph = 6,
toclikeparagraph = 6,
tocsubparagraph = 7,
toclikesubparagraph = 7,
}
-- number of child levels to be kept
-- the depth of 1 ensures that only direct children of the current sectioning command
-- will be kept in TOC
local max_depth = 1
-- debugging function to test correct structure of the TOC tree
local function print_tree(tree, level)
local level = level or 0
log:debug(string.rep(" ", level) .. (tree.type or "root"), tree.id)
for k, v in pairs(tree.children) do
print_tree(v, level + 2)
end
end
-- convert the parsed toc entries to a tree structure
local function make_toc_tree(tocentries, lowestlevel, position, tree)
local position = position or 1
local tree = tree or {
level = lowestlevel - 1,
children = {}
}
local stack = {tree}
if position > #tocentries then return tree, position end
-- loop over TOC entries and make a tree
for i = 1, #tocentries do
-- initialize new child
local element = tocentries[i]
element.children = element.children or {}
local parent = stack[#stack]
local level_diff = element.level - parent.level
if level_diff == 0 then -- entry is sibling of parent
-- current parent is sibling of the current elemetn, true parent is
-- sibling's parent
parent = parent.parent
-- we must replace sibling element with the current element in stact
-- so the child elements get correct parent
table.remove(stack)
table.insert(stack, element)
elseif level_diff > 0 then -- entry is child of parent
for x = 1, level_diff do
table.insert(stack, element)
end
else
-- we must remove levels from the stack to get the correct parent
for x =1 , level_diff, -1 do
if #stack > 0 then
parent = table.remove(stack)
end
end
-- we must reinsert parent back to stack, place the current element to stact too
table.insert(stack, parent)
table.insert(stack, element)
end
table.insert(parent.children, element)
element.parent = parent
end
print_tree(tree)
return tree
end
-- find first sectioning element in the current page
local function find_headers(dom, header_levels)
-- we need to find id attributes in <a> elements that are children of sectioning elements
local ids = {}
for _, header in ipairs(dom:query_selector(header_levels)) do
local id = header:get_attribute "id"
if id then ids[#ids+1] = id end
end
return ids
end
-- process list of ids and find those that should be kept:
-- siblings, children, parents and top level
local function find_toc_entries_to_keep(ids, tree)
local tree = tree or {}
-- all id in TOC tree that we want to kepp are saved in this table
local ids_to_keep = {}
-- find current id in the TOC tree
local function find_id(id, tree)
if tree.id == id then return tree end
if not tree.children or #tree.children == 0 then return false end
for k,v in pairs(tree.children) do
local found_id = find_id(id, v)
if found_id then return found_id end
end
return false
end
-- always keep top level of the hiearchy
local function keep_toplevel(tree)
for _, el in ipairs(tree.children) do
ids_to_keep[el.id] = true
end
end
-- we want to keep all children in TOC hiearchy
local function keep_children(element, depth)
local depth = depth or 1
local max_depth = max_depth or 1
-- stop processing when there are no children
for _, el in pairs(element.children or {}) do
if el.id then ids_to_keep[el.id] = true end
-- by default, we keep just direct children of the current sectioning element
if depth < max_depth then
keep_children(el, depth + 1)
end
end
end
-- also keep all siblings
local function keep_siblings(element)
local parent = element.parent
for k, v in pairs(parent.children or {}) do
ids_to_keep[v.id] = true
end
end
-- and of course, keep all parents
local function keep_parents(element)
local parent = element.parent
if parent and parent.id then
ids_to_keep[parent.id] = true
-- we should keep siblings of all parents as well
keep_siblings(parent)
keep_parents(parent)
end
end
-- always keep the top-level TOC hiearchy, even if we cannot find any sectioning element on the page
keep_toplevel(tree)
for _, id in ipairs(ids) do
-- keep the current id
ids_to_keep[id] = true
local found_element = find_id(id, tree)
if found_element then
keep_children(found_element)
keep_siblings(found_element)
keep_parents(found_element)
end
end
return ids_to_keep
end
-- process the .4tc file and convert entries to a tree structure
-- based on the sectioning level
local function parse_4tc(parameters, toc_levels)
local tcfilename = mkutils.file_in_builddir(parameters.input .. ".4tc", parameters)
if not mkutils.file_exists(tcfilename) then
log:warning("Cannot find TOC: " .. tcfilename)
return {}
end
local tocentries = {}
local f = io.open(tcfilename, "r")
-- we need to find the lowest level used in the TOC
local lowestlevel = 999
for line in f:lines() do
-- entries looks like: \doTocEntry\tocsubsection{1.2.2}{\csname a:TocLink\endcsname{5}{x5-60001.2.2}{QQ2-5-6}{aaaa}}{7}\relax
-- we want do extract tocsubsection and x5-60001.2.2
local toctype, id = line:match("\\doTocEntry\\(.-){.-}{.-{.-}{(.-)}")
if toctype then
local level = toc_levels[toctype]
if not level then
log:warning("Cannot find TOC level for: " .. toctype)
else
lowestlevel = level < lowestlevel and level or lowestlevel
table.insert(tocentries, {type = toctype, id = id, level = level})
end
end
end
f:close()
local toc = make_toc_tree(tocentries, lowestlevel)
return toc
end
local function remove_levels(toc, matched_ids)
-- remove links that aren't in the TOC hiearchy that should be kept
for _, link in ipairs(toc:query_selector("a")) do
local href = link:get_attribute("href")
-- find id in the href
local id = href:match("#(.+)")
if id and not matched_ids[id] then
-- toc links are in <span> elements that can contain the section number
-- we must remove them too
local parent = link:get_parent()
if parent:get_element_name() == "span" then
parent:remove_node()
else
-- if the parent node isn't <span>, remove at least the link itself
link:remove_node()
end
end
end
end
local function collapsetoc(dom, parameters)
-- set options
local par = parameters
local options = get_filter_settings "collapsetoc"
-- query to find the TOC element in DOM
local toc_query = par.toc_query or options.toc_query or ".tableofcontents"
-- query to select sectioning elements with id's
local title_query = par.title_query or options.title_query or "h1 a, h2 a, h3 a, h4 a, h5 a, h6 a"
-- level of child levels to be kept in TOC
max_depth = par.max_depth or options.max_depth or max_depth
-- set level numbers for particular TOC entry types
local user_toc_levels = par.toc_levels or options.toc_levels or {}
-- join user's levels with default
for k,v in pairs(user_toc_levels) do toc_levels[k] = v end
-- parse the .4tc file to get TOC tree
toc = toc or parse_4tc(parameters, toc_levels)
-- find sections in the current html file
local ids = find_headers(dom, title_query)
log:debug("Ids", table.concat(ids, ","))
local ids_to_keep = find_toc_entries_to_keep(ids, toc)
local toc_dom = dom:query_selector(toc_query)[1]
if toc_dom then
remove_levels(toc_dom, ids_to_keep)
else
log:warning("Cannot find TOC element using query: " .. toc_query)
end
return dom
end
return collapsetoc
================================================
FILE: domfilters/make4ht-fixinlines.lua
================================================
local inline_elements = {
a=true,
b=true,
big=true,
i=true,
small=true,
tt=true,
abbr=true,
acronym=true,
cite=true,
code=true,
dfn=true,
em=true,
kbd=true,
strong=true,
samp=true,
time=true,
var=true,
a=true,
bdo=true,
br=true,
img=true,
map=true,
object=true,
q=true,
span=true,
sub=true,
sup=true,
button=true,
input=true,
label=true,
select=true,
textarea=true,
}
local function fix_inlines(obj)
local settings = get_filter_settings "fixinlines"
local inline_elements = settings.inline_elements or inline_elements
local nodes = obj:get_path("html body")
local new = nil
obj:traverse_node_list(nodes, function(jej)
if jej._type == "ELEMENT" or jej._type == "TEXT" or jej._type == "COMMENT" then
local name = string.lower(jej._name or "")
-- local parent = jej:get_parent_node()
if inline_elements[name] or jej._type == "TEXT" or jej._type == "COMMENT" or (name:match(":?math") and jej:get_attribute("display") == "inline") then
if not new then
-- start new paragraph
if (jej._type == "TEXT" and jej._text:match("^%s+$")) or jej._type == "COMMENT" then
-- ignore parts that contain only whitespace or comments and are placed before
-- paragraph start
else
new = obj:create_element("p" )
new:add_child_node(obj:copy_node(jej))
jej:replace_node(new)
end
else
-- paragraph already exists
new:add_child_node(obj:copy_node(jej))
jej:remove_node()
end
else
-- close the current paragraph before new block element
new = nil
end
else
new = nil
end
end)
return obj
end
return fix_inlines
================================================
FILE: domfilters/make4ht-idcolons.lua
================================================
local allowed_chars = {
["-"] = true,
["."] = true
}
local function fix_colons(id)
-- match every non alphanum character
return id:gsub("[%W]", function(s)
-- some characters are allowed, we don't need to replace them
if allowed_chars[s] then return s end
-- in other cases, replace with underscore
return "_"
end)
end
local function id_colons(obj)
-- replace non-valid characters in links and ids with underscores
obj:traverse_elements(function(el)
local name = string.lower(obj:get_element_name(el))
if name == "a" then
local href = el:get_attribute("href")
-- don't replace colons in external links
if href and not href:match("[a-z]%://") then
local base, id = href:match("(.*)%#(.*)")
if base and id then
id = fix_colons(id)
el:set_attribute("href", base .. "#" .. id)
end
end
end
local id = el:get_attribute("id")
if id then
el:set_attribute("id", fix_colons(id))
end
end)
return obj
end
return id_colons
================================================
FILE: domfilters/make4ht-inlinecss.lua
================================================
local cssquery = require "luaxml-cssquery"
local log = logging.new("inlinecss")
local cssrules = {}
local cssobj = cssquery()
local function parse_rule(line)
-- parse CSS selector and attributes
-- they are always on one line in the CSS file produced by TeX4ht
local selector, values = line:match("%s*(.-)%s*(%b{})")
if values then
values = values:sub(2,-2)
end
return selector, values
end
local function join_values(old, new)
-- correctly joins two attribute lists, depending on the ending
local separator = ";"
if not old then return new end
-- if old already ends with ;, then don't use semicolon as a separator
if old:match(";%s*$") then separator = "" end
return old .. separator .. new
end
local function parse_css(filename)
local css_file = io.open(filename, "r")
if not css_file then return nil, "cannot load css file: " .. (filename or "") end
local newlines = {}
for line in css_file:lines() do
-- match lines that contain # or =, as these can be id or attribute selectors
if line:match("[%#%=].-{") then
-- update attributes for the current selector
local selector, value = parse_rule(line)
local oldvalue = cssrules[selector]
cssrules[selector] = join_values(oldvalue, value)
else
newlines[#newlines+1] = line
end
end
-- we need to add css rules
for selector, value in pairs(cssrules) do
cssobj:add_selector(selector, function(dom) end, {value=value})
end
css_file:close()
-- write new version of the CSS file, without rules for ids and attributes
local css_file = io.open(filename, "w")
css_file:write(table.concat(newlines, "\n"))
css_file:close()
return true
end
local processed = false
-- process the HTML file and insert inline CSS for id and attribute selectors
return function(dom, par)
if not processed then
-- process the CSS file before everything else, but only once
processed = true
local css_file = mkutils.file_in_builddir(par.input .. ".css", par)
local status, msg = parse_css(css_file)
if not status then log:warning(msg) end
end
-- loop over all elements in the current page
dom:traverse_elements(function(curr)
-- use CSS object to match if the current element
-- is matched by id attribute selector
local matched = cssobj:match_querylist(curr)
if #matched > 0 then
-- join possible already existing style attribute with values from the CSS file
local values = curr:get_attribute("style")
-- join values of all matched rules
for _,rule in ipairs(matched) do
values = join_values(values, rule.params.value)
end
curr:set_attribute("style", values)
end
end)
return dom
end
================================================
FILE: domfilters/make4ht-itemparagraphs.lua
================================================
-- TeX4ht puts contents of all \item commands into paragraphs. We are not
-- able to detect if it contain only one paragraph, or more. If just one,
-- we can remove the paragraph and put the contents directly to <li> element.
return function(dom)
for _, li in ipairs(dom:query_selector("li")) do
local is_single_par = false
-- count elements and paragraphs that are direct children of <li>
-- remove the paragraph only if it is the only child element
local el_count, par_count = 0, 0
local par = {}
for pos, el in ipairs(li._children) do
if el:is_element() then
el_count = el_count + 1
local name = el:get_element_name()
if name == "p" then
par[#par+1] = el
elseif name == "a" and el_count == 1 and el:get_attribute("id") then
-- if the first element is <a> with id, we can move it to <li> and remove it from the list of children, this is needed for nested lists
el_count = el_count - 1
local id = el:get_attribute("id")
if not li:get_attribute("id") then
li:set_attribute("id", id)
el:remove_node()
end
end
end
end
if #par == 1 and el_count == 1 then
-- place paragraph children as direct children of <li>, this
-- efectivelly removes <p>
li._children = par[1]._children
end
end
return dom
end
================================================
FILE: domfilters/make4ht-joincharacters.lua
================================================
local log = logging.new("joincharacters")
local charclasses = {
span=true,
mn = true,
}
local safe_mathml_elements = {
math = true,
mrow = true,
mstyle = true,
mtext = true,
mtd = true,
}
local function update_mathvariant(curr)
-- when we join several <mi> elements, they will be rendered incorrectly
-- we must set the mathvariant attribute
local parent = curr:get_parent()
-- set mathvariant only if it haven't been set by the parent element
if not parent:get_attribute("mathvariant") then
-- curr._attr = curr._attr or {}
local mathvariant = "italic"
-- the joined elements don't have attributes
curr._attr = curr._attr or {}
curr:set_attribute("mathvariant", mathvariant)
end
end
local table_count = function(tbl)
local tbl = tbl or {}
local i = 0
for k,v in pairs(tbl) do i = i + 1 end
return i
end
local has_matching_attributes = function (el, next_el)
local el_attr = el._attr or {}
local next_attr = next_el._attr or {}
-- if the number of attributes doesn't match, elements don't match
if table_count(next_attr) ~= table_count(el_attr) then return false end
for k, v in pairs(el_attr) do
-- if any attribute doesn't match, elements don't match
if v~=next_attr[k] then return false end
end
return true
end
local function join_characters(obj,par)
-- join adjanced span and similar elements inserted by
-- tex4ht to just one object.
local par = par or {}
local options = get_filter_settings "joincharacters"
local charclasses = options.charclasses or par.charclasses or charclasses
local get_name = function(curr)
return string.lower(curr:get_element_name())
end
local get_class = function(next_el)
return next_el:get_attribute("class") or next_el:get_attribute("mathvariant")
end
local is_span = function(next_el)
return charclasses[get_name(next_el)]
end
local is_safe_mathml = function(el)
-- we want to join the <mn> element only when it is safe. for example <mfrac><mn>1</mn><mn>2</mn></mfrac> should be left
local current_name = get_name(el)
if current_name == "mn" then
local parent_name = get_name(el:get_parent())
return safe_mathml_elements[parent_name]
end
return true
end
local has_children = function(curr)
-- don't process spans that have child elements
local children = curr:get_children() or {}
-- if there is more than one child, we can be sure that it has child elements
if #children > 1 then
return true
elseif #children == 1 then
-- test if the child is an element
return children[1]:is_element()
end
return false
end
local join_elements = function(el, next_el)
-- it the following element match, copy it's children to the current element
for _, child in ipairs(next_el:get_children()) do
el:add_child_node(child)
end
-- remove the next element
next_el:remove_node()
end
local function get_next(curr, class)
local next_el = curr:get_next_node()
if next_el and next_el:is_element() and is_span(next_el) then
return next_el
-- if the next node is space followed by a matching element, we should add this space
elseif next_el and next_el:is_text() and get_next(next_el, class) then
local text = next_el._text
-- match only text containing just whitespace
if text:match("^%s+$") then return next_el end
end
end
obj:traverse_elements(function(el)
-- loop over all elements and test if the current element is in a list of
-- processed elements (charclasses) and if it doesn't contain children
if is_span(el) and not has_children(el) and is_safe_mathml(el) then
local next_el = get_next(el)
-- loop over the following elements and test whether they are of the same type
-- as the current one
while next_el do
-- save the next element because we will remove it later
local real_next = get_next(next_el)
if get_name(el) == get_name(next_el) and has_matching_attributes(el,next_el) and not el:get_attribute("id") then
join_elements(el, next_el)
-- add the whitespace
elseif next_el:is_text() then
local s = next_el._text
-- we must create a new node
el:add_child_node(el:create_text_node(s))
next_el:remove_node()
-- real_next = nil
else
real_next = nil
end
-- use the saved element as a next object
next_el = real_next
end
end
end)
-- process <mi> elements
obj:traverse_elements(function(el)
local function get_next_mi(curr)
local next_el = curr:get_next_node()
if next_el and next_el:is_element() then
return next_el
end
end
local function has_no_attributes(x)
return table_count(x._attr) == 0
end
-- join only subsequential <mi> elements with no attributes
if get_name(el) == "mi" and has_no_attributes(el) then
local next_el = get_next_mi(el)
while next_el do
local real_next = get_next_mi(next_el)
if get_name(next_el) == "mi" and has_no_attributes(next_el) then
join_elements(el, next_el)
-- set math variant to italic
-- (if the parent <mstyle> element doesn't set it to something else)
update_mathvariant(el)
else
-- break the loop otherwise
real_next = nil
end
next_el = real_next
end
end
end)
-- join text nodes in an element into one
obj:traverse_elements(function(el)
-- save the text
local t = {}
local children = el:get_children()
for _, x in ipairs(children) do
if x:is_text() then
t[#t+1] = x._text
else
return nil
end
end
el._text = table.concat(t)
return el
end)
return obj
end
return join_characters
================================================
FILE: domfilters/make4ht-joincolors.lua
================================================
local cssfiles = {}
local log = logging.new "joincolors"
-- keep mapping between span ids and colors
local colors = {}
local function extract_colors(csscontent)
local used_colors = {}
-- delete the color ids and save the used colors
csscontent = csscontent:gsub("[%a]*%#(textcolor.-)%s*{%s*color%s*%:%s*(.-)%s*%}%s", function(id, color)
-- convert rgb() function to hex value and generate the span name
local converted = "textcolor-" .. color:gsub("rgb%((.-),(.-),(.-)%)", function(r,g,b)
return string.format("%02x%02x%02x", tonumber(r), tonumber(g), tonumber(b))
end)
-- remove the # characters from the converted color name
converted = converted:gsub("%#", "")
-- save the id and used color
colors[id] = converted
used_colors[converted] = color
return ""
end)
-- add the used colors to css
local t = {}
for class, color in pairs(used_colors) do
t[#t+1] = string.format(".%s{color:%s;}", class, color)
end
table.sort(t)
return csscontent .. table.concat(t, "\n")
end
local function process_css(cssfile)
local f = io.open(cssfile,"r")
if not f then return nil, "Cannot open the CSS file: ".. cssfile end
local content = f:read("*all")
f:close()
-- delete color ids and replace them with joined spans
local newcontent = extract_colors(content)
-- save the updated css file
local f=io.open(cssfile, "w")
f:write(newcontent)
f:close()
end
local function process_css_files(dom)
for _, el in ipairs(dom:query_selector("link")) do
local href = el:get_attribute("href") or ""
if not cssfiles[href] and href:match("css$") then
log:debug("Load CSS file ", href)
cssfiles[href] = true
process_css(href)
end
end
end
local function join_colors(dom)
-- find css files in the current HTML file and join the colors
process_css_files(dom)
for _, span in ipairs(dom:query_selector("span")) do
local id = span:get_attribute("id")
if id then
-- test if the id is in the saved colors
local class = colors[id]
if class then
-- remove the id
span:set_attribute("id", nil)
span:set_attribute("class", class)
end
end
end
return dom
end
return join_colors
================================================
FILE: domfilters/make4ht-mathmlfixes.lua
================================================
local log = logging.new("mathmlfixes")
local mathml_chardata = require "make4ht-mathml-char-def"
-- <mglyph> should be inside <mi>, so we don't process it
-- even though it is a token element
local token = {"mi", "mn", "mo", "mtext", "mspace", "ms"}
local token_elements = {}
for _, tok in ipairs(token) do token_elements[tok] = true end
-- helper functions to support MathML elements with prefixes (<mml:mi> etc).
--
local function get_element_name(el)
-- return element name and xmlns prefix
local name = el:get_element_name()
if name:match(":") then
local prefix, real_name = name:match("([^%:]+):?(.+)")
return real_name, prefix
else
return name
end
end
local function get_attribute(el, attr_name)
-- attributes can have the prefix, but sometimes they don't have it
-- so we need to catch both cases
local _, prefix = get_element_name(el)
prefix = prefix or ""
return el:get_attribute(attr_name) or el:get_attribute(prefix .. ":" .. attr_name)
end
local function get_new_element_name(name, prefix)
return prefix and prefix .. ":" .. name or name
end
local function update_element_name(el, name, prefix)
local newname = get_new_element_name(name, prefix)
el._name = newname
end
local function create_element(el, name, prefix, attributes)
local attributes = attributes or {}
local newname = get_new_element_name(name, prefix)
return el:create_element(newname, attributes)
end
local function element_pos(el)
local pos, count = 0, 0
for _, node in ipairs(el:get_siblings()) do
if node:is_element() then
count = count + 1
if node == el then
pos = count
end
end
end
return pos, count
end
-- test if element is the first element in the current element list
local function is_first_element(el)
local pos, count = element_pos(el)
return pos == 1
end
-- test if element is the last element in the current element list
local function is_last_element(el)
local pos, count = element_pos(el)
return pos == count
end
local function is_token_element(el)
local name, prefix = get_element_name(el)
return token_elements[name], prefix
end
local function fix_token_elements(el)
-- find token elements that are children of other token elements
if is_token_element(el) then
local parent = el:get_parent()
local is_parent_token, prefix = is_token_element(parent)
if is_parent_token then
-- change top element in nested token elements to mstyle
update_element_name(parent, "mstyle", prefix)
end
end
end
local function fix_nested_mstyle(el)
-- the <mstyle> element can be child of token elements
-- we must exterminate it
local el_name = get_element_name(el)
if el_name == "mstyle" then
local parent = el:get_parent()
if is_token_element(parent) then
-- if parent doesn't have the mathvariant attribute copy it from <mstyle>
if not parent:get_attribute("mathvariant") then
local mathvariant = el:get_attribute("mathvariant")
parent._attr = parent._attr or {}
parent:set_attribute("mathvariant", mathvariant)
end
-- copy the contents of <mstyle> to the parent element
parent._children = el._children
end
end
end
local function fix_mathvariant(el)
-- set mathvariant of <mi> that is child of <mstyle> to have the same value
local function find_mstyle(x)
-- find if element has <mstyle> parent, and its value of mathvariant
if not x:is_element() then
return nil
elseif get_element_name(x) == "mstyle" then
return x:get_attribute("mathvariant")
else
return find_mstyle(x:get_parent())
end
end
if get_element_name(el) == "mi" then
-- process only <mi> that have mathvariant set
local oldmathvariant = el:get_attribute("mathvariant")
if oldmathvariant then
local mathvariant = find_mstyle(el:get_parent())
if mathvariant then
el:set_attribute("mathvariant", mathvariant)
end
end
end
end
local function contains_only_text(el)
-- detect if element contains only text
local elements = 0
local text = 0
local children = el:get_children() or {}
for _ , child in ipairs(children) do
if child:is_text() then text = text + 1
elseif child:is_element() then elements = elements + 1
end
end
return text > 0 and elements == 0
end
-- check if <mstyle> element contains direct text. in that case, add
-- <mtext>
local function fix_missing_mtext(el)
if el:get_element_name() == "mstyle" and contains_only_text(el) then
-- add child <mtext>
log:debug("mstyle contains only text: " .. el:get_text())
-- copy the current mode, change it's element name to mtext and add it as a child of <mstyle>
local copy = el:copy_node()
copy._name = "mtext"
copy._parent = el
el._children = {copy}
end
end
local function is_radical(el)
local radicals = {msup=true, msub=true, msubsup=true}
return radicals[el:get_element_name()]
end
local function get_mrow_child(el)
local get_first = function(x)
local children = x:get_children()
return children[1]
end
local first = get_first(el)
-- either return first child, and if the child is <mrow>, return it's first child
if first and first:is_element() then
if first:get_element_name() == "mrow" then
return get_first(first), first
else
return first
end
end
end
local function fix_radicals(el)
if is_radical(el) then
local first_child, mrow = get_mrow_child(el)
-- if the first child is only one character long, it is possible that there is a problem
if first_child and string.len(first_child:get_text()) == 1 then
local name = first_child:get_element_name()
local siblings = el:get_siblings()
local pos = el:find_element_pos()
-- it doesn't make sense to do any further processing if the element is at the beginning
if pos == 1 then return end
if name == "mo" then
for i = pos, 1,-1 do
end
end
end
end
end
-- put <mrow> as child of <math> if it already isn't here
local allowed_top_mrow = {
math=true
}
local function top_mrow(math)
local children = math:get_children()
local put_mrow = false
-- don't process elements with one or zero children
-- don't process elements that already are mrow
local parent = math:get_parent()
local parent_name
if parent then parent_name = get_element_name(parent) end
local current_name, prefix = get_element_name(math)
if #children < 2 or not allowed_top_mrow[current_name] or current_name == "mrow" or parent_name == "mrow" then return nil end
local mrow_count = 0
for _,v in ipairs(children) do
if v:is_element() and is_token_element(v) then
put_mrow = true
-- break
elseif v:is_element() and get_element_name(v) == "mrow" then
mrow_count = mrow_count + 1
end
end
if not put_mrow and get_element_name(math) == "math" and mrow_count == 0 then
-- put at least one <mrow> to each <math>
put_mrow = true
end
if put_mrow then
local newname = get_new_element_name("mrow", prefix)
local mrow = math:create_element(newname)
for _, el in ipairs(children) do
mrow:add_child_node(el)
end
math._children = {mrow}
end
end
local function get_fence(el, attr, form)
-- convert fence attribute to <mo> element
-- attr: open | close
-- form: prefix | postfix
local char = el:get_attribute(attr)
local mo
if char then
local name, prefix = get_element_name(el)
local newname = get_new_element_name("mo", prefix)
mo = el:create_element(newname, {fence="true", form = form})
mo:add_child_node(mo:create_text_node(char))
end
return mo
end
local function fix_mfenced(el)
-- TeX4ht uses in some cases <mfenced> element which is deprecated in MathML.
-- Firefox doesn't support it already.
local name, prefix = get_element_name(el)
if name == "mfenced" then
-- we must replace it by <mrow><mo>start</mo><mfenced children...><mo>end</mo></mrow>
local open = get_fence(el, "open", "prefix")
local close = get_fence(el, "close", "postfix")
-- there can be also separator attribute, but it is not used in TeX4ht
-- change <mfenced> to <mrow> and remove all attributes
local newname = get_new_element_name("mrow", prefix)
el._name = newname
el._attr = {}
-- open must be first child, close needs to be last
if open then el:add_child_node(open, 1) end
if close then el:add_child_node(close) end
end
end
local function is_fence(el)
return get_element_name(el) == "mo" and el:get_attribute("fence") == "true"
end
local function fix_mo_to_mfenced(el)
-- LibreOffice NEEDS <mfenced> element. so we need to convert <mrow><mo fence="true">
-- to <mfenced>. ouch.
if is_fence(el) then
local parent = el:get_parent()
local open = el:get_text():gsub("%s*", "") -- convert mo content to text, so it can be used in
-- close needs to be the last element in the sibling list of the current element
local siblings = el:get_siblings()
el:remove_node() -- we don't need this element anymore
local close
for i = #siblings, 1, -1 do
last = siblings[i]
if last:is_element() then
if is_fence(last) then -- set close attribute only if the last element is fence
close = last:get_text():gsub("%s*", "")
last:remove_node() -- remove <mo>
end
break -- break looping over elements once we find last element
end
end
-- convert parent <mrow> to <mfenced>
local _, prefix = get_element_name(parent)
local newname = get_new_element_name("mfenced", prefix)
parent._name = newname
parent._attr = {open = open, close = close}
end
end
local function fix_numbers(el)
-- convert <mn>1</mn><mo>.</mo><mn>3</mn> to <mn>1.3</mn>
if get_element_name(el) == "mn" then
-- sometimes minus sign can be outside <mn>
local x = el:get_sibling_node(-1)
if x and x:is_text()
and x:get_text() == "−"
then
el:add_child_node(x:copy_node(), 1)
x:remove_node()
end
local n = el:get_sibling_node(1)
-- test if next element is <mo class="MathClass-punc">.</mo>
if n and n:is_element()
and get_element_name(n) == "mo"
and get_attribute(n, "class") == "MathClass-punc"
and n:get_text() == "."
then
-- get next element and test if it is <mn>
local x = el:get_sibling_node(2)
if x and x:is_element()
and get_element_name(x) == "mn"
then
-- join numbers and set it as text content of the current element
local newnumber = el:get_text() .. "." .. x:get_text()
log:debug("Joining numbers: " .. newnumber)
el._children = {}
local newchild = el:create_text_node(newnumber)
el:add_child_node(newchild)
-- remove elements that hold dot and decimal part
n:remove_node()
x:remove_node()
end
end
end
end
local function just_operators(list)
-- count <mo> and return true if list contains just them
local mo = 0
for _, x in ipairs(list) do
if get_element_name(x) == "mo" then mo = mo + 1 end
end
return mo
end
local function fix_operators(x)
-- change <mo> elements that are only children of any element to <mi>
-- this fixes issues in LibreOffice with a^{*}
-- I hope it doesn't introduce different issues
-- process only <mo>
local el_name, prefix = get_element_name(x)
if el_name ~= "mo" then return nil end
local siblings = x:get_siblings()
-- test if current element list contains only <mo>
if just_operators(siblings) == #siblings then
if #siblings == 1 then
if not x:get_attribute("stretchy") then
-- one <mo> translates to <mtext>
local newname = get_new_element_name("mtext", prefix)
x._name = newname
log:debug("changing one <mo> to <mtext>: " .. x:get_text())
-- I think we should use <mi>, but LO incorrectly renders it in <msubsup>,
-- even if we use the mathvariant="normal" attribute. <mtext> works, so
-- we use that instead.
-- x:set_attribute("mathvariant", "normal")
end
else
-- multiple <mo> translate to <mtext>
local text = {}
for _, el in ipairs(siblings) do
text[#text+1] = el:get_text()
end
-- replace first <mo> text with concetanated text content
-- of all <mo> elements
x._children = {}
local newtext = table.concat(text)
local text_el = x:create_text_node(newtext)
log:debug("changing <mo> to <mtext>: " .. newtext)
x:add_child_node(text_el)
-- change <mo> to <mtext>
local newname = get_new_element_name("mtext", prefix)
x._name = newname
-- remove subsequent <mo>
for i = 2, #siblings do
siblings[i]:remove_node()
end
end
end
end
local function get_third_parent(el)
local first = el:get_parent()
if not first then return nil end
local second = first:get_parent()
if not second then return nil end
return second:get_parent()
end
local function add_space(el, pos)
local parent = el:get_parent()
local name, prefix = get_element_name(el)
local space = create_element(parent, "mspace", prefix)
space:set_attribute("width", "0.3em")
parent:add_child_node(space, pos)
end
local function fix_dcases(el)
-- we need to fix spacing in dcases* environments
-- when you use something like:
-- \begin{dcases*}
-- 1 & if $a=b$ then
-- \end{dcases*}
-- the spaces around $a=b$ will be missing
-- we detect if the <mtext> elements contains spaces that are collapsed by the browser, and add explicit <mspace>
-- elements when necessary
if el:get_element_name() == "mtext" then
local parent = get_third_parent(el)
if parent and parent:get_element_name() == "mtable" and parent:get_attribute("class") == "dcases-star" then
local text = el:get_text()
local pos = el:find_element_pos()
if pos == 1 and text:match("%s$") then
add_space(el, 2)
elseif text:match("^%s") and not el._used then
add_space(el, pos)
-- this is necessary to avoid infinite loop, we mark this element as processed
el._used = true
end
end
end
end
local function is_empty_row(el)
-- empty row should contain only one <mtd>
local count = 0
if el:get_text():match("^%s*$") then
for _, child in ipairs(el:get_children()) do
if child:is_element() then count = count + 1 end
end
else
-- row is not empty if it contains any text
return false
end
-- if there is one or zero childrens, then it is empty row
return count < 2
end
local function delete_last_empty_mtr(el)
-- arrays sometimes contain last empty row, which causes rendering issues,
-- so we should remove them
local el_name, prefix = get_element_name(el)
if el_name == "mtr"
and get_attribute(el, "class") == "array-row"
and is_last_element(el)
and is_empty_row(el)
then
el:remove_node()
end
end
local function fix_mtable_hlines(mtable)
-- TeX4ht adds <mtr class="hline"> for hlines. we need to remove these <mtr> elements and construct
-- correct "rowlines" attribute for horizontal lines
local hlines = {}
local rowlines = {}
local styles = {}
local el_name, prefix = get_element_name(mtable)
-- process only <mtable> elements
if el_name ~= "mtable" or mtable:get_attribute("rowlines") then
-- if rowlines attribute is already set, we don't need to do anything
return
end
local mtrs = mtable:query_selector("mtr")
for count, mtr in ipairs(mtrs) do
local hline = mtr:get_attribute("class")
if hline and hline == "array-hline" then
table.insert(hlines, "hline")
-- we need to remove <mtr> elements that represent hlines, hlines will be displayed using the rowlines attribute
mtr:remove_node()
elseif count == #mtrs and hline == "array-row" and is_empty_row(mtr) then
-- ignore empty row that is inserted if \hline is at the end of the array
mtr:remove_node()
else
-- just keep the track of normal lines
table.insert(hlines, "")
end
end
-- now we need to construct rowlines attribute
for i, el in ipairs(hlines) do
if el == "hline" then
-- rowlines are used only inside the array. at the start and at the end, we need to use CSS
if i == 1 then
table.insert(styles, "border-top: 1px solid black;")
elseif i == #hlines then
table.insert(styles, "border-bottom: 1px solid black;")
else
table.insert(rowlines, "solid")
end
else
-- we need to detect rows that weren't separated by hlines. in that case, we need to insert none to rowlines
if i > 1 and i ~= #hlines then
if hlines[i-1] ~= "hline" then table.insert(rowlines, "none") end
end
end
end
mtable:set_attribute("rowlines", table.concat(rowlines, " "))
local style = mtable:get_attribute("style") or ""
mtable:set_attribute("style", style .. table.concat(styles, " "))
end
local function fix_rel_mo(el)
-- this is necessary for LibreOffice. It has a problem with relative <mo> that are
-- first childs in an element list. This often happens in equations, where first
-- element in a table column is an operator, like non-equal-, less-than etc.
local el_name, prefix = get_element_name(el)
if el_name == "mo"
and not get_attribute(el, "fence") -- ignore fences
and not get_attribute(el, "form") -- these should be also ignored
and not get_attribute(el, "accent") -- and accents too
then
local parent = el:get_parent()
if is_first_element(el) then
local mrow = create_element(parent, "mrow", prefix)
parent:add_child_node(mrow, 1)
elseif is_last_element(el) then
local mrow = create_element(parent, "mrow", prefix)
parent:add_child_node(mrow)
end
end
end
local uchar = utf8.char
local ucodes = utf8.codes
-- current version of MathML doesn't support the mathvariant attribute, so we need to replace unicode characters with the corresponding base code for the current font style
local function replace_characters(math, current_style)
-- recursively loop over all the children of the math element and replace the unicode characters with the corresponding base code for the current font style
for _, child in ipairs(math:get_children()) do
if child:is_text() then
local text = child:get_text()
local new_text = {}
for _ ,char in ucodes(text) do
-- replace the unicode characters with the corresponding base code for the current font style
local code = mathml_chardata[char]
if code then
local new_char = code[current_style] or char
table.insert(new_text, uchar(new_char))
else
table.insert(new_text, uchar(char))
end
end
child._text = table.concat(new_text)
elseif child:is_element() then
local current_style = child:get_attribute("mathvariant") or current_style
replace_characters(child, current_style)
end
end
end
local function fix_mathml_chars(el)
local el_name, _ = get_element_name(el)
if el_name == "math" then
replace_characters(el, "normal")
end
end
local function fix_intent(mrow)
-- put the intent or arg attribute on a child element if mrow with these attributes contain only single child node
local element_name, _ = get_element_name(mrow)
if element_name ~= "mrow" then
return nil
end
local intent = get_attribute(mrow,"intent")
local arg = get_attribute(mrow, "arg")
if intent or arg then
local children = mrow:get_children()
local first_child = children[1]
-- if there is only one child, we can set the attributes on it and remove mrow
if #children == 1 and first_child:is_element() then
local parent = mrow:get_parent()
-- replace the mrow with its single child
local pos = mrow:find_element_pos()
parent._children[pos] = first_child
-- now set the attributes on the child element
first_child:set_attribute("arg", arg)
first_child:set_attribute("intent", intent)
end
end
end
return function(dom)
dom:traverse_elements(function(el)
if settings.output_format ~= "odt" then
-- LibreOffice needs <mfenced>, but Firefox doesn't
fix_mfenced(el)
else
fix_mo_to_mfenced(el)
fix_rel_mo(el)
end
fix_mtable_hlines(el)
fix_radicals(el)
fix_token_elements(el)
fix_nested_mstyle(el)
fix_missing_mtext(el)
fix_numbers(el)
fix_operators(el)
fix_mathvariant(el)
if settings.output_format ~= "odt" then
-- ODT needs older MathML version
fix_mathml_chars(el)
end
fix_dcases(el)
fix_intent(el)
top_mrow(el)
delete_last_empty_mtr(el)
end)
return dom
end
================================================
FILE: domfilters/make4ht-odtfonts.lua
================================================
return function(dom, params)
-- fix ODT style for fonts
-- sometimes, fonts have missing size, we need to patch styles
local properties = get_filter_settings "odtfonts" or {}
local fix_lgfile_fonts = params.patched_lg_fonts or properties.patched_lg_fonts or {}
for _, style in ipairs(dom:query_selector "style|style") do
local typ = style:get_attribute("style:family")
if typ == "text" then
-- detect if the style is for font
local style_name = style:get_attribute("style:name")
local name, size, size2, size3 = style_name:match("(.-)%-(%d*)x%-(%d*)x%-(%d+)")
if name then
-- find if the style corresponds to a problematic font (it is set in formats/make4ht-odt.lua)
local used_name = name .. "-" .. size
if fix_lgfile_fonts[used_name] then
-- copy current style and fix the name
local new = style:copy_node()
new:set_attribute("style:name", string.format("%s-x-%sx-%s", name, size2, size3))
local parent = style:get_parent()
parent:add_child_node(new)
end
end
end
end
return dom
end
================================================
FILE: domfilters/make4ht-odtimagesize.lua
================================================
local log = logging.new "odtimagesize"
-- set correct dimensions to frames around images
return function(dom)
local frames = dom:query_selector("draw|frame")
for _, frame in ipairs(frames) do
local images = frame:query_selector("draw|image")
if #images > 0 then
local image = images[1]
local width = image:get_attribute("svg:width")
local height = image:get_attribute("svg:height")
if widht then frame:set_attribute("svg:width", width) end
if height then frame:set_attribute("svg:height", height) end
log:debug("image dimensions", width, height)
end
end
return dom
end
================================================
FILE: domfilters/make4ht-odtpartable.lua
================================================
-- find all tables inside paragraphs, replace the found paragraphs with the child table
return function(dom)
for _,table in ipairs(dom:query_selector("text|p table|table")) do
-- replace the paragraph by its child element
local parent = table:get_parent()
parent:replace_node(table)
end
return dom
end
================================================
FILE: domfilters/make4ht-odtsvg.lua
================================================
-- we need to set dimensions for SVG images produced by \Picture commands
local log = logging.new "odtsvg"
local function get_svg_dimensions(filename)
local width, height
log:debug("file exists", filename, mkutils.file_exists(filename))
if mkutils.file_exists(filename) then
for line in io.lines(filename) do
width = line:match("width%s*=%s*[\"'](.-)[\"']") or width
height = line:match("height%s*=%s*[\"'](.-)[\"']") or height
-- stop parsing once we get both width and height
if width and height then break end
end
end
return width, height
end
-- process
return function(dom)
for _, pic in ipairs(dom:query_selector("draw|image")) do
local imagename = pic:get_attribute("xlink:href")
-- update SVG images dimensions
log:debug("image", imagename)
local parent = pic:get_parent()
local width = parent:get_attribute("svg:width")
local height = parent:get_attribute("svg:height")
-- if width == "0.0pt" then width = nil end
-- if height == "0.0pt" then height = nil end
if not width or not height then
if imagename:match("svg$") then
width, height = get_svg_dimensions(imagename) -- or width, height
elseif imagename:match("png$") or imagename:match("jpe?g$") then
end
end
log:debug("dimensions", width, height)
parent:set_attribute("svg:width", width)
parent:set_attribute("svg:height", height)
-- if
end
return dom
end
================================================
FILE: domfilters/make4ht-sectionid.lua
================================================
local mkutils = require "mkutils"
local log = logging.new("tocid")
-- Unicode data distributed with ConTeXt
-- defines "characters" table
if not mkutils.isModuleAvailable("make4ht-char-def") then
log:warning("char-def module not found")
log:warning("cannot fix section id's")
return function(dom) return dom end
end
local chardata = require "make4ht-char-def"
local toc = nil
local function is_letter(info)
-- test if character is letter
local category = info.category or ""
return category:match("^l")
end
local function is_space(info)
local category = info.category or ""
return category == "zs"
end
local function is_number(char)
return char >= 48 and char <= 57
end
local uchar = utf8.char
local function normalize_letter(char, result)
local info = chardata[char] or {}
-- first get lower case of the letter
local lowercase = info.lccode or char
-- remove accents. the base letter is in the shcode field
local lowerinfo = chardata[lowercase] or {}
-- when no shcode, use the current lowercase char
local shcode = lowerinfo.shcode or lowercase
-- shcode can be table if it contains multiple characters
-- normaliz it to a table, so we can add all letters to
-- the resulting string
if type(shcode) ~= "table" then shcode = {shcode} end
for _, x in ipairs(shcode) do
result[#result+1] = uchar(x)
end
end
local escape_name = function(name)
local result = {}
-- remove LaTeX commands
name = name:gsub("\\[%a]+", "")
name = name:gsub("^%s+", ""):gsub("%s+$", "")
for _,char in utf8.codes(name) do
local info = chardata[char] or {}
if is_space(info) then
result[#result+1] = " "
elseif is_letter(info) then
normalize_letter(char, result)
elseif is_number(char) then
result[#result+1] = uchar(char)
end
end
--- convert table with normalized characters to string
local name = table.concat(result)
-- remove spaces
name = name:gsub("%s+", "-")
name = name:gsub("^%-", "")
-- ids cannot start with number in HTML 4, so we will add x
name = name:gsub("^(%d)", "x%1")
return name
end
local function parse_toc_line(line)
-- the section ids and titles are saved in the following format:
-- \csname a:TocLink\endcsname{1}{x1-20001}{QQ2-1-2}{Nazdar světe}
-- ............................... id ................. title ...
local id, name = line:match("a:TocLink.-{.-}{(.-)}{.-}(%b{})")
if id then
return id, escape_name(name)
end
end
local used = {}
local function parse_toc(filename)
local toc = {}
if not mkutils.file_exists(filename) then return nil, "Cannot open TOC file " .. filename end
for line in io.lines(filename) do
local id, name = parse_toc_line(line)
-- if section name doesn't contain any text, it would lead to id which contains only number
-- this is invalid in HTML
if name == "" then name = "_" end
local orig_name = name
-- not all lines in the .4tc file contains TOC entries
if id then
-- test if the same name was used already. user should be notified
if used[name] then
-- update
name = name .. used[name]
log:debug("Duplicate id found: ".. orig_name .. ". New id: " .. name)
end
used[orig_name] = (used[orig_name] or 0) + 1
toc[id] = name
end
end
return toc
end
-- we don't want to change the original id, as there may be links to it from the outside
-- so we will set it to the parent element (which should be h[1-6])
local function set_id(el, id)
local section = el:get_parent()
local section_id = section:get_attribute("id")
if section_id and section_id~=id then -- if it already has id, we don't override it, but create dummy child instead
local new = section:create_element("span", {id=id})
section:add_child_node(new,1)
else
section:set_attribute("id", id)
end
end
-- we want to remove <a id="xxx"> elements from some elements, most notably <figure>
local elements_to_remove = {
figure = true,
figcaption
}
local function remove_a(el, parent, id)
parent:set_attribute("id", id)
el:remove_node()
end
return function(dom, par)
local msg
toc, msg = toc or parse_toc(mkutils.file_in_builddir(par.input .. ".4tc", par))
msg = msg or "Cannot load TOC"
-- don't do anyting if toc cannot be found
if not toc then
log:warning(msg)
return dom
end
-- if user selects the "notoc" option on the command line, we
-- will not update href links
local notoc = false
if par["tex4ht_sty_par"]:match("notoc") then notoc = true end
-- the HTML file can already contain ID that we want to assign
-- we will not set duplicate id from TOC in that case
local toc_ids = {}
for _, el in ipairs(dom:query_selector("[id]")) do
local id = el:get_attribute("id")
toc_ids[id] = true
end
-- process all elements with id atribute or <a href>
for _, el in ipairs(dom:query_selector "[id],a[href]") do
local id, href = el:get_attribute("id"), el:get_attribute("href")
if id then
local name = toc[id]
local parent = el:get_parent()
-- remove unnecessary <a> elements if the parent doesn't have id yet
if elements_to_remove[parent:get_element_name()]
and not parent:get_attribute("id")
and el:get_element_name() == "a"
then
remove_a(el, parent, id)
set_id(el, name)
-- replace id with new section id
elseif name and not toc_ids[name] then
set_id(el, name)
else
if name then
log:debug("Document already contains id: " .. name)
end
end
end
if href and notoc == false then
-- replace links to sections with new id
local base, anchor = href:match("^(.*)%#(.+)")
local name = toc[anchor]
if name then
el:set_attribute("href", base .. "#" .. name)
end
end
end
return dom
end
================================================
FILE: domfilters/make4ht-t4htlinks.lua
================================================
-- This filter is used by the ODT output format to fix links
return function(dom)
for _, link in ipairs(dom:query_selector("t4htlink")) do
local name = link:get_attribute("name")
local href = link:get_attribute("href")
local children = link:get_children()
-- print("link", name, href, #link._children, link:get_text())
-- add a link if it contains any subnodes and has href attribute
if #children > 0 and href then
link._name = "text:a"
href = href:gsub("^.+4oo%#", "#")
link._attr = {["xlink:type"]="simple", ["xlink:href"]=href}
-- if the link is named, add a bookmark
if name then
local bookmark = link:create_element("text:bookmark", {["text:name"] = name})
link:add_child_node(bookmark)
end
-- add bookmark if element has name
elseif name then
link._name = "text:bookmark"
link._attr = {["text:name"] = name}
else
-- just remove the link in other cases
link:remove_node()
end
end
return dom
end
================================================
FILE: domfilters/make4ht-tablecaption.lua
================================================
local function get_parent_table(caption)
-- recursively find the parent table of a caption element, as it can be inside <tr> and <td>
local parent = caption:get_parent()
if parent and parent:get_element_name() == "table" then
return parent
elseif parent then
return get_parent_table(parent)
else
return nil
end
end
return function(dom)
-- the caption element must be a first element in table, it cannot be contained inside tr
for _, caption in ipairs(dom:query_selector("table caption")) do
local table = get_parent_table(caption)
if table then
-- insert caption as the first child of table
table:add_child_node(caption:copy_node(),1)
-- remove the original caption
caption:remove_node()
end
end
return dom
end
================================================
FILE: domfilters/make4ht-tablerows.lua
================================================
local log = logging.new ("tablerows")
return function(dom)
local has_child_elements = function(child)
-- detect if the element contains child elements
local child_elements = 0
local children = child:get_children()
local last_child_pos
for pos, el in ipairs(children) do
last_child_pos = pos
local step = el:is_element() and 1 or 0
-- log:info("element name", el._name)
child_elements = child_elements + step
end
-- longtable has <td><p></p></td> inside empty rows, we regard them as empty
if child_elements == 1 and children[last_child_pos]:get_element_name() == "p" and child:get_text():gsub("%s", "") == "" then
child_elements = 0
end
return child_elements > 0
end
local is_empty_row = function(row)
local not_empty = false
local element_count = 0
-- ignore hline rows
local row_class = row:get_attribute("class")
if row_class == "hline" or row_class == "cline" then return false end
-- detect if the row contain only one empty child
for _,child in ipairs(row:get_children() or {}) do
if child:is_element() then
element_count = element_count + 1
-- empty rows contain only one element, it is not empty otherwise
if element_count > 1 or has_child_elements(child) then return false end
-- detect if it contains only whitespace
not_empty = child:get_text():gsub("%s","") ~= "" or not_empty
end
end
-- print("element count", element_count, not_empty)
return element_count == 1 and not_empty == false
end
local is_not_styled = function(row, css)
-- get the id attribute and escape it, so it can be used in regexp
local id = row:get_attribute("id")
if not id then return true end -- no styling without id
local search_term = "%#" .. id:gsub("%-", "%%-")
-- if the CSS file contains the row id (<td> elements can also have id
-- that matches this pattern, so we should keep the row if we match them too)
return not css:match(search_term)
end
local hline_hr = function(row)
-- remove <hr> elements from "hline" rows
for _, hr in ipairs(row:query_selector(".hline hr")) do
hr:remove_node()
end
end
local longtable_last_row = function(tbl)
-- longtable contains last row of empty cells
local rows= tbl:query_selector("tr")
local last_row = rows[#rows]
if not last_row or last_row:get_attribute("class") == "hline" then return end
for _, cell in ipairs(last_row:query_selector("td")) do
-- loop over cells in the last row a and detect that they are empty. break processing if they are not.
if has_child_elements(cell) or not cell:get_text():match("^%s*$") then
return
end
end
last_row:remove_node()
end
local load_css_files = function()
-- the empty rows can be styled using CSS, for example configuration for
-- Booktabs does that. We shouldn't remove such rows.
local cssfiles = {}
for _, link in ipairs(dom:query_selector("head link")) do
local src = link:get_attribute("href")
if src then
local f = io.open(src, "r")
if f then
local contents = f:read("*all")
f:close()
table.insert(cssfiles, contents)
end
end
end
return table.concat(cssfiles, "\n")
end
local css = load_css_files()
for _, tbl in ipairs(dom:query_selector("table")) do
-- find the empty rows
local rows = tbl:query_selector("tr")
for count, row in ipairs(rows) do
if is_empty_row(row) and is_not_styled(row, css) then row:remove_node() end
hline_hr(row)
end
if tbl:get_attribute("class") and tbl:get_attribute("class"):match("longtable") then
longtable_last_row(tbl)
end
end
return dom
end
================================================
FILE: extensions/make4ht-ext-common_domfilters.lua
================================================
local M = {}
-- this variable will hold the output format name
local current_format
local filter = require "make4ht-domfilter"
-- local process = filter {"fixinlines", "idcolons", "joincharacters" }
-- filters support only html formats
function M.test(format)
current_format = format
-- if format == "odt" then return false end
return true
end
function M.modify_build(make)
-- number of filters that should be moved to the beginning
local count = 0
if current_format == "odt" then
-- some formats doesn't make sense in the ODT format
local process = filter ({"joincharacters", "mathmlfixes"}, "commondomfilters")
local charclasses = {mn = true, ["text:span"] = true, mi=true}
make:match("4oo$", process, {charclasses= charclasses})
-- match math documents
make:match("4om$", process, {charclasses= charclasses})
count = 2
else
local process = filter({"fixinlines", "idcolons", "joincharacters", "tablecaption", "mathmlfixes", "tablerows","booktabs", "sectionid", "itemparagraphs"}, "commondomfilters")
make:match("html?$", process)
count = 1
end
return make
end
return M
================================================
FILE: extensions/make4ht-ext-common_filters.lua
================================================
local M = {}
local filter = require "make4ht-filter"
local process = filter({"cleanspan-nat", "fixligatures", "hruletohr", "entities", "fix-links"}, "commonfilters")
-- filters support only html formats
function M.test(format)
if format == "odt" then return false end
return true
end
function M.modify_build(make)
make:match("html?$", process)
local matches = make.matches
-- the filters should be first match to be executed, especially if tidy
-- should be executed as well
if #matches > 1 then
local last = matches[#matches]
table.insert(matches, 1, last)
matches[#matches] = nil
end
return make
end
return M
================================================
FILE: extensions/make4ht-ext-copy_images.lua
================================================
local M = {}
local mkutils = require "mkutils"
local domfilter = require "make4ht-domfilter"
local copied_images = {}
local function image_copy(path, parameters, img_dir)
if mkutils.is_url(path) then return nil, "External image" end
-- get image basename
local basename = path:match("([^/]+)$")
-- if outdir is empty, keep it empty, otherwise add / separator
local outdir = parameters.outdir == "" and "" or parameters.outdir .. "/"
if img_dir ~= "" then
outdir = outdir .. img_dir .. "/"
end
-- handle trailing //
outdir = outdir:gsub("%/+","/")
local output_file = outdir .. basename
if outdir == "" then
mkutils.cp(path, output_file)
else
mkutils.copy(path, output_file)
end
end
-- filters support only html formats
function M.test(format)
current_format = format
if format == "odt" then return false end
return true
end
function M.modify_build(make)
local ext_settings = get_filter_settings "copy_images" or {}
local img_dir = ext_settings.img_dir or ""
local img_extensions = ext_settings.extensions or {"jpg", "png", "jpeg", "svg"}
local process = domfilter({
function(dom, par)
for _, img in ipairs(dom:query_selector("img")) do
local src = img:get_attribute("src")
if src and not mkutils.is_url(src) then
-- remove path specification
src = src:match("([^/]+)$")
if img_dir ~= "" then
src = img_dir .. "/" .. src
src = src:gsub("%/+", "/")
end
img:set_attribute("src", src)
end
end
return dom
end
}, "copy_images")
-- add matcher for all image extensions
for _, ext in ipairs(img_extensions) do
make:match(ext .. "$", function(path, parameters)
image_copy(path, parameters, img_dir)
-- prevent further processing of the image
return false
end)
end
make:match("html$", process, {img_dir = img_dir})
return make
end
return M
================================================
FILE: extensions/make4ht-ext-detect_engine.lua
================================================
-- support magic comments used by TeXShop and TeXWorks to detect used engine and format
--
local M = {}
local log = logging.new("detect engine")
local htlatex = require "make4ht-htlatex"
-- we must change build sequence when Plain TeX is requested
local change_table = {
tex = {
htlatex = "etex",
command = htlatex.httex
},
pdftex = {
htlatex = "etex",
command = htlatex.httex
},
etex = {
htlatex = "etex",
command = htlatex.httex
},
luatex = {
htlatex = "dviluatex",
command = htlatex.httex
},
xetex = {
htlatex = "xetex -no-pdf",
command = htlatex.httex
},
xelatex = {
htlatex = "xelatex -no-pdf",
},
lualatex = {
htlatex = "dvilualatex",
},
pdflatex = {
htlatex = "latex"
},
harflatex = {
htlatex = "lualatex-dev --output-format=dvi"
},
harftex= {
htlatex = "harftex --output-format=dvi",
command = htlatex.httex
}
}
local function find_magic_program(filename)
-- find the magic line containing program name
local get_comment = function(line)
return line:match("%s*%%%s*(.+)")
end
local empty_line = function(line) return line:match("^%s*$") end
for line in io.lines(filename) do
local comment = get_comment(line)
-- read line after line from the file, break the processing after first non comment or non empty line
if not comment and not empty_line(line) then return nil, "Cannot find program name" end
comment = comment or "" -- comment is nil for empty lines
local program = comment:match("!%s*[Tt][Ee][Xx].-program%s*=%s*([^%s]+)")
if program then return program:lower() end
end
end
-- update htlatex entries with detected program
local function update_build_sequence(program, build_seq)
-- handle Plain TeX
local replaces = change_table[program] or {}
local is_xetex = program:match("xe") -- we must handle xetex in tex4ht
for pos, entry in ipairs(build_seq) do
if entry.name == "htlatex" then
-- handle httex
entry.command = replaces.command or entry.command
local params = entry.params or {}
params.htlatex = replaces.htlatex or params.htlatex
entry.params = params
elseif is_xetex and entry.name == "tex4ht" then
-- tex4ht must process .xdv file if the TeX file was compiled by XeTeX
entry.params.tex4ht_par = entry.params.tex4ht_par .. " -.xdv"
end
end
end
function M.modify_build(make)
-- find magic comments in the TeX file
local build_seq = make.build_seq
local tex_file = make.params.tex_file
local program, msg = find_magic_program(tex_file)
if program then
log:info("Found program name", program)
update_build_sequence(program, build_seq)
else
log:warning("Cannot find magic line with the program name")
end
return make
end
return M
================================================
FILE: extensions/make4ht-ext-dvisvgm_hashes.lua
================================================
local dvireader = require "make4ht-dvireader"
local mkutils = require "mkutils"
local filter = require "make4ht-filter"
local log = logging.new "dvisvgm_hashes"
local dvisvgm_par = {}
local M = {}
-- mapping between tex4ht image names and hashed image names
local output_map = {}
local dvisvgm_options = "-n --exact --embed-bitmaps -c ${scale},${scale}"
local parallel_size = 64
local make_command = "make -j ${process_count} -f ${make_file}"
local test_make_command = "make -v"
-- local parallel_size = 3
local function make_hashed_name(base, hash)
return base .. "-" ..hash..".svg"
end
-- detect the number of available processors
local cpu_cnt = 3 -- set a reasonable default for non-Linux systems
if os.name == 'linux' then
cpu_cnt = 0
local cpuinfo=assert(io.open('/proc/cpuinfo', 'r'))
for line in cpuinfo:lines() do
if line:match('^processor') then
cpu_cnt = cpu_cnt + 1
end
end
-- set default number of threds if no CPU core have been found
if cpu_cnt == 0 then cpu_cnt = 1 end
cpuinfo:close()
elseif os.name == 'cygwin' or os.type == 'windows' then
-- windows has NUMBER_OF_PROCESSORS environmental value
local nop = os.getenv('NUMBER_OF_PROCESSORS')
if tonumber(nop) then
cpu_cnt = nop
end
end
-- process output of dvisvgm and find output page numbers and corresponding files
local function get_generated_pages(output, pages)
local pages = pages or {}
local pos = 1
local pos, finish, page = string.find(output, "processing page (%d+)", pos)
while(pos) do
pos, finish, file = string.find(output, "output written to ([^\n^\r]+)", finish)
pages[tonumber(page)] = file
if not finish then break end
pos, finish, page = string.find(output, "processing page (%d+)", finish)
end
return pages
end
local function make_ranges(pages)
local newpages = {}
local start, stop
for i=1,#pages do
local current = pages[i]
local next_el = pages[i+1] or current + 100 -- just select a big number
local diff = next_el - current
if diff == 1 then
if not start then start = current end
else
local element
if start then
element = start .. "-" .. current
else
element = current
end
newpages[#newpages+1] = element
start = nil
end
end
return newpages
end
local function read_log(dvisvgmlog)
local f = io.open(dvisvgmlog, "rb")
if not f then return nil, "Cannot read dvisvgm log" end
local output = f:read("*all")
f:close()
return output
end
-- test the existence of GNU Make, which can execute tasks in parallel
local function test_make()
local make = io.popen(test_make_command, "r")
local content = make:read("*all")
make:close()
-- io.popen always returns valid handle, so we can find that the command doesn't exists only by checking that the
-- content is empty
return content~=nil and content ~= ""
end
local function save_file(filename, text)
local f = io.open(filename, "w")
f:write(text)
f:close()
end
local function make_makefile_command(idvfile, page_sequences)
local logs = {}
local all = {} -- list of targets in the "all:" makefile target
local targets = {}
local basename = idvfile:gsub(".idv$", "")
local makefilename = basename .. "-images" .. ".mk"
-- build make targets
for i, ranges in ipairs(page_sequences) do
local target = basename .. "-" .. i
local logfile = target .. ".dlog"
logs[#logs + 1] = logfile
all[#all+1] = target
local chunk = target .. ":\n\tdvisvgm -v4 " .. dvisvgm_options .. " -p " .. ranges .. " " .. idvfile .. " 2> " .. logfile .. "\n"
targets[#targets + 1] = chunk
end
-- construct makefile and save it
local makefile = "all: " .. table.concat(all, " ") .. "\n\n" .. table.concat(targets, "\n")
save_file(makefilename, makefile)
local command = make_command % {process_count = cpu_cnt, make_file = makefilename}
log:debug("Makefile command: " .. command)
return command, logs
end
local function prepare_command(idvfile, pages)
local logs = {}
if #pages > parallel_size and test_make() then
local page_sequences = {}
for i=1, #pages, parallel_size do
local current_pages = {}
for x = i, i+parallel_size -1 do
current_pages[#current_pages + 1] = pages[x]
end
table.insert(page_sequences,table.concat(make_ranges(current_pages), ","))
end
return make_makefile_command(idvfile, page_sequences)
end
-- else
local pagesequence = table.concat(make_ranges(pages), ",")
-- the stderr from dvisvgm must be redirected and postprocessed
local dvisvgmlog = idvfile:gsub("idv$", "dlog")
-- local dvisvgm = io.popen("dvisvgm -v4 -n --exact -c 1.15,1.15 -p " .. pagesequence .. " " .. idvfile, "r")
local command = "dvisvgm -v4 " .. dvisvgm_options .. " -p " .. pagesequence .. " " .. idvfile .. " 2> " .. dvisvgmlog
return command, {dvisvgmlog}
-- end
end
local function execute_dvisvgm(idvfile, pages)
if #pages < 1 then return nil, "No pages to convert" end
local command, logs = prepare_command(idvfile, pages)
log:info(command)
os.execute(command)
local generated_pages = {}
for _, dvisvgmlog in ipairs(logs) do
local output = read_log(dvisvgmlog)
generated_pages = get_generated_pages(output, generated_pages)
end
return generated_pages
end
local function get_dvi_pages(arg)
-- list of pages to convert in this run
local to_convert = {}
local idv_file = arg.input .. ".idv"
-- set extension options
local extoptions = mkutils.get_filter_settings "dvisvgm_hashes" or {}
dvisvgm_options = arg.options or extoptions.options or dvisvgm_options
parallel_size = arg.parallel_size or extoptions.parallel_size or parallel_size
cpu_cnt = arg.cpu_cnt or extoptions.cpu_cnt or cpu_cnt
dvisvgm_par.scale = arg.scale or extoptions.scale or 1.4
dvisvgm_options = dvisvgm_options % dvisvgm_par
make_command = arg.make_command or extoptions.make_command or make_command
test_make_command = arg.test_make_command or extoptions.test_make_command or test_make_command
local f = io.open(idv_file, "rb")
if not f then return nil, "Cannot open idv file: " .. idv_file end
local content = f:read("*all")
f:close()
local dvi_pages = dvireader.get_pages(content)
-- we must find page numbers and output name sfor the generated images
local lg = mkutils.parse_lg(arg.input ..".lg", arg.builddir)
for _, name in ipairs(lg.images) do
local page = tonumber(name.page)
local hash = dvi_pages[page]
local tex4ht_name = name.output
local output_name = make_hashed_name(arg.input, hash)
output_map[tex4ht_name] = output_name
if not mkutils.file_exists(output_name) then
log:debug("output file: ".. output_name)
to_convert[#to_convert+1] = page
end
end
local generated_files, msg = execute_dvisvgm(idv_file, to_convert)
if not generated_files then
return nil, msg
end
-- rename the generated files to the hashed filenames
for page, file in pairs(generated_files) do
os.rename(file, make_hashed_name(arg.input, dvi_pages[page]))
end
end
function M.test(format)
-- ODT format doesn't support SVG
if format == "odt" then return false end
return true
end
function M.modify_build(make)
-- this must be used in the .mk4 file as
-- Make:dvisvgm_hashes {}
make:add("dvisvgm_hashes", function(arg)
get_dvi_pages(arg)
end,
{
})
-- insert dvisvgm_hashes command at the end of the build sequence -- it needs to be called after t4ht
make:dvisvgm_hashes {}
-- replace original image names with hashed names
local executed = false
make:match(".*", function(arg)
if not executed then
executed = true
local lgfiles = make.lgfile.files
for i, filename in ipairs(lgfiles) do
local replace = output_map[filename]
if replace then
lgfiles[i] = replace
end
end
-- tex4ebook process also the images table, so we need to replace generated filenames here as well
local lgimages = make.lgfile.images
for _, image in ipairs(lgimages) do
local replace = output_map[image.output]
if replace then
image.output = replace
end
end
end
end)
-- fix src attributes
local process = filter({
function(str, filename)
return str:gsub('src=["\'](.-)(["\'])', function(filename, endquote)
local newname = output_map[filename] or filename
log:debug("newname", newname)
return 'src=' .. endquote .. newname .. endquote
end)
end
}, "dvisvgmhashes")
make:match("htm.?$", process)
-- disable the image processing
for _,v in ipairs(make.build_seq) do
if v.name == "t4ht" then
local t4ht_par = v.params.t4ht_par or make.params.t4ht_par or ""
v.params.t4ht_par = t4ht_par .. " -p"
end
end
make:image(".", function() return "" end)
return make
end
return M
================================================
FILE: extensions/make4ht-ext-inlinecss.lua
================================================
local M = {}
local filter = require "make4ht-domfilter"
-- filters support only html formats
function M.test(format)
if format:match("html") then return true end
return false
end
function M.modify_build(make)
local process = filter({"inlinecss"}, "inlinecss")
make:match("html?$", process)
return make
end
return M
================================================
FILE: extensions/make4ht-ext-join_colors.lua
================================================
local M = {}
local filter = require "make4ht-domfilter"
-- filters support only html formats
function M.test(format)
if format == "odt" then return false end
return true
end
function M.modify_build(make)
local process = filter({"joincolors"}, "joincolors")
make:match("html?$", process)
return make
end
return M
================================================
FILE: extensions/make4ht-ext-latexmk_build.lua
================================================
-- use Latexmk in first LaTeX call
-- only in the first call, because we don't need to execute biber, etc. in the subsequent
-- LaTeX calls, these are only for resolving the cross-references
local M = {}
local htlatex_names = {
htlatex = true,
autohtlatex = true,
}
function M.modify_build(make)
local used = false
local first
local build_seq = make.build_seq
-- find first htlatex call in the build sequence
for pos,v in ipairs(build_seq) do
if htlatex_names[v.name] and not first then
first = pos
end
end
-- we need to save contents of the .tmp file, to prevent extra executions from latexmk
-- tex4ht command overwrites content that was set by LaTeX with it's own stuff
local tmp_file
make:add("save_tmp", function(par)
local f = io.open(mkutils.file_in_builddir(par.input .. ".tmp", par), "r")
if f then
tmp_file = f:read("*all")
f:close()
end
return 0
end)
make:add("load_tmp", function(par)
if tmp_file then
local f = io.open(mkutils.file_in_builddir(par.input .. ".tmp", par), "w")
if f then
f:write(tmp_file)
end
end
return 0
end)
-- if htlatex was found
if first then
-- handle tmp file
make:load_tmp {}
make:save_tmp {}
-- add dummy latexmk call to the build sequence
make:latexmk {}
-- replace name, command and type in the first htlatex
-- call with values from the dummy latexmk call
local replaced = build_seq[first]
local latexmk = build_seq[#build_seq]
replaced.name = latexmk.name
replaced.command = latexmk.command
replaced.type = latexmk.type
-- remove the dummy latexmk
table.remove(build_seq)
end
-- remove htlatex calls from the build sequence, they are unnecessary
local new_build_seq = {}
for pos, v in ipairs(build_seq) do
if v.name ~= "htlatex" and v.name ~= "tex4ht" then
table.insert(new_build_seq, v)
elseif v.name == "tex4ht" then
-- insert save_tmp before tex4ht
table.insert(new_build_seq, build_seq[#build_seq])
-- remove save_tmp from the end
table.remove(build_seq)
-- and now insert tex4ht
table.insert(new_build_seq, v)
end
end
make.build_seq = new_build_seq
return make
end
return M
================================================
FILE: extensions/make4ht-ext-mathjaxnode.lua
================================================
local M = {}
local filter = require "make4ht-filter"
function M.test(format)
if format == "odt" then return false end
return true
end
function M.prepare_parameters(params)
params.tex4ht_sty_par = params.tex4ht_sty_par .. ",mathml"
return params
end
function M.modify_build(make)
local mathjax = filter({ "mathjaxnode"}, "mathjaxnode")
-- this extension needs mathml enabled
make:match("html?$",mathjax)
return make
end
return M
================================================
FILE: extensions/make4ht-ext-mjcli.lua
================================================
local M = {}
local filter = require "make4ht-filter"
function M.test(format)
-- this extension works only for formats based on HTML, as it produces
-- custom HTML tags that would be ilegal in XML
if not format:match("html5?$") then return false end
return true
end
--
local detected_latex = false
function M.prepare_parameters(params)
-- mjcli supports both MathML and LaTeX math input
-- LaTeX math is keep if user uses "mathjax" option for make4ht
-- "mathjax" option used in \Preamble in the .cfg file doesn't work
if params.tex4ht_sty_par:match("mathjax") then
detected_latex = true
else
params.tex4ht_sty_par = params.tex4ht_sty_par .. ",mathml"
end
return params
end
function M.modify_build(make)
local mathjax = filter({ "mjcli"}, "mjcli")
local params = {}
if detected_latex then
params.latex = true
end
make:match("html?$",mathjax, params)
return make
end
return M
================================================
FILE: extensions/make4ht-ext-nodynamicodt.lua
================================================
local M = {}
-- this extension covnerts links, tables of contents and other dynamic content in the ODT format to plain text
local filter = require "make4ht-domfilter"
-- this extension only works for the ODT format
M.test = function(format)
return format=="odt"
end
local function nodynamiccontent(dom)
for _,link in ipairs(dom:query_selector("text|a")) do
-- change links to spans
link._name = "text:span"
-- remove attributes
link._attr = {}
end
for _, bibliography in ipairs(dom:query_selector("text|bibliography")) do
-- remove links from bibliography
-- use div instead of bibliography
bibliography._name = "text:div"
-- remove bibliography-source elements
for _, source in ipairs(bibliography:query_selector("text:bibliography-source")) do
source:remove_node()
end
for _, index in ipairs(bibliography:query_selector("text|index-body")) do
-- use div instead of bibliography-entry
index._name = "text:div"
end
end
for _, toc in ipairs(dom:query_selector("text|table-of-content")) do
-- remove links from toc
-- use div instead of table-of-contents
toc._name = "text:div"
for _, entry in ipairs(toc:query_selector("text|index-body, text|index-title")) do
-- use div instead of table-of-contents-entry
entry._name = "text:div"
end
end
return dom
end
M.modify_build = function(make)
local process = filter({nodynamiccontent}, "nodynamiccontent")
Make:match("4oo$",process)
return make
end
return M
================================================
FILE: extensions/make4ht-ext-odttemplate.lua
================================================
local M = {}
local filter = require "make4ht-filter"
-- this extension only works for the ODT format
M.test = function(format)
return format=="odt"
end
M.modify_build = function(make)
local process = filter({"odttemplate"}, "odttemplate")
make:match("4oy$", process)
return make
end
return M
================================================
FILE: extensions/make4ht-ext-preprocess_input.lua
================================================
-- preprocess R literate sources or Markdown files to LaTeX
local M = {}
local log = logging.new "preprocess_input"
local mkutils = require "mkutils"
local commands = {
knitr = { command = 'Rscript -e "library(knitr); knit(\'${tex_file}\', output=\'${tmp_file}\')"'},
pandoc = { command = 'pandoc -f ${input_format} -s -o \'${tmp_file}\' -t latex \'${tex_file}\''},
render = { command = 'Rscript -e "library(rmarkdown); render(\'${tex_file}\', output_file=\'${tmp_file}\',output_format = \'latex_document\')"'}
}
local filetypes = {
rnw = {sequence = {"knitr"} },
rtex = {sequence = {"knitr"}},
rmd = {sequence = {"render"}},
rrst = {sequence = {"knitr", "pandoc"}, options = {input_format = "rst"}},
md = {sequence = {"pandoc"}, options = {input_format = "markdown"}},
rst = {sequence = {"pandoc"}, options = {input_format = "rst"}},
}
local function get_temp_name(arg,curr, length)
-- we don't want to use the temp dir, because graphics would be then generated outside of
-- the directory of the source document. so we will make
local tmp_name = os.tmpname()
if pos == sequence then
-- base tmp_name on the input name in the last step of sequence
-- so the generated images won't have random names
tmp_name = arg.input .. "-preprocess_input"
else
tmp_name = tmp_name:match("([^/\\]+)$")
end
return tmp_name
end
local function execute_sequence(sequence, arg, make)
-- keep track of all generated tmp files
local temp_files = {}
-- the temporary file for the current compilation step
-- should become the tex_file for the next one. It doesn't
-- matter that it isn't TeX file in some cases
local previous_temp
for pos, cmd_name in ipairs(sequence) do
local tmp_name = get_temp_name(arg,pos, #sequence)
temp_files[#temp_files+1] = tmp_name
-- make the temp file name accessible to the executed commands
arg.tmp_file = tmp_name
-- the current temporary file should become tex_file in the next step
-- in the first execution of the compilation sequence we will use the
-- actual input file name
arg.tex_file = previous_temp or arg.tex_file
previous_temp = tmp_name
-- get the command to execute
local cmd = commands[cmd_name]
-- fill the command template with make4ht arguments and execute
local command = cmd.command % arg
log:info(command)
mkutils.execute(command)
end
return temp_files
end
local function get_preprocessing_pipeline(input_file)
-- detect the file extension
local extension = input_file:match("%.(.-)$")
if not extension then return nil, "Cannot get extension: " .. input_file end
-- the table with file actions is case insensitive
-- the extension is converted to lowercase in order
-- to support both .rnw and .Rnw
extension = string.lower(extension)
local matched = filetypes[extension]
if not matched then return nil, "Unsupported extension: " .. extension end
return matched
end
-- join the make4ht params and command options tables
local function make_options(arg, command_options)
local options = {}
local command_options = command_options or {}
for k,v in pairs(arg) do options[k] = v end
for k,v in pairs(command_options) do options[k] = v end
return options
end
M.modify_build = function(make)
-- get access to the main arguments
local arg = make.params
-- get the execution sequence for the input format
local matched, msg = get_preprocessing_pipeline(arg.tex_file)
if not matched then
log:error("preprocess_input error: ".. msg)
return
end
-- prepare options
local options = make_options(arg, matched.options)
-- run the execution sequence
local temp_files = execute_sequence(matched.sequence or {}, options, make)
-- the last temporary file contains the actual TeX file
local last_temp_file = temp_files[#temp_files]
-- remove the intermediate temp files
if #temp_files > 2 then
for i = 1, #temp_files - 1 do
log:debug("Removing temporary file", temp_files[i])
os.remove(temp_files[i])
end
end
if last_temp_file then
-- update all commands in the .mk4 file with the temp file as tex_file
local update_params = function(cmd)
local params = cmd.params
params.tex_file = last_temp_file
params.is_tmp_file = true
end
for _, cmd in ipairs(make.build_seq) do
update_params(cmd)
end
-- also update the main params
update_params(make)
end
return make
end
return M
================================================
FILE: extensions/make4ht-ext-staticsite.lua
================================================
local M = {}
local filter = require "make4ht-filter"
local mkutils = require "mkutils"
local log = logging.new "staticsite"
-- get the published file name
local function get_slug(settings)
local published_name = mkutils.remove_extension(settings.tex_file) .. ".published"
local config = get_filter_settings "staticsite"
local file_pattern = config.file_pattern or "%Y-%m-%d-${input}"
local time = os.time()
-- we must save the published date, so the subsequent compilations at different days
-- use the same name
if mkutils.file_exists(published_name) then
local f = io.open(published_name, "r")
local readtime = f:read("*line")
time = tonumber(readtime)
log:info("Already pubslished", os.date("%Y-%m-%d %H:%M", time))
f:close()
else
-- escape
-- slug must contain the unescaped input name
local f = io.open(published_name, "w")
log:info("Publishing article", os.date("%Y-%m-%d %H:%M", time))
f:write(time)
f:close()
end
-- set the updated and publishing times
local updated
-- the updated time will be set only when it is more than one day from the published time
local newtime = os.time()
if (newtime - time) > (24 * 3600) then updated = newtime end
filter_settings "staticsite" {
header = {
time = time,
updated = updated
}
}
-- make the output file name in the format YYYY-MM-DD-old-filename.html
local slug = os.date(file_pattern,time) % settings
return slug
end
-- it is necessary to set correct -jobname in latex_par parameters field
-- in order to the get correct HTML file name
local function update_jobname(slug, latex_par)
local latex_par = latex_par or ""
if latex_par:match("%-jobname") then
local firstchar=latex_par:match("%-jobname=.")
local replace_pattern="%-jobname=[^%s]+"
if firstchar == "'" or firstchar=='"' then
replace_pattern = "%-jobname=".. firstchar .."[^%"..firstchar.."]+"
end
return latex_par:gsub(replace_pattern, "-jobname=".. slug)
else
return latex_par .. "-jobname="..slug
end
end
-- execute the function passed as parameter only once, when the file matching
-- starts
local function insert_filter(make, pattern, fn)
local insert_executed = false
table.insert(make.matches, 1, {
pattern=pattern,
params = make.params or {},
command = function()
if not insert_executed then
fn()
end
insert_executed = true
end
})
end
local function remove_maketitle(make)
-- use DOM filter to remove \maketitle block
local domfilter = require "make4ht-domfilter"
local process = domfilter({
function(dom)
local maketitles = dom:query_selector(".maketitle")
for _, el in ipairs(maketitles) do
log:debug("removing maketitle")
el:remove_node()
end
return dom
end
}, "staticsite")
make:match("html$", process)
end
local function copy_files(filename, par)
local function prepare_path(dir, subdir)
local f = filename
if par.builddir then
f = f:gsub("^" .. par.builddir .. "/", "")
end
local path = dir .. "/" .. subdir .. "/" .. f
return path:gsub("//", "/")
end
-- get extension settings
local site_settings = get_filter_settings "staticsite"
local site_root = site_settings.site_root or par.outdir
if site_root == "" then site_root = "./" end
local map = site_settings.map or {}
-- default path without subdir, will be used if the file is not matched
-- by any pattern in the map
local path = prepare_path(site_root, "")
for pattern, destination in pairs(map) do
if filename:match(pattern) then
path = prepare_path(site_root, destination)
break
end
end
-- it is possible to use string extrapolation in path, for example for slug
mkutils.copy(filename, path % par)
end
function M.modify_build(make)
-- it is necessary to insert the filters for YAML header and file copying as last matches
-- we use an bogus match which will be executed only once as the very first one to insert
-- the filters
-- I should make filter from this
local process = filter({
"staticsite"
}, "staticsite")
-- detect if we should remove maketitle
local site_settings = get_filter_settings "staticsite"
-- \maketitle is removed by default, set `remove_maketitle=false` setting to disable that
if site_settings.remove_maketitle ~= false then
remove_maketitle(make)
end
local settings = make.params
-- get the published file name
local slug = get_slug(settings)
for _, cmd in ipairs(make.build_seq) do
-- all commands must use the published file name
cmd.params.input = slug
cmd.params.latex_par = update_jobname(slug, cmd.params.latex_par)
end
local quotepattern = '(['..("%^$().[]*+-?"):gsub("(.)", "%%%1")..'])'
local mainfile = string.gsub(slug, quotepattern, "%%%1")
-- run the following code once in the first match on the first file
insert_filter(make, ".*", function()
-- for _, match in ipairs(make.matches) do
-- match.params.outdir = outdir
-- print(match.pattern, match.params.outdir)
-- end
local params = make.params
params.slug = slug
make:match("html?$", process, params)
make:match(".*", copy_files, params)
end)
return make
end
return M
================================================
FILE: extensions/make4ht-ext-tidy.lua
================================================
local M = {}
local log = logging.new "tidy"
function M.test(format)
if format == "odt" then return false end
return true
end
local empty_elements = {
area=true,
base=true,
br=true,
col=true,
embed=true,
hr=true,
img=true,
input=true,
keygen=true,
link=true,
meta=true,
param=true,
source=true,
track=true,
wbr=true,
}
-- LuaXML cannot read HTML with unclosed tags (like <meta name="hello" content="world">)
-- Tidy removes end slashes in the HTML output, so
-- this function will add them back
local function close_tags(s)
return s:gsub("<(%w+)([^>]-)>", function(tag, rest)
local endslash = ""
if empty_elements[tag] then endslash = " /" end
return string.format("<%s%s%s>", tag, rest, endslash)
end)
end
function M.modify_build(make)
make:match("html?$", function(filename, par)
local settings = get_filter_settings "tidy" or {}
par.options = par.options or settings.options or "-utf8 -w 512 -ashtml -q"
local command = "tidy ${options} ${filename}" % par
log:info("running tidy: ".. command)
-- os.execute(command)
local run, msg = io.popen(command, "r")
local result = run:read("*all")
run:close()
if not result or result == "" then
log:warning("Cannot execute Tidy command")
return nil
end
result = close_tags(result)
local f = io.open(filename, "w")
f:write(result)
f:close()
end)
return make
end
return M
================================================
FILE: filters/make4ht-cleanspan-nat.lua
================================================
-- cleanspan function submitted by Nat Kuhn
-- http://www.natkuhn.com/
local function filter(s)
local pattern = "(<span%s+([^>]+)>[^<]*)</span>(%s*)<span%s+%2>"
repeat
s, n = s:gsub(pattern, "%1%3")
until n == 0
return s
end
return filter
================================================
FILE: filters/make4ht-cleanspan.lua
================================================
-- make4ht-cleanspan4ht.lua
-- fixes spurious <span> elements in tex4ht output
function filter(input)
local parse_args = function(s)
local at = {}
s:gsub("(%w+)%s*=%s*\"([^\"]-)\"", function(k,w)
at[k]=w
end)
return at
end
-- local pattern = "(<?/?[%w]*>?)<span[%s]*class=\"([^\"]+)\"[%s]*>"
local pattern = "(<?/?[%w]*>?)([%s]*)<span[%s]*([^>]-)>"
local last_class = ""
local depth = 0
return input:gsub(pattern, function(tag,space, args)
local attr = parse_args(args) or {}
local class = attr["class"] or ""
if tag == "</span>" then
if class == last_class and class~= "" then
last_class = class
return space .. ""
end
elseif tag == "" then
class=""
end
last_class = class
return tag ..space .. '<span '..args ..'>'
end)
end
return filter
================================================
FILE: filters/make4ht-domfilter.lua
================================================
local filter_lib = require "make4ht-filterlib"
local dom = require "luaxml-domobject"
local mkutils = require "mkutils"
local log = logging.new "domfilter"
local function load_filter(filtername)
return require("domfilters.make4ht-"..filtername)
end
-- get snippet of the position where XML parsing failed
local function get_html_snippet(str, errmsg)
-- we can get position in bytes from message like this:
-- /home/mint/texmf/scripts/lua/LuaXML/luaxml-mod-xml.lua:175: Unbalanced Tag (/p) [char=1112]
local position = tonumber(errmsg:match("char=(%d+)") or "")
if not position then return "Cannot find error position" end
-- number of bytes around the error position that shoule be printed
local error_context = 100
local start = position > error_context and position - error_context or 0
local stop = (position + error_context) < str:len() and position + error_context or str:len()
return str:sub(start, stop)
end
-- save processed names, in order to block multiple executions of the filter
-- sequence on a same file
local processed = {}
local function filter(filters, name)
-- because XML parsing to DOM is potentially expensive operation
-- this filter will use cache for it's sequence
-- all requests to the domfilter will add new filters to the
-- one sequence, which will be executed on one DOM object.
-- it is possible to request a different sequence using
-- unique name parameter
local name = name or "domfilter"
local settings = mkutils.get_filter_settings(name) or {}
local sequence = settings.sequence or {}
local local_sequence = filter_lib.load_filters(filters, load_filter)
for _, filter in ipairs(local_sequence) do
table.insert(sequence, filter)
end
settings.sequence = sequence
mkutils.filter_settings (name) (settings)
return function(filename, parameters)
-- load processed files for the current filter name
local processed_files = processed[name] or {}
-- don't process the file again
if processed_files[filename] then
return nil
end
local input = filter_lib.load_input_file(filename)
if not input then return nil, "Cannot load the input file" end
-- in pure XML, we need to ignore void_elements provided by LuaXML, because these can exist only in HTML
local no_void_elements = {docbook = {}, jats = {}, odt = {}, tei = {} }
local void_elements = no_void_elements[parameters.output_format]
-- we need to use pcall, because XML error would break the whole build process
-- domobject will be error object if DOM parsing failed
local status, domobject = pcall(function()
return dom.parse(input, void_elements)
end)
if not status then
log:warning("XML DOM parsing of " .. filename .. " failed:")
log:warning(domobject)
log:debug("Error context:\n" .. (get_html_snippet(input, domobject) or ""))
log:debug("Trying HTML DOM parsing")
status, domobject = pcall(function()
return dom.html_parse(input)
end)
if not status then
log:warning("HTML DOM parsing failed as well")
return nil, "DOM parsing failed"
else
log:warning("HTML DOM parsing OK, DOM filters will be executed")
end
end
for _,f in pairs(sequence) do
domobject = f(domobject,parameters)
end
local output = domobject:serialize()
if output then
filter_lib.save_input_file(filename, output)
else
log:warning("DOM filter failed on ".. filename)
end
-- mark the filename as processed
processed_files[filename] = true
processed[name] = processed_files
end
end
return filter
================================================
FILE: filters/make4ht-entities-to-unicode.lua
================================================
-- convert Unicode characters encoded as XML entities back to Unicode
local utfchar = unicode.utf8.char
-- list of disabled characters
local disabled = { ["&"] = "&", ["<"] = "<", [">"] = ">"}
return function(content)
local content = content:gsub("%&%#x([A-Fa-f0-9]+);", function(entity)
-- convert hexadecimal entity to Unicode
local char_number = tonumber(entity, 16)
-- fix for non-breaking spaces, LO cannot open file when they are present as Unicode
if char_number == 160 then return " " end
local newchar = utfchar(char_number)
-- we don't want to break XML validity with forbidden characters
return disabled[newchar] or newchar
end)
return content
end
================================================
FILE: filters/make4ht-entities.lua
================================================
-- Fix bad entities
-- Sometimes, tex4ht produce named xml entities, which are prohobited in epub
-- , for example
function filter(s)
local replaces = {
nbsp = "#160"
}
return s:gsub("&(%w+);",function(x)
local m = replaces[x] or x
return "&"..m..";"
end)
end
return filter
================================================
FILE: filters/make4ht-filter.lua
================================================
local filter_lib = require "make4ht-filterlib"
local function load_filter(filtername)
return require("filters.make4ht-"..filtername)
end
function filter(filters)
local sequence = filter_lib.load_filters(filters, load_filter)
return function(filename, parameters)
if not filename then return false, "filters: no filename" end
local input = filter_lib.load_input_file(filename)
if not input then return nil, "Cannot load the input file" end
for _,f in pairs(sequence) do
input = f(input,parameters)
end
filter_lib.save_input_file(filename, input)
end
end
return filter
================================================
FILE: filters/make4ht-fix-links.lua
================================================
-- replace colons in `id` or `href` attributes for local links with underscores
--
local function fix_href_colons(s)
return s:gsub('(href=".-")', function(a)
if a:match("[a-z]%://") then return a end
return a:gsub(":","_")
end)
end
local function fix_id_colons(s)
return s:gsub('(id=".-")', function(a)
return a:gsub(":", "_")
end)
end
return function(s)
return fix_id_colons(fix_href_colons(s))
end
================================================
FILE: filters/make4ht-fixligatures.lua
================================================
-- fix ligatures
-- replace ligatures produced by tex4ht with their components
-- this prevents problems with some readers
local gsub = unicode.utf8.gsub
function filter(s)
local replaces = {
["fi"] = "fi",
["ffi"] = "ffi",
["fl"] = "fl",
["ffl"] = "ffl",
["ff"] = "ff"
}
return gsub(s, "([fiffiflfflff])",function (x) return replaces[x] or x end)
end
return filter
================================================
FILE: filters/make4ht-hruletohr.lua
================================================
-- hruletohr
-- \hrule primitive is impossible to redefine catching all possible arguments
-- with tex4ht, it is converted as series of underscores
-- it seems that these underscores are always part of previous paragraph
-- this assumption may be wrong, needs more real world testing
local hruletohr = function(s)
return s:gsub("___+(.-)</p>","%1</p>\n<hr class=\"hrule\" />")
end
return hruletohr
================================================
FILE: filters/make4ht-mathjaxnode.lua
======================
gitextract_7n7efxfs/
├── .github/
│ ├── actions/
│ │ └── make4ht-build/
│ │ ├── Dockerfile
│ │ ├── action.yml
│ │ └── entrypoint.sh
│ └── workflows/
│ └── main.yml
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── INSTALL.md
├── Makefile
├── README.md
├── config.cfg
├── domfilters/
│ ├── make4ht-aeneas.lua
│ ├── make4ht-booktabs.lua
│ ├── make4ht-collapsetoc.lua
│ ├── make4ht-fixinlines.lua
│ ├── make4ht-idcolons.lua
│ ├── make4ht-inlinecss.lua
│ ├── make4ht-itemparagraphs.lua
│ ├── make4ht-joincharacters.lua
│ ├── make4ht-joincolors.lua
│ ├── make4ht-mathmlfixes.lua
│ ├── make4ht-odtfonts.lua
│ ├── make4ht-odtimagesize.lua
│ ├── make4ht-odtpartable.lua
│ ├── make4ht-odtsvg.lua
│ ├── make4ht-sectionid.lua
│ ├── make4ht-t4htlinks.lua
│ ├── make4ht-tablecaption.lua
│ └── make4ht-tablerows.lua
├── extensions/
│ ├── make4ht-ext-common_domfilters.lua
│ ├── make4ht-ext-common_filters.lua
│ ├── make4ht-ext-copy_images.lua
│ ├── make4ht-ext-detect_engine.lua
│ ├── make4ht-ext-dvisvgm_hashes.lua
│ ├── make4ht-ext-inlinecss.lua
│ ├── make4ht-ext-join_colors.lua
│ ├── make4ht-ext-latexmk_build.lua
│ ├── make4ht-ext-mathjaxnode.lua
│ ├── make4ht-ext-mjcli.lua
│ ├── make4ht-ext-nodynamicodt.lua
│ ├── make4ht-ext-odttemplate.lua
│ ├── make4ht-ext-preprocess_input.lua
│ ├── make4ht-ext-staticsite.lua
│ └── make4ht-ext-tidy.lua
├── filters/
│ ├── make4ht-cleanspan-nat.lua
│ ├── make4ht-cleanspan.lua
│ ├── make4ht-domfilter.lua
│ ├── make4ht-entities-to-unicode.lua
│ ├── make4ht-entities.lua
│ ├── make4ht-filter.lua
│ ├── make4ht-fix-links.lua
│ ├── make4ht-fixligatures.lua
│ ├── make4ht-hruletohr.lua
│ ├── make4ht-mathjaxnode.lua
│ ├── make4ht-mjcli.lua
│ ├── make4ht-odttemplate.lua
│ ├── make4ht-staticsite.lua
│ └── make4ht-svg-height.lua
├── formats/
│ ├── make4ht-docbook.lua
│ ├── make4ht-html5.lua
│ ├── make4ht-jats.lua
│ ├── make4ht-odt.lua
│ ├── make4ht-tei.lua
│ └── make4ht-xhtml.lua
├── lapp-mk4.lua
├── make4ht
├── make4ht-aeneas-config.lua
├── make4ht-config.lua
├── make4ht-doc.tex
├── make4ht-dvireader.lua
├── make4ht-errorlogparser.lua
├── make4ht-filterlib.lua
├── make4ht-htlatex.lua
├── make4ht-indexing.lua
├── make4ht-lib.lua
├── make4ht-logging.lua
├── make4ht-xtpipes.lua
├── mkparams.lua
├── mkutils.lua
├── test/
│ ├── dom-test.lua
│ └── test-mkparams.lua
└── tools/
├── make_chardata.lua
└── make_mathmlchardata.lua
Condensed preview — 83 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (363K chars).
[
{
"path": ".github/actions/make4ht-build/Dockerfile",
"chars": 1081,
"preview": "FROM debian:unstable-slim\n\nLABEL \"maintainer\"=\"Michal Hoftich <michal.h21@gmail.com>\"\nLABEL \"repository\"=\"https://github"
},
{
"path": ".github/actions/make4ht-build/action.yml",
"chars": 129,
"preview": "name: \"LaTeX to XML\"\ndescription: \"Convert LaTeX documents to XML with make4ht\"\nruns:\n using: \"docker\"\n image: \"Docker"
},
{
"path": ".github/actions/make4ht-build/entrypoint.sh",
"chars": 99,
"preview": "#!/bin/bash\n\n# make4ht -um draft \nmake install SUDO=\"\"\nmake htmldoc\ncat htmldoc/make4ht-doc.html\n\n\n"
},
{
"path": ".github/workflows/main.yml",
"chars": 691,
"preview": "name: Build documentation\n\non: \n push:\n paths: \n - README.md\n - CHANGELOG.md\n - make4ht-doc.tex\n\njobs:\n bu"
},
{
"path": ".gitignore",
"chars": 179,
"preview": "*.aux\n*.lg\n*.log\n*.tmp\n*.xref\n*.4tc\n*.4ct\n*.swp\n*.idv\n*.dvi\n*.fls\n*.html\n*.out\n*.pdf\nbackup\nbuild\n*.toc\n*.fdb_latexmk\ndo"
},
{
"path": ".travis.yml",
"chars": 539,
"preview": "dist: bionic\n\ninstall:\n # Local\n - sudo apt-get install -qq luatex texlive-base texlive-luatex luarocks\n # Global \n "
},
{
"path": "CHANGELOG.md",
"chars": 31185,
"preview": "# Changelog\n\n- 2026/05/11\n\n - added new `mkutils` function, `escape_pattern`\n - use `escape_pattern` to sanitize outpu"
},
{
"path": "INSTALL.md",
"chars": 2597,
"preview": "Installation\n------------\n\nIf you use TeX Live 2015 or up-to date Miktex distributions, `make4ht` should be installed al"
},
{
"path": "Makefile",
"chars": 3773,
"preview": ".PHONY: build tags\nlua_content = make4ht $(wildcard *.lua) \nfilters = $(wildcard filters/*.lua)\ndomfilters = $(wildcard "
},
{
"path": "README.md",
"chars": 44240,
"preview": "% [](https://travis-ci.org/michal-h21/make4ht"
},
{
"path": "config.cfg",
"chars": 1166,
"preview": "\\Preamble{xhtml}\n% this was fixed in the upstream, but Debian used in the Docker container\n% doesn't contain it yet\n\\def"
},
{
"path": "domfilters/make4ht-aeneas.lua",
"chars": 4641,
"preview": "-- DOM filter for Aeneas, tool for automatical text and audio synchronization\n-- https://github.com/readbeyond/aeneas\n--"
},
{
"path": "domfilters/make4ht-booktabs.lua",
"chars": 2741,
"preview": "\nlocal function find_cmidrules(current_rows)\n -- save rows with cmidrules here\n local matched_rows = {}\n local contin"
},
{
"path": "domfilters/make4ht-collapsetoc.lua",
"chars": 8295,
"preview": "-- mini TOC support for make4ht\nlocal domobject = require \"luaxml-domobject\"\n\nlocal filter = require \"make4ht-filter\"\nlo"
},
{
"path": "domfilters/make4ht-fixinlines.lua",
"chars": 1782,
"preview": "local inline_elements = {\n a=true,\n b=true,\n big=true,\n i=true,\n small=true,\n tt=true,\n abbr=true,\n acronym=true"
},
{
"path": "domfilters/make4ht-idcolons.lua",
"chars": 1046,
"preview": "local allowed_chars = {\n [\"-\"] = true,\n [\".\"] = true\n}\nlocal function fix_colons(id)\n -- match every non alphanum cha"
},
{
"path": "domfilters/make4ht-inlinecss.lua",
"chars": 2717,
"preview": "local cssquery = require \"luaxml-cssquery\"\n\nlocal log = logging.new(\"inlinecss\")\n\nlocal cssrules = {}\nlocal cssobj = "
},
{
"path": "domfilters/make4ht-itemparagraphs.lua",
"chars": 1391,
"preview": "-- TeX4ht puts contents of all \\item commands into paragraphs. We are not\n-- able to detect if it contain only one parag"
},
{
"path": "domfilters/make4ht-joincharacters.lua",
"chars": 5880,
"preview": "local log = logging.new(\"joincharacters\")\n\nlocal charclasses = {\n span=true,\n mn = true,\n}\n\nlocal safe_mathml_elements"
},
{
"path": "domfilters/make4ht-joincolors.lua",
"chars": 2232,
"preview": "local cssfiles = {}\nlocal log = logging.new \"joincolors\"\n\n\n-- keep mapping between span ids and colors\nlocal colors = {}"
},
{
"path": "domfilters/make4ht-mathmlfixes.lua",
"chars": 20767,
"preview": "local log = logging.new(\"mathmlfixes\")\n\nlocal mathml_chardata = require \"make4ht-mathml-char-def\"\n\n-- <mglyph> should be"
},
{
"path": "domfilters/make4ht-odtfonts.lua",
"chars": 1124,
"preview": "return function(dom, params)\n -- fix ODT style for fonts \n -- sometimes, fonts have missing size, we need to patch sty"
},
{
"path": "domfilters/make4ht-odtimagesize.lua",
"chars": 627,
"preview": "local log = logging.new \"odtimagesize\"\n-- set correct dimensions to frames around images\nreturn function(dom)\n local f"
},
{
"path": "domfilters/make4ht-odtpartable.lua",
"chars": 321,
"preview": "-- find all tables inside paragraphs, replace the found paragraphs with the child table\nreturn function(dom)\n for _,tab"
},
{
"path": "domfilters/make4ht-odtsvg.lua",
"chars": 1458,
"preview": "-- we need to set dimensions for SVG images produced by \\Picture commands\nlocal log = logging.new \"odtsvg\"\nlocal functio"
},
{
"path": "domfilters/make4ht-sectionid.lua",
"chars": 5979,
"preview": "local mkutils = require \"mkutils\"\nlocal log = logging.new(\"tocid\")\n-- Unicode data distributed with ConTeXt\n-- defines"
},
{
"path": "domfilters/make4ht-t4htlinks.lua",
"chars": 1026,
"preview": "-- This filter is used by the ODT output format to fix links\nreturn function(dom)\n for _, link in ipairs(dom:query_sel"
},
{
"path": "domfilters/make4ht-tablecaption.lua",
"chars": 780,
"preview": "local function get_parent_table(caption)\n -- recursively find the parent table of a caption element, as it can be insid"
},
{
"path": "domfilters/make4ht-tablerows.lua",
"chars": 3791,
"preview": "local log = logging.new (\"tablerows\")\nreturn function(dom)\n local has_child_elements = function(child)\n -- detect if"
},
{
"path": "extensions/make4ht-ext-common_domfilters.lua",
"chars": 1136,
"preview": "local M = {}\n\n\n-- this variable will hold the output format name\nlocal current_format \n\nlocal filter = require \"make4ht-"
},
{
"path": "extensions/make4ht-ext-common_filters.lua",
"chars": 646,
"preview": "local M = {}\n\n\nlocal filter = require \"make4ht-filter\"\nlocal process = filter({\"cleanspan-nat\", \"fixligatures\", \"hruleto"
},
{
"path": "extensions/make4ht-ext-copy_images.lua",
"chars": 1953,
"preview": "local M = {}\nlocal mkutils = require \"mkutils\"\nlocal domfilter = require \"make4ht-domfilter\"\n\nlocal copied_images = {}\n\n"
},
{
"path": "extensions/make4ht-ext-detect_engine.lua",
"chars": 2794,
"preview": "-- support magic comments used by TeXShop and TeXWorks to detect used engine and format\n--\nlocal M = {}\nlocal log = logg"
},
{
"path": "extensions/make4ht-ext-dvisvgm_hashes.lua",
"chars": 8916,
"preview": "local dvireader = require \"make4ht-dvireader\"\nlocal mkutils = require \"mkutils\"\nlocal filter = require \"make4ht-filter\"\n"
},
{
"path": "extensions/make4ht-ext-inlinecss.lua",
"chars": 328,
"preview": "local M = {}\n\nlocal filter = require \"make4ht-domfilter\"\n\n-- filters support only html formats\nfunction M.test(format)\n "
},
{
"path": "extensions/make4ht-ext-join_colors.lua",
"chars": 325,
"preview": "local M = {}\n\nlocal filter = require \"make4ht-domfilter\"\n\n-- filters support only html formats\nfunction M.test(format)\n "
},
{
"path": "extensions/make4ht-ext-latexmk_build.lua",
"chars": 2263,
"preview": "-- use Latexmk in first LaTeX call\n-- only in the first call, because we don't need to execute biber, etc. in the subse"
},
{
"path": "extensions/make4ht-ext-mathjaxnode.lua",
"chars": 450,
"preview": "local M = {}\n\n\nlocal filter = require \"make4ht-filter\"\nfunction M.test(format)\n if format == \"odt\" then return false en"
},
{
"path": "extensions/make4ht-ext-mjcli.lua",
"chars": 929,
"preview": "local M = {}\n\n\nlocal filter = require \"make4ht-filter\"\nfunction M.test(format)\n -- this extension works only for format"
},
{
"path": "extensions/make4ht-ext-nodynamicodt.lua",
"chars": 1528,
"preview": "local M = {}\n\n-- this extension covnerts links, tables of contents and other dynamic content in the ODT format to plain "
},
{
"path": "extensions/make4ht-ext-odttemplate.lua",
"chars": 304,
"preview": "local M = {}\n\nlocal filter = require \"make4ht-filter\"\n\n-- this extension only works for the ODT format\nM.test = function"
},
{
"path": "extensions/make4ht-ext-preprocess_input.lua",
"chars": 4494,
"preview": "-- preprocess R literate sources or Markdown files to LaTeX\nlocal M = {}\nlocal log = logging.new \"preprocess_input\"\nloca"
},
{
"path": "extensions/make4ht-ext-staticsite.lua",
"chars": 5289,
"preview": "local M = {}\nlocal filter = require \"make4ht-filter\"\nlocal mkutils = require \"mkutils\"\nlocal log = logging.new \"staticsi"
},
{
"path": "extensions/make4ht-ext-tidy.lua",
"chars": 1449,
"preview": "local M = {}\n\nlocal log = logging.new \"tidy\"\nfunction M.test(format)\n if format == \"odt\" then return false end\n return"
},
{
"path": "filters/make4ht-cleanspan-nat.lua",
"chars": 264,
"preview": "-- cleanspan function submitted by Nat Kuhn \n-- http://www.natkuhn.com/\n\nlocal function filter(s)\n local pattern = \"("
},
{
"path": "filters/make4ht-cleanspan.lua",
"chars": 798,
"preview": "-- make4ht-cleanspan4ht.lua \n-- fixes spurious <span> elements in tex4ht output\n\n\nfunction filter(input)\n\tlocal parse_ar"
},
{
"path": "filters/make4ht-domfilter.lua",
"chars": 3624,
"preview": "local filter_lib = require \"make4ht-filterlib\"\nlocal dom = require \"luaxml-domobject\"\nlocal mkutils = require \"mku"
},
{
"path": "filters/make4ht-entities-to-unicode.lua",
"chars": 713,
"preview": "-- convert Unicode characters encoded as XML entities back to Unicode\n\nlocal utfchar = unicode.utf8.char\n-- list of disa"
},
{
"path": "filters/make4ht-entities.lua",
"chars": 291,
"preview": "-- Fix bad entities\n-- Sometimes, tex4ht produce named xml entities, which are prohobited in epub\n-- , for example"
},
{
"path": "filters/make4ht-filter.lua",
"chars": 596,
"preview": "local filter_lib = require \"make4ht-filterlib\"\n\nlocal function load_filter(filtername)\n\treturn require(\"filters.make4ht-"
},
{
"path": "filters/make4ht-fix-links.lua",
"chars": 425,
"preview": "-- replace colons in `id` or `href` attributes for local links with underscores\n--\n\nlocal function fix_href_colons(s)\n "
},
{
"path": "filters/make4ht-fixligatures.lua",
"chars": 366,
"preview": "-- fix ligatures\n-- replace ligatures produced by tex4ht with their components\n-- this prevents problems with some reade"
},
{
"path": "filters/make4ht-hruletohr.lua",
"chars": 402,
"preview": "-- hruletohr\n-- \\hrule primitive is impossible to redefine catching all possible arguments\n-- with tex4ht, it is convert"
},
{
"path": "filters/make4ht-mathjaxnode.lua",
"chars": 3191,
"preview": "local mkutils = require \"mkutils\"\nlocal log = logging.new(\"mathjaxnode\")\n-- other possible value is page2svg\nlocal mathn"
},
{
"path": "filters/make4ht-mjcli.lua",
"chars": 6116,
"preview": "local mkutils = require \"mkutils\"\nlocal log = logging.new(\"mjcli\")\n-- other possible value is page2svg\nlocal mathnodepat"
},
{
"path": "filters/make4ht-odttemplate.lua",
"chars": 2324,
"preview": "local mkutils = require \"mkutils\"\nlocal zip = require \"zip\"\nlocal domobject = require \"luaxml-domobject\"\n\n\nlocal functio"
},
{
"path": "filters/make4ht-staticsite.lua",
"chars": 3709,
"preview": "local domobj = require \"luaxml-domobject\"\nlocal log = logging.new(\"staticsite\")\n-- save the header settings in YAML form"
},
{
"path": "filters/make4ht-svg-height.lua",
"chars": 1215,
"preview": "\nlocal log = logging.new(\"svg-height\")\n-- Make:image(\"svg$\", \"dvisvgm -n -a -p ${page} -b preview -c 1.4,1.4 -s ${source"
},
{
"path": "formats/make4ht-docbook.lua",
"chars": 1143,
"preview": "local M = {}\nlocal mkutils = require \"mkutils\"\nlocal lfs = require \"lfs\"\nlocal os = require \"os\"\nlocal kpse "
},
{
"path": "formats/make4ht-html5.lua",
"chars": 395,
"preview": "local M = {}\n\nlocal mkutils = require \"mkutils\"\n\nfunction M.prepare_extensions(extensions)\n return mkutils.add_extensio"
},
{
"path": "formats/make4ht-jats.lua",
"chars": 7510,
"preview": "local M = {}\nlocal xtpipeslib = require \"make4ht-xtpipes\"\nlocal domfilter = require \"make4ht-domfilter\"\n\n\n-- some elemen"
},
{
"path": "formats/make4ht-odt.lua",
"chars": 14246,
"preview": "local M = {}\nlocal mkutils = require \"mkutils\"\nlocal lfs = require \"lfs\"\nlocal os = require \"os\"\nlocal kpse "
},
{
"path": "formats/make4ht-tei.lua",
"chars": 733,
"preview": "local M = {}\nlocal xtpipeslib = require \"make4ht-xtpipes\"\n\nlocal domfilter = require \"make4ht-domfilter\"\n\nfunction M.pre"
},
{
"path": "formats/make4ht-xhtml.lua",
"chars": 326,
"preview": "local M = {}\n\nlocal mkutils = require \"mkutils\"\n\nfunction M.prepare_extensions(extensions)\n return mkutils.add_extensio"
},
{
"path": "lapp-mk4.lua",
"chars": 9796,
"preview": "-- lapp.lua\n-- Simple command-line parsing using human-readable specification\n-----------------------------\n--~ -- args."
},
{
"path": "make4ht",
"chars": 4585,
"preview": "#!/usr/bin/env texlua\n-- Package make4ht. Author Michal Hoftich <michal.h21@gmail.com>\n-- This package is subject of LPP"
},
{
"path": "make4ht-aeneas-config.lua",
"chars": 7915,
"preview": "local M = {}\n\nlocal mkutils = require \"mkutils\"\n\nlocal task_template = [[\n<task>\n <task_language>${lang}</task_langua"
},
{
"path": "make4ht-config.lua",
"chars": 2465,
"preview": "local m = {}\n\nlocal mkutils = require \"mkutils\"\n\nlocal file_exists = mkutils.file_exists\n-- function file_exists(name)\n-"
},
{
"path": "make4ht-doc.tex",
"chars": 824,
"preview": "% \\documentclass{ltxdoc}\n\\documentclass{article}\n\n\n\\usepackage[english]{babel}\n\\usepackage{hyperref}\n\\newcommand\\authorm"
},
{
"path": "make4ht-dvireader.lua",
"chars": 5355,
"preview": "-- This is not actually full DVI reader. It just calculates hash for each page,\n-- so it can be detected if it changed b"
},
{
"path": "make4ht-errorlogparser.lua",
"chars": 4959,
"preview": "local m = {}\n\nlocal function get_filename(chunk)\n local filename = chunk:match(\"([^\\n^%(]+)\") \n if not filename then \n"
},
{
"path": "make4ht-filterlib.lua",
"chars": 1039,
"preview": "local M = {}\n\n-- the filter module must implement the load_filter function\nfunction M.load_filters(filters, load_filter"
},
{
"path": "make4ht-htlatex.lua",
"chars": 6722,
"preview": "local log = logging.new \"htlatex\"\nlocal autolog = logging.new \"autohtlatex\"\n\nlocal error_logparser = require(\"make4ht-er"
},
{
"path": "make4ht-indexing.lua",
"chars": 10847,
"preview": "\nlocal M = {}\nlocal log = logging.new \"indexing\"\n\n-- Handle accented characters in files created with \\usepackage[utf]{i"
},
{
"path": "make4ht-lib.lua",
"chars": 6455,
"preview": "-- Simple make system for tex4ht\n--kpse.set_program_name(\"luatex\")\n-- module(...,package.seeall)\nlocal m = {}\nlocal log "
},
{
"path": "make4ht-logging.lua",
"chars": 3238,
"preview": "-- logging system for make4ht\n-- inspired by https://github.com/rxi/log.lua\nlocal logging = {}\n\nlocal levels = {}\n-- lev"
},
{
"path": "make4ht-xtpipes.lua",
"chars": 4450,
"preview": "local M = {}\n\nlocal mkutils = require \"mkutils\"\n\nlocal log = logging.new \"xtpipes\"\n-- find if tex4ht.jar exists in a pat"
},
{
"path": "mkparams.lua",
"chars": 11097,
"preview": "local lapp = require \"lapp-mk4\"\nlocal mkutils = require \"mkutils\"\nlocal m = {} -- use ugly module system for new lua ver"
},
{
"path": "mkutils.lua",
"chars": 21780,
"preview": "module(...,package.seeall)\n\nlocal log = logging.new(\"mkutils\")\n\nlocal make4ht = require(\"make4ht-lib\")\nlocal mkparams = "
},
{
"path": "test/dom-test.lua",
"chars": 2598,
"preview": "require \"busted.runner\" ()\nkpse.set_program_name \"luatex\"\n\nlocal dom = require \"luaxml-domobject\"\n\ndescribe(\"Basic DOM f"
},
{
"path": "test/test-mkparams.lua",
"chars": 530,
"preview": "require \"busted.runner\" ()\nkpse.set_program_name \"luatex\"\nlocal mkparams = require \"mkparams\"\n\ndescribe(\"Test output for"
},
{
"path": "tools/make_chardata.lua",
"chars": 1424,
"preview": "kpse.set_program_name \"luatex\"\n-- create Lua module from UnicodeData\n-- we need mapping to lower case letters and decomp"
},
{
"path": "tools/make_mathmlchardata.lua",
"chars": 2333,
"preview": "-- This file generates Lua table with mapping of Unicode charcodes for different math font styles (bold, italic, bold-it"
}
]
About this extraction
This page contains the full source code of the michal-h21/make4ht GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 83 files (333.9 KB), approximately 93.7k tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.