Repository: michal-h21/make4ht Branch: master Commit: e1aa8a497128 Files: 83 Total size: 333.9 KB Directory structure: gitextract_7n7efxfs/ ├── .github/ │ ├── actions/ │ │ └── make4ht-build/ │ │ ├── Dockerfile │ │ ├── action.yml │ │ └── entrypoint.sh │ └── workflows/ │ └── main.yml ├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── INSTALL.md ├── Makefile ├── README.md ├── config.cfg ├── domfilters/ │ ├── make4ht-aeneas.lua │ ├── make4ht-booktabs.lua │ ├── make4ht-collapsetoc.lua │ ├── make4ht-fixinlines.lua │ ├── make4ht-idcolons.lua │ ├── make4ht-inlinecss.lua │ ├── make4ht-itemparagraphs.lua │ ├── make4ht-joincharacters.lua │ ├── make4ht-joincolors.lua │ ├── make4ht-mathmlfixes.lua │ ├── make4ht-odtfonts.lua │ ├── make4ht-odtimagesize.lua │ ├── make4ht-odtpartable.lua │ ├── make4ht-odtsvg.lua │ ├── make4ht-sectionid.lua │ ├── make4ht-t4htlinks.lua │ ├── make4ht-tablecaption.lua │ └── make4ht-tablerows.lua ├── extensions/ │ ├── make4ht-ext-common_domfilters.lua │ ├── make4ht-ext-common_filters.lua │ ├── make4ht-ext-copy_images.lua │ ├── make4ht-ext-detect_engine.lua │ ├── make4ht-ext-dvisvgm_hashes.lua │ ├── make4ht-ext-inlinecss.lua │ ├── make4ht-ext-join_colors.lua │ ├── make4ht-ext-latexmk_build.lua │ ├── make4ht-ext-mathjaxnode.lua │ ├── make4ht-ext-mjcli.lua │ ├── make4ht-ext-nodynamicodt.lua │ ├── make4ht-ext-odttemplate.lua │ ├── make4ht-ext-preprocess_input.lua │ ├── make4ht-ext-staticsite.lua │ └── make4ht-ext-tidy.lua ├── filters/ │ ├── make4ht-cleanspan-nat.lua │ ├── make4ht-cleanspan.lua │ ├── make4ht-domfilter.lua │ ├── make4ht-entities-to-unicode.lua │ ├── make4ht-entities.lua │ ├── make4ht-filter.lua │ ├── make4ht-fix-links.lua │ ├── make4ht-fixligatures.lua │ ├── make4ht-hruletohr.lua │ ├── make4ht-mathjaxnode.lua │ ├── make4ht-mjcli.lua │ ├── make4ht-odttemplate.lua │ ├── make4ht-staticsite.lua │ └── make4ht-svg-height.lua ├── formats/ │ ├── make4ht-docbook.lua │ ├── make4ht-html5.lua │ ├── make4ht-jats.lua │ ├── make4ht-odt.lua │ ├── make4ht-tei.lua │ └── make4ht-xhtml.lua ├── lapp-mk4.lua ├── make4ht ├── make4ht-aeneas-config.lua ├── make4ht-config.lua ├── make4ht-doc.tex ├── make4ht-dvireader.lua ├── make4ht-errorlogparser.lua ├── make4ht-filterlib.lua ├── make4ht-htlatex.lua ├── make4ht-indexing.lua ├── make4ht-lib.lua ├── make4ht-logging.lua ├── make4ht-xtpipes.lua ├── mkparams.lua ├── mkutils.lua ├── test/ │ ├── dom-test.lua │ └── test-mkparams.lua └── tools/ ├── make_chardata.lua └── make_mathmlchardata.lua ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/actions/make4ht-build/Dockerfile ================================================ FROM debian:unstable-slim LABEL "maintainer"="Michal Hoftich " LABEL "repository"="https://github.com/michal-h21/make4ht" LABEL "homepage"="https://github.com/michal-h21/make4ht" LABEL "com.github.actions.name"="LaTeX to XML" LABEL "com.github.actions.description"="Convert LaTeX documents to XML with make4ht." LABEL "com.github.actions.icon"="code" LABEL "com.github.actions.color"="blue" ENV DEBIAN_FRONTEND noninteractive # Install all TeX and LaTeX dependencies RUN apt-get update && \ apt-get install --yes --no-install-recommends \ make luatex texlive-base texlive-luatex texlive-latex-extra context \ tidy \ # texlive-fonts-recommended \ fonts-noto-mono \ texlive-plain-generic \ texlive-latex-recommended \ pandoc latexmk texlive lmodern fonts-lmodern tex-gyre fonts-texgyre \ texlive-lang-english && \ apt-get autoclean && apt-get --purge --yes autoremove && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* ADD entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh ENTRYPOINT ["/entrypoint.sh"] ================================================ FILE: .github/actions/make4ht-build/action.yml ================================================ name: "LaTeX to XML" description: "Convert LaTeX documents to XML with make4ht" runs: using: "docker" image: "Dockerfile" ================================================ FILE: .github/actions/make4ht-build/entrypoint.sh ================================================ #!/bin/bash # make4ht -um draft make install SUDO="" make htmldoc cat htmldoc/make4ht-doc.html ================================================ FILE: .github/workflows/main.yml ================================================ name: Build documentation on: push: paths: - README.md - CHANGELOG.md - make4ht-doc.tex jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - name: Run a one-line script run: echo Hello, world! - name: Run a multi-line script run: | echo Add other actions to build, echo test, and deploy your project. - name: Generate HTML docs uses: ./.github/actions/make4ht-build - name: Publish the web pages uses: peaceiris/actions-gh-pages@v2.5.0 env: ACTIONS_DEPLOY_KEY: ${{ secrets.ACTIONS_DEPLOY_KEY }} PUBLISH_BRANCH: gh-pages PUBLISH_DIR: ./htmldoc ================================================ FILE: .gitignore ================================================ *.aux *.lg *.log *.tmp *.xref *.4tc *.4ct *.swp *.idv *.dvi *.fls *.html *.out *.pdf backup build *.toc *.fdb_latexmk doc tags *.css readme.tex changelog.tex make4ht-char-def.lua ================================================ FILE: .travis.yml ================================================ dist: bionic install: # Local - sudo apt-get install -qq luatex texlive-base texlive-luatex luarocks # Global - sudo apt-get install -qq pandoc latexmk texlive texlive-xetex texlive-fonts-recommended fonts-lmodern tex-gyre fonts-texgyre fonts-noto script: # Already runs locally - ./make4ht -v # - luarocks --local install busted - git fetch --tags - make - make justinstall # - sudo ln -s /home/travis/texmf/scripts/lua/make4ht/make4ht /usr/local/bin/make4ht # - make test # Now runs globally - make4ht -v ================================================ FILE: CHANGELOG.md ================================================ # Changelog - 2026/05/11 - added new `mkutils` function, `escape_pattern` - use `escape_pattern` to sanitize output file names https://tex.stackexchange.com/q/762690/2891 - look for images outside of the build dir in the ODT output. https://tex.stackexchange.com/q/762690/2891 - 2026/05/10 - set anchor id in lists to the `
  • ` element in the `itemparagraphs` DOM filter. - 2026/02/24 - version `0.4e` released. - 2026/02/18 - consecutive `` elements in MathML are now merged only within parent elements whose meaning does not depend on the order of their child elements. - 2026/01/31 - remove `#` character from internal links in the JATS output (it needs to be here to distinguish internal and external links). - 2026/01/29 - removed the `}} \Configure{@HEAD}{\HCode{\Hnewline}} \Configure{@HEAD}{\HCode{\Hnewline}} \Configure{@HEAD}{\HCode{\Hnewline}} \Configure{@HEAD}{\HCode{\Hnewline}} \Configure{@HEAD}{\HCode{\Hnewline }} \Css{body{ font-size:18px; font-size-adjust: 0.5; width:65ch; max-width:100\%; margin: 1em auto; }} \Css{p,li,dt{ line-height: calc(1ex / 0.32); text-align: justify; hyphens: auto; }} \Css{div.center p {text-align:center;}} % save the \@title command \begin{document} \makeatletter \Tag{TITLE+}{The make4ht build system} \makeatother \def\TeX{TeX} \def\LaTeX{LaTeX} \EndPreamble ================================================ FILE: domfilters/make4ht-aeneas.lua ================================================ -- DOM filter for Aeneas, tool for automatical text and audio synchronization -- https://github.com/readbeyond/aeneas -- It adds elements with id attributes for text chunks, in sentence length. -- -- local cssquery = require "luaxml-cssquery" local mkutils = require "mkutils" local log = logging.new "aeneas" -- Table of CSS selectors to be skipped. local skip_elements = { "math", "svg"} -- The id attribute format is configurable -- Aeneas must be told to search for the ID pattern using is_text_unparsed_id_regex -- option in Aneas configuration file local id_prefix = "ast" -- Pattern to mach a sentence. It should match two groups, first is actual -- sentence, the second optional interpunction mark. local sentence_match = "([^%.^%?^!]*)([%.%?!]?)" -- convert table with selectors to a query list local function prepare_selectors(skips) local css = cssquery() for _, selector in ipairs(skips) do css:add_selector(selector) end return css end -- save the HTML language local function save_config(dom, saves) local get_lang = function(d) local html = d:query_selector("html")[1] or {} return html:get_attribute("lang") end local saves = saves or {} local config = get_filter_settings "aeneas_config" if config.language then return end saves.lang = get_lang(dom) filter_settings "aeneas-config" (saves) end -- make span element with unique id for a sentence local function make_span(id,parent, text) local newobj = parent:create_element("span", {id=id }) newobj.processed = true -- to disable multiple processing of the node local text_node = newobj:create_text_node(text) newobj:add_child_node(text_node) return newobj end -- make the id attribute and update the id value local function make_id(lastid, id_prefix) local id = id_prefix .. lastid lastid = lastid + 1 return id, lastid end -- parse text for sentences and add spans local function make_ids(parent, text, lastid, id_prefix) local t = {} local id for chunk, punct in text:gmatch(sentence_match) do id, lastid = make_id(lastid, id_prefix) local newtext = chunk..punct -- the newtext is empty string sometimes. we can skipt it then. if newtext~="" then table.insert(t, make_span(id, parent, newtext)) end end return t, lastid end -- test if the DOM element is in list of skipped CSS selectors local function is_skipped(el, css) local matched = css:match_querylist(el) return #matched > 0 end local function aeneas(dom, par) local par = par or {} local id = 1 local options = get_filter_settings "aeneas" local skip_elements = options.skip_elements or par.skip_elements or skip_elements local id_prefix = options.id_prefix or par.id_prefix or id_prefix local skip_object = prepare_selectors(skip_elements) sentence_match = options.sentence_match or par.sentence_match or sentence_match local body = dom:query_selector("body")[1] -- process only the document body if not body then return dom end -- save information for aeneas_config save_config(dom, {id_prefix = id_prefix}) body:traverse_elements(function(el) -- skip disabled elements if(is_skipped(el, skip_object)) then return false end -- skip already processed elements if el.processed then return false end local newchildren = {} -- this will contain the new elements local children = el:get_children() local first_child = children[1] -- if the element contains only text, doesn't already have an id attribute and the text is short, -- the id is set directly on that element. if #children == 1 and first_child:is_text() and not el:get_attribute("id") and string.len(first_child._text) < 20 and el._attr then local idtitle idtitle, id = make_id(id, id_prefix) log:debug(el._name, first_child._text) el:set_attribute("id", idtitle) return el end for _, child in ipairs(children) do -- process only non-empty text if child:is_text() and child._text:match("%a+") then local newnodes newnodes, id = make_ids(child, child._text, id, id_prefix) for _, node in ipairs(newnodes) do table.insert(newchildren, node or {}) end else -- insert the current processing element to the new element list -- if it isn't only text table.insert(newchildren, child or {}) end end -- replace element children with the new ones if #newchildren > 0 then el._children = {} for _, c in ipairs(newchildren) do el:add_child_node(c) end end end) return dom end return aeneas ================================================ FILE: domfilters/make4ht-booktabs.lua ================================================ local function find_cmidrules(current_rows) -- save rows with cmidrules here local matched_rows = {} local continue = false for row_no, row in ipairs(current_rows) do local columnposition = 1 local matched_cmidrule = false for _, col in ipairs(row:query_selector("td")) do -- keep track of culumns local span = tonumber(col:get_attribute("colspan")) or 1 local cmidrule = col:query_selector(".cmidrule") -- column contain cmidrule if #cmidrule > 0 then -- remove any child elements, we don't need them anymore col._children = {} -- only one cmidrule can be on each row, save the position, column span and all attributes matched_rows[row_no] = {attributes = col._attr, column = columnposition, span = span, continue = continue} matched_cmidrule = true end columnposition = columnposition + span end if matched_cmidrule then -- save the row number of the first cmidrule on the current row continue = continue or row_no else continue = false end end -- save the table rows count, so we can loop over them sequentially later matched_rows.length = #current_rows return matched_rows end local function update_row(current_rows, match, newspan, i) local row_to_update = current_rows[match.continue] -- insert spanning column if necessary if newspan > 0 then local td = row_to_update:create_element("td", {colspan=tostring(newspan), span="nazdar"}) row_to_update:add_child_node(td) end -- insert the rule column local td = row_to_update:create_element("td", match.attributes) row_to_update:add_child_node(td) -- remove unnecessary row current_rows[i]:remove_node() end local function join_rows(matched_rows,current_rows) for i = 1, matched_rows.length do local match = matched_rows[i] if match then -- we only need to process rows that place subsequent cmidrules on the same row local continue = match.continue if continue then local prev_row = matched_rows[continue] -- find column where the previous cmidrule ends local prev_end = prev_row.column + prev_row.span local newspan = match.column - prev_end update_row(current_rows, match, newspan, i) -- update the current row position prev_row.column = match.column prev_row.span = match.span end end end end local function process_booktabs(dom) local tables = dom:query_selector("table") for _, tbl in ipairs(tables) do local current_rows = tbl:query_selector("tr") local matched_rows = find_cmidrules(current_rows) join_rows(matched_rows, current_rows) end return dom end return process_booktabs ================================================ FILE: domfilters/make4ht-collapsetoc.lua ================================================ -- mini TOC support for make4ht local domobject = require "luaxml-domobject" local filter = require "make4ht-filter" local log = logging.new "collapsetoc" local mktuils = require "mkutils" -- assign levels to entries in the .4tc file local toc_levels = { tocpart = 1, toclikepart = 1, tocappendix = 2, toclikechapter = 2, tocchapter = 2, tocsection = 3, toclikesection = 3, tocsubsection = 4, toclikesubsection = 4, tocsubsubsection = 5, toclikesubsubsection = 5, tocparagraph = 6, toclikeparagraph = 6, tocsubparagraph = 7, toclikesubparagraph = 7, } -- number of child levels to be kept -- the depth of 1 ensures that only direct children of the current sectioning command -- will be kept in TOC local max_depth = 1 -- debugging function to test correct structure of the TOC tree local function print_tree(tree, level) local level = level or 0 log:debug(string.rep(" ", level) .. (tree.type or "root"), tree.id) for k, v in pairs(tree.children) do print_tree(v, level + 2) end end -- convert the parsed toc entries to a tree structure local function make_toc_tree(tocentries, lowestlevel, position, tree) local position = position or 1 local tree = tree or { level = lowestlevel - 1, children = {} } local stack = {tree} if position > #tocentries then return tree, position end -- loop over TOC entries and make a tree for i = 1, #tocentries do -- initialize new child local element = tocentries[i] element.children = element.children or {} local parent = stack[#stack] local level_diff = element.level - parent.level if level_diff == 0 then -- entry is sibling of parent -- current parent is sibling of the current elemetn, true parent is -- sibling's parent parent = parent.parent -- we must replace sibling element with the current element in stact -- so the child elements get correct parent table.remove(stack) table.insert(stack, element) elseif level_diff > 0 then -- entry is child of parent for x = 1, level_diff do table.insert(stack, element) end else -- we must remove levels from the stack to get the correct parent for x =1 , level_diff, -1 do if #stack > 0 then parent = table.remove(stack) end end -- we must reinsert parent back to stack, place the current element to stact too table.insert(stack, parent) table.insert(stack, element) end table.insert(parent.children, element) element.parent = parent end print_tree(tree) return tree end -- find first sectioning element in the current page local function find_headers(dom, header_levels) -- we need to find id attributes in elements that are children of sectioning elements local ids = {} for _, header in ipairs(dom:query_selector(header_levels)) do local id = header:get_attribute "id" if id then ids[#ids+1] = id end end return ids end -- process list of ids and find those that should be kept: -- siblings, children, parents and top level local function find_toc_entries_to_keep(ids, tree) local tree = tree or {} -- all id in TOC tree that we want to kepp are saved in this table local ids_to_keep = {} -- find current id in the TOC tree local function find_id(id, tree) if tree.id == id then return tree end if not tree.children or #tree.children == 0 then return false end for k,v in pairs(tree.children) do local found_id = find_id(id, v) if found_id then return found_id end end return false end -- always keep top level of the hiearchy local function keep_toplevel(tree) for _, el in ipairs(tree.children) do ids_to_keep[el.id] = true end end -- we want to keep all children in TOC hiearchy local function keep_children(element, depth) local depth = depth or 1 local max_depth = max_depth or 1 -- stop processing when there are no children for _, el in pairs(element.children or {}) do if el.id then ids_to_keep[el.id] = true end -- by default, we keep just direct children of the current sectioning element if depth < max_depth then keep_children(el, depth + 1) end end end -- also keep all siblings local function keep_siblings(element) local parent = element.parent for k, v in pairs(parent.children or {}) do ids_to_keep[v.id] = true end end -- and of course, keep all parents local function keep_parents(element) local parent = element.parent if parent and parent.id then ids_to_keep[parent.id] = true -- we should keep siblings of all parents as well keep_siblings(parent) keep_parents(parent) end end -- always keep the top-level TOC hiearchy, even if we cannot find any sectioning element on the page keep_toplevel(tree) for _, id in ipairs(ids) do -- keep the current id ids_to_keep[id] = true local found_element = find_id(id, tree) if found_element then keep_children(found_element) keep_siblings(found_element) keep_parents(found_element) end end return ids_to_keep end -- process the .4tc file and convert entries to a tree structure -- based on the sectioning level local function parse_4tc(parameters, toc_levels) local tcfilename = mkutils.file_in_builddir(parameters.input .. ".4tc", parameters) if not mkutils.file_exists(tcfilename) then log:warning("Cannot find TOC: " .. tcfilename) return {} end local tocentries = {} local f = io.open(tcfilename, "r") -- we need to find the lowest level used in the TOC local lowestlevel = 999 for line in f:lines() do -- entries looks like: \doTocEntry\tocsubsection{1.2.2}{\csname a:TocLink\endcsname{5}{x5-60001.2.2}{QQ2-5-6}{aaaa}}{7}\relax -- we want do extract tocsubsection and x5-60001.2.2 local toctype, id = line:match("\\doTocEntry\\(.-){.-}{.-{.-}{(.-)}") if toctype then local level = toc_levels[toctype] if not level then log:warning("Cannot find TOC level for: " .. toctype) else lowestlevel = level < lowestlevel and level or lowestlevel table.insert(tocentries, {type = toctype, id = id, level = level}) end end end f:close() local toc = make_toc_tree(tocentries, lowestlevel) return toc end local function remove_levels(toc, matched_ids) -- remove links that aren't in the TOC hiearchy that should be kept for _, link in ipairs(toc:query_selector("a")) do local href = link:get_attribute("href") -- find id in the href local id = href:match("#(.+)") if id and not matched_ids[id] then -- toc links are in elements that can contain the section number -- we must remove them too local parent = link:get_parent() if parent:get_element_name() == "span" then parent:remove_node() else -- if the parent node isn't , remove at least the link itself link:remove_node() end end end end local function collapsetoc(dom, parameters) -- set options local par = parameters local options = get_filter_settings "collapsetoc" -- query to find the TOC element in DOM local toc_query = par.toc_query or options.toc_query or ".tableofcontents" -- query to select sectioning elements with id's local title_query = par.title_query or options.title_query or "h1 a, h2 a, h3 a, h4 a, h5 a, h6 a" -- level of child levels to be kept in TOC max_depth = par.max_depth or options.max_depth or max_depth -- set level numbers for particular TOC entry types local user_toc_levels = par.toc_levels or options.toc_levels or {} -- join user's levels with default for k,v in pairs(user_toc_levels) do toc_levels[k] = v end -- parse the .4tc file to get TOC tree toc = toc or parse_4tc(parameters, toc_levels) -- find sections in the current html file local ids = find_headers(dom, title_query) log:debug("Ids", table.concat(ids, ",")) local ids_to_keep = find_toc_entries_to_keep(ids, toc) local toc_dom = dom:query_selector(toc_query)[1] if toc_dom then remove_levels(toc_dom, ids_to_keep) else log:warning("Cannot find TOC element using query: " .. toc_query) end return dom end return collapsetoc ================================================ FILE: domfilters/make4ht-fixinlines.lua ================================================ local inline_elements = { a=true, b=true, big=true, i=true, small=true, tt=true, abbr=true, acronym=true, cite=true, code=true, dfn=true, em=true, kbd=true, strong=true, samp=true, time=true, var=true, a=true, bdo=true, br=true, img=true, map=true, object=true, q=true, span=true, sub=true, sup=true, button=true, input=true, label=true, select=true, textarea=true, } local function fix_inlines(obj) local settings = get_filter_settings "fixinlines" local inline_elements = settings.inline_elements or inline_elements local nodes = obj:get_path("html body") local new = nil obj:traverse_node_list(nodes, function(jej) if jej._type == "ELEMENT" or jej._type == "TEXT" or jej._type == "COMMENT" then local name = string.lower(jej._name or "") -- local parent = jej:get_parent_node() if inline_elements[name] or jej._type == "TEXT" or jej._type == "COMMENT" or (name:match(":?math") and jej:get_attribute("display") == "inline") then if not new then -- start new paragraph if (jej._type == "TEXT" and jej._text:match("^%s+$")) or jej._type == "COMMENT" then -- ignore parts that contain only whitespace or comments and are placed before -- paragraph start else new = obj:create_element("p" ) new:add_child_node(obj:copy_node(jej)) jej:replace_node(new) end else -- paragraph already exists new:add_child_node(obj:copy_node(jej)) jej:remove_node() end else -- close the current paragraph before new block element new = nil end else new = nil end end) return obj end return fix_inlines ================================================ FILE: domfilters/make4ht-idcolons.lua ================================================ local allowed_chars = { ["-"] = true, ["."] = true } local function fix_colons(id) -- match every non alphanum character return id:gsub("[%W]", function(s) -- some characters are allowed, we don't need to replace them if allowed_chars[s] then return s end -- in other cases, replace with underscore return "_" end) end local function id_colons(obj) -- replace non-valid characters in links and ids with underscores obj:traverse_elements(function(el) local name = string.lower(obj:get_element_name(el)) if name == "a" then local href = el:get_attribute("href") -- don't replace colons in external links if href and not href:match("[a-z]%://") then local base, id = href:match("(.*)%#(.*)") if base and id then id = fix_colons(id) el:set_attribute("href", base .. "#" .. id) end end end local id = el:get_attribute("id") if id then el:set_attribute("id", fix_colons(id)) end end) return obj end return id_colons ================================================ FILE: domfilters/make4ht-inlinecss.lua ================================================ local cssquery = require "luaxml-cssquery" local log = logging.new("inlinecss") local cssrules = {} local cssobj = cssquery() local function parse_rule(line) -- parse CSS selector and attributes -- they are always on one line in the CSS file produced by TeX4ht local selector, values = line:match("%s*(.-)%s*(%b{})") if values then values = values:sub(2,-2) end return selector, values end local function join_values(old, new) -- correctly joins two attribute lists, depending on the ending local separator = ";" if not old then return new end -- if old already ends with ;, then don't use semicolon as a separator if old:match(";%s*$") then separator = "" end return old .. separator .. new end local function parse_css(filename) local css_file = io.open(filename, "r") if not css_file then return nil, "cannot load css file: " .. (filename or "") end local newlines = {} for line in css_file:lines() do -- match lines that contain # or =, as these can be id or attribute selectors if line:match("[%#%=].-{") then -- update attributes for the current selector local selector, value = parse_rule(line) local oldvalue = cssrules[selector] cssrules[selector] = join_values(oldvalue, value) else newlines[#newlines+1] = line end end -- we need to add css rules for selector, value in pairs(cssrules) do cssobj:add_selector(selector, function(dom) end, {value=value}) end css_file:close() -- write new version of the CSS file, without rules for ids and attributes local css_file = io.open(filename, "w") css_file:write(table.concat(newlines, "\n")) css_file:close() return true end local processed = false -- process the HTML file and insert inline CSS for id and attribute selectors return function(dom, par) if not processed then -- process the CSS file before everything else, but only once processed = true local css_file = mkutils.file_in_builddir(par.input .. ".css", par) local status, msg = parse_css(css_file) if not status then log:warning(msg) end end -- loop over all elements in the current page dom:traverse_elements(function(curr) -- use CSS object to match if the current element -- is matched by id attribute selector local matched = cssobj:match_querylist(curr) if #matched > 0 then -- join possible already existing style attribute with values from the CSS file local values = curr:get_attribute("style") -- join values of all matched rules for _,rule in ipairs(matched) do values = join_values(values, rule.params.value) end curr:set_attribute("style", values) end end) return dom end ================================================ FILE: domfilters/make4ht-itemparagraphs.lua ================================================ -- TeX4ht puts contents of all \item commands into paragraphs. We are not -- able to detect if it contain only one paragraph, or more. If just one, -- we can remove the paragraph and put the contents directly to
  • element. return function(dom) for _, li in ipairs(dom:query_selector("li")) do local is_single_par = false -- count elements and paragraphs that are direct children of
  • -- remove the paragraph only if it is the only child element local el_count, par_count = 0, 0 local par = {} for pos, el in ipairs(li._children) do if el:is_element() then el_count = el_count + 1 local name = el:get_element_name() if name == "p" then par[#par+1] = el elseif name == "a" and el_count == 1 and el:get_attribute("id") then -- if the first element is with id, we can move it to
  • and remove it from the list of children, this is needed for nested lists el_count = el_count - 1 local id = el:get_attribute("id") if not li:get_attribute("id") then li:set_attribute("id", id) el:remove_node() end end end end if #par == 1 and el_count == 1 then -- place paragraph children as direct children of
  • , this -- efectivelly removes

    li._children = par[1]._children end end return dom end ================================================ FILE: domfilters/make4ht-joincharacters.lua ================================================ local log = logging.new("joincharacters") local charclasses = { span=true, mn = true, } local safe_mathml_elements = { math = true, mrow = true, mstyle = true, mtext = true, mtd = true, } local function update_mathvariant(curr) -- when we join several elements, they will be rendered incorrectly -- we must set the mathvariant attribute local parent = curr:get_parent() -- set mathvariant only if it haven't been set by the parent element if not parent:get_attribute("mathvariant") then -- curr._attr = curr._attr or {} local mathvariant = "italic" -- the joined elements don't have attributes curr._attr = curr._attr or {} curr:set_attribute("mathvariant", mathvariant) end end local table_count = function(tbl) local tbl = tbl or {} local i = 0 for k,v in pairs(tbl) do i = i + 1 end return i end local has_matching_attributes = function (el, next_el) local el_attr = el._attr or {} local next_attr = next_el._attr or {} -- if the number of attributes doesn't match, elements don't match if table_count(next_attr) ~= table_count(el_attr) then return false end for k, v in pairs(el_attr) do -- if any attribute doesn't match, elements don't match if v~=next_attr[k] then return false end end return true end local function join_characters(obj,par) -- join adjanced span and similar elements inserted by -- tex4ht to just one object. local par = par or {} local options = get_filter_settings "joincharacters" local charclasses = options.charclasses or par.charclasses or charclasses local get_name = function(curr) return string.lower(curr:get_element_name()) end local get_class = function(next_el) return next_el:get_attribute("class") or next_el:get_attribute("mathvariant") end local is_span = function(next_el) return charclasses[get_name(next_el)] end local is_safe_mathml = function(el) -- we want to join the element only when it is safe. for example 12 should be left local current_name = get_name(el) if current_name == "mn" then local parent_name = get_name(el:get_parent()) return safe_mathml_elements[parent_name] end return true end local has_children = function(curr) -- don't process spans that have child elements local children = curr:get_children() or {} -- if there is more than one child, we can be sure that it has child elements if #children > 1 then return true elseif #children == 1 then -- test if the child is an element return children[1]:is_element() end return false end local join_elements = function(el, next_el) -- it the following element match, copy it's children to the current element for _, child in ipairs(next_el:get_children()) do el:add_child_node(child) end -- remove the next element next_el:remove_node() end local function get_next(curr, class) local next_el = curr:get_next_node() if next_el and next_el:is_element() and is_span(next_el) then return next_el -- if the next node is space followed by a matching element, we should add this space elseif next_el and next_el:is_text() and get_next(next_el, class) then local text = next_el._text -- match only text containing just whitespace if text:match("^%s+$") then return next_el end end end obj:traverse_elements(function(el) -- loop over all elements and test if the current element is in a list of -- processed elements (charclasses) and if it doesn't contain children if is_span(el) and not has_children(el) and is_safe_mathml(el) then local next_el = get_next(el) -- loop over the following elements and test whether they are of the same type -- as the current one while next_el do -- save the next element because we will remove it later local real_next = get_next(next_el) if get_name(el) == get_name(next_el) and has_matching_attributes(el,next_el) and not el:get_attribute("id") then join_elements(el, next_el) -- add the whitespace elseif next_el:is_text() then local s = next_el._text -- we must create a new node el:add_child_node(el:create_text_node(s)) next_el:remove_node() -- real_next = nil else real_next = nil end -- use the saved element as a next object next_el = real_next end end end) -- process elements obj:traverse_elements(function(el) local function get_next_mi(curr) local next_el = curr:get_next_node() if next_el and next_el:is_element() then return next_el end end local function has_no_attributes(x) return table_count(x._attr) == 0 end -- join only subsequential elements with no attributes if get_name(el) == "mi" and has_no_attributes(el) then local next_el = get_next_mi(el) while next_el do local real_next = get_next_mi(next_el) if get_name(next_el) == "mi" and has_no_attributes(next_el) then join_elements(el, next_el) -- set math variant to italic -- (if the parent element doesn't set it to something else) update_mathvariant(el) else -- break the loop otherwise real_next = nil end next_el = real_next end end end) -- join text nodes in an element into one obj:traverse_elements(function(el) -- save the text local t = {} local children = el:get_children() for _, x in ipairs(children) do if x:is_text() then t[#t+1] = x._text else return nil end end el._text = table.concat(t) return el end) return obj end return join_characters ================================================ FILE: domfilters/make4ht-joincolors.lua ================================================ local cssfiles = {} local log = logging.new "joincolors" -- keep mapping between span ids and colors local colors = {} local function extract_colors(csscontent) local used_colors = {} -- delete the color ids and save the used colors csscontent = csscontent:gsub("[%a]*%#(textcolor.-)%s*{%s*color%s*%:%s*(.-)%s*%}%s", function(id, color) -- convert rgb() function to hex value and generate the span name local converted = "textcolor-" .. color:gsub("rgb%((.-),(.-),(.-)%)", function(r,g,b) return string.format("%02x%02x%02x", tonumber(r), tonumber(g), tonumber(b)) end) -- remove the # characters from the converted color name converted = converted:gsub("%#", "") -- save the id and used color colors[id] = converted used_colors[converted] = color return "" end) -- add the used colors to css local t = {} for class, color in pairs(used_colors) do t[#t+1] = string.format(".%s{color:%s;}", class, color) end table.sort(t) return csscontent .. table.concat(t, "\n") end local function process_css(cssfile) local f = io.open(cssfile,"r") if not f then return nil, "Cannot open the CSS file: ".. cssfile end local content = f:read("*all") f:close() -- delete color ids and replace them with joined spans local newcontent = extract_colors(content) -- save the updated css file local f=io.open(cssfile, "w") f:write(newcontent) f:close() end local function process_css_files(dom) for _, el in ipairs(dom:query_selector("link")) do local href = el:get_attribute("href") or "" if not cssfiles[href] and href:match("css$") then log:debug("Load CSS file ", href) cssfiles[href] = true process_css(href) end end end local function join_colors(dom) -- find css files in the current HTML file and join the colors process_css_files(dom) for _, span in ipairs(dom:query_selector("span")) do local id = span:get_attribute("id") if id then -- test if the id is in the saved colors local class = colors[id] if class then -- remove the id span:set_attribute("id", nil) span:set_attribute("class", class) end end end return dom end return join_colors ================================================ FILE: domfilters/make4ht-mathmlfixes.lua ================================================ local log = logging.new("mathmlfixes") local mathml_chardata = require "make4ht-mathml-char-def" -- should be inside , so we don't process it -- even though it is a token element local token = {"mi", "mn", "mo", "mtext", "mspace", "ms"} local token_elements = {} for _, tok in ipairs(token) do token_elements[tok] = true end -- helper functions to support MathML elements with prefixes ( etc). -- local function get_element_name(el) -- return element name and xmlns prefix local name = el:get_element_name() if name:match(":") then local prefix, real_name = name:match("([^%:]+):?(.+)") return real_name, prefix else return name end end local function get_attribute(el, attr_name) -- attributes can have the prefix, but sometimes they don't have it -- so we need to catch both cases local _, prefix = get_element_name(el) prefix = prefix or "" return el:get_attribute(attr_name) or el:get_attribute(prefix .. ":" .. attr_name) end local function get_new_element_name(name, prefix) return prefix and prefix .. ":" .. name or name end local function update_element_name(el, name, prefix) local newname = get_new_element_name(name, prefix) el._name = newname end local function create_element(el, name, prefix, attributes) local attributes = attributes or {} local newname = get_new_element_name(name, prefix) return el:create_element(newname, attributes) end local function element_pos(el) local pos, count = 0, 0 for _, node in ipairs(el:get_siblings()) do if node:is_element() then count = count + 1 if node == el then pos = count end end end return pos, count end -- test if element is the first element in the current element list local function is_first_element(el) local pos, count = element_pos(el) return pos == 1 end -- test if element is the last element in the current element list local function is_last_element(el) local pos, count = element_pos(el) return pos == count end local function is_token_element(el) local name, prefix = get_element_name(el) return token_elements[name], prefix end local function fix_token_elements(el) -- find token elements that are children of other token elements if is_token_element(el) then local parent = el:get_parent() local is_parent_token, prefix = is_token_element(parent) if is_parent_token then -- change top element in nested token elements to mstyle update_element_name(parent, "mstyle", prefix) end end end local function fix_nested_mstyle(el) -- the element can be child of token elements -- we must exterminate it local el_name = get_element_name(el) if el_name == "mstyle" then local parent = el:get_parent() if is_token_element(parent) then -- if parent doesn't have the mathvariant attribute copy it from if not parent:get_attribute("mathvariant") then local mathvariant = el:get_attribute("mathvariant") parent._attr = parent._attr or {} parent:set_attribute("mathvariant", mathvariant) end -- copy the contents of to the parent element parent._children = el._children end end end local function fix_mathvariant(el) -- set mathvariant of that is child of to have the same value local function find_mstyle(x) -- find if element has parent, and its value of mathvariant if not x:is_element() then return nil elseif get_element_name(x) == "mstyle" then return x:get_attribute("mathvariant") else return find_mstyle(x:get_parent()) end end if get_element_name(el) == "mi" then -- process only that have mathvariant set local oldmathvariant = el:get_attribute("mathvariant") if oldmathvariant then local mathvariant = find_mstyle(el:get_parent()) if mathvariant then el:set_attribute("mathvariant", mathvariant) end end end end local function contains_only_text(el) -- detect if element contains only text local elements = 0 local text = 0 local children = el:get_children() or {} for _ , child in ipairs(children) do if child:is_text() then text = text + 1 elseif child:is_element() then elements = elements + 1 end end return text > 0 and elements == 0 end -- check if element contains direct text. in that case, add -- local function fix_missing_mtext(el) if el:get_element_name() == "mstyle" and contains_only_text(el) then -- add child log:debug("mstyle contains only text: " .. el:get_text()) -- copy the current mode, change it's element name to mtext and add it as a child of local copy = el:copy_node() copy._name = "mtext" copy._parent = el el._children = {copy} end end local function is_radical(el) local radicals = {msup=true, msub=true, msubsup=true} return radicals[el:get_element_name()] end local function get_mrow_child(el) local get_first = function(x) local children = x:get_children() return children[1] end local first = get_first(el) -- either return first child, and if the child is , return it's first child if first and first:is_element() then if first:get_element_name() == "mrow" then return get_first(first), first else return first end end end local function fix_radicals(el) if is_radical(el) then local first_child, mrow = get_mrow_child(el) -- if the first child is only one character long, it is possible that there is a problem if first_child and string.len(first_child:get_text()) == 1 then local name = first_child:get_element_name() local siblings = el:get_siblings() local pos = el:find_element_pos() -- it doesn't make sense to do any further processing if the element is at the beginning if pos == 1 then return end if name == "mo" then for i = pos, 1,-1 do end end end end end -- put as child of if it already isn't here local allowed_top_mrow = { math=true } local function top_mrow(math) local children = math:get_children() local put_mrow = false -- don't process elements with one or zero children -- don't process elements that already are mrow local parent = math:get_parent() local parent_name if parent then parent_name = get_element_name(parent) end local current_name, prefix = get_element_name(math) if #children < 2 or not allowed_top_mrow[current_name] or current_name == "mrow" or parent_name == "mrow" then return nil end local mrow_count = 0 for _,v in ipairs(children) do if v:is_element() and is_token_element(v) then put_mrow = true -- break elseif v:is_element() and get_element_name(v) == "mrow" then mrow_count = mrow_count + 1 end end if not put_mrow and get_element_name(math) == "math" and mrow_count == 0 then -- put at least one to each put_mrow = true end if put_mrow then local newname = get_new_element_name("mrow", prefix) local mrow = math:create_element(newname) for _, el in ipairs(children) do mrow:add_child_node(el) end math._children = {mrow} end end local function get_fence(el, attr, form) -- convert fence attribute to element -- attr: open | close -- form: prefix | postfix local char = el:get_attribute(attr) local mo if char then local name, prefix = get_element_name(el) local newname = get_new_element_name("mo", prefix) mo = el:create_element(newname, {fence="true", form = form}) mo:add_child_node(mo:create_text_node(char)) end return mo end local function fix_mfenced(el) -- TeX4ht uses in some cases element which is deprecated in MathML. -- Firefox doesn't support it already. local name, prefix = get_element_name(el) if name == "mfenced" then -- we must replace it by startend local open = get_fence(el, "open", "prefix") local close = get_fence(el, "close", "postfix") -- there can be also separator attribute, but it is not used in TeX4ht -- change to and remove all attributes local newname = get_new_element_name("mrow", prefix) el._name = newname el._attr = {} -- open must be first child, close needs to be last if open then el:add_child_node(open, 1) end if close then el:add_child_node(close) end end end local function is_fence(el) return get_element_name(el) == "mo" and el:get_attribute("fence") == "true" end local function fix_mo_to_mfenced(el) -- LibreOffice NEEDS element. so we need to convert -- to . ouch. if is_fence(el) then local parent = el:get_parent() local open = el:get_text():gsub("%s*", "") -- convert mo content to text, so it can be used in -- close needs to be the last element in the sibling list of the current element local siblings = el:get_siblings() el:remove_node() -- we don't need this element anymore local close for i = #siblings, 1, -1 do last = siblings[i] if last:is_element() then if is_fence(last) then -- set close attribute only if the last element is fence close = last:get_text():gsub("%s*", "") last:remove_node() -- remove end break -- break looping over elements once we find last element end end -- convert parent to local _, prefix = get_element_name(parent) local newname = get_new_element_name("mfenced", prefix) parent._name = newname parent._attr = {open = open, close = close} end end local function fix_numbers(el) -- convert 1.3 to 1.3 if get_element_name(el) == "mn" then -- sometimes minus sign can be outside local x = el:get_sibling_node(-1) if x and x:is_text() and x:get_text() == "−" then el:add_child_node(x:copy_node(), 1) x:remove_node() end local n = el:get_sibling_node(1) -- test if next element is . if n and n:is_element() and get_element_name(n) == "mo" and get_attribute(n, "class") == "MathClass-punc" and n:get_text() == "." then -- get next element and test if it is local x = el:get_sibling_node(2) if x and x:is_element() and get_element_name(x) == "mn" then -- join numbers and set it as text content of the current element local newnumber = el:get_text() .. "." .. x:get_text() log:debug("Joining numbers: " .. newnumber) el._children = {} local newchild = el:create_text_node(newnumber) el:add_child_node(newchild) -- remove elements that hold dot and decimal part n:remove_node() x:remove_node() end end end end local function just_operators(list) -- count and return true if list contains just them local mo = 0 for _, x in ipairs(list) do if get_element_name(x) == "mo" then mo = mo + 1 end end return mo end local function fix_operators(x) -- change elements that are only children of any element to -- this fixes issues in LibreOffice with a^{*} -- I hope it doesn't introduce different issues -- process only local el_name, prefix = get_element_name(x) if el_name ~= "mo" then return nil end local siblings = x:get_siblings() -- test if current element list contains only if just_operators(siblings) == #siblings then if #siblings == 1 then if not x:get_attribute("stretchy") then -- one translates to local newname = get_new_element_name("mtext", prefix) x._name = newname log:debug("changing one to : " .. x:get_text()) -- I think we should use , but LO incorrectly renders it in , -- even if we use the mathvariant="normal" attribute. works, so -- we use that instead. -- x:set_attribute("mathvariant", "normal") end else -- multiple translate to local text = {} for _, el in ipairs(siblings) do text[#text+1] = el:get_text() end -- replace first text with concetanated text content -- of all elements x._children = {} local newtext = table.concat(text) local text_el = x:create_text_node(newtext) log:debug("changing to : " .. newtext) x:add_child_node(text_el) -- change to local newname = get_new_element_name("mtext", prefix) x._name = newname -- remove subsequent for i = 2, #siblings do siblings[i]:remove_node() end end end end local function get_third_parent(el) local first = el:get_parent() if not first then return nil end local second = first:get_parent() if not second then return nil end return second:get_parent() end local function add_space(el, pos) local parent = el:get_parent() local name, prefix = get_element_name(el) local space = create_element(parent, "mspace", prefix) space:set_attribute("width", "0.3em") parent:add_child_node(space, pos) end local function fix_dcases(el) -- we need to fix spacing in dcases* environments -- when you use something like: -- \begin{dcases*} -- 1 & if $a=b$ then -- \end{dcases*} -- the spaces around $a=b$ will be missing -- we detect if the elements contains spaces that are collapsed by the browser, and add explicit -- elements when necessary if el:get_element_name() == "mtext" then local parent = get_third_parent(el) if parent and parent:get_element_name() == "mtable" and parent:get_attribute("class") == "dcases-star" then local text = el:get_text() local pos = el:find_element_pos() if pos == 1 and text:match("%s$") then add_space(el, 2) elseif text:match("^%s") and not el._used then add_space(el, pos) -- this is necessary to avoid infinite loop, we mark this element as processed el._used = true end end end end local function is_empty_row(el) -- empty row should contain only one local count = 0 if el:get_text():match("^%s*$") then for _, child in ipairs(el:get_children()) do if child:is_element() then count = count + 1 end end else -- row is not empty if it contains any text return false end -- if there is one or zero childrens, then it is empty row return count < 2 end local function delete_last_empty_mtr(el) -- arrays sometimes contain last empty row, which causes rendering issues, -- so we should remove them local el_name, prefix = get_element_name(el) if el_name == "mtr" and get_attribute(el, "class") == "array-row" and is_last_element(el) and is_empty_row(el) then el:remove_node() end end local function fix_mtable_hlines(mtable) -- TeX4ht adds for hlines. we need to remove these elements and construct -- correct "rowlines" attribute for horizontal lines local hlines = {} local rowlines = {} local styles = {} local el_name, prefix = get_element_name(mtable) -- process only elements if el_name ~= "mtable" or mtable:get_attribute("rowlines") then -- if rowlines attribute is already set, we don't need to do anything return end local mtrs = mtable:query_selector("mtr") for count, mtr in ipairs(mtrs) do local hline = mtr:get_attribute("class") if hline and hline == "array-hline" then table.insert(hlines, "hline") -- we need to remove elements that represent hlines, hlines will be displayed using the rowlines attribute mtr:remove_node() elseif count == #mtrs and hline == "array-row" and is_empty_row(mtr) then -- ignore empty row that is inserted if \hline is at the end of the array mtr:remove_node() else -- just keep the track of normal lines table.insert(hlines, "") end end -- now we need to construct rowlines attribute for i, el in ipairs(hlines) do if el == "hline" then -- rowlines are used only inside the array. at the start and at the end, we need to use CSS if i == 1 then table.insert(styles, "border-top: 1px solid black;") elseif i == #hlines then table.insert(styles, "border-bottom: 1px solid black;") else table.insert(rowlines, "solid") end else -- we need to detect rows that weren't separated by hlines. in that case, we need to insert none to rowlines if i > 1 and i ~= #hlines then if hlines[i-1] ~= "hline" then table.insert(rowlines, "none") end end end end mtable:set_attribute("rowlines", table.concat(rowlines, " ")) local style = mtable:get_attribute("style") or "" mtable:set_attribute("style", style .. table.concat(styles, " ")) end local function fix_rel_mo(el) -- this is necessary for LibreOffice. It has a problem with relative that are -- first childs in an element list. This often happens in equations, where first -- element in a table column is an operator, like non-equal-, less-than etc. local el_name, prefix = get_element_name(el) if el_name == "mo" and not get_attribute(el, "fence") -- ignore fences and not get_attribute(el, "form") -- these should be also ignored and not get_attribute(el, "accent") -- and accents too then local parent = el:get_parent() if is_first_element(el) then local mrow = create_element(parent, "mrow", prefix) parent:add_child_node(mrow, 1) elseif is_last_element(el) then local mrow = create_element(parent, "mrow", prefix) parent:add_child_node(mrow) end end end local uchar = utf8.char local ucodes = utf8.codes -- current version of MathML doesn't support the mathvariant attribute, so we need to replace unicode characters with the corresponding base code for the current font style local function replace_characters(math, current_style) -- recursively loop over all the children of the math element and replace the unicode characters with the corresponding base code for the current font style for _, child in ipairs(math:get_children()) do if child:is_text() then local text = child:get_text() local new_text = {} for _ ,char in ucodes(text) do -- replace the unicode characters with the corresponding base code for the current font style local code = mathml_chardata[char] if code then local new_char = code[current_style] or char table.insert(new_text, uchar(new_char)) else table.insert(new_text, uchar(char)) end end child._text = table.concat(new_text) elseif child:is_element() then local current_style = child:get_attribute("mathvariant") or current_style replace_characters(child, current_style) end end end local function fix_mathml_chars(el) local el_name, _ = get_element_name(el) if el_name == "math" then replace_characters(el, "normal") end end local function fix_intent(mrow) -- put the intent or arg attribute on a child element if mrow with these attributes contain only single child node local element_name, _ = get_element_name(mrow) if element_name ~= "mrow" then return nil end local intent = get_attribute(mrow,"intent") local arg = get_attribute(mrow, "arg") if intent or arg then local children = mrow:get_children() local first_child = children[1] -- if there is only one child, we can set the attributes on it and remove mrow if #children == 1 and first_child:is_element() then local parent = mrow:get_parent() -- replace the mrow with its single child local pos = mrow:find_element_pos() parent._children[pos] = first_child -- now set the attributes on the child element first_child:set_attribute("arg", arg) first_child:set_attribute("intent", intent) end end end return function(dom) dom:traverse_elements(function(el) if settings.output_format ~= "odt" then -- LibreOffice needs , but Firefox doesn't fix_mfenced(el) else fix_mo_to_mfenced(el) fix_rel_mo(el) end fix_mtable_hlines(el) fix_radicals(el) fix_token_elements(el) fix_nested_mstyle(el) fix_missing_mtext(el) fix_numbers(el) fix_operators(el) fix_mathvariant(el) if settings.output_format ~= "odt" then -- ODT needs older MathML version fix_mathml_chars(el) end fix_dcases(el) fix_intent(el) top_mrow(el) delete_last_empty_mtr(el) end) return dom end ================================================ FILE: domfilters/make4ht-odtfonts.lua ================================================ return function(dom, params) -- fix ODT style for fonts -- sometimes, fonts have missing size, we need to patch styles local properties = get_filter_settings "odtfonts" or {} local fix_lgfile_fonts = params.patched_lg_fonts or properties.patched_lg_fonts or {} for _, style in ipairs(dom:query_selector "style|style") do local typ = style:get_attribute("style:family") if typ == "text" then -- detect if the style is for font local style_name = style:get_attribute("style:name") local name, size, size2, size3 = style_name:match("(.-)%-(%d*)x%-(%d*)x%-(%d+)") if name then -- find if the style corresponds to a problematic font (it is set in formats/make4ht-odt.lua) local used_name = name .. "-" .. size if fix_lgfile_fonts[used_name] then -- copy current style and fix the name local new = style:copy_node() new:set_attribute("style:name", string.format("%s-x-%sx-%s", name, size2, size3)) local parent = style:get_parent() parent:add_child_node(new) end end end end return dom end ================================================ FILE: domfilters/make4ht-odtimagesize.lua ================================================ local log = logging.new "odtimagesize" -- set correct dimensions to frames around images return function(dom) local frames = dom:query_selector("draw|frame") for _, frame in ipairs(frames) do local images = frame:query_selector("draw|image") if #images > 0 then local image = images[1] local width = image:get_attribute("svg:width") local height = image:get_attribute("svg:height") if widht then frame:set_attribute("svg:width", width) end if height then frame:set_attribute("svg:height", height) end log:debug("image dimensions", width, height) end end return dom end ================================================ FILE: domfilters/make4ht-odtpartable.lua ================================================ -- find all tables inside paragraphs, replace the found paragraphs with the child table return function(dom) for _,table in ipairs(dom:query_selector("text|p table|table")) do -- replace the paragraph by its child element local parent = table:get_parent() parent:replace_node(table) end return dom end ================================================ FILE: domfilters/make4ht-odtsvg.lua ================================================ -- we need to set dimensions for SVG images produced by \Picture commands local log = logging.new "odtsvg" local function get_svg_dimensions(filename) local width, height log:debug("file exists", filename, mkutils.file_exists(filename)) if mkutils.file_exists(filename) then for line in io.lines(filename) do width = line:match("width%s*=%s*[\"'](.-)[\"']") or width height = line:match("height%s*=%s*[\"'](.-)[\"']") or height -- stop parsing once we get both width and height if width and height then break end end end return width, height end -- process return function(dom) for _, pic in ipairs(dom:query_selector("draw|image")) do local imagename = pic:get_attribute("xlink:href") -- update SVG images dimensions log:debug("image", imagename) local parent = pic:get_parent() local width = parent:get_attribute("svg:width") local height = parent:get_attribute("svg:height") -- if width == "0.0pt" then width = nil end -- if height == "0.0pt" then height = nil end if not width or not height then if imagename:match("svg$") then width, height = get_svg_dimensions(imagename) -- or width, height elseif imagename:match("png$") or imagename:match("jpe?g$") then end end log:debug("dimensions", width, height) parent:set_attribute("svg:width", width) parent:set_attribute("svg:height", height) -- if end return dom end ================================================ FILE: domfilters/make4ht-sectionid.lua ================================================ local mkutils = require "mkutils" local log = logging.new("tocid") -- Unicode data distributed with ConTeXt -- defines "characters" table if not mkutils.isModuleAvailable("make4ht-char-def") then log:warning("char-def module not found") log:warning("cannot fix section id's") return function(dom) return dom end end local chardata = require "make4ht-char-def" local toc = nil local function is_letter(info) -- test if character is letter local category = info.category or "" return category:match("^l") end local function is_space(info) local category = info.category or "" return category == "zs" end local function is_number(char) return char >= 48 and char <= 57 end local uchar = utf8.char local function normalize_letter(char, result) local info = chardata[char] or {} -- first get lower case of the letter local lowercase = info.lccode or char -- remove accents. the base letter is in the shcode field local lowerinfo = chardata[lowercase] or {} -- when no shcode, use the current lowercase char local shcode = lowerinfo.shcode or lowercase -- shcode can be table if it contains multiple characters -- normaliz it to a table, so we can add all letters to -- the resulting string if type(shcode) ~= "table" then shcode = {shcode} end for _, x in ipairs(shcode) do result[#result+1] = uchar(x) end end local escape_name = function(name) local result = {} -- remove LaTeX commands name = name:gsub("\\[%a]+", "") name = name:gsub("^%s+", ""):gsub("%s+$", "") for _,char in utf8.codes(name) do local info = chardata[char] or {} if is_space(info) then result[#result+1] = " " elseif is_letter(info) then normalize_letter(char, result) elseif is_number(char) then result[#result+1] = uchar(char) end end --- convert table with normalized characters to string local name = table.concat(result) -- remove spaces name = name:gsub("%s+", "-") name = name:gsub("^%-", "") -- ids cannot start with number in HTML 4, so we will add x name = name:gsub("^(%d)", "x%1") return name end local function parse_toc_line(line) -- the section ids and titles are saved in the following format: -- \csname a:TocLink\endcsname{1}{x1-20001}{QQ2-1-2}{Nazdar světe} -- ............................... id ................. title ... local id, name = line:match("a:TocLink.-{.-}{(.-)}{.-}(%b{})") if id then return id, escape_name(name) end end local used = {} local function parse_toc(filename) local toc = {} if not mkutils.file_exists(filename) then return nil, "Cannot open TOC file " .. filename end for line in io.lines(filename) do local id, name = parse_toc_line(line) -- if section name doesn't contain any text, it would lead to id which contains only number -- this is invalid in HTML if name == "" then name = "_" end local orig_name = name -- not all lines in the .4tc file contains TOC entries if id then -- test if the same name was used already. user should be notified if used[name] then -- update name = name .. used[name] log:debug("Duplicate id found: ".. orig_name .. ". New id: " .. name) end used[orig_name] = (used[orig_name] or 0) + 1 toc[id] = name end end return toc end -- we don't want to change the original id, as there may be links to it from the outside -- so we will set it to the parent element (which should be h[1-6]) local function set_id(el, id) local section = el:get_parent() local section_id = section:get_attribute("id") if section_id and section_id~=id then -- if it already has id, we don't override it, but create dummy child instead local new = section:create_element("span", {id=id}) section:add_child_node(new,1) else section:set_attribute("id", id) end end -- we want to remove elements from some elements, most notably
    local elements_to_remove = { figure = true, figcaption } local function remove_a(el, parent, id) parent:set_attribute("id", id) el:remove_node() end return function(dom, par) local msg toc, msg = toc or parse_toc(mkutils.file_in_builddir(par.input .. ".4tc", par)) msg = msg or "Cannot load TOC" -- don't do anyting if toc cannot be found if not toc then log:warning(msg) return dom end -- if user selects the "notoc" option on the command line, we -- will not update href links local notoc = false if par["tex4ht_sty_par"]:match("notoc") then notoc = true end -- the HTML file can already contain ID that we want to assign -- we will not set duplicate id from TOC in that case local toc_ids = {} for _, el in ipairs(dom:query_selector("[id]")) do local id = el:get_attribute("id") toc_ids[id] = true end -- process all elements with id atribute or for _, el in ipairs(dom:query_selector "[id],a[href]") do local id, href = el:get_attribute("id"), el:get_attribute("href") if id then local name = toc[id] local parent = el:get_parent() -- remove unnecessary elements if the parent doesn't have id yet if elements_to_remove[parent:get_element_name()] and not parent:get_attribute("id") and el:get_element_name() == "a" then remove_a(el, parent, id) set_id(el, name) -- replace id with new section id elseif name and not toc_ids[name] then set_id(el, name) else if name then log:debug("Document already contains id: " .. name) end end end if href and notoc == false then -- replace links to sections with new id local base, anchor = href:match("^(.*)%#(.+)") local name = toc[anchor] if name then el:set_attribute("href", base .. "#" .. name) end end end return dom end ================================================ FILE: domfilters/make4ht-t4htlinks.lua ================================================ -- This filter is used by the ODT output format to fix links return function(dom) for _, link in ipairs(dom:query_selector("t4htlink")) do local name = link:get_attribute("name") local href = link:get_attribute("href") local children = link:get_children() -- print("link", name, href, #link._children, link:get_text()) -- add a link if it contains any subnodes and has href attribute if #children > 0 and href then link._name = "text:a" href = href:gsub("^.+4oo%#", "#") link._attr = {["xlink:type"]="simple", ["xlink:href"]=href} -- if the link is named, add a bookmark if name then local bookmark = link:create_element("text:bookmark", {["text:name"] = name}) link:add_child_node(bookmark) end -- add bookmark if element has name elseif name then link._name = "text:bookmark" link._attr = {["text:name"] = name} else -- just remove the link in other cases link:remove_node() end end return dom end ================================================ FILE: domfilters/make4ht-tablecaption.lua ================================================ local function get_parent_table(caption) -- recursively find the parent table of a caption element, as it can be inside and local parent = caption:get_parent() if parent and parent:get_element_name() == "table" then return parent elseif parent then return get_parent_table(parent) else return nil end end return function(dom) -- the caption element must be a first element in table, it cannot be contained inside tr for _, caption in ipairs(dom:query_selector("table caption")) do local table = get_parent_table(caption) if table then -- insert caption as the first child of table table:add_child_node(caption:copy_node(),1) -- remove the original caption caption:remove_node() end end return dom end ================================================ FILE: domfilters/make4ht-tablerows.lua ================================================ local log = logging.new ("tablerows") return function(dom) local has_child_elements = function(child) -- detect if the element contains child elements local child_elements = 0 local children = child:get_children() local last_child_pos for pos, el in ipairs(children) do last_child_pos = pos local step = el:is_element() and 1 or 0 -- log:info("element name", el._name) child_elements = child_elements + step end -- longtable has

    inside empty rows, we regard them as empty if child_elements == 1 and children[last_child_pos]:get_element_name() == "p" and child:get_text():gsub("%s", "") == "" then child_elements = 0 end return child_elements > 0 end local is_empty_row = function(row) local not_empty = false local element_count = 0 -- ignore hline rows local row_class = row:get_attribute("class") if row_class == "hline" or row_class == "cline" then return false end -- detect if the row contain only one empty child for _,child in ipairs(row:get_children() or {}) do if child:is_element() then element_count = element_count + 1 -- empty rows contain only one element, it is not empty otherwise if element_count > 1 or has_child_elements(child) then return false end -- detect if it contains only whitespace not_empty = child:get_text():gsub("%s","") ~= "" or not_empty end end -- print("element count", element_count, not_empty) return element_count == 1 and not_empty == false end local is_not_styled = function(row, css) -- get the id attribute and escape it, so it can be used in regexp local id = row:get_attribute("id") if not id then return true end -- no styling without id local search_term = "%#" .. id:gsub("%-", "%%-") -- if the CSS file contains the row id ( elements can also have id -- that matches this pattern, so we should keep the row if we match them too) return not css:match(search_term) end local hline_hr = function(row) -- remove
    elements from "hline" rows for _, hr in ipairs(row:query_selector(".hline hr")) do hr:remove_node() end end local longtable_last_row = function(tbl) -- longtable contains last row of empty cells local rows= tbl:query_selector("tr") local last_row = rows[#rows] if not last_row or last_row:get_attribute("class") == "hline" then return end for _, cell in ipairs(last_row:query_selector("td")) do -- loop over cells in the last row a and detect that they are empty. break processing if they are not. if has_child_elements(cell) or not cell:get_text():match("^%s*$") then return end end last_row:remove_node() end local load_css_files = function() -- the empty rows can be styled using CSS, for example configuration for -- Booktabs does that. We shouldn't remove such rows. local cssfiles = {} for _, link in ipairs(dom:query_selector("head link")) do local src = link:get_attribute("href") if src then local f = io.open(src, "r") if f then local contents = f:read("*all") f:close() table.insert(cssfiles, contents) end end end return table.concat(cssfiles, "\n") end local css = load_css_files() for _, tbl in ipairs(dom:query_selector("table")) do -- find the empty rows local rows = tbl:query_selector("tr") for count, row in ipairs(rows) do if is_empty_row(row) and is_not_styled(row, css) then row:remove_node() end hline_hr(row) end if tbl:get_attribute("class") and tbl:get_attribute("class"):match("longtable") then longtable_last_row(tbl) end end return dom end ================================================ FILE: extensions/make4ht-ext-common_domfilters.lua ================================================ local M = {} -- this variable will hold the output format name local current_format local filter = require "make4ht-domfilter" -- local process = filter {"fixinlines", "idcolons", "joincharacters" } -- filters support only html formats function M.test(format) current_format = format -- if format == "odt" then return false end return true end function M.modify_build(make) -- number of filters that should be moved to the beginning local count = 0 if current_format == "odt" then -- some formats doesn't make sense in the ODT format local process = filter ({"joincharacters", "mathmlfixes"}, "commondomfilters") local charclasses = {mn = true, ["text:span"] = true, mi=true} make:match("4oo$", process, {charclasses= charclasses}) -- match math documents make:match("4om$", process, {charclasses= charclasses}) count = 2 else local process = filter({"fixinlines", "idcolons", "joincharacters", "tablecaption", "mathmlfixes", "tablerows","booktabs", "sectionid", "itemparagraphs"}, "commondomfilters") make:match("html?$", process) count = 1 end return make end return M ================================================ FILE: extensions/make4ht-ext-common_filters.lua ================================================ local M = {} local filter = require "make4ht-filter" local process = filter({"cleanspan-nat", "fixligatures", "hruletohr", "entities", "fix-links"}, "commonfilters") -- filters support only html formats function M.test(format) if format == "odt" then return false end return true end function M.modify_build(make) make:match("html?$", process) local matches = make.matches -- the filters should be first match to be executed, especially if tidy -- should be executed as well if #matches > 1 then local last = matches[#matches] table.insert(matches, 1, last) matches[#matches] = nil end return make end return M ================================================ FILE: extensions/make4ht-ext-copy_images.lua ================================================ local M = {} local mkutils = require "mkutils" local domfilter = require "make4ht-domfilter" local copied_images = {} local function image_copy(path, parameters, img_dir) if mkutils.is_url(path) then return nil, "External image" end -- get image basename local basename = path:match("([^/]+)$") -- if outdir is empty, keep it empty, otherwise add / separator local outdir = parameters.outdir == "" and "" or parameters.outdir .. "/" if img_dir ~= "" then outdir = outdir .. img_dir .. "/" end -- handle trailing // outdir = outdir:gsub("%/+","/") local output_file = outdir .. basename if outdir == "" then mkutils.cp(path, output_file) else mkutils.copy(path, output_file) end end -- filters support only html formats function M.test(format) current_format = format if format == "odt" then return false end return true end function M.modify_build(make) local ext_settings = get_filter_settings "copy_images" or {} local img_dir = ext_settings.img_dir or "" local img_extensions = ext_settings.extensions or {"jpg", "png", "jpeg", "svg"} local process = domfilter({ function(dom, par) for _, img in ipairs(dom:query_selector("img")) do local src = img:get_attribute("src") if src and not mkutils.is_url(src) then -- remove path specification src = src:match("([^/]+)$") if img_dir ~= "" then src = img_dir .. "/" .. src src = src:gsub("%/+", "/") end img:set_attribute("src", src) end end return dom end }, "copy_images") -- add matcher for all image extensions for _, ext in ipairs(img_extensions) do make:match(ext .. "$", function(path, parameters) image_copy(path, parameters, img_dir) -- prevent further processing of the image return false end) end make:match("html$", process, {img_dir = img_dir}) return make end return M ================================================ FILE: extensions/make4ht-ext-detect_engine.lua ================================================ -- support magic comments used by TeXShop and TeXWorks to detect used engine and format -- local M = {} local log = logging.new("detect engine") local htlatex = require "make4ht-htlatex" -- we must change build sequence when Plain TeX is requested local change_table = { tex = { htlatex = "etex", command = htlatex.httex }, pdftex = { htlatex = "etex", command = htlatex.httex }, etex = { htlatex = "etex", command = htlatex.httex }, luatex = { htlatex = "dviluatex", command = htlatex.httex }, xetex = { htlatex = "xetex -no-pdf", command = htlatex.httex }, xelatex = { htlatex = "xelatex -no-pdf", }, lualatex = { htlatex = "dvilualatex", }, pdflatex = { htlatex = "latex" }, harflatex = { htlatex = "lualatex-dev --output-format=dvi" }, harftex= { htlatex = "harftex --output-format=dvi", command = htlatex.httex } } local function find_magic_program(filename) -- find the magic line containing program name local get_comment = function(line) return line:match("%s*%%%s*(.+)") end local empty_line = function(line) return line:match("^%s*$") end for line in io.lines(filename) do local comment = get_comment(line) -- read line after line from the file, break the processing after first non comment or non empty line if not comment and not empty_line(line) then return nil, "Cannot find program name" end comment = comment or "" -- comment is nil for empty lines local program = comment:match("!%s*[Tt][Ee][Xx].-program%s*=%s*([^%s]+)") if program then return program:lower() end end end -- update htlatex entries with detected program local function update_build_sequence(program, build_seq) -- handle Plain TeX local replaces = change_table[program] or {} local is_xetex = program:match("xe") -- we must handle xetex in tex4ht for pos, entry in ipairs(build_seq) do if entry.name == "htlatex" then -- handle httex entry.command = replaces.command or entry.command local params = entry.params or {} params.htlatex = replaces.htlatex or params.htlatex entry.params = params elseif is_xetex and entry.name == "tex4ht" then -- tex4ht must process .xdv file if the TeX file was compiled by XeTeX entry.params.tex4ht_par = entry.params.tex4ht_par .. " -.xdv" end end end function M.modify_build(make) -- find magic comments in the TeX file local build_seq = make.build_seq local tex_file = make.params.tex_file local program, msg = find_magic_program(tex_file) if program then log:info("Found program name", program) update_build_sequence(program, build_seq) else log:warning("Cannot find magic line with the program name") end return make end return M ================================================ FILE: extensions/make4ht-ext-dvisvgm_hashes.lua ================================================ local dvireader = require "make4ht-dvireader" local mkutils = require "mkutils" local filter = require "make4ht-filter" local log = logging.new "dvisvgm_hashes" local dvisvgm_par = {} local M = {} -- mapping between tex4ht image names and hashed image names local output_map = {} local dvisvgm_options = "-n --exact --embed-bitmaps -c ${scale},${scale}" local parallel_size = 64 local make_command = "make -j ${process_count} -f ${make_file}" local test_make_command = "make -v" -- local parallel_size = 3 local function make_hashed_name(base, hash) return base .. "-" ..hash..".svg" end -- detect the number of available processors local cpu_cnt = 3 -- set a reasonable default for non-Linux systems if os.name == 'linux' then cpu_cnt = 0 local cpuinfo=assert(io.open('/proc/cpuinfo', 'r')) for line in cpuinfo:lines() do if line:match('^processor') then cpu_cnt = cpu_cnt + 1 end end -- set default number of threds if no CPU core have been found if cpu_cnt == 0 then cpu_cnt = 1 end cpuinfo:close() elseif os.name == 'cygwin' or os.type == 'windows' then -- windows has NUMBER_OF_PROCESSORS environmental value local nop = os.getenv('NUMBER_OF_PROCESSORS') if tonumber(nop) then cpu_cnt = nop end end -- process output of dvisvgm and find output page numbers and corresponding files local function get_generated_pages(output, pages) local pages = pages or {} local pos = 1 local pos, finish, page = string.find(output, "processing page (%d+)", pos) while(pos) do pos, finish, file = string.find(output, "output written to ([^\n^\r]+)", finish) pages[tonumber(page)] = file if not finish then break end pos, finish, page = string.find(output, "processing page (%d+)", finish) end return pages end local function make_ranges(pages) local newpages = {} local start, stop for i=1,#pages do local current = pages[i] local next_el = pages[i+1] or current + 100 -- just select a big number local diff = next_el - current if diff == 1 then if not start then start = current end else local element if start then element = start .. "-" .. current else element = current end newpages[#newpages+1] = element start = nil end end return newpages end local function read_log(dvisvgmlog) local f = io.open(dvisvgmlog, "rb") if not f then return nil, "Cannot read dvisvgm log" end local output = f:read("*all") f:close() return output end -- test the existence of GNU Make, which can execute tasks in parallel local function test_make() local make = io.popen(test_make_command, "r") local content = make:read("*all") make:close() -- io.popen always returns valid handle, so we can find that the command doesn't exists only by checking that the -- content is empty return content~=nil and content ~= "" end local function save_file(filename, text) local f = io.open(filename, "w") f:write(text) f:close() end local function make_makefile_command(idvfile, page_sequences) local logs = {} local all = {} -- list of targets in the "all:" makefile target local targets = {} local basename = idvfile:gsub(".idv$", "") local makefilename = basename .. "-images" .. ".mk" -- build make targets for i, ranges in ipairs(page_sequences) do local target = basename .. "-" .. i local logfile = target .. ".dlog" logs[#logs + 1] = logfile all[#all+1] = target local chunk = target .. ":\n\tdvisvgm -v4 " .. dvisvgm_options .. " -p " .. ranges .. " " .. idvfile .. " 2> " .. logfile .. "\n" targets[#targets + 1] = chunk end -- construct makefile and save it local makefile = "all: " .. table.concat(all, " ") .. "\n\n" .. table.concat(targets, "\n") save_file(makefilename, makefile) local command = make_command % {process_count = cpu_cnt, make_file = makefilename} log:debug("Makefile command: " .. command) return command, logs end local function prepare_command(idvfile, pages) local logs = {} if #pages > parallel_size and test_make() then local page_sequences = {} for i=1, #pages, parallel_size do local current_pages = {} for x = i, i+parallel_size -1 do current_pages[#current_pages + 1] = pages[x] end table.insert(page_sequences,table.concat(make_ranges(current_pages), ",")) end return make_makefile_command(idvfile, page_sequences) end -- else local pagesequence = table.concat(make_ranges(pages), ",") -- the stderr from dvisvgm must be redirected and postprocessed local dvisvgmlog = idvfile:gsub("idv$", "dlog") -- local dvisvgm = io.popen("dvisvgm -v4 -n --exact -c 1.15,1.15 -p " .. pagesequence .. " " .. idvfile, "r") local command = "dvisvgm -v4 " .. dvisvgm_options .. " -p " .. pagesequence .. " " .. idvfile .. " 2> " .. dvisvgmlog return command, {dvisvgmlog} -- end end local function execute_dvisvgm(idvfile, pages) if #pages < 1 then return nil, "No pages to convert" end local command, logs = prepare_command(idvfile, pages) log:info(command) os.execute(command) local generated_pages = {} for _, dvisvgmlog in ipairs(logs) do local output = read_log(dvisvgmlog) generated_pages = get_generated_pages(output, generated_pages) end return generated_pages end local function get_dvi_pages(arg) -- list of pages to convert in this run local to_convert = {} local idv_file = arg.input .. ".idv" -- set extension options local extoptions = mkutils.get_filter_settings "dvisvgm_hashes" or {} dvisvgm_options = arg.options or extoptions.options or dvisvgm_options parallel_size = arg.parallel_size or extoptions.parallel_size or parallel_size cpu_cnt = arg.cpu_cnt or extoptions.cpu_cnt or cpu_cnt dvisvgm_par.scale = arg.scale or extoptions.scale or 1.4 dvisvgm_options = dvisvgm_options % dvisvgm_par make_command = arg.make_command or extoptions.make_command or make_command test_make_command = arg.test_make_command or extoptions.test_make_command or test_make_command local f = io.open(idv_file, "rb") if not f then return nil, "Cannot open idv file: " .. idv_file end local content = f:read("*all") f:close() local dvi_pages = dvireader.get_pages(content) -- we must find page numbers and output name sfor the generated images local lg = mkutils.parse_lg(arg.input ..".lg", arg.builddir) for _, name in ipairs(lg.images) do local page = tonumber(name.page) local hash = dvi_pages[page] local tex4ht_name = name.output local output_name = make_hashed_name(arg.input, hash) output_map[tex4ht_name] = output_name if not mkutils.file_exists(output_name) then log:debug("output file: ".. output_name) to_convert[#to_convert+1] = page end end local generated_files, msg = execute_dvisvgm(idv_file, to_convert) if not generated_files then return nil, msg end -- rename the generated files to the hashed filenames for page, file in pairs(generated_files) do os.rename(file, make_hashed_name(arg.input, dvi_pages[page])) end end function M.test(format) -- ODT format doesn't support SVG if format == "odt" then return false end return true end function M.modify_build(make) -- this must be used in the .mk4 file as -- Make:dvisvgm_hashes {} make:add("dvisvgm_hashes", function(arg) get_dvi_pages(arg) end, { }) -- insert dvisvgm_hashes command at the end of the build sequence -- it needs to be called after t4ht make:dvisvgm_hashes {} -- replace original image names with hashed names local executed = false make:match(".*", function(arg) if not executed then executed = true local lgfiles = make.lgfile.files for i, filename in ipairs(lgfiles) do local replace = output_map[filename] if replace then lgfiles[i] = replace end end -- tex4ebook process also the images table, so we need to replace generated filenames here as well local lgimages = make.lgfile.images for _, image in ipairs(lgimages) do local replace = output_map[image.output] if replace then image.output = replace end end end end) -- fix src attributes local process = filter({ function(str, filename) return str:gsub('src=["\'](.-)(["\'])', function(filename, endquote) local newname = output_map[filename] or filename log:debug("newname", newname) return 'src=' .. endquote .. newname .. endquote end) end }, "dvisvgmhashes") make:match("htm.?$", process) -- disable the image processing for _,v in ipairs(make.build_seq) do if v.name == "t4ht" then local t4ht_par = v.params.t4ht_par or make.params.t4ht_par or "" v.params.t4ht_par = t4ht_par .. " -p" end end make:image(".", function() return "" end) return make end return M ================================================ FILE: extensions/make4ht-ext-inlinecss.lua ================================================ local M = {} local filter = require "make4ht-domfilter" -- filters support only html formats function M.test(format) if format:match("html") then return true end return false end function M.modify_build(make) local process = filter({"inlinecss"}, "inlinecss") make:match("html?$", process) return make end return M ================================================ FILE: extensions/make4ht-ext-join_colors.lua ================================================ local M = {} local filter = require "make4ht-domfilter" -- filters support only html formats function M.test(format) if format == "odt" then return false end return true end function M.modify_build(make) local process = filter({"joincolors"}, "joincolors") make:match("html?$", process) return make end return M ================================================ FILE: extensions/make4ht-ext-latexmk_build.lua ================================================ -- use Latexmk in first LaTeX call -- only in the first call, because we don't need to execute biber, etc. in the subsequent -- LaTeX calls, these are only for resolving the cross-references local M = {} local htlatex_names = { htlatex = true, autohtlatex = true, } function M.modify_build(make) local used = false local first local build_seq = make.build_seq -- find first htlatex call in the build sequence for pos,v in ipairs(build_seq) do if htlatex_names[v.name] and not first then first = pos end end -- we need to save contents of the .tmp file, to prevent extra executions from latexmk -- tex4ht command overwrites content that was set by LaTeX with it's own stuff local tmp_file make:add("save_tmp", function(par) local f = io.open(mkutils.file_in_builddir(par.input .. ".tmp", par), "r") if f then tmp_file = f:read("*all") f:close() end return 0 end) make:add("load_tmp", function(par) if tmp_file then local f = io.open(mkutils.file_in_builddir(par.input .. ".tmp", par), "w") if f then f:write(tmp_file) end end return 0 end) -- if htlatex was found if first then -- handle tmp file make:load_tmp {} make:save_tmp {} -- add dummy latexmk call to the build sequence make:latexmk {} -- replace name, command and type in the first htlatex -- call with values from the dummy latexmk call local replaced = build_seq[first] local latexmk = build_seq[#build_seq] replaced.name = latexmk.name replaced.command = latexmk.command replaced.type = latexmk.type -- remove the dummy latexmk table.remove(build_seq) end -- remove htlatex calls from the build sequence, they are unnecessary local new_build_seq = {} for pos, v in ipairs(build_seq) do if v.name ~= "htlatex" and v.name ~= "tex4ht" then table.insert(new_build_seq, v) elseif v.name == "tex4ht" then -- insert save_tmp before tex4ht table.insert(new_build_seq, build_seq[#build_seq]) -- remove save_tmp from the end table.remove(build_seq) -- and now insert tex4ht table.insert(new_build_seq, v) end end make.build_seq = new_build_seq return make end return M ================================================ FILE: extensions/make4ht-ext-mathjaxnode.lua ================================================ local M = {} local filter = require "make4ht-filter" function M.test(format) if format == "odt" then return false end return true end function M.prepare_parameters(params) params.tex4ht_sty_par = params.tex4ht_sty_par .. ",mathml" return params end function M.modify_build(make) local mathjax = filter({ "mathjaxnode"}, "mathjaxnode") -- this extension needs mathml enabled make:match("html?$",mathjax) return make end return M ================================================ FILE: extensions/make4ht-ext-mjcli.lua ================================================ local M = {} local filter = require "make4ht-filter" function M.test(format) -- this extension works only for formats based on HTML, as it produces -- custom HTML tags that would be ilegal in XML if not format:match("html5?$") then return false end return true end -- local detected_latex = false function M.prepare_parameters(params) -- mjcli supports both MathML and LaTeX math input -- LaTeX math is keep if user uses "mathjax" option for make4ht -- "mathjax" option used in \Preamble in the .cfg file doesn't work if params.tex4ht_sty_par:match("mathjax") then detected_latex = true else params.tex4ht_sty_par = params.tex4ht_sty_par .. ",mathml" end return params end function M.modify_build(make) local mathjax = filter({ "mjcli"}, "mjcli") local params = {} if detected_latex then params.latex = true end make:match("html?$",mathjax, params) return make end return M ================================================ FILE: extensions/make4ht-ext-nodynamicodt.lua ================================================ local M = {} -- this extension covnerts links, tables of contents and other dynamic content in the ODT format to plain text local filter = require "make4ht-domfilter" -- this extension only works for the ODT format M.test = function(format) return format=="odt" end local function nodynamiccontent(dom) for _,link in ipairs(dom:query_selector("text|a")) do -- change links to spans link._name = "text:span" -- remove attributes link._attr = {} end for _, bibliography in ipairs(dom:query_selector("text|bibliography")) do -- remove links from bibliography -- use div instead of bibliography bibliography._name = "text:div" -- remove bibliography-source elements for _, source in ipairs(bibliography:query_selector("text:bibliography-source")) do source:remove_node() end for _, index in ipairs(bibliography:query_selector("text|index-body")) do -- use div instead of bibliography-entry index._name = "text:div" end end for _, toc in ipairs(dom:query_selector("text|table-of-content")) do -- remove links from toc -- use div instead of table-of-contents toc._name = "text:div" for _, entry in ipairs(toc:query_selector("text|index-body, text|index-title")) do -- use div instead of table-of-contents-entry entry._name = "text:div" end end return dom end M.modify_build = function(make) local process = filter({nodynamiccontent}, "nodynamiccontent") Make:match("4oo$",process) return make end return M ================================================ FILE: extensions/make4ht-ext-odttemplate.lua ================================================ local M = {} local filter = require "make4ht-filter" -- this extension only works for the ODT format M.test = function(format) return format=="odt" end M.modify_build = function(make) local process = filter({"odttemplate"}, "odttemplate") make:match("4oy$", process) return make end return M ================================================ FILE: extensions/make4ht-ext-preprocess_input.lua ================================================ -- preprocess R literate sources or Markdown files to LaTeX local M = {} local log = logging.new "preprocess_input" local mkutils = require "mkutils" local commands = { knitr = { command = 'Rscript -e "library(knitr); knit(\'${tex_file}\', output=\'${tmp_file}\')"'}, pandoc = { command = 'pandoc -f ${input_format} -s -o \'${tmp_file}\' -t latex \'${tex_file}\''}, render = { command = 'Rscript -e "library(rmarkdown); render(\'${tex_file}\', output_file=\'${tmp_file}\',output_format = \'latex_document\')"'} } local filetypes = { rnw = {sequence = {"knitr"} }, rtex = {sequence = {"knitr"}}, rmd = {sequence = {"render"}}, rrst = {sequence = {"knitr", "pandoc"}, options = {input_format = "rst"}}, md = {sequence = {"pandoc"}, options = {input_format = "markdown"}}, rst = {sequence = {"pandoc"}, options = {input_format = "rst"}}, } local function get_temp_name(arg,curr, length) -- we don't want to use the temp dir, because graphics would be then generated outside of -- the directory of the source document. so we will make local tmp_name = os.tmpname() if pos == sequence then -- base tmp_name on the input name in the last step of sequence -- so the generated images won't have random names tmp_name = arg.input .. "-preprocess_input" else tmp_name = tmp_name:match("([^/\\]+)$") end return tmp_name end local function execute_sequence(sequence, arg, make) -- keep track of all generated tmp files local temp_files = {} -- the temporary file for the current compilation step -- should become the tex_file for the next one. It doesn't -- matter that it isn't TeX file in some cases local previous_temp for pos, cmd_name in ipairs(sequence) do local tmp_name = get_temp_name(arg,pos, #sequence) temp_files[#temp_files+1] = tmp_name -- make the temp file name accessible to the executed commands arg.tmp_file = tmp_name -- the current temporary file should become tex_file in the next step -- in the first execution of the compilation sequence we will use the -- actual input file name arg.tex_file = previous_temp or arg.tex_file previous_temp = tmp_name -- get the command to execute local cmd = commands[cmd_name] -- fill the command template with make4ht arguments and execute local command = cmd.command % arg log:info(command) mkutils.execute(command) end return temp_files end local function get_preprocessing_pipeline(input_file) -- detect the file extension local extension = input_file:match("%.(.-)$") if not extension then return nil, "Cannot get extension: " .. input_file end -- the table with file actions is case insensitive -- the extension is converted to lowercase in order -- to support both .rnw and .Rnw extension = string.lower(extension) local matched = filetypes[extension] if not matched then return nil, "Unsupported extension: " .. extension end return matched end -- join the make4ht params and command options tables local function make_options(arg, command_options) local options = {} local command_options = command_options or {} for k,v in pairs(arg) do options[k] = v end for k,v in pairs(command_options) do options[k] = v end return options end M.modify_build = function(make) -- get access to the main arguments local arg = make.params -- get the execution sequence for the input format local matched, msg = get_preprocessing_pipeline(arg.tex_file) if not matched then log:error("preprocess_input error: ".. msg) return end -- prepare options local options = make_options(arg, matched.options) -- run the execution sequence local temp_files = execute_sequence(matched.sequence or {}, options, make) -- the last temporary file contains the actual TeX file local last_temp_file = temp_files[#temp_files] -- remove the intermediate temp files if #temp_files > 2 then for i = 1, #temp_files - 1 do log:debug("Removing temporary file", temp_files[i]) os.remove(temp_files[i]) end end if last_temp_file then -- update all commands in the .mk4 file with the temp file as tex_file local update_params = function(cmd) local params = cmd.params params.tex_file = last_temp_file params.is_tmp_file = true end for _, cmd in ipairs(make.build_seq) do update_params(cmd) end -- also update the main params update_params(make) end return make end return M ================================================ FILE: extensions/make4ht-ext-staticsite.lua ================================================ local M = {} local filter = require "make4ht-filter" local mkutils = require "mkutils" local log = logging.new "staticsite" -- get the published file name local function get_slug(settings) local published_name = mkutils.remove_extension(settings.tex_file) .. ".published" local config = get_filter_settings "staticsite" local file_pattern = config.file_pattern or "%Y-%m-%d-${input}" local time = os.time() -- we must save the published date, so the subsequent compilations at different days -- use the same name if mkutils.file_exists(published_name) then local f = io.open(published_name, "r") local readtime = f:read("*line") time = tonumber(readtime) log:info("Already pubslished", os.date("%Y-%m-%d %H:%M", time)) f:close() else -- escape -- slug must contain the unescaped input name local f = io.open(published_name, "w") log:info("Publishing article", os.date("%Y-%m-%d %H:%M", time)) f:write(time) f:close() end -- set the updated and publishing times local updated -- the updated time will be set only when it is more than one day from the published time local newtime = os.time() if (newtime - time) > (24 * 3600) then updated = newtime end filter_settings "staticsite" { header = { time = time, updated = updated } } -- make the output file name in the format YYYY-MM-DD-old-filename.html local slug = os.date(file_pattern,time) % settings return slug end -- it is necessary to set correct -jobname in latex_par parameters field -- in order to the get correct HTML file name local function update_jobname(slug, latex_par) local latex_par = latex_par or "" if latex_par:match("%-jobname") then local firstchar=latex_par:match("%-jobname=.") local replace_pattern="%-jobname=[^%s]+" if firstchar == "'" or firstchar=='"' then replace_pattern = "%-jobname=".. firstchar .."[^%"..firstchar.."]+" end return latex_par:gsub(replace_pattern, "-jobname=".. slug) else return latex_par .. "-jobname="..slug end end -- execute the function passed as parameter only once, when the file matching -- starts local function insert_filter(make, pattern, fn) local insert_executed = false table.insert(make.matches, 1, { pattern=pattern, params = make.params or {}, command = function() if not insert_executed then fn() end insert_executed = true end }) end local function remove_maketitle(make) -- use DOM filter to remove \maketitle block local domfilter = require "make4ht-domfilter" local process = domfilter({ function(dom) local maketitles = dom:query_selector(".maketitle") for _, el in ipairs(maketitles) do log:debug("removing maketitle") el:remove_node() end return dom end }, "staticsite") make:match("html$", process) end local function copy_files(filename, par) local function prepare_path(dir, subdir) local f = filename if par.builddir then f = f:gsub("^" .. par.builddir .. "/", "") end local path = dir .. "/" .. subdir .. "/" .. f return path:gsub("//", "/") end -- get extension settings local site_settings = get_filter_settings "staticsite" local site_root = site_settings.site_root or par.outdir if site_root == "" then site_root = "./" end local map = site_settings.map or {} -- default path without subdir, will be used if the file is not matched -- by any pattern in the map local path = prepare_path(site_root, "") for pattern, destination in pairs(map) do if filename:match(pattern) then path = prepare_path(site_root, destination) break end end -- it is possible to use string extrapolation in path, for example for slug mkutils.copy(filename, path % par) end function M.modify_build(make) -- it is necessary to insert the filters for YAML header and file copying as last matches -- we use an bogus match which will be executed only once as the very first one to insert -- the filters -- I should make filter from this local process = filter({ "staticsite" }, "staticsite") -- detect if we should remove maketitle local site_settings = get_filter_settings "staticsite" -- \maketitle is removed by default, set `remove_maketitle=false` setting to disable that if site_settings.remove_maketitle ~= false then remove_maketitle(make) end local settings = make.params -- get the published file name local slug = get_slug(settings) for _, cmd in ipairs(make.build_seq) do -- all commands must use the published file name cmd.params.input = slug cmd.params.latex_par = update_jobname(slug, cmd.params.latex_par) end local quotepattern = '(['..("%^$().[]*+-?"):gsub("(.)", "%%%1")..'])' local mainfile = string.gsub(slug, quotepattern, "%%%1") -- run the following code once in the first match on the first file insert_filter(make, ".*", function() -- for _, match in ipairs(make.matches) do -- match.params.outdir = outdir -- print(match.pattern, match.params.outdir) -- end local params = make.params params.slug = slug make:match("html?$", process, params) make:match(".*", copy_files, params) end) return make end return M ================================================ FILE: extensions/make4ht-ext-tidy.lua ================================================ local M = {} local log = logging.new "tidy" function M.test(format) if format == "odt" then return false end return true end local empty_elements = { area=true, base=true, br=true, col=true, embed=true, hr=true, img=true, input=true, keygen=true, link=true, meta=true, param=true, source=true, track=true, wbr=true, } -- LuaXML cannot read HTML with unclosed tags (like ) -- Tidy removes end slashes in the HTML output, so -- this function will add them back local function close_tags(s) return s:gsub("<(%w+)([^>]-)>", function(tag, rest) local endslash = "" if empty_elements[tag] then endslash = " /" end return string.format("<%s%s%s>", tag, rest, endslash) end) end function M.modify_build(make) make:match("html?$", function(filename, par) local settings = get_filter_settings "tidy" or {} par.options = par.options or settings.options or "-utf8 -w 512 -ashtml -q" local command = "tidy ${options} ${filename}" % par log:info("running tidy: ".. command) -- os.execute(command) local run, msg = io.popen(command, "r") local result = run:read("*all") run:close() if not result or result == "" then log:warning("Cannot execute Tidy command") return nil end result = close_tags(result) local f = io.open(filename, "w") f:write(result) f:close() end) return make end return M ================================================ FILE: filters/make4ht-cleanspan-nat.lua ================================================ -- cleanspan function submitted by Nat Kuhn -- http://www.natkuhn.com/ local function filter(s) local pattern = "(]+)>[^<]*)(%s*)" repeat s, n = s:gsub(pattern, "%1%3") until n == 0 return s end return filter ================================================ FILE: filters/make4ht-cleanspan.lua ================================================ -- make4ht-cleanspan4ht.lua -- fixes spurious elements in tex4ht output function filter(input) local parse_args = function(s) local at = {} s:gsub("(%w+)%s*=%s*\"([^\"]-)\"", function(k,w) at[k]=w end) return at end -- local pattern = "(?)" local pattern = "(?)([%s]*)]-)>" local last_class = "" local depth = 0 return input:gsub(pattern, function(tag,space, args) local attr = parse_args(args) or {} local class = attr["class"] or "" if tag == "" then if class == last_class and class~= "" then last_class = class return space .. "" end elseif tag == "" then class="" end last_class = class return tag ..space .. '' end) end return filter ================================================ FILE: filters/make4ht-domfilter.lua ================================================ local filter_lib = require "make4ht-filterlib" local dom = require "luaxml-domobject" local mkutils = require "mkutils" local log = logging.new "domfilter" local function load_filter(filtername) return require("domfilters.make4ht-"..filtername) end -- get snippet of the position where XML parsing failed local function get_html_snippet(str, errmsg) -- we can get position in bytes from message like this: -- /home/mint/texmf/scripts/lua/LuaXML/luaxml-mod-xml.lua:175: Unbalanced Tag (/p) [char=1112] local position = tonumber(errmsg:match("char=(%d+)") or "") if not position then return "Cannot find error position" end -- number of bytes around the error position that shoule be printed local error_context = 100 local start = position > error_context and position - error_context or 0 local stop = (position + error_context) < str:len() and position + error_context or str:len() return str:sub(start, stop) end -- save processed names, in order to block multiple executions of the filter -- sequence on a same file local processed = {} local function filter(filters, name) -- because XML parsing to DOM is potentially expensive operation -- this filter will use cache for it's sequence -- all requests to the domfilter will add new filters to the -- one sequence, which will be executed on one DOM object. -- it is possible to request a different sequence using -- unique name parameter local name = name or "domfilter" local settings = mkutils.get_filter_settings(name) or {} local sequence = settings.sequence or {} local local_sequence = filter_lib.load_filters(filters, load_filter) for _, filter in ipairs(local_sequence) do table.insert(sequence, filter) end settings.sequence = sequence mkutils.filter_settings (name) (settings) return function(filename, parameters) -- load processed files for the current filter name local processed_files = processed[name] or {} -- don't process the file again if processed_files[filename] then return nil end local input = filter_lib.load_input_file(filename) if not input then return nil, "Cannot load the input file" end -- in pure XML, we need to ignore void_elements provided by LuaXML, because these can exist only in HTML local no_void_elements = {docbook = {}, jats = {}, odt = {}, tei = {} } local void_elements = no_void_elements[parameters.output_format] -- we need to use pcall, because XML error would break the whole build process -- domobject will be error object if DOM parsing failed local status, domobject = pcall(function() return dom.parse(input, void_elements) end) if not status then log:warning("XML DOM parsing of " .. filename .. " failed:") log:warning(domobject) log:debug("Error context:\n" .. (get_html_snippet(input, domobject) or "")) log:debug("Trying HTML DOM parsing") status, domobject = pcall(function() return dom.html_parse(input) end) if not status then log:warning("HTML DOM parsing failed as well") return nil, "DOM parsing failed" else log:warning("HTML DOM parsing OK, DOM filters will be executed") end end for _,f in pairs(sequence) do domobject = f(domobject,parameters) end local output = domobject:serialize() if output then filter_lib.save_input_file(filename, output) else log:warning("DOM filter failed on ".. filename) end -- mark the filename as processed processed_files[filename] = true processed[name] = processed_files end end return filter ================================================ FILE: filters/make4ht-entities-to-unicode.lua ================================================ -- convert Unicode characters encoded as XML entities back to Unicode local utfchar = unicode.utf8.char -- list of disabled characters local disabled = { ["&"] = "&", ["<"] = "<", [">"] = ">"} return function(content) local content = content:gsub("%&%#x([A-Fa-f0-9]+);", function(entity) -- convert hexadecimal entity to Unicode local char_number = tonumber(entity, 16) -- fix for non-breaking spaces, LO cannot open file when they are present as Unicode if char_number == 160 then return " " end local newchar = utfchar(char_number) -- we don't want to break XML validity with forbidden characters return disabled[newchar] or newchar end) return content end ================================================ FILE: filters/make4ht-entities.lua ================================================ -- Fix bad entities -- Sometimes, tex4ht produce named xml entities, which are prohobited in epub --  , for example function filter(s) local replaces = { nbsp = "#160" } return s:gsub("&(%w+);",function(x) local m = replaces[x] or x return "&"..m..";" end) end return filter ================================================ FILE: filters/make4ht-filter.lua ================================================ local filter_lib = require "make4ht-filterlib" local function load_filter(filtername) return require("filters.make4ht-"..filtername) end function filter(filters) local sequence = filter_lib.load_filters(filters, load_filter) return function(filename, parameters) if not filename then return false, "filters: no filename" end local input = filter_lib.load_input_file(filename) if not input then return nil, "Cannot load the input file" end for _,f in pairs(sequence) do input = f(input,parameters) end filter_lib.save_input_file(filename, input) end end return filter ================================================ FILE: filters/make4ht-fix-links.lua ================================================ -- replace colons in `id` or `href` attributes for local links with underscores -- local function fix_href_colons(s) return s:gsub('(href=".-")', function(a) if a:match("[a-z]%://") then return a end return a:gsub(":","_") end) end local function fix_id_colons(s) return s:gsub('(id=".-")', function(a) return a:gsub(":", "_") end) end return function(s) return fix_id_colons(fix_href_colons(s)) end ================================================ FILE: filters/make4ht-fixligatures.lua ================================================ -- fix ligatures -- replace ligatures produced by tex4ht with their components -- this prevents problems with some readers local gsub = unicode.utf8.gsub function filter(s) local replaces = { ["fi"] = "fi", ["ffi"] = "ffi", ["fl"] = "fl", ["ffl"] = "ffl", ["ff"] = "ff" } return gsub(s, "([fiffiflfflff])",function (x) return replaces[x] or x end) end return filter ================================================ FILE: filters/make4ht-hruletohr.lua ================================================ -- hruletohr -- \hrule primitive is impossible to redefine catching all possible arguments -- with tex4ht, it is converted as series of underscores -- it seems that these underscores are always part of previous paragraph -- this assumption may be wrong, needs more real world testing local hruletohr = function(s) return s:gsub("___+(.-)

    ","%1

    \n
    ") end return hruletohr ================================================ FILE: filters/make4ht-mathjaxnode.lua ================================================ local mkutils = require "mkutils" local log = logging.new("mathjaxnode") -- other possible value is page2svg local mathnodepath = "mjpage" -- options for MathJax command local options = "--output CommonHTML" -- math fonts position -- don't alter fonts if not set local fontdir = nil -- if we copy fonts local fontdest = nil local fontformat = "woff" local cssfilename = "mathjax-chtml.css" local function compile(text) local tmpfile = os.tmpname() log:info("Compile using MathJax") local command = mathnodepath .. " ".. options .. " > " .. tmpfile log:info(command) local commandhandle = io.popen(command,"w") commandhandle:write(text) commandhandle:close() log:info("Result written to: ".. tmpfile) local f = io.open(tmpfile) local content = f:read("*all") f:close() os.remove(tmpfile) return content end -- save the css code from the html page generated by MathJax local function extract_css(contents) local css = "" local filename = cssfilename contents = contents:gsub('', function(style) -- replace only the style for mathjax if style:match "%.mjx%-math" then css = style return '' end end) -- local x = assert(io.open(file, "w")) -- x:write(contents) -- x:close() return filename, contents, css end -- Update the paths to fonts to use the local versions local function use_fonts(css) local family_pattern = "font%-family:%s*(.-);.-%/([^%/]+)%.".. fontformat local family_build = "@font-face {font-family: %s; src: url('%s/%s.%s') format('%s')}" local fontdir = fontdir:gsub("/$","") css = css:gsub("(@font%-face%s*{.-})", function(face) if not face:match("url%(") then return face end -- print(face) local family, filename = face:match(family_pattern) log:info("use font: ",family, filename) local newfile = string.format("%s/%s.%s", fontdir, filename, fontformat) Make:add_file(newfile) return family_build:format(family, fontdir, filename, fontformat, fontformat) -- return face end) return css end local function save_css(filename, css) local f = io.open(filename, "w") f:write(css) f:close() end return function(text, arguments) -- if arguments.prg then mathnodepath = arguments.prg end local extoptions = mkutils.get_filter_settings "mathjaxnode" or {} local arguments = arguments or {} mathnodepath = arguments.prg or extoptions.prg or mathnodepath options = arguments.options or extoptions.options or options fontdir = arguments.fontdir or extoptions.fontdir or fontdir -- the following ne is unused ATM fontdest = arguments.fontdest or extoptions.fontdest or fontdest fontformat = arguments.fontformat or extoptions.fontformat or fontformat cssfilename = arguments.cssfilename or extoptions.cssfilename or cssfilename local newtext = compile(text) local cssfile, newtext, css = extract_css(newtext) -- use local font files if fontdir is present if fontdir then css = use_fonts(css) end save_css(cssfile, css) Make:add_file(cssfile) -- print(css) log:info("CSS file: " .. cssfile) return newtext end ================================================ FILE: filters/make4ht-mjcli.lua ================================================ local mkutils = require "mkutils" local log = logging.new("mjcli") -- other possible value is page2svg local mathnodepath = "mjcli" -- options for MathJax command local options = "" -- math fonts position -- don't alter fonts if not set local fontdir = nil -- if we copy fonts local fontdest = nil local fontformat = "woff" local cssfilename = "mathjax-chtml.css" local function compile(filename, options) -- local tmpfile = os.tmpname() log:info("Compile using MathJax") local command = mathnodepath .. " ".. options .. " " .. filename log:info(command) local commandhandle, msg = io.popen(command,"r") if not commandhandle then return nil, msg end local content = commandhandle:read("*all") commandhandle:close() return content end local saved_styles = {} local used_styles = {} local function make_css(saved_styles) -- this buffer contains lines of the new CSS file local buffer = {} -- process table with saved CSS rules and make CSS file again for _, rule in ipairs(saved_styles) do buffer[#buffer+1] = rule.selector .. " {" -- save CSS properties for _, line in ipairs(rule.content) do buffer[#buffer+1] = line end buffer[#buffer+1] = "}" buffer[#buffer+1] = "" -- add blank line end return table.concat(buffer, "\n") end -- MathJax generated CSS contains reusable declarations but also -- declarations of fixes for elements in the current file -- the idea is to reuse the common declarations and to save each -- fix. local function parse_css(css, file_class) local status = "init" local current = {} local current_selector for line in css:gmatch("([^\n]+)") do if status == "init" then local selector, rest = line:match("%s*(.-)%s*{(.-)") if selector then current_selector = selector -- if the current selector contains class, we must prepend the current file class -- as the joined CSS file could contain multiple rules with the same class otherwise if current_selector:match("%.") then current_selector = "." .. file_class .. " " .. current_selector end status = "record" end elseif status == "record" then -- find end of the CSS rule if line:match("}%s*$") then status = "init" if not used_styles[current_selector] then table.insert(saved_styles, {selector = current_selector, content = current}) end current = {} used_styles[current_selector] = true else table.insert(current, line) end end end -- save combined CSS for all files return make_css(saved_styles) end local function make_file_class(name) -- clean the filename to make it safe as a class name return name:gsub("[%s%p%s]", "_") end -- set class attribute in the body element of the current file -- this is necessary for the updated CSS file local function set_body_class(content, file_class) content = content:gsub("", function(body) if body:match("class") then -- add new class if there already is one body = body:gsub("(class.-[\"'])", "%1" .. file_class .. " ") else body = body .. ' class="' .. file_class .. '"' end return "" end) return content end -- save the css code from the html page generated by MathJax local function extract_css(contents, currentfilename) local css local filename = cssfilename local file_class = make_file_class(currentfilename) -- detect all ', function(style) -- replace only the style for mathjax if style:match "mjx%-container" then css = parse_css(style, file_class) return '' end end) contents = set_body_class(contents, file_class) return filename, contents, css end -- Update the paths to fonts to use the local versions local function use_fonts(css) local family_pattern = "font%-family:%s*(.-);.-%/([^%/]+)%.".. fontformat local family_build = "@font-face {font-family: %s; src: url('%s/%s.%s') format('%s')}" local fontdir = fontdir:gsub("/$","") css = css:gsub("(@font%-face%s*{.-})", function(face) if not face:match("url%(") then return face end -- print(face) local family, filename = face:match(family_pattern) log:info("use font: ",family, filename) local newfile = string.format("%s/%s.%s", fontdir, filename, fontformat) Make:add_file(newfile) return family_build:format(family, fontdir, filename, fontformat, fontformat) -- return face end) return css end local function save_css(filename, css) local f = io.open(filename, "w") f:write(css) f:close() end return function(text, arguments) -- if arguments.prg then mathnodepath = arguments.prg end local extoptions = mkutils.get_filter_settings "mjcli" or {} local arguments = arguments or {} mathnodepath = arguments.prg or extoptions.prg or mathnodepath local options = arguments.options or extoptions.options or options fontdir = arguments.fontdir or extoptions.fontdir or fontdir -- the following ne is unused ATM fontdest = arguments.fontdest or extoptions.fontdest or fontdest fontformat = arguments.fontformat or extoptions.fontformat or fontformat cssfilename = arguments.cssfilename or extoptions.cssfilename or arguments.input .. "-mathjax.css" local is_latex = arguments.latex or extoptions.latex or false local filename = arguments.filename -- modify options to use LaTeX syntax by MathJax if is_latex then options = options .. " -l" end -- compile current html file with mathjax local newtext, msg = compile(filename, options) if not newtext then log:error(msg) return text end -- save CSS to a standalone file local cssfile, newtext, css = extract_css(newtext, filename) -- use local font files if fontdir is present if fontdir then css = use_fonts(css) end if css then save_css(cssfile, css) Make:add_file(cssfile) -- print(css) log:info("CSS file: " .. cssfile) end return newtext end ================================================ FILE: filters/make4ht-odttemplate.lua ================================================ local mkutils = require "mkutils" local zip = require "zip" local domobject = require "luaxml-domobject" local function get_template_filename(settings) -- either get the template odt filename from tex4ht.sty options (make4ht filename.tex "odttemplate=test.odt") local tex4ht_settings = settings.tex4ht_sty_par local templatefile = tex4ht_settings:match("odttemplate=([^%,]+)") if templatefile then return templatefile end -- read the template odt filename from settings local filtersettings = get_filter_settings "odttemplate" return settings.template or filtersettings.template end local function join_styles(old, new) local old_dom = domobject.parse(old) local new_dom = domobject.parse(new) local template_styles = {} local template_obj -- element, we will add new styles from the generated ODT here -- detect style names in the template file and save them in a table for easy accesss for _, style in ipairs(new_dom:query_selector("office|styles *")) do template_obj = template_obj or style:get_parent() local name = style:get_attribute("style:name") -- get the element if name then template_styles[name] = true end end -- process the generated styles and add ones not used in the template for _, style in ipairs(old_dom:query_selector("office|styles *")) do local name = style:get_attribute("style:name") if name and not template_styles[name] then template_obj:add_child_node(style) end end -- return template with additional styles from the generated file return new_dom:serialize() end return function(content, settings) -- use settings added from the Make:match, or default settings saved in Make object local templatefile = get_template_filename(settings) -- don't do anything if the template file doesn't exist if not templatefile or not mkutils.file_exists(templatefile) then return content end local odtfile = zip.open(templatefile) if odtfile then local stylesfile = odtfile:open("styles.xml") -- just break if the styles cannot be found if not stylesfile then return content end local styles = stylesfile:read("*all") local newstyle = join_styles(content, styles) return newstyle end -- just return content in the case of problems return content end ================================================ FILE: filters/make4ht-staticsite.lua ================================================ local domobj = require "luaxml-domobject" local log = logging.new("staticsite") -- save the header settings in YAML format local function make_yaml(tbl, level) local t = {} local level = level or 0 local indent = string.rep(" ", level) -- indentation for multilen strings local str_indent = string.rep(" ", level + 1) local sorted = {} for k, _ in pairs(tbl) do sorted[#sorted+1] = k end table.sort(sorted) for _,k in ipairs(sorted) do local v = tbl[k] if type(v)=="string" then -- detect multiline strings if v:match("\n") then table.insert(t, string.format(indent .. "%s: |", k)) table.insert(t, str_indent .. (v:gsub("\n", "\n".. str_indent))) else v = v:gsub("'", "''") table.insert(t, string.format(indent .. "%s: '%s'", k,v)) end elseif type(v) == "table" then table.insert(t,string.format(indent .. "%s:", k)) -- we need to differently process array and hash table -- we don't support mixing types if #v > 0 then for x,y in ipairs(v) do if type(y) == "string" then -- each string can be printed on it's own line table.insert(t, indent .. string.format("- '%s'", y)) else -- subtables need to be indented -- table.insert(t, indent .. "-") local subtable = make_yaml(y, level + 1) -- we must insert dash at a correct place local insert_dash = subtable:gsub("^(%s*)%s%s", "%1- ") table.insert(t, insert_dash) end end else -- print indented table table.insert(t, make_yaml(v,level + 1)) end else -- convert numbers and other values to string table.insert(t, string.format(indent .. "%s: %s", k,tostring(v))) end end return table.concat(t, "\n") end local function update_properties(properties, dom) -- enable properties update from the config or build file local settings = get_filter_settings "staticsite" or {} local header = settings.header or {} -- set non-function properties first for field, rule in pairs(header) do if type(rule) ~="function" then properties[field] = rule end end -- then execute functions. it ensures that all propeties set in header are available for field, rule in pairs(header) do -- it is possible to pass function as a rule, it will be executed with properties as a parameter if type(rule) == "function" then properties[field] = rule(properties, dom) end end return properties end local function get_header(tbl) local yaml = make_yaml(tbl) return "---\n".. yaml.. "\n---\n" end return function(s,par) local dom = domobj.parse(s) local properties = {} local head = dom:query_selector("head")[1] properties.title = head:query_selector("title")[1]:get_text() local styles = {} for _, link in ipairs(head:query_selector("link")) do local typ = link:get_attribute("type") if typ == "text/css" then table.insert(styles, link:get_attribute("href")) end end properties.styles = styles local metas = {} for _, meta in ipairs(head:query_selector("meta")) do log:debug("parsed meta: " .. meta:serialize()) table.insert(metas, {charset= meta:get_attribute("charset"), content = meta:get_attribute("content"), property = meta:get_attribute("property"), name = meta:get_attribute("name")}) end properties.meta = metas properties = update_properties(properties, dom) local body = dom:query_selector("body")[1] log:debug(get_header(properties)) -- return s return get_header(properties) .. body:serialize():gsub("", ""):gsub("", "") end ================================================ FILE: filters/make4ht-svg-height.lua ================================================ local log = logging.new("svg-height") -- Make:image("svg$", "dvisvgm -n -a -p ${page} -b preview -c 1.4,1.4 -s ${source} > ${output}") local max = function(a,b) return a > b and a or b end local function get_height(svg) local height = svg:match("height='([0-9%.]+)pt'") return tonumber(height) end local function get_max_height(path,max_number) local coordinates = {} for number in path:gmatch("(%-?[0-9%.]+)") do table.insert(coordinates, tonumber(number)) end for i = 2, #coordinates, 2 do max_number = max(max_number, coordinates[i]) end return max_number end local function update_height(svg, height) return svg:gsub("height='.-pt'", "height='"..height .."pt'") end -- we need to fix the svg height return function(svg) local max_height = 0 local height = get_height(svg) for path in svg:gmatch("path d='([^']+)'") do -- find highest height in all paths in the svg file max_height = get_max_height(path, max_height) end -- update the height only if the max_height is larger than height set in the SVG file log:debug("max height and height", max_height, height) if max_height > height then svg = update_height(svg, max_height) end return svg end ================================================ FILE: formats/make4ht-docbook.lua ================================================ local M = {} local mkutils = require "mkutils" local lfs = require "lfs" local os = require "os" local kpse = require "kpse" local filter = require "make4ht-filter" local domfilter = require "make4ht-domfilter" local xtpipeslib = require "make4ht-xtpipes" local log = logging.new "docbook" function M.prepare_parameters(settings, extensions) settings.tex4ht_sty_par = settings.tex4ht_sty_par ..",docbook" settings = mkutils.extensions_prepare_parameters(extensions, settings) return settings end local move_matches = xtpipeslib.move_matches -- call xtpipes from Lua local function call_xtpipes(make) -- we must find root of the TeX distribution local selfautoparent = xtpipeslib.get_selfautoparent() if selfautoparent then local matchfunction = xtpipeslib.get_xtpipes(selfautoparent) make:match("xml$", matchfunction) move_matches(make) else log:warning "Cannot locate xtpipes. Try to set TEXMFROOT variable to a root directory of your TeX distribution" end end function M.modify_build(make) -- use xtpipes to fix some common docbook issues call_xtpipes(make) return make end return M ================================================ FILE: formats/make4ht-html5.lua ================================================ local M = {} local mkutils = require "mkutils" function M.prepare_extensions(extensions) return mkutils.add_extensions("+common_domfilters", extensions) end function M.prepare_parameters(parameters,extensions) parameters.tex4ht_sty_par = parameters.tex4ht_sty_par .. ",html5" parameters = mkutils.extensions_prepare_parameters(extensions,parameters) return parameters end return M ================================================ FILE: formats/make4ht-jats.lua ================================================ local M = {} local xtpipeslib = require "make4ht-xtpipes" local domfilter = require "make4ht-domfilter" -- some elements need to be moved from the document flow to the document meta local article_meta local elements_to_move_to_meta = {} local function move_to_meta(el) -- we don't move elements immediatelly, because it would prevent them from further -- processing in the filter. so we save them in an array, and move them once -- the full DOM was processed table.insert(elements_to_move_to_meta, el) end local elements_to_move_to_title = {} local function move_to_title_group(el) -- there can be only one title and subtitle local name = el:get_element_name() if not elements_to_move_to_title[name] then elements_to_move_to_title[name] = el end end local elements_to_move_to_contribs = {} local function move_to_contribs(el) table.insert(elements_to_move_to_contribs, el) end local function process_moves() if article_meta then if elements_to_move_to_title["article-title"] and #article_meta:query_selector("title-group") == 0 then -- don't move anything if user added title-group from a config file local title_group = article_meta:create_element("title-group") for _, name in ipairs{ "article-title", "subtitle" } do local v = elements_to_move_to_title[name] if v then title_group:add_child_node(v:copy_node()) v:remove_node() end end article_meta:add_child_node(title_group, 1) end if #elements_to_move_to_contribs > 0 then local contrib_group = article_meta:create_element("contrib-group") for _, el in ipairs(elements_to_move_to_contribs) do contrib_group:add_child_node(el:copy_node()) el:remove_node() end article_meta:add_child_node(contrib_group) end for _, el in ipairs(elements_to_move_to_meta) do -- move elemnt's copy, and remove the original article_meta:add_child_node(el:copy_node()) el:remove_node() end end end local function has_no_text(el) -- detect if element contains only whitespace if el:get_text():match("^%s*$") then --- if it contains any elements, it has text for _, child in ipairs(el:get_children()) do if child:is_element() then return false end end return true end return false end local function is_xref_id(el) return el:get_element_name() == "xref" and el:get_attribute("id") and el:get_attribute("rid") == nil and has_no_text(el) end -- set id to parent element for that contain only id local function xref_to_id(el) local parent = el:get_parent() -- set id only if it doesn't exist yet if parent:get_attribute("id") == nil then print(parent:serialize()) parent:set_attribute("id", el:get_attribute("id")) el:remove_node() end end local function make_text(el) local text = el:get_text():gsub("^%s*", ""):gsub("%s*$", "") local text_el = el:create_text_node(text) el._children = {text_el} end local function is_empty_par(el) return el:get_element_name() == "p" and has_no_text(el) end local function handle_links(el, params) -- we must distinguish between internal links in the document, and external links -- to websites etc. these needs to be changed to the element. local link = el:get_attribute("rid") if link then -- try to remove \jobname.xml from the beginning of the link -- if the rest starts with #, then it is an internal link local local_link = link:gsub("^" .. params.input .. ".xml", "") if local_link:match("^%#") then -- the rid attribute should not start with #, it must be the exact ID used in the linked element local_link = local_link:gsub("^%#", "") el:set_attribute("rid", local_link) else -- change element to ext-link for extenal links el._name = "ext-link" el:set_attribute("rid", nil) el:set_attribute("xlink:href", link) end end end local function handle_maketitle(el) -- is special element produced by TeX4ht from LaTeX's \maketitle -- we need to pick interesting info from there, and move it to the header local function is_empty(selector) return #article_meta:query_selector(selector) == 0 end -- move to local affiliations = {} for _, aff in ipairs(el:query_selector("aff")) do local id = aff:get_attribute("id") if id then for _,mark in ipairs(aff:query_selector("affmark")) do mark:remove_node() end affiliations[id] = aff:copy_node() end end if is_empty("contrib") then for _, contrib in ipairs(el:query_selector("contrib")) do for _, affref in ipairs(contrib:query_selector("affref")) do local id = affref:get_attribute("rid") or "" -- we no longer need this node affref:remove_node() local linked_affiliation = affiliations[id] if linked_affiliation then contrib:add_child_node(linked_affiliation) end end for _, string_name in ipairs(contrib:query_selector("string-name")) do make_text(string_name) end move_to_contribs(contrib:copy_node()) -- we need to remove it from here, even though we remove later -- we got doubgle contributors without that contrib:remove_node() end end if is_empty("pub-date") then for _, date in ipairs(el:query_selector("date")) do date._name = "pub-date" for _, s in ipairs(date:query_selector("string-date")) do make_text(s) end move_to_meta(date:copy_node()) end end el:remove_node() end function M.prepare_parameters(settings, extensions) settings.tex4ht_sty_par = settings.tex4ht_sty_par ..",jats" settings = mkutils.extensions_prepare_parameters(extensions, settings) return settings end function M.prepare_extensions(extensions) return extensions end function M.modify_build(make) filter_settings("joincharacters", {charclasses = {italic=true, bold=true}}) local process = domfilter { function(dom, params) dom:traverse_elements(function(el) -- some elements need special treatment local el_name = el:get_element_name() if is_xref_id(el) then xref_to_id(el) elseif el_name == "article-meta" then -- save article-meta element for further processig article_meta = el elseif el_name == "article-title" then move_to_title_group(el) elseif el_name == "subtitle" then move_to_title_group(el) elseif el_name == "abstract" then move_to_meta(el) elseif el_name == "string-name" then make_text(el) elseif el_name == "contrib" then move_to_contribs(el) elseif is_empty_par(el) then -- remove empty paragraphs el:remove_node() elseif el_name == "xref" then handle_links(el, params) elseif el_name == "maketitle" then handle_maketitle(el) elseif el_name == "div" and el:get_attribute("class") == "maketitle" then el:remove_node() end end) -- move elements that are marked for move process_moves() return dom end, "joincharacters","mathmlfixes", "tablerows","booktabs" } local charclasses = {["mml:mi"] = true, ["mml:mn"] = true , italic = true, bold=true, roman = true, ["mml:mtext"] = true, mi=true, mn=true} make:match("xml$", process, {charclasses = charclasses}) return make end return M ================================================ FILE: formats/make4ht-odt.lua ================================================ local M = {} local mkutils = require "mkutils" local lfs = require "lfs" local os = require "os" local kpse = require "kpse" local filter = require "make4ht-filter" local domfilter = require "make4ht-domfilter" local domobject = require "luaxml-domobject" local xtpipeslib = require "make4ht-xtpipes" local log = logging.new "odt" function M.prepare_parameters(settings, extensions) settings.tex4ht_sty_par = settings.tex4ht_sty_par ..",ooffice" settings.tex4ht_par = settings.tex4ht_par .. " ooffice/! -cmozhtf" -- settings.t4ht_par = settings.t4ht_par .. " -cooxtpipes -coo " -- settings.t4ht_par = settings.t4ht_par .. " -cooxtpipes " settings = mkutils.extensions_prepare_parameters(extensions, settings) return settings end -- object for working with the ODT file local Odtfile = {} Odtfile.__index = Odtfile Odtfile.new = function(archivename) local self = setmetatable({}, Odtfile) -- create a temporary file local tmpname = os.tmpname() -- remove a temporary file, we are interested only in the unique file name os.remove(tmpname) -- get the unique dir name tmpname = tmpname:match("([a-zA-Z0-9_%-%.]+)$") local status, msg = lfs.mkdir(tmpname) if not status then return nil, msg end -- make picture dir lfs.mkdir(tmpname .. "/Pictures") self.archivelocation = tmpname self.name = archivename return self end function Odtfile:copy(src, dest) mkutils.cp(src, self.archivelocation .. "/" .. dest) end function Odtfile:move(src, dest) mkutils.mv(src, self.archivelocation .. "/" .. dest) end function Odtfile:create_dir(dir) local currentdir = lfs.currentdir() lfs.chdir(self.archivelocation) lfs.mkdir(dir) lfs.chdir(currentdir) end function Odtfile:make_mimetype() self.mimetypename = "mimetype" local m, msg = io.open(self.mimetypename, "w") if not m then log:error(msg) return nil, msg end m:write("application/vnd.oasis.opendocument.text") m:close() end function Odtfile:remove_mimetype() os.remove(self.mimetypename) end function Odtfile:pack() local currentdir = lfs.currentdir() local zip_command = mkutils.find_zip() lfs.chdir(self.archivelocation) -- make temporary mime type file self:make_mimetype() mkutils.execute(zip_command .. ' -q0X "' .. self.name .. '" ' .. self.mimetypename) -- remove it, so the next command doesn't overwrite it self:remove_mimetype() mkutils.execute(zip_command ..' -r "' .. self.name .. '" *') lfs.chdir(currentdir) mkutils.cp(self.archivelocation .. "/" .. self.name, mkutils.file_in_builddir(self.name, Make.params)) mkutils.delete_dir(self.archivelocation) end --- ************************* -- *** fix picture sizes *** -- ************************* -- local function add_points(dimen) if type(dimen) ~= "string" then return dimen end -- convert SVG dimensions to points if only number is provided if dimen:match("[0-9]$") then return dimen .. "pt" end return dimen end local function get_svg_dimensions(filename) local width, height if mkutils.file_exists(filename) then for line in io.lines(filename) do width = line:match("width%s*=%s*[\"'](.-)[\"']") or width height = line:match("height%s*=%s*[\"'](.-)[\"']") or height -- stop parsing once we get both width and height if width and height then break end end end width = add_points(width) height = add_points(height) return width, height end local function get_xbb_dimensions(filename) local f = io.popen("ebb -x -O " .. filename) if f then local content = f:read("*all") local width, height = content:match("%%BoundingBox: %d+ %d+ (%d+) (%d+)") return add_points(width), add_points(height) end return nil end -- local function fix_picture_sizes(tmpdir) local filename = tmpdir .. "/content.xml" local f = io.open(filename, "r") if not f then log:warning("Cannot open ", filename, "for picture size fixes") return nil end local content = f:read("*all") or "" f:close() local status, dom= pcall(function() return domobject.parse(content) end) if not status then log:warning("Cannot parse DOM, the resulting ODT file will be most likely corrupted") return nil end for _, pic in ipairs(dom:query_selector("draw|image")) do local imagename = pic:get_attribute("xlink:href") -- update SVG images dimensions log:debug("image", imagename) local parent = pic:get_parent() local width = parent:get_attribute("svg:width") local height = parent:get_attribute("svg:height") -- if width == "0.0pt" then width = nil end -- if height == "0.0pt" then height = nil end if not width or not height then local imgfilename = tmpdir .. "/" .. imagename if imagename:match("svg$") then width, height = get_svg_dimensions(imgfilename) -- or width, height elseif imagename:match("png$") or imagename:match("jpe?g$") then width, height = get_xbb_dimensions(imgfilename) end end log:debug("new dimensions", width, height) parent:set_attribute("svg:width", width) parent:set_attribute("svg:height", height) -- if end -- save the modified DOM again log:debug("Fixed picture sizes") local domcontent = dom:serialize() local f, msg = io.open(filename, "w") if not f then log:error(msg) return nil, msg end f:write(domcontent) f:close() end -- fix font records in the lg file that don't correct Font_Size record local lg_fonts_processed=false local patched_lg_fonts = {} local function fix_lgfile_fonts(ignored_name, params) -- this function is called from file match. we must use the name of the .lg file local filename = mkutils.file_in_builddir(params.input .. ".lg", params) if not lg_fonts_processed then local lines = {} -- default font_size local font_size = "10" if mkutils.file_exists(filename) then -- for line in io.lines(filename) do -- default font_size can be set in the .lg file if line:match("Font_Size") then font_size = line:match("Font_Size:%s*(%d+)") elseif line:match("Font%(") then -- match Font record local name, size, size2, size3 = line:match('Font%("([^"]+)","([%d]*)","([%d]+)","([%d]+)"') -- find if the first size is not set, and add the default font_size then if size == "" then line = string.format('Font("%s","%s","%s","%s")', name, font_size, size2, size3) -- we must also save the font name and size for later post-processing, because -- we will need to fix styles in content.xml too patched_lg_fonts[name .. "-" .. font_size] = true end end lines[#lines+1] = line end -- save changed lines to the lg file local f = io.open(filename, "w") for _,line in ipairs(lines) do f:write(line .. "\n") end f:close() end filter_settings "odtfonts" {patched_lg_fonts = patched_lg_fonts} end lg_fonts_processed=true return true end local move_matches = xtpipeslib.move_matches local function insert_lgfile_fonts(make) local params = make.params local first_file = mkutils.file_in_builddir(params.input .. ".4oo", params) -- find the last file and escape it so it can be used -- in filename match make:match(first_file, fix_lgfile_fonts) move_matches(make) end -- escape string to be used in the gsub search local function escape_file(filename) local quotepattern = '(['..("%^$().[]*+-?"):gsub("(.)", "%%%1")..'])' return filename:gsub(quotepattern, "%%%1") end -- call xtpipes from Lua local function call_xtpipes(make) -- we must find root of the TeX distribution local selfautoparent = xtpipeslib.get_selfautoparent() if selfautoparent then local matchfunction = xtpipeslib.get_xtpipes(selfautoparent) make:match("4oo", matchfunction) make:match("4om", matchfunction) -- move last match to a first place -- we need to move last two matches, for 4oo and 4om files move_matches(make) move_matches(make) -- fix font records in the lg file insert_lgfile_fonts(make) else log:warning "Cannot locate xtpipes. Try to set TEXMFROOT variable to a root directory of your TeX distribution" end end -- sort output files according to their extensions local function prepare_output_files(lgfiles) local groups = {} for _, name in ipairs(lgfiles) do local basename, extension = name:match("(.-)%.([^%.]+)$") local group = groups[extension] or {} table.insert(group, basename) groups[extension] = group log:debug("prepare output file", basename, extension) end return groups end -- execute function on all files in the group -- function fn takes current filename and table with various attributes local function exec_group(groups, name, fn) for _, basename in ipairs(groups[name] or {}) do fn{basename = basename, extension=name, filename = basename .. "." .. name} end end -- remove ", "") end function M.modify_build(make) local executed = false -- execute xtpipes from the build file, instead of t4ht. this fixes issues with wrong paths -- expanded in tex4ht.env in Miktex or Debian call_xtpipes(make) -- fix the image dimensions wrongly set by xtpipes local domfilters = domfilter({"t4htlinks", "odtpartable"}, "odtfilters") make:match("4oo$", domfilters) -- execute it before xtpipes, because we don't want xtpipes to mess with t4htlink elements move_matches(make) -- fixes for mathml local mathmldomfilters = domfilter({"joincharacters","mathmlfixes"}, "mathmlfilters") make:match("4om$", mathmldomfilters) -- DOM filters that should be executed after xtpipes local latedom = domfilter({"odtfonts"}, "lateodtfilters") make:match("4oo$", latedom) -- convert XML entities for Unicode characters produced by Xtpipes to characters local fixentities = filter {"entities-to-unicode", remove_xtpipes} make:match("4oo", fixentities) make:match("4om", fixentities) -- we must handle outdir. make4ht copies the ODT file before it was packed, so -- we will copy it again after packing later in this format file local outdir = make.params["outdir"] -- build the ODT file. This match must be executed as a last one -- this will be executed as a first match, just to find the last filename -- in the lgfile make:match(".*", function() -- execute it only once if not executed then -- this is list of processed files local lgfiles = make.lgfile.files for k,v in ipairs(lgfiles) do if v:match("odt$") then table.remove(lgfiles, k) end end -- find the last file and escape it so it can be used -- in filename match local lastfile = escape_file(lgfiles[#lgfiles]) .."$" -- make match for the last file -- odt packing will be done here make:match(lastfile, function(filename, par) local groups = prepare_output_files(make.lgfile.files) -- we must remove any path from the basename -- local basename = groups.odt[1]:match("([^/]+)$") local basename = make.params.input local odtname = basename .. ".odt" local odt,msg = Odtfile.new(odtname) if not odt then log:error("Cannot create ODT file: " .. msg) end -- helper function for simple file moving local function move_file(group, dest) exec_group(groups, group, function(par) odt:move("${filename}" % par, dest) end) end -- the document text exec_group(groups, "4oo", function(par) odt:move("${filename}" % par, "content.xml") odt:create_dir("Pictures") end) -- manifest exec_group(groups, "4of", function(par) odt:create_dir("META-INF") odt:move("${filename}" % par, "META-INF/manifest.xml") end) -- math exec_group(groups, "4om", function(par) odt:create_dir(par.basename) odt:move("${filename}" % par, "${basename}/content.xml" % par) -- copy the settings file to math subdir local settings = groups["4os"][1] odt:copy(settings .. ".4os", "${basename}/settings.xml" % par) end) -- these files are created only once, so it doesn't matter that they are -- copied to one file move_file("4os", "settings.xml") move_file("4ot", "meta.xml") move_file("4oy", "styles.xml") -- pictures exec_group(groups, "4og", function(par) -- add support for images in the TEXMF tree if not mkutils.file_exists(par.basename) then -- try to find the file in the directory with the original TeX file local without_builddir = par.basename:gsub("^" .. mkutils.escape_pattern(make.params.builddir or "") .. "/", "") if mkutils.file_exists(without_builddir) then par.basename = without_builddir else par.basename = kpse.find_file(par.basename, "graphic/figure") or kpse.find_file(without_builddir, "graphic/figure") if not par.basename then return nil, "Cannot find picture" end end end -- the Pictues dir is flat, without subdirs odt:copy("${basename}" % par, "Pictures") end) -- fix picture sizes in the content file fix_picture_sizes(odt.archivelocation) -- remove some spurious file exec_group(groups, "4od", function(par) os.remove(par.filename) end) odt:pack() local build_filename = mkutils.file_in_builddir(odt.name, make.params) if outdir and outdir ~= "" then local outfilename = outdir .. "/" .. odt.name log:info("Copying ODT file to the output dir: " .. outfilename) mkutils.copy(build_filename,outfilename) elseif build_filename ~= odt.name then mkutils.cp(build_filename, odt.name) end end) end executed = true end) return make end return M ================================================ FILE: formats/make4ht-tei.lua ================================================ local M = {} local xtpipeslib = require "make4ht-xtpipes" local domfilter = require "make4ht-domfilter" function M.prepare_parameters(settings, extensions) settings.tex4ht_sty_par = settings.tex4ht_sty_par ..",tei" settings = mkutils.extensions_prepare_parameters(extensions, settings) return settings end function M.prepare_extensions(extensions) return extensions end function M.modify_build(make) local process = domfilter { "joincharacters" } -- we use elements for characters styled using HTF fonts in TEI -- use the `joincharacters` DOM filter to join them filter_settings "joincharacters" { charclasses = { hi=true, mn = true} } make:match("xml$", process) return make end return M ================================================ FILE: formats/make4ht-xhtml.lua ================================================ local M = {} local mkutils = require "mkutils" function M.prepare_extensions(extensions) return mkutils.add_extensions("+common_domfilters", extensions) end function M.prepare_parameters(parameters,extensions) parameters = mkutils.extensions_prepare_parameters(extensions,parameters) return parameters end return M ================================================ FILE: lapp-mk4.lua ================================================ -- lapp.lua -- Simple command-line parsing using human-readable specification ----------------------------- --~ -- args.lua --~ local args = require ('lapp') [[ --~ Testing parameter handling --~ -p Plain flag (defaults to false) --~ -q,--quiet Plain flag with GNU-style optional long name --~ -o (string) Required string option --~ -n (number) Required number option --~ -s (default 1.0) Option that takes a number, but will default --~ (number) Required number argument --~ (default stdin) A parameter which is an input file --~ (default stdout) One that is an output file --~ ]] --~ for k,v in pairs(args) do --~ print(k,v) --~ end ------------------------------- --~ > args -pq -o help -n 2 2.3 --~ input file (781C1B78) --~ p true --~ s 1 --~ output file (781C1B98) --~ quiet true --~ start 2.3 --~ o help --~ n 2 -------------------------------- lapp = {} local append = table.insert local usage local open_files = {} local parms = {} local aliases = {} local parmlist = {} local filetypes = { stdin = {io.stdin,'file-in'}, stdout = {io.stdout,'file-out'}, stderr = {io.stderr,'file-out'} } local function quit(msg,no_usage) if msg then io.stderr:write(msg..'\n\n') end if not no_usage then io.stderr:write(usage) end os.exit(1); end local function help() print(usage) os.exit() end local function version() return {version = true} end local function error(msg,no_usage) quit(arg[0]:gsub('.+[\\/]','')..':'..msg,no_usage) end local function ltrim(line) return line:gsub('^%s*','') end local function rtrim(line) return line:gsub('%s*$','') end local function trim(s) return ltrim(rtrim(s)) end local function open (file,opt) local val,err = io.open(file,opt) if not val then error(err,true) end append(open_files,val) return val end local function xassert(condn,msg) if not condn then error(msg) end end local function range_check(x,min,max,parm) xassert(min <= x and max >= x,parm..' out of range') end local function xtonumber(s) local val = tonumber(s) if not val then error("unable to convert to number: "..s) end return val end local function is_filetype(type) return type == 'file-in' or type == 'file-out' end local types = {} local function convert_parameter(ps,val) if ps.converter then val = ps.converter(val) end if ps.type == 'number' then val = xtonumber(val) elseif is_filetype(ps.type) then val = open(val,(ps.type == 'file-in' and 'r') or 'w' ) elseif ps.type == 'boolean' then val = true end if ps.constraint then ps.constraint(val) end return val end function lapp.add_type (name,converter,constraint) types[name] = {converter=converter,constraint=constraint} end local function force_short(short) xassert(#short==1,short..": short parameters should be one character") end function process_options_string(str) local res = {} local varargs local function check_varargs(s) local res,cnt = s:gsub('%.%.%.$','') varargs = cnt > 0 return res end local function set_result(ps,parm,val) if not ps.varargs then res[parm] = val else if not res[parm] then res[parm] = { val } else append(res[parm],val) end end end usage = str for line in str:gmatch('([^\n]*)\n') do local optspec,optparm,i1,i2,defval,vtype,constraint line = ltrim(line) -- flags: either - or -, i1,i2,optspec = line:find('^%-(%S+)') if i1 then optspec = check_varargs(optspec) local short,long = optspec:match('([^,]+),(.+)') if short then optparm = long:sub(3) aliases[short] = optparm force_short(short) else optparm = optspec force_short(optparm) end else -- is it ? i1,i2,optparm = line:find('(%b<>)') if i1 then -- so becomes input_file ... optparm = check_varargs(optparm:sub(2,-2)):gsub('%A','_') append(parmlist,optparm) end end if i1 then -- this is not a pure doc line local last_i2 = i2 local sval line = ltrim(line:sub(i2+1)) -- do we have (default ) or ()? i1,i2,typespec = line:find('^%s*(%b())') if i1 then typespec = trim(typespec:sub(2,-2)) -- trim the parens and any space sval = typespec:match('default%s+(.+)') if sval then local val = tonumber(sval) if val then -- we have a number! defval = val vtype = 'number' elseif filetypes[sval] then local ft = filetypes[sval] defval = ft[1] vtype = ft[2] else defval = sval vtype = 'string' end else local min,max = typespec:match '([^%.]+)%.%.(.+)' if min then -- it's (min..max) vtype = 'number' min = xtonumber(min) max = xtonumber(max) constraint = function(x) range_check(x,min,max,optparm) end else -- () just contains type of required parameter vtype = typespec end end else -- must be a plain flag, no extra parameter required defval = false vtype = 'boolean' end local ps = { type = vtype, defval = defval, required = defval == nil, comment = line:sub((i2 or last_i2)+1) or optparm, constraint = constraint, varargs = varargs } if types[vtype] then local converter = types[vtype].converter if type(converter) == 'string' then ps.type = converter else ps.converter = converter end ps.constraint = types[vtype].constraint end parms[optparm] = ps end end -- cool, we have our parms, let's parse the command line args local iparm = 1 local iextra = 1 local i = 1 local parm,ps,val while i <= #arg do -- look for a flag, - or -- local i1,i2,dash,parmstr = arg[i]:find('^(%-+)(%a.*)') if i1 then -- we have a flag if #dash == 2 then -- long option parm = parmstr else -- short option if #parmstr == 1 then parm = parmstr else -- multiple flags after a '-',? parm = parmstr:sub(1,1) if parmstr:find('^%a%d+') then -- a short option followed by a digit? (exception for AW ;)) -- push ahead into the arg array table.insert(arg,i+1,parmstr:sub(2)) else -- push multiple flags into the arg array! for k = 2,#parmstr do table.insert(arg,i+k-1,'-'..parmstr:sub(k,k)) end end end end if parm == 'h' or parm == 'help' then help() end if parm == "v" or parm == "version" then return version() end if aliases[parm] then parm = aliases[parm] end ps = parms[parm] if not ps then error("unrecognized parameter: "..parm) end if ps.type ~= 'boolean' then -- we need a value! This should follow val = arg[i+1] i = i + 1 xassert(val,parm.." was expecting a value") end else -- a parameter parm = parmlist[iparm] if not parm then -- extra unnamed parameters are indexed starting at 1 parm = iextra iextra = iextra + 1 ps = { type = 'string' } else ps = parms[parm] end if not ps.varargs then iparm = iparm + 1 end val = arg[i] end ps.used = true val = convert_parameter(ps,val) set_result(ps,parm,val) if is_filetype(ps.type) then set_result(ps,parm..'_name',arg[i]) end if lapp.callback then lapp.callback(parm,arg[i],res) end i = i + 1 end -- check unused parms, set defaults and check if any required parameters were missed for parm,ps in pairs(parms) do if not ps.used then if ps.required then error("missing required parameter: "..parm) end set_result(ps,parm,ps.defval) end end return res end setmetatable(lapp, { __call = function(tbl,str) return process_options_string(str) end, __index = { open = open, quit = quit, error = error, assert = xassert, } }) return lapp ================================================ FILE: make4ht ================================================ #!/usr/bin/env texlua -- Package make4ht. Author Michal Hoftich -- This package is subject of LPPL license, version 1.3 kpse.set_program_name("luatex") -- logging should be globally available logging = require "make4ht-logging" if os.type == "windows" then logging.use_colors = false end local log = logging.new("make4ht") local make4ht = require("make4ht-lib") local lapp = require("lapp-mk4") local mkutils = require("mkutils") local mkparams = require("mkparams") local mk_config = require("make4ht-config") -- args string is here just as sample, we dont pass it it to -- mkparams.get_args() so default args string is used local args = [[ make4ht - build system for TeX4ht Usage: make4ht [options] filename ["tex4ht.sty op." "tex4ht op." "t4ht op" "latex op"] -c,--config (default xhtml) Custom config file -d,--output-dir (default nil) Output directory -l,--lua Use lualatex for document compilation -s,--shell-escape Enables running external programs from LaTeX -u,--utf8 For output documents in utf8 encoding -x,--xetex Use xelatex for document compilation (string) Input file name ]] -- set version number. the template should be replaced by the -- actual version number by the build script local version = "{{version}}" mkparams.version_number = version local args = mkparams.get_args() local parameters = mkparams.process_args(args) log:status("Conversion started") log:status("Input file: " .. parameters.tex_file) if parameters.builddir and parameters.builddir ~= "" then mkutils.make_path(parameters.builddir) end local mode = parameters.mode local build_file = parameters.build_file -- handle output formats local allowed_output_formats = {xhtml = true, html5=true, odt = true, docbook=true, tei=true, jats=true} -- formatter is Lua library which must provide at least prepare_parameters -- and process_build_sequence functions local formatter local output_format = parameters.output_format if allowed_output_formats[ output_format ] then formatter = mkutils.load_output_format(output_format) else -- load html5 as default output format if output_format then log:warning("Cannot load output format: ".. output_format) end formatter = mkutils.load_output_format("html5") end -- find make4ht configuration file local configname = "make4ht" local conffile = mk_config.find_config(configname) or mk_config.find_xdg_config(configname) if conffile then log:info("Using configuration file: " .. conffile) mkutils.load_config(parameters, conffile) end local extensions = formatter.prepare_extensions(parameters.extensions) extensions = mkutils.load_extensions(extensions, output_format) -- run extensions with prepare_parameters function parameters = formatter.prepare_parameters(parameters,extensions) local make = mkutils.load_config(parameters, build_file)["Make"] make.params = parameters if make:length() < 1 then if mode == "draft" then make:htlatex() elseif mode == "clean" then make:clean() make.no_dvi_process = true else -- automatically detect and execute number of necessary compilations by default make:autohtlatex() end end if not args["no-tex4ht"] and not make.no_dvi_process then make:tex4ht() end local ext = args.xetex and "xdv" or "dvi" if #make.image_patterns > 0 then make.params.t4ht_par = make.params.t4ht_par .. " -p" end if not make.no_dvi_process then make:t4ht {ext = ext} end -- run extensions which modify the build sequence if #extensions > 0 then make = mkutils.extensions_modify_build(extensions, make) end -- allow output formats to modify the build process at the end make = formatter.modify_build(make) or make make:match("tmp$", function(filename,params) -- remove the temporary tex file created when the input comes from the standard input if params.is_tmp_file then log:info("removing temp file", params.tex_file) os.remove(params.tex_file) end -- prevent copying of the temporary file to the outdir return false,"tmp file" end ) make:match(".*",function(filename,par) local outdir = '' --par["outdir"] and par["outdir"] .."/" or '' if par['outdir'] ~= "" then outdir = par['outdir'] .. '/' else -- don't run unnecessary copy without output dir log:info("No output directory") return true end log:info("outdir: "..outdir) local outfilename = filename:gsub("^" .. mkutils.escape_pattern(par.builddir or ""), "") outfilename = outdir .. outfilename mkutils.copy(filename,outfilename) return true end) make:run() log:status("Conversion finished") logging.exit_status() ================================================ FILE: make4ht-aeneas-config.lua ================================================ local M = {} local mkutils = require "mkutils" local task_template = [[ ${lang} ${file_desc} ${file_id} ${prefix}${html_file} ${text_type} ${prefix}${audio_file} ${id_sort} ${id_regex} ${sub_file} ${sub_format} ${html_file} ${audio_file} ]] -- get html files local function get_html_files(config) local config = config or {} local files = {} local filematch = config.file_match or "html$" -- this is a trick to get list of files from the LG file for _, file in ipairs(Make.lgfile.files) do if file:match(filematch) then table.insert(files, file) end end return files end -- prepare filename for the audio local function get_audio_file(filename, config) local extension = config.audio_extension or "mp3" local base = mkutils.remove_extension(filename) return base .. "." .. extension end local function get_sub_file(filename, config) local extension = config.sub_format or "smil" local base = mkutils.remove_extension(filename) return base .. "." .. extension end -- create task record for each HTML file local function prepare_tasks(files, configuration) local tasks = {} -- the map can contain info for particular files, otherwise we will interfere default values local map = configuration.map or {} -- task_template should be configurable local task_template = configuration.task_template or task_template for i, filename in ipairs(files) do local filemap = map[filename] if filemap ~= false then filemap = filemap or {} local taskconfig = configuration taskconfig.html_file = filename taskconfig.prefix = filemap.prefix or configuration.prefix taskconfig.file_desc = filemap.description or configuration.description .. " " .. i taskconfig.file_id = filemap.id or filename:gsub("[%/%.]", "_") taskconfig.text_type = filemap.text_type or configuration.text_type taskconfig.audio_file = filemap.audio_file or get_audio_file(filename, configuration) taskconfig.sub_file = filemap.sub_file or get_sub_file(filename, configuration) taskconfig.id_sort= filemap.id_sort or configuration.id_sort taskconfig.id_prefix = filemap.id_regex or configuration.id_regex taskconfig.sub_format = filemap.sub_format or configuration.sub_format tasks[#tasks+1] = task_template % taskconfig Make:add_file(taskconfig.audio_file) Make:add_file(taskconfig.sub_file) end end return tasks --table.concat(tasks, "\n") end -- from https://www.readbeyond.it/aeneas/docs/clitutorial.html#xml-config-file-config-xml local config_template = [[ ${lang} ${description} ${tasks} output_example4 zip flat ${prefix} ]] -- check if the config file exists local function is_config(filename) return mkutils.file_exists(filename) end -- prepare Aeneas configuration local function prepare_configuration(parameters) local config = parameters or {} config.lang = parameters.lang config.tasks = table.concat(prepare_tasks(parameters.files, config), "\n") return config end -- write Aeneeas configuration file in the XML format local function write_config(filename, configuration) local cfg = config_template % configuration print(cfg) local f = io.open(filename, "w") f:write(cfg) f:close() end local function make_default_options(options) local configuration = {} local par = get_filter_settings "aeneas-config" configuration.lang = options.lang or par.lang or "en" configuration.description = options.description or par.description or "Aeneas job" configuration.map = options.map or par.map or {} configuration.text_type = options.text_type or par.text_type or "unparsed" configuration.id_sort = options.id_sort or par.id_sort or "numeric" configuration.id_regex = options.id_regex or par.id_regex or par.id_prefix .. "[0-9]+" configuration.sub_format = options.sub_format or par.sub_format or "smil" configuration.prefix = options.prefix or par.prefix or "./" configuration.config_name = options.config_name or par.config_name or "config.xml" configuration.keep_config = options.keep_config or par.keep_config return configuration end local function configure_job(options) local configuration = make_default_options(options) local config_name = configuration.config_name -- prepare the configuration in every case configuration.files = get_html_files() local configuration = prepare_configuration(configuration) -- write the configuration only if the config file doesn't exist -- and keep_config option is set to true if is_config(config_name) and configuration.keep_config==true then else write_config(config_name, configuration) end end local function execute_job(options) local par = get_filter_settings "aeneas-config" local configuration = make_default_options(options) configuration.files = get_html_files() -- we need to configure prepare_tasks to return calls to aeneas task convertor configuration.python = options.python or par.python or "python3" configuration.module = options.module or par.module or "aeneas.tools.execute_task" configuration.task_template = '${python} -m "${module}" "${audio_file}" "${html_file}" "is_text_type=${text_type}|os_task_file_smil_audio_ref=${audio_file}|os_task_file_smil_page_ref=${html_file}|task_language=${lang}|is_text_unparsed_id_sort=${id_sort}|is_text_unparsed_id_regex=${id_regex}|os_task_file_format=${sub_format}" "${sub_file}"' local tasks = prepare_tasks(configuration.files, configuration) -- execute the tasks for _, v in ipairs(tasks) do print("task", v) local proc = io.popen(v, "r") local result = proc:read("*all") proc:close() print(result) end end -- the aeneas configuration must be executed at last processed file, after all filters -- have been executed local function get_last_lg_file() local t = Make.lgfile.files for i = #t, 1, -1 do -- find last html file or the tmp file local x = t[i] if x:match "html$" or x:match "tmp$" then return x end end return t[#t] end -- write Aeneas job configuration file -- it doesn't execute Aeneas function M.write_job(par) -- configuration table for Aeneas job Make:match("tmp$", function() configure_job(par) end) end -- execute Aeneas directly function M.execute(par) Make:match("tmp$", function(current_name) -- there may be html files after the .tmp file -- the aeneas must be executed after the Aeneas filter inserts the id -- attributes, so it is necessary to execute this code as very last one local last = get_last_lg_file() -- execute the job if there are no HTML files after the tmp file if current_name == last then execute_job(par) end Make:match(last, function() execute_job(par) end) end) end -- only register the audio and smil files as processed files function M.process_files(par) Make:match("tmp$", function() local configuration = make_default_options(par) local files = get_html_files() prepare_tasks(files, configuration) end) end return M ================================================ FILE: make4ht-config.lua ================================================ local m = {} local mkutils = require "mkutils" local file_exists = mkutils.file_exists -- function file_exists(name) -- local f=io.open(name,"r") -- if f~=nil then io.close(f) return true else return false end -- end local make_name = function(name) return table.concat(name, "/") -- return name:gsub("//","/") end -- find the config file in XDG_CONFIG_HOME or in the HOME directry -- the XDG tree is looked up first, the $HOME is used only when it cannot be -- find in the former local xdg_config = function(filename, xdg_config_name) local dotfilename = "." .. filename local xdg_config_name = xdg_config_name or "config.lua" local xdg = os.getenv("XDG_CONFIG_HOME") or ((os.getenv("HOME") or "") .. "/.config") local home = os.getenv("HOME") or os.getenv("USERPROFILE") if xdg then -- filename like ~/.config/make4ht/config.lua local fn = make_name{ xdg ,filename , xdg_config_name } if file_exists(fn) then return fn end end if home then -- ~/.make4ht local fn = make_name{ home, dotfilename } if file_exists(fn) then return fn end end end local find_config = function(filename) local filename = "." .. filename local current = lfs.currentdir() local path = {} current:gsub("([^/]+)", function(s) table.insert(path,s) end) local begin = os.type == "windows" and "" or "/" for i = #path, 1, -1 do local fn =begin .. table.concat(path,"/") .. "/".. filename -- print("testing",fn) if file_exists(fn) then return fn end table.remove(path) end return false end local function load_config(filename, default) local default = default or {} default.table = table default.string = string default.io = io default.os = os default.math = math default.print = print default.ipairs = ipairs default.pairs = pairs local f = io.open(filename, "r") local contents = f:read("*all") f:close() load(contents,"sandbox config","bt", default)() return default end --[[ local function load_config(filename, default) local default = default or {} if ~file_exists(filename) then return nil, "Cannot load config file "..filename end local section = "default" local file = io.open(filename, "r") if ~file then return nil, "Error opening config file"..filename end for line in file:lines() do local ts = line:match("") end file:close() end --]] m.find_config = find_config m.find_xdg_config = xdg_config m.load_config = load_config return m ================================================ FILE: make4ht-doc.tex ================================================ % \documentclass{ltxdoc} \documentclass{article} \usepackage[english]{babel} \usepackage{hyperref} \newcommand\authormail[1]{\footnote{\textless\url{#1}\textgreater}} \ifdefined\HCode \renewcommand\authormail[1]{\space\textless\Link[#1]{}{}#1\EndLink\textgreater} \fi \usepackage{fontspec} \setmainfont{TeX Gyre Schola} % \setmonofont[Scale=MatchLowercase]{Inconsolatazi4} \IfFontExistsTF{Noto Sans Mono Regular}{% \setmonofont[Scale=MatchLowercase]{Noto Sans Mono Regular} }{\setmonofont{NotoMono-Regular.ttf}} \usepackage{upquote} \usepackage{microtype} \providecommand\tightlist{\relax} \title{The \texttt{make4ht} build system} \author{Michal Hoftich\authormail{michal.h21@gmail.com}} \date{Version \version\\\gitdate} \begin{document} \maketitle \tableofcontents \input{readme} \input{changelog} \end{document} ================================================ FILE: make4ht-dvireader.lua ================================================ -- This is not actually full DVI reader. It just calculates hash for each page, -- so it can be detected if it changed between compilations and needs to be -- converted to image using Dvisvgm or Dvipng -- -- information about DVI format is from here: https://web.archive.org/web/20070403030353/http://www.math.umd.edu/~asnowden/comp-cont/dvi.html -- local M -- the file after post_post is filled with bytes 223 local endfill = 223 -- numbers of bytes for each data type in DVI file local int = 4 local byte = 1 local sixteen = 2 local function read_char(str, pos) if pos and pos > string.len(str) then return nil end return string.sub(str, pos, pos + 1) end local function read_byte(str, pos) return string.byte(read_char(str, pos)) end -- DVI file format uses signed big endian integers. This code doesn't take into account -- the sign, so it will return incorrect result for negative numbers. It doesn't matter -- for the original purpose of this library, but it should be fixed for general use. local function read_integer(str, pos) local first = read_byte(str, pos) local num = first * (256 ^ 3) num = read_byte(str, pos + 1) * (256 ^ 2) + num num = read_byte(str, pos + 2) * 256 + num num = read_byte(str, pos + 3) + num return num end local function read_sixteen(str, pos) local num = read_byte(str, pos) * 256 num = read_byte(str, pos + 1) + num return num end -- select reader function with number of bytes of an argument local readers = { [byte] = read_byte, [int] = read_integer, [sixteen] = read_sixteen } local opcodes = { post_post = { opcode = 249, args = { {name="q", type = int}, -- postamble address {name="i", type = byte} } }, post = { opcode = 248, args = { {name="p", type = int}, -- address of the last page {name="num", type = int}, {name="den", type = int}, {name="mag", type = int}, {name="l", type = int}, {name="u", type = int}, {name="s", type = sixteen}, {name="t", type = sixteen}, } }, bop = { opcode = 139, args = { {name="c0", type=int}, {name="c1", type=int}, {name="c2", type=int}, {name="c3", type=int}, {name="c4", type=int}, {name="c5", type=int}, {name="c6", type=int}, {name="c7", type=int}, {name="c8", type=int}, {name="c9", type=int}, {name="p", type=int}, -- previous page } } } local function read_arguments(str, pos, args) local t = {} for _, v in ipairs(args) do local fn = readers[v.type] t[v.name] = fn(str, pos) -- seek the position. v.type contains size of the current data type in bytes pos = pos + v.type end return t end local function read_opcode(opcode, str, pos) local format = opcodes[opcode] if not format then return nil, "Cannot find opcode format: " .. opcode end -- check that opcode byte in the current position is the same as required opcode local op = read_byte(str, pos) if op ~= format.opcode then return nil, "Wrong opcode " .. op .. " at position " .. pos end return read_arguments(str, pos+1, format.args) end -- find the postamble address local function get_postamble_addr(dvicontent) local pos = string.len(dvicontent) local last = read_char(dvicontent, pos) -- skip endfill bytes at the end of file while string.byte(last) == endfill do pos = pos - 1 last = read_char(dvicontent, pos) end -- first read post_post to get address of the postamble local post_postamble, msg = read_opcode("post_post", dvicontent, pos-5) if not post_postamble then return nil, msg end -- return the postamble address return post_postamble.q + 1 -- return read_opcode("post", dvicontent, post_postamble.q + 1) end local function read_page(str, start, stop) local function get_end_of_page(str, pos) if read_byte(str, pos) == 140 then -- end of page return pos end return get_end_of_page(str, pos - 1) end -- we reached the end of file if start == 2^32-1 then return nil end local current_page = read_opcode("bop", str, start + 1) if not current_page then return nil end local endofpage = get_end_of_page(str, stop) -- get the page contents, but skip all parameters, because they can change -- (especially pointer to the previous page) local page = str:sub(start + 46, endofpage) local page_obj = { number = current_page.c0, -- the page number hash = md5.sumhexa(page) -- hash the page contents } return page_obj, current_page.p, start end local function get_pages(dvicontent) local pages = {} local postamble_pos = get_postamble_addr(dvicontent) local postamble = read_opcode("post", dvicontent, postamble_pos) local next_page_pos = postamble.p local page, previous_page = nil, postamble_pos local page_sequence = {} while next_page_pos do page, next_page_pos, previous_page = read_page(dvicontent, next_page_pos, previous_page) page_sequence[#page_sequence+1] = page end -- reorder pages for _, v in ipairs(page_sequence) do pages[v.number] = v.hash end return pages end -- if arg[1] then -- local f = io.open(arg[1], "r") -- local dvicontent = f:read("*all") -- f:close() -- local pages = get_pages(dvicontent) -- for k,v in pairs(pages) do -- print(k,v) -- end -- end return { get_pages = get_pages } ================================================ FILE: make4ht-errorlogparser.lua ================================================ local m = {} local function get_filename(chunk) local filename = chunk:match("([^\n^%(]+)") if not filename then return false, "No filename detected" end local first = filename:match("^[%./\\]+") if first then return filename end return false end local function get_chunks(text) -- parse log for particular included files local chunks = {} -- each file is enclosed in matching () brackets local newtext = text:gsub("(%b())", function(a) local chunk = string.sub(a,2,-2) -- if no filename had been found in the chunk, it is probably not file chunk -- so just return the original text local filename = get_filename(chunk) if not filename then return a end local children, text = get_chunks(chunk) table.insert(chunks, {filename = filename, text = text, children = children}) return "" end) return chunks, newtext end function print_chunks(chunks, level) local level = level or 0 local indent = string.rep(" ", level) for k,v in ipairs(chunks) do print(indent .. (v.filename or "?"), string.len(v.text)) print_chunks(v.children, level + 1) end end local function parse_default_error(lines, i) local line = lines[i] -- get the error message "! msg text" local err = line:match("^!(.+)") -- the next line should contain line number where error happened local next_line = lines[i+1] or "" local msg = {} -- get the line number and first line of the error context local line_no, msg_start = next_line:match("^l%.(%d+)(.+)") line_no = line_no or false msg_start = msg_start or "" msg[#msg+1] = msg_start .. " <-" -- try to find rest of the error context. for x = i+2, i+5 do local next_line = lines[x] or "" -- break on blank lines if next_line:match("^%s*$") then break end msg[#msg+1] = next_line:gsub("^%s*", ""):gsub("%s$", "") end return err, line_no, table.concat(msg, " ") end local function parse_linenumber_error(lines, i) -- parse errors from log created with the -file-line-number option local line = lines[i] local filename, line_no, err = line:match("^([^%:]+)%:(%d+)%:%s*(.*)") local msg = {} -- get error context for x = i+1, i+2 do local next_line = lines[x] or "" -- break on blank lines if next_line:match("^%s*$") then break end msg[#msg+1] = next_line:gsub("^%s*", ""):gsub("%s$", "") end -- insert mark to the error if #msg > 1 then table.insert(msg, 2, "<-") end return err, line_no, table.concat(msg, " ") end --- get error messages, linenumbers and contexts from a log file chunk ---@param text string chunk from the long file where we should find errors ---@return table errors error messages ---@return table error_lines error line number ---@return table error_messages error line contents local function parse_errors(text) local lines = {} local errors = {} local find_line_no = false local error_lines = {} local error_messages = {} for line in text:gmatch("([^\n]+)") do lines[#lines+1] = line end for i = 1, #lines do local line = lines[i] local err, line_no, msg if line:match("^!(.+)") then err, line_no, msg = parse_default_error(lines, i) elseif line:match("^[^%:]+%:%d+%:.+") then err, line_no, msg = parse_linenumber_error(lines, i) end if err then errors[#errors+1] = err error_lines[#errors] = line_no error_messages[#errors] = msg end end return errors, error_lines, error_messages end local function get_errors(chunks, errors) local errors = errors or {} for _, v in ipairs(chunks) do local current_errors, error_lines, error_contexts = parse_errors(v.text) for i, err in ipairs(current_errors) do table.insert(errors, {filename = v.filename, error = err, line = error_lines[i], context = error_contexts[i] }) end errors = get_errors(v.children, errors) end return errors end function m.get_missing_4ht_files(log) local used_files = {} local used_4ht_files = {} local missing_4ht_files = {} local pkg_names = {sty=true, cls=true} for filename, ext in log:gmatch("[^%s]-([^%/^%\\^%.%s]+)%.([%w][%w]+)") do -- break ak if ext == "aux" then break end if pkg_names[ext] then used_files[filename .. "." .. ext] = true elseif ext == "4ht" then used_4ht_files[filename] = true end end for filename, _ in pairs(used_files) do if not used_4ht_files[mkutils.remove_extension(filename)] then table.insert(missing_4ht_files, filename) end end return missing_4ht_files end function m.parse(log) local chunks, newtext = get_chunks(log) -- save the unparsed text that contains system messages table.insert(chunks, {text = newtext, children = {}}) -- print_chunks(chunks) local errors = get_errors(chunks) -- for _,v in ipairs(errors) do -- print("error", v.filename, v.line, v.error) -- end return errors, chunks end m.print_chunks = print_chunks return m ================================================ FILE: make4ht-filterlib.lua ================================================ local M = {} -- the filter module must implement the load_filter function function M.load_filters(filters, load_filter) local sequence = {} if type(filters) == "string" then table.insert(sequence,load_filter(filters)) elseif type(filters) == "table" then for _,n in ipairs(filters) do if type(n) == "string" then table.insert(sequence,load_filter(n)) elseif type(n) == "function" then table.insert(sequence, n) end end elseif type(filters) == "function" then table.insert(sequence, filters) else return false, "Argument to filter must be either\ntable with filter names, or single filter name" end return sequence end function M.load_input_file(filename) if not filename then return false, "filters: no filename" end local input = nil if filename then local file = io.open(filename,"r") input = file:read("*all") file:close() end return input end function M.save_input_file(filename, input) local file = io.open(filename,"w") file:write(input) file:close() end return M ================================================ FILE: make4ht-htlatex.lua ================================================ local log = logging.new "htlatex" local autolog = logging.new "autohtlatex" local error_logparser = require("make4ht-errorlogparser") local Make = Make or {} -- this function reads the LaTeX log file and tries to detect fatal errors in the compilation local function testlogfile(par) local logfile = mkutils.file_in_builddir(par.input .. ".log", par) local f = io.open(logfile,"r") if not f then log:warning("Make4ht: cannot open log file "..logfile) return 1 end local content = f:read("*a") -- test only the end of the log file, no need to run search functions on everything local text = content:sub(-1256) f:close() -- parse log file for all errors in non-interactive modes if par.interaction~="errorstopmode" then -- the error log parsing can be slow, so detect errors first -- detect both default error messages (! msg) and -file-line-number errors (filename:lineno:msg) if content:match("\n!") or content:match("[^:]+%:%d+%:.+") then local errors, chunks = error_logparser.parse(content) if #errors > 0 then log:error("Compilation errors in the htlatex run") log:error("Filename", "Line", "Message") for _, err in ipairs(errors) do log:error(err.filename or "?", err.line or "?", err.error) log:status(err.context) end end end end -- info about packages with no corresponding .4ht files local missing_4ht = error_logparser.get_missing_4ht_files(content) for _, filename in ipairs(missing_4ht) do log:info("Unsupported file: " .. filename) end -- test for fatal errors if text:match("No pages of output") or text:match("TeX capacity exceeded, sorry") or text:match("That makes 100 errors") or text:match("Emergency stop") then return 1 end return 0 end -- Make this function available in the build files Make.testlogfile = testlogfile --env.Make:add("htlatex", "${htlatex} ${latex_par} '\\\makeatletter\\def\\HCode{\\futurelet\\HCode\\HChar}\\def\\HChar{\\ifx\"\\HCode\\def\\HCode\"##1\"{\\Link##1}\\expandafter\\HCode\\else\\expandafter\\Link\\fi}\\def\\Link#1.a.b.c.{\\g@addto@macro\\@documentclasshook{\\RequirePackage[#1,html]{tex4ht}\\let\\HCode\\documentstyle\\def\\documentstyle{\\let\\documentstyle\\HCode\\expandafter\\def\\csname tex4ht\\endcsname{#1,html}\\def\\HCode####1{\\documentstyle[tex4ht,}\\@ifnextchar[{\\HCode}{\\documentstyle[tex4ht]}}}\\makeatother\\HCode '${config}${tex4ht_sty_par}'.a.b.c.\\input ' ${input}") -- template for calling LaTeX with tex4ht loaded Make.latex_command = "${htlatex} --interaction=${interaction} ${build_dir_arg} ${latex_par} '\\makeatletter".. "\\def\\HCode{\\futurelet\\HCode\\HChar}\\def\\HChar{\\ifx\"\\HCode".. "\\def\\HCode\"##1\"{\\Link##1}\\expandafter\\HCode\\else".. "\\expandafter\\Link\\fi}\\def\\Link#1.a.b.c.{".. "\\let\\HCode\\documentstyle\\def\\documentstyle{\\let\\documentstyle".. "\\HCode\\expandafter\\def\\csname tex4ht\\endcsname{#1,html}\\def".. "\\HCode####1{\\documentstyle[tex4ht,}\\@ifnextchar[{\\HCode}{".. "\\documentstyle[tex4ht]}}\\RequirePackage[#1,html]{tex4ht}${packages}}\\makeatother\\HCode ${tex4ht_sty_par}.a.b.c.".. "\\input \"\\detokenize{${tex_file}}\"'" Make.plain_command = '${htlatex} --interaction=${interaction} ${build_dir_arg} ${latex_par}' .. "'\\def\\Link#1.a.b.c.{\\expandafter\\def\\csname tex4ht\\endcsname{\\expandafter\\def\\csname tex4ht\\endcsname{#1,html}\\input tex4ht.sty }}" .. "\\def\\HCode{\\futurelet\\HCode\\HChar}\\def\\HChar{\\ifx\"\\HCode\\def\\HCode\"##1\"{\\Link##1}\\expandafter\\HCode\\else\\expandafter\\Link\\fi}" .. "\\HCode ${tex4ht_sty_par}.a.b.c.\\input \"\\detokenize{${tex_file}}\"'" local m = {} function m.htlatex(par, latex_command) -- latex_command can be also plain_command for Plain TeX local command = latex_command or Make.latex_command local devnull = " > /dev/null 2>&1" if os.type == "windows" then command = command:gsub("'",'') devnull = " > nul 2>&1" end par.interaction = par.interaction or "batchmode" if par.builddir~="" then par.build_dir_arg = "--output-directory=${builddir}" % par else par.build_dir_arg = "" end if par.interaction == "batchmode" then command = command .. devnull end command = command % par log:info("LaTeX call: "..command) os.execute(command) return Make.testlogfile(par) end function m.httex(par) local newpar = {} for k,v in pairs(par) do newpar[k] = v end -- change executable name from *latex to *tex newpar.htlatex = newpar.htlatex:gsub("latex", "tex") -- plain tex command doesn't support etex extensions -- which are necessary for TeX4ht. just quick hack to fix this if newpar.htlatex == "tex" then newpar.htlatex = "etex" end return m.htlatex(newpar, Make.plain_command) end local function get_checksum(main_file, extensions, par) -- make checksum for temporary files local checksum = "" local extensions = extensions or {"aux", "4tc", "xref"} for _, ext in ipairs(extensions) do local filename = mkutils.file_in_builddir(main_file .. "." .. ext, par) local f = io.open(filename, "r") if f then local content = f:read("*all") f:close() -- make checksum of the file and previous checksum -- this way, we will detect change in any file checksum = md5.sumhexa(checksum .. content) end end return checksum end -- this function runs htlatex multiple times until the checksum of temporary files doesn't change Make:add("autohtlatex", function(par) -- get checksum of temp files before compilation local options = get_filter_settings "autohtlatex" local extensions = par.auto_extensions or options.auto_extensions or {"aux", "4tc", "xref"} local max_compilations = par.max_compilations or options.max_compilations or 5 local checksum = get_checksum(par.input, extensions, par) local status = m.htlatex(par) -- stop processing on error if status ~= 0 then return status end -- get checksum after compilation local newchecksum = get_checksum(par.input, extensions, par) -- this is needed to prevent possible infinite loops local compilation_count = 1 while checksum ~= newchecksum do -- stop processing if we reach maximum number of compilations if compilation_count > max_compilations then autolog:info("Stopping after " .. max_compilations .. " compilations") return status end status = m.htlatex(par) -- stop processing on error if status ~= 0 then return status end checksum = newchecksum -- get checksum after compilation newchecksum = get_checksum(par.input, extensions, par) compilation_count = compilation_count + 1 end return status end, {correct_exit= 0}) return m ================================================ FILE: make4ht-indexing.lua ================================================ local M = {} local log = logging.new "indexing" -- Handle accented characters in files created with \usepackage[utf]{inputenc} -- this code was originally part of https://github.com/michal-h21/iec2utf/ local enc = {} local licrs = {} local codepoint2utf = unicode.utf8.char local used_encodings = {} -- load inputenc encoding file local function load_encfiles(f) local file= io.open(f,"r") local encodings = file:read("*all") file:close() for codepoint, licr in encodings:gmatch('DeclareUnicodeCharacter(%b{})(%b{})') do local codepoint = codepoint2utf(tonumber(codepoint:sub(2,-2),16)) local licr= licr:sub(2,-2):gsub('@tabacckludge','') licrs[licr] = codepoint end end local function sanitize_licr(l) return l:gsub(" (.)",function(s) if s:match("[%a]") then return " "..s else return s end end):sub(2,-2) end local load_enc = function(enc) -- use default encodings if used doesn't provide one enc = enc or {"T1","T2A","T2B","T2C","T3","T5", "LGR"} for _,e in pairs(enc) do local filename = e:lower() .. "enc.dfu" -- don't process an enc file multiple times if not used_encodings[filename] then local dfufile = kpse.find_file(filename) if dfufile then load_encfiles(dfufile) end end used_encodings[filename] = true end end local cache = {} local get_utf8 = function(input) local output = input:gsub('\\IeC[%s]*(%b{})',function(iec) -- remove \protect commands local iec = iec:gsub("\\protect%s*", "") local code = cache[iec] or licrs[sanitize_licr(iec)] or '\\IeC '..iec -- print(iec, code) cache[iec] = code return code end) return output end -- parse the idx file produced by tex4ht -- it replaces the document page numbers by index entry number -- each index entry can then link to place in the HTML file where the -- \index command had been used local parse_idx = function(content) -- index entry number local current_entry = 0 -- map between index entry number and corresponding HTML file and destination local map = {} local buffer = {} for line in content:gmatch("([^\n]+)") do if line:match("^\\beforeentry") then -- increment index entry number current_entry = current_entry + 1 local file, dest, locator = line:match("\\beforeentry%s*{(.-)}{(.-)}{(.-)}") -- if the third argument to \beforeentry is not empty, -- use it as a index entry locator instead of the index counter if locator and locator == "" then locator = nil end map[current_entry] = {file = file, dest = dest, locator = locator} elseif line:match("^\\indexentry") then -- replace the page number with the current -- index entry number local result = line:gsub("%b{}$", "{"..current_entry .."}") buffer[#buffer+1] = get_utf8(result) else buffer[#buffer+1] = line end end -- return table with page to dest map and updated idx file return {map = map, idx = table.concat(buffer, "\n")} end local previous -- replace numbers in .ind file with links back to text local function replace_index_pages(rest, entries) -- keep track of the previous page number local count = 0 local delete_coma = false return rest:gsub("(%s*%-*%s*)(,?%s*)(%{?)(%[?)(%d+)(%]?)(%}?)", function(dash, coma, lbrace, lbracket, page, rbracket, rbrace) if lbracket == "[" and rbracket == "]" then -- don't process numbers in brackets, they are not page numbers return nil end local entry = entries[tonumber(page)] count = count + 1 if entry then page = entry.locator or page if delete_coma then -- if the coma was marked for deletion, remove it. this may happen after line breaks in the index coma = "" end -- if the page number is the same as the previous one, don't create a link -- this can happen when we use section numbers as locators. for example, -- we could get 1.1 -- 1.1, 1.1, so we want to keep only the first one if page == previous then previous = page -- if the first page number on a line is the same as the previous one, we need to delete the coma, -- otherwise the coma will be left in the output if count == 1 then delete_coma = true end return "" else previous = page -- don't forget to reset the delete_coma flag after page change delete_coma = false -- construct link to the index entry return dash .. coma.. lbrace .. "\\Link[" .. entry.file .."]{".. entry.dest .."}{}" .. page .."\\EndLink{}" .. rbrace end else return dash .. coma .. lbrace .. lbracket .. page .. rbracket .. rbrace end end) end local function fix_subitems(start, rest) -- in xindex, subentries start with a comma, so if the subentry itself is number, it would be mistaken for the page number -- the start should contain just \subitem -\ if start:match("%s*\\subitem %-\\$") then -- the keyword in this case is the first item in the rest local keyword, newrest = rest:match("(,?[^,]+,)(.+)") if keyword and newrest then -- join the extracted keyword with the start, newrest should contain only actual page numbers return start .. keyword, newrest end end return start, rest end -- replace page numbers in the ind file with hyperlinks local fix_idx_pages = function(content, idxobj) local buffer = {} local entries = idxobj.map for line in content:gmatch("([^\n]+)") do local line, count = line:gsub("(%s*\\%a+[^%[^,]+)(.+)$", function(start,rest) -- reset the previous page number previous = nil start, rest = fix_subitems(start, rest) -- there is a problem when index term itself contains numbers, like Bible verses (1:2), -- because they will be detected as page numbers too. I cannot find a good solution -- that wouldn't break something else. -- There can be also commands with numbers in braces. These numbers in braces will be ignored, -- as they may be not page numbers return start .. replace_index_pages(rest, entries) end) -- longer index entries may be broken over several lines, in that case, we need to process only numbers if count == 0 then line = line:gsub("(%s*%d+.+)", function(rest) return replace_index_pages(rest, entries) end) end buffer[#buffer+1] = line end return table.concat(buffer, "\n") end -- prepare the .idx file produced by tex4ht -- for use with Xindy or Makeindex local prepare_idx = function(filename) local f = io.open(filename, "r") if not f then return nil, "Cannot open file :".. tostring(filename) end local content = f:read("*all") local idx = parse_idx(content) local idxname = os.tmpname() local f = io.open(idxname, "w") f:write(idx.idx) f:close() -- return the object with mapping between dummy page numbers -- and link destinations in the files, and the temporary .idx file -- these can be used for the processing with the index processor return idx, idxname end -- add links to a index file local process_index = function(indname, idx) local f = io.open(indname, "r") if not f then return nil, "Cannot open .ind file: " .. tostring(indname) end local content = f:read("*all") f:close() local newcontent = fix_idx_pages(content, idx) local f = io.open(indname,"w") f:write(newcontent) f:close() return true end local get_idxname = function(par) return par.idxfile or par.input .. ".idx" end local prepare_tmp_idx = function(par) par.idxfile = mkutils.file_in_builddir(get_idxname(par), par) if not par.idxfile or not mkutils.file_exists(par.idxfile) then return nil, "Cannot load idx file " .. (par.idxfile or "''") end -- construct the .ind name, based on the .idx name par.indfile = par.indfile or par.idxfile:gsub("idx$", "ind") load_enc() -- save hyperlinks and clean the .idx file local idxdata, newidxfile = prepare_idx(par.idxfile) if not idxdata then -- if the prepare_idx function returns nil, the second reuturned value contains error msg return nil, newidxfile end return newidxfile, idxdata end local splitindex = function(par) local files = {} local idxfiles = {} local buffer local idxfile = get_idxname(par) if not idxfile or not mkutils.file_exists(idxfile) then return nil, "Cannot load idx file " .. (idxfile or "''") end for line in io.lines(idxfile) do local file = line:match("indexentry%[(.-)%]") if file then -- generate idx name for the current output file file = par.input .. "-" ..file .. ".idx" local current = files[file] or {} -- remove file name from the index entry local indexentry = line:gsub("indexentry%[.-%]", "indexentry") -- save the index entry and preseding line to the current buffer table.insert(current, buffer) table.insert(current, indexentry) files[file] = current end -- buffer = line end -- save idx files for filename, contents in pairs(files) do log:info("Saving split index file: " .. filename) idxfiles[#idxfiles+1] = filename local f = io.open(filename, "w") f:write(table.concat(contents, "\n")) f:close() end return idxfiles end local function run_indexing_command (command, par) -- detect command name from the command. It will be the first word local cmd_name = command:match("^[%a]+") or "indexing" local xindylog = logging.new(cmd_name) -- support split index local subindexes = splitindex(par) or {} if #subindexes > 0 then -- call the command again on all files produced by splitindex for _, subindex in ipairs(subindexes) do -- make copy of the parameters local t = {} for k,v in pairs(par) do t[k] = v end t.idxfile = subindex run_indexing_command(command, t) end return nil end local newidxfile, idxdata = prepare_tmp_idx(par) if not newidxfile then -- the idxdata will contain error message in the case of error xindylog:warning(idxdata) return false end par.newidxfile = newidxfile xindylog:debug("Prepared temporary idx file: ", newidxfile) -- prepare modules local xindy_call = command % par xindylog:info(xindy_call) local status = mkutils.execute(xindy_call) -- insert correct links to the index local status, msg = process_index(par.indfile, idxdata) if not status then xindylog:warning(msg) end -- remove the temporary idx file os.remove(newidxfile) -- null the indfile, it is necessary in order to support -- multiple indices par.indfile = nil end M.get_utf8 = get_utf8 M.load_enc = load_enc M.parse_idx = parse_idx M.fix_idx_pages = fix_idx_pages M.prepare_idx = prepare_idx M.process_index = process_index M.prepare_tmp_idx = prepare_tmp_idx M.run_indexing_command = run_indexing_command return M ================================================ FILE: make4ht-lib.lua ================================================ -- Simple make system for tex4ht --kpse.set_program_name("luatex") -- module(...,package.seeall) local m = {} local log = logging.new "make4ht-lib" Make = {} --Make.params = {} Make.build_seq = {} -- Patterns for matching output filenames Make.matches = {} Make.image_patterns = {} Make.run_count = {} Make.add = function(self,name,fn,par,rep) local par = par or {} self.params = self.params or {} Make[name] = function(self,p,typ) local params = {} for k,v in pairs(self.params) do params[k] = v end for k,v in pairs(par) do params[k] = v; log:info("setting param "..k) end local typ = typ or "make" local p = p or {} local fn = fn for k,v in pairs(p) do params[k]=v log:info("Adding: ",k,v) end -- print( fn % params) local command = { name=name, type=typ, command = fn, params = params, repetition = rep } table.insert(self.build_seq,command) end end Make.length = function(self) return #self.build_seq end Make.match = function(self, pattern, command, params) local params = params or {} table.insert(self.matches,{pattern = pattern, command = command, params = params}) end Make.run_command = function(self,filename,s) local command = s.command local params = s.params params["filename"] = filename log:info("parse_lg process file: "..filename) --for k,v in pairs(params) do print(k..": "..v) end if type(command) == "function" then return command(filename,params) elseif type(command) == "string" then local run = command % params log:info("Execute: " .. run) return mkutils.execute(run) end return false, "parse_lg: Command is not string or function" end Make.image = function(self, pattern, command, params) local tab = { pattern = pattern, command = command, params = params } table.insert(self.image_patterns, tab) end Make.image_convert = function(self, images) local image_patterns = self.image_patterns or {} for i, r in pairs(image_patterns) do local p = self.params or {} local v = r.params or {} for k,v in pairs(v) do p[k]= v end image_patterns[i].params = p end for _,i in ipairs(images) do local output = i.output for _, x in ipairs(image_patterns) do local pattern = x.pattern if output:match(pattern) then local command = x.command local p = x.params or {} p.output = output p.page= i.page p.source = i.source if type(command) == "function" then command(p) elseif type(command) == "string" then local c = command % p log:info("Make4ht convert: "..c) mkutils.execute(c) end break end end end end Make.file_matches = function(self, files) local statuses = {} -- First make params for all matchers for k,v in ipairs(self.matches) do local v = self.matches[k].params or {} local p = self.params or {} for i,j in pairs(p) do v[i] = j end self.matches[k].params = v end -- Loop over files, run command on matched for _, file in ipairs(files)do statuses[file] = {} for _, s in ipairs(self.matches) do local pattern= s.pattern if file:match(pattern) then local status, msg = self:run_command(file,s) msg = msg or "No message given" table.insert(statuses[file],status) if status == false then log:info(msg) break end end end end return statuses end -- add files from the mk4 file -- we must add them to the table generated from the lg file, so they can be processed later -- Make.add_file = function(self, filename) -- self.lgfile should be present, as it is created once the lg_file was parsed for the first time local lg = self.lgfile or {} local files = lg.files or {} -- run filters on the file local filtertable = {filename} -- should we care about return status? self:file_matches(filtertable) -- break if the file is present already -- start at the end, it it was added by a build file, the file will be likely at the end for i = #files,1,-1 do if files[i] == filename then return false, "File was already added" end end -- save the added file to the lg_file table.insert(lg.files, filename) self.lg = lg end Make.run = function(self) local return_codes = {} local params = self.params or {} for _,v in ipairs(self.build_seq) do --print("sekvence: "..v.name) for p,n in pairs(v.params) do params[p] = n end --for c,_ in pairs(params) do print("build param: "..c) end if type(v.command)=="function" then table.insert(return_codes,{name=v.name,status = v.command(params)}) elseif type(v.command) =="string" then local command = v.command % params -- Some commands should be executed only limited times, typicaly once -- tex4ht or t4ht for example local run_count = self.run_count[v.command] or 0 run_count = run_count + 1 self.run_count[v.command] = run_count local repetition = v.repetition if repetition and run_count > repetition then log:warning (command .." can be executed only "..repetition .."x") else log:info("executing: " .. command) local status = mkutils.execute(command) table.insert(return_codes,{name=v.name,status=status}) end else log:warning("Unknown command type, must be string or function - " ..v.name..": "..type(v.command)) end local correct_exit = params.correct_exit or nil if correct_exit then local last_return = return_codes[#return_codes] or {} local current_status = last_return.status or 0 if current_status ~= correct_exit then local last_name = last_return.name or "unknown" log:fatal("Fatal error. Command "..last_name .." returned exit code "..current_status) os.exit(1) end end end local lgfile = params.input and params.input .. ".lg" or nil if params.builddir~="" then lgfile = params.builddir .. "/" .. lgfile end if lgfile then self.lgfile = self.lgfile or mkutils.parse_lg(lgfile, params.builddir) local lg = self.lgfile -- First convert images from lg files self:image_convert(lg["images"]) -- Then run file matchers on lg files and converted images local files = lg["files"] for _,v in ipairs(lg["images"]) do local v = v.output -- print(v) table.insert(files,v) end self:file_matches(files) else log:warning("No lg file. tex4ht run failed?") end return return_codes end m.Make = Make return m --[[Make:add("hello", "hello ${world}", {world = "world"}) Make:add("ajaj", "ajaj") Make:hello() Make:hello{world="světe"} Make:hello() Make:run() --]] ================================================ FILE: make4ht-logging.lua ================================================ -- logging system for make4ht -- inspired by https://github.com/rxi/log.lua local logging = {} local levels = {} -- level of bugs that should be shown -- enable querying of current log level logging.show_level = 1 local max_width = 0 local max_status = 0 logging.use_colors = true logging.modes = { {name = "debug", color = 34}, {name = "info", color = 32}, {name = "status", color = 37}, {name = "warning", color = 33}, {name = "error", color = 31, status = 1}, {name = "fatal", color = 35, status = 2} } -- prepare table with mapping between mode names and corresponding levels function logging.prepare_levels(modes) local modes = modes or logging.modes logging.modes = modes for level, mode in ipairs(modes) do levels[mode.name] = level mode.level = level max_width = math.max(string.len(mode.name), max_width) end end -- the logging level is set once function logging.set_level(name) local level = levels[name] or 1 logging.show_level = level end function logging.print_msg(header, message, color) local color = color or 0 -- use format for collors depending on the use_colors option local header = "[" .. header .. "]" local color_format = logging.use_colors and string.format("\27[%im%%s\27[0m%%s", color) or "%s%s" -- the padding is maximal mode name width + brackets + space local padded_header = string.format("%-".. max_width + 3 .. "s", header) print(string.format(color_format, padded_header, message)) end -- function logging.new(module) local obj = { module = module, output = function(self, output) -- used for printing of output of commands if logging.show_level <= (levels["debug"] or 1) then print(output) end end } obj.__index = obj -- make a function for each mode for _, mode in ipairs(logging.modes) do local name = mode.name local color = mode.color local status = mode.status or 0 obj[name] = function(self, ...) -- set make4ht exit status max_status = math.max(status, max_status) -- max width is saved in logging.prepare_levels if mode.level >= logging.show_level then -- support variable number of parameters local table_with_holes = table.pack(...) local table_without_holes = {} -- trick used to support the nil values in the varargs -- https://stackoverflow.com/a/7186820/2467963 for i= 1, table_with_holes.n do table.insert(table_without_holes, tostring(table_with_holes[i]) or "") end local msg = table.concat(table_without_holes, "\t") logging.print_msg(string.upper(name), string.format("%s: %s", self.module, msg), color) end end end return setmetatable({}, obj) end -- exit make4ht with maximal error status function logging.exit_status() os.exit(max_status) end -- prepare default levels logging.prepare_levels() -- for _, mode in ipairs(logging.modes) do -- logging.print_msg(mode.name,"xxxx", mode.color) -- end -- local cls = logging.new("sample") -- cls:warning("hello") -- cls:error("world") -- cls:info("set new level") -- logging.set_level("error") -- cls:info("level set") -- cls:error("just print the error") -- return logging ================================================ FILE: make4ht-xtpipes.lua ================================================ local M = {} local mkutils = require "mkutils" local log = logging.new "xtpipes" -- find if tex4ht.jar exists in a path local function find_tex4ht_jar(path) local jar_file = path .. "/tex4ht/bin/tex4ht.jar" return mkutils.file_exists(jar_file) end -- return value of TEXMFROOT variable if it exists and if tex4ht.jar can be located inside local function get_texmfroot() -- user can set TEXMFROOT environmental variable as the last resort local root_directories = {kpse.var_value("TEXMFROOT"), kpse.var_value("TEXMFDIST"), os.getenv("TEXMFROOT")} for _, root in ipairs(root_directories) do if root then if find_tex4ht_jar(root) then return root end -- TeX live locates files in texmf-dist subdirectory, but Miktex doesn't local path = root .. "/texmf-dist" if find_tex4ht_jar(path) then return path end end end end -- Miktex doesn't seem to set TeX variables such as TEXMFROOT -- we will try to find the TeX root using trick with locating package in TeX root -- there is a danger that this file is located in TEXMFHOME, the location will fail then local function find_texmfroot() local tex4ht_path = kpse.find_file("tex4ht.sty") if tex4ht_path then local path = tex4ht_path:gsub("/tex/generic/tex4ht/tex4ht.sty$","") if find_tex4ht_jar(path) then return path end end return nil end function M.get_selfautoparent() return get_texmfroot() or find_texmfroot() end local function replace_lg_file() -- xtpipes expects the lg file to be placed in the current working dir, but with the --build option, -- it is saved in the build dir. So we need to copy that file to the place where it is expected. local params = Make.params local basename = params.input local lg_name = basename .. ".lg" local lg_in_builddir = mkutils.file_in_builddir(lg_name,params) if lg_name ~= lg_in_builddir and mkutils.file_exists(lg_in_builddir) then log:info("Creating temporary lg_file", lg_name) mkutils.cp(lg_in_builddir, lg_name) return true, lg_name end -- don't copy the Lg file if --build_fir option isn't used return false, lg_name end function M.get_xtpipes(selfautoparent) -- make pattern using TeX distro path local pattern = string.format('java -classpath "%s/tex4ht/bin/tex4ht.jar" xtpipes -i "%s/tex4ht/xtpipes/" -o "${outputfile}" "${filename}"', selfautoparent, selfautoparent) -- call xtpipes on a temporary file local matchfunction = function(filename) -- move the matched file to a temporary file, xtpipes will write it back to the original file local basename = mkutils.remove_extension(filename) local tmpfile = basename ..".tmp" local remove, lg_filename = replace_lg_file() mkutils.mv(filename, tmpfile) local command = pattern % {filename = tmpfile, outputfile = filename} log:info("execute: " ..command) local status, output = mkutils.execute(command) -- remove temporary lg file if it was created if remove then os.remove(lg_filename) end if status > 0 then -- if xtpipes failed to process the file, it may mean that it was bad-formed xml -- we can try to make it well-formed using Tidy local tidy_command = 'tidy -utf8 -xml -asxml -q -o "${filename}" "${tmpfile}"' % {tmpfile = tmpfile, filename = filename} log:warning("Xtpipes failed") -- show_level 1 is debug mode, which prints command output as well -- we need this condition to prevent multiple instances of the output if logging.show_level > 1 then print(output) end log:warning("Trying HTML tidy") log:debug(tidy_command) local status, output = os.execute(tidy_command) if status > 0 then -- if tidy failed as well, just use the original file -- it will probably produce corrupted ODT file though log:warning("Tidy failed as well") if logging.show_level > 1 then print(output) end mkutils.mv(tmpfile, filename) end end end return matchfunction end -- This function moves the last added file matching function to the first place -- in the execution order. This ensures that filters are executed in the -- correct order. function M.move_matches(make) local matches = make.matches local last = matches[#matches] table.insert(matches, 1, last) matches[#matches] = nil end M.get_texmfroot = get_texmfroot M.find_texmfroot = find_texmfroot M.find_tex4ht_jar = find_tex4ht_jar return M ================================================ FILE: mkparams.lua ================================================ local lapp = require "lapp-mk4" local mkutils = require "mkutils" local m = {} -- use ugly module system for new lua versions support local log = logging.new "mkparams" -- these two variables will be used in the version number -- progname will be set in get_args m.progname = "make4ht" -- set the version number before call to process_args() m.version_number = "v0.1" m.optiontext = [[ ${progname} - build system for TeX4ht Usage: ${progname} [options] filename ["tex4ht.sty op."] ["tex4ht op."] ["t4ht op"] ["latex op"] Available options: -a,--loglevel (default status) Set log level. possible values: debug, info, status, warning, error, fatal -b,--backend (default tex4ht) Backend used for xml generation. possible values: tex4ht or lua4ht -c,--config (default xhtml) Custom config file -d,--output-dir (default nil) Output directory -B,--build-dir (default nil) Build directory -e,--build-file (default nil) If build file is different than `filename`.mk4 -f,--format (default html5) Output file format -h,--help Display this message -j,--jobname (default nil) Set the jobname -l,--lua Use lualatex for document compilation -m,--mode (default default) Switch which can be used in the makefile -n,--no-tex4ht Disable dvi file processing with the tex4ht command -s,--shell-escape Enables running external programs from LaTeX -u,--utf8 [obsolete] The document is generated in UTF8 encoding by default -v,--version Display version number -x,--xetex Use xelatex for document compilation ]] -- test if the current command line argument should be passed to tex4ht, t4ht or latex local function is_escapedargument(arg) -- we need to ignore make4ht options which can be used without filename, ie --version and --help local ignored_options = {["-h"]=true, ["--help"]=true, ["-v"] = true, ["--version"]=true} if ignored_options[arg] then return false end -- in other cases, match if the argument starts with "-" character return arg:match("^%-.+") end local function get_args(parameters, optiontext) local parameters = parameters or {} parameters.progname = parameters.progname or "make4ht" parameters.issue_tracker = parameters.issue_tracker or "https://github.com/michal-h21/make4ht/issues" parameters.postparams = parameters.postparams or "" local optiontext = optiontext or m.optiontext parameters.postfile = parameters.postfile or "" optiontext = optiontext .. parameters.postparams ..[[ (string) Input file name Positional optional arguments: ["tex4ht.sty op."] Additional parameters for tex4ht.sty ["tex4ht op."] Options for tex4ht command ["t4ht op"] Options for t4ht command ["latex op"] Additional options for LaTeX Documentation: https://tug.org/applications/tex4ht/mn.html Issue tracker for tex4ht bugs: https://puszcza.gnu.org.ua/bugs/?group=tex4ht Issue tracker for ${progname} bugs: ${issue_tracker} ]] .. parameters.postfile -- we can pass arguments for tex4ht and t4ht after filename, but it will confuse lapp, thinking that these -- options are for make4ht. this may result in execution error or wrong option parsing -- as fix, add a space before options at the end (we need to stop to add spaces as soon as we find -- nonempty string which doesn't start with - it will be filename or tex4ht.sty options if #arg > 1 then -- do this only if more than one argument is used for i=#arg,1,-1 do local current = arg[i] if is_escapedargument(arg[i]) then arg[i] = " ".. arg[i] -- empty parameter elseif current == "" then else break end end end --print("--------------\n" .. optiontext .."--------------\n") return lapp(optiontext % parameters) end --- get outptut file format and list of extensions from --format option string local function get_format_extensions(format_string) local format, rest = format_string:match("^([a-zA-Z0-9]+)(.*)") local extensions = {} -- it is possible to pass only the extensions rest = rest or format_string rest:gsub("([%+%-])([^%+^%-]+)",function(typ, name) table.insert(extensions, {type = typ, name = name}) end) return format, extensions end -- try to make safe filename local function escape_filename(input) -- quoting don't work on Windows, so we will just if os.type == "windows" then return '"' .. input .. '"' else -- single quotes are safe in Unix return "'" .. input .. "'" end end -- detect if user specified -jobname in arguments to the TeX engine -- or used the --jobname option for make4ht local function handle_jobname(input, args) -- parameters to the TeX engine local latex_params = {} local latex_cli_params = args[4] or "" -- use the jobname as input name if it is specified local jobname = args.jobname ~="nil" and args.jobname or nil if jobname or not latex_cli_params:match("%-jobname") then -- prefer jobname over input input = jobname or input -- we must strip out directories from jobname when full path to document is given input = input:match("([^%/^%\\]+)$") -- input also cannot contain spaces, replace them with underscores input = input:gsub("%s", "_") table.insert(latex_params,"-jobname=".. escape_filename(input)) else -- when user specifies -jobname, we must change name of the input file, -- in order to be able to process correct dvi file with tex4ht and t4ht local newinput -- first contains quotation character or first character of the name local first, rest = latex_cli_params:match("%-jobname%s*=?%s*(.)(.*)") if first=='"' then newinput=rest:match('([^"]+)') elseif first=="'" then newinput=rest:match("([^']+)") elseif type(first)== "string" then -- if the jobname is unquoted, it cannot contain space -- join the first character and rest rest = first.. rest newinput = rest:match("([^ ]+)") end if newinput then input = newinput end end -- table.insert(latex_params, latex_cli_params) return latex_params, input end local function tex_file_not_exits(tex_file) -- try to find the input file, return false if we cannot find it return not (kpse.find_file(tex_file, "tex") or kpse.find_file(tex_file .. ".tex", "tex")) end -- use standard input instead of file if the filename is just `-` -- return the filename and status if it is a tmp name local function handle_input_file(filename) -- return the original file name if it isn't just dash if filename ~= "-" then return filename, false end -- generate the temporary name. the added extension is important local tmp_name = os.tmpname() local contents = io.read("*all") local f = io.open(tmp_name, "w") f:write(contents) f:close() return tmp_name, true end local function process_args(args) local function get_inserter(args,tb) return function(key, value) --local v = args[key] and value or "" local v = "" if args[key] then v = value end table.insert(tb,v) end end -- set error log level logging.set_level(args.loglevel) -- the default LaTeX --interaction parameter local interaction = "batchmode" if args.loglevel == "debug" then interaction = "errorstopmode" end if args.version ==true then print(string.format("%s version %s", m.progname, m.version_number)) os.exit() end local outdir = "" local packages = "" if args["output-dir"] ~= "nil" then outdir = args["output-dir"] or "" outdir = outdir:gsub('\\','/') outdir = outdir:gsub('/$','') end local builddir = "" if args["build-dir"] ~= "nil" then builddir = args["build-dir"] or "" builddir = builddir:gsub('\\','/') builddir = builddir:gsub('/$','') end -- make4ht now requires UTF-8 output, because of DOM filters -- numeric entites are expanded to Unicode characters. These -- characters would be displayed incorrectly in 8 bit encodings. args.utf8 = true if args.backend == "lua4ht" then args.lua = true args.xetex = nil args.utf8 = true args["no-tex4ht"] = true packages = packages .."\\RequirePackage{lua4ht}" end local compiler = args.lua and "dvilualatex" or args.xetex and "xelatex --no-pdf" or "latex" local tex_file, is_tmp_file = handle_input_file(args.filename) -- test if the file exists if not is_tmp_file and tex_file_not_exits(tex_file) then log:warning("Cannot find input file: " .. tex_file) end local input = mkutils.remove_extension(tex_file) -- the output file name can be influneced using -jobname parameter passed to the TeX engine local latex_params, input = handle_jobname(input, args) local insert_latex = get_inserter(args,latex_params) insert_latex("shell-escape","-shell-escape") --table.insert(latex_params,args["shell-escape"] and "-shell-escape") local t4sty = args[1] or "" -- test if first option is custom config file local cfg_tmp = t4sty:match("([^,^ ]+)") if cfg_tmp and cfg_tmp ~= args.config then local fn = cfg_tmp..".cfg" local f = io.open(fn,"r") if f then args.config = cfg_tmp f:close() end end --[[if args[1] and args[1] ~= "" then t4sty = args[1] else --]] -- Different behaviour from htlatex local utf = args.utf8 and ",charset=utf-8" or "" t4sty = args.config .. "," .. t4sty .. utf --end local tex4ht = "" local dvi= args.xetex and "xdv" or "dvi" if args[2] and args[2] ~="" then tex4ht = args[2] else tex4ht = args.utf8 and " -cmozhtf -utf8" or "" end -- set the correct extension for tex4ht if xetex is used if args.xetex then tex4ht = tex4ht .. " -.xdv" end local t4ht = args[3] or "" local mode = args.mode or "default" local build_file = input.. ".mk4" if args["build-file"] and args["build-file"] ~= "nil" then build_file = args["build-file"] end local outformat, extensions if args["format"] and arg["format"] ~= "nil" then outformat, extensions = get_format_extensions(args["format"]) end local parameters = { htlatex = compiler ,input=input ,tex_file=tex_file ,packages=packages ,latex_par=table.concat(latex_params," ") --,config=ebookutils.remove_extension(args.config) ,tex4ht_sty_par=t4sty ,tex4ht_par=tex4ht ,t4ht_par=t4ht ,mode = mode ,dvi = dvi ,build_file = build_file ,output_format = outformat ,extensions = extensions ,is_tmp_file = is_tmp_file ,interaction = interaction --,t4ht_dir_format=t4ht_dir_format } if outdir then parameters.outdir = outdir end if builddir then parameters.builddir = builddir end log:info("Output dir: "..outdir) log:info("Compiler: "..compiler) log:info("Latex options: ".. table.concat(latex_params," ")) log:info("tex4ht.sty: "..t4sty) log:info("tex4ht: "..tex4ht) log:info("build_file: ".. build_file) if outformat~="nil" then log:info("Output format: ".. outformat) for _, ex in ipairs(extensions) do log:info("Extension: ".. ex.type .. ex.name) end end return parameters end m.get_args = get_args m.get_format_extensions = get_format_extensions m.process_args = process_args return m ================================================ FILE: mkutils.lua ================================================ module(...,package.seeall) local log = logging.new("mkutils") local make4ht = require("make4ht-lib") local mkparams = require("mkparams") local indexing = require("make4ht-indexing") --template engine function interp(s, tab) local tab = tab or {} return (s:gsub('($%b{})', function(w) return tab[w:sub(3, -2)] or w end)) end --print( interp("${name} is ${value}", {name = "foo", value = "bar"}) ) function addProperty(s,prop) if prop ~=nil then return s .." "..prop else return s end end getmetatable("").__mod = interp getmetatable("").__add = addProperty --print( "${name} is ${value}" % {name = "foo", value = "bar"} ) -- Outputs "foo is bar" function is_url(path) return path:match("^%a+://") end -- merge two tables recursively function merge(t1, t2) for k, v in pairs(t2) do if (type(v) == "table") and (type(t1[k] or false) == "table") then merge(t1[k], t2[k]) else t1[k] = v end end return t1 end function string:split(sep) local sep, fields = sep or ":", {} local pattern = string.format("([^%s]+)", sep) self:gsub(pattern, function(c) fields[#fields+1] = c end) return fields end function remove_extension(path) local found, len, remainder = string.find(path, "^(.*)%.[^%.]*$") if found then return remainder else return path end end -- -- check if file exists function file_exists(file) local f = io.open(file, "rb") if f then f:close() end return f ~= nil end -- check if Lua module exists -- source: https://stackoverflow.com/a/15434737/2467963 function isModuleAvailable(name) if package.loaded[name] then return true else for _, searcher in ipairs(package.searchers or package.loaders) do local loader = searcher(name) if type(loader) == 'function' then package.preload[name] = loader return true end end return false end end -- searching for converted images function parse_lg(filename, builddir) log:info("Parse LG") local dir = builddir~="" and builddir .. "/" or "" local outputimages,outputfiles,status={},{},nil local fonts, used_fonts = {},{} if not file_exists(filename) then log:warning("Cannot read log file: "..filename) else local usedfiles={} for line in io.lines(filename) do --- needs --- pokus.idv[1] ==> pokus0x.png --- -- line:gsub("needs --- (.+?)[([0-9]+) ==> ([%a%d%p%.%-%_]*)",function(name,page,k) table.insert(outputimages,k)end) line:gsub("needs %-%-%- (.+)%[([0-9]+)%] ==> (.*) %-%-%-", function(file,page,output) local rec = { source=file, page=page, output=dir..output } table.insert(outputimages,rec) end ) line:gsub("File: (.*)", function(x) local k = dir .. x if not file_exists(k) then k = x end if not usedfiles[k] then table.insert(outputfiles,k) usedfiles[k] = true end end) line:gsub("htfcss: ([^%s]+)(.*)",function(k,r) local fields = {} r:gsub("[%s]*([^%:]+):[%s]*([^;]+);",function(c,v) fields[c] = v end) fonts[k] = fields end) line:gsub('Font("([^"]+)","([%d]+)","([%d]+)","([%d]+)"',function(n,s1,s2,s3) table.insert(used_fonts,{n,s1,s2,s3}) end) end status=true end return {files = outputfiles, images = outputimages},status end -- local cp_func = os.type == "unix" and "cp" or "copy" -- maybe it would be better to actually move the files -- in reality it isn't. -- local cp_func = os.type == "unix" and "mv" or "move" function cp(src,dest) if is_url(src) then log.info(src .. " is a URL, will leave as is") return end if not file_exists(src) then -- try to find file using kpse library if it cannot be found src = kpse.find_file(src) or src end local command = string.format('%s "%s" "%s"', cp_func, src, dest) if cp_func == "copy" then command = command:gsub("/",'\\') end log:info("Copy: "..command) if not file_exists(src) then log:error("File " .. src .. " doesn't exist") end os.execute(command) end function mv(src, dest) local mv_func = os.type == "unix" and "mv " or "move " local command = string.format('%s "%s" "%s"', mv_func, src, dest) -- fix windows paths if mv_func == "move" then command = command:gsub("/",'\\') end log:info("Move: ".. command) os.execute(command) end function delete_dir(path) local cmd = os.type == "unix" and "rm -rd " or "rd /s/q " os.execute(cmd .. path) end local used_dir = {} function prepare_path(path) --local dirs = path:split("/") local dirs = {} if path:match("^/") then dirs = {""} elseif path:match("^~") then local home = os.getenv "HOME" dirs = home:split "/" path = path:gsub("^~/","") table.insert(dirs,1,"") end if path:match("/$")then path = path .. " " end for _,d in pairs(path:split "/") do table.insert(dirs,d) end table.remove(dirs,#dirs) return dirs,table.concat(dirs,"/") end -- Find which part of path already exists -- and which directories have to be created function find_directories(dirs, pos) local pos = pos or #dirs -- we tried whole path and no dir exist if pos < 1 then return dirs end local path = "" -- in the case of unix absolute path, empty string is inserted in dirs if pos == 1 and dirs[pos] == "" then path = "/" else path = table.concat(dirs,"/", 1,pos) .. "/" end if not lfs.chdir(path) then -- recursion until we succesfully changed dir -- or there are no elements in the dir table return find_directories(dirs,pos - 1) elseif pos ~= #dirs then -- if we succesfully changed dir -- and we have dirs to create local p = {} for i = pos+1, #dirs do table.insert(p, dirs[i]) end return p else -- whole path exists return {} end end function mkdirectories(dirs) if type(dirs) ~="table" then return false, "mkdirectories: dirs is not table" end local path = "" for _,d in ipairs(dirs) do path = path .. d .. "/" local stat,msg = lfs.mkdir(path) if not stat then return false, "makedirectories error: "..msg end end return true end function make_path(path) -- we must create the build dir if it doesn't exist local cwd = lfs.currentdir() -- add dummy /foo dir. it won't be created, but without that, the top-level dir wouldn't be created local parts = mkutils.prepare_path(path .. "/foo") local to_create = mkutils.find_directories(parts) mkutils.mkdirectories(to_create) -- change back to the original dir lfs.chdir(cwd) end function file_in_builddir(filename, par) if par.builddir and par.builddir ~= "" then local newname = par.builddir .. "/" .. filename return newname end return filename end function copy_filter(src,dest, filter) local src_f=io.open(src,"rb") local dst_f=io.open(dest,"w") local contents = src_f:read("*all") local filter = filter or function(s) return s end src_f:close() dst_f:write(filter(contents)) dst_f:close() end function copy(filename,outfilename) local currdir = lfs.currentdir() if filename == outfilename then return true end local parts, path = prepare_path(outfilename) if not used_dir[path] then local to_create, msg = find_directories(parts) if not to_create then log:warning(msg) return false end used_dir[path] = true local stat, msg = mkdirectories(to_create) if not stat then log:warning(msg) end end lfs.chdir(currdir) cp(filename, path) return true end function execute(command) local f = io.popen(command, "r") local output = f:read("*all") -- rc will contain return codes of the executed command local rc = {f:close()} -- the status code is on the third position -- https://stackoverflow.com/a/14031974/2467963 local status = rc[3] -- print the command line output only when requested through -- log level log:output(output) return status, output end -- find the zip command function find_zip() if io.popen("zip -v","r"):close() then return "zip" elseif io.popen("miktex-zip -v","r"):close() then return "miktex-zip" end -- we cannot find the zip command return "zip" end -- Config loading local function run(untrusted_code, env) if untrusted_code:byte(1) == 27 then return nil, "binary bytecode prohibited" end local untrusted_function = nil untrusted_function, message = load(untrusted_code, nil, "t",env) if not untrusted_function then return nil, message end if not setfenv then setfenv = function(a,b) return true end end setfenv(untrusted_function, env) return pcall(untrusted_function) end function escape_pattern(str) -- escape all magic characters in the string, so it can be used as a literal pattern return (str:gsub("([%(%)%.%%%+%-%*%?%[%]%^%$])", "%%%1")) end local main_settings = {} main_settings.fonts = {} -- use global environment in the build file -- it used to be sandboxed, but it proved not to be useful at all local env = _G ---{} -- explicitly enale some functions and modules in the sandbox -- Function declarations: env.pairs = pairs env.ipairs = ipairs env.print = print env.split = split env.string = string env.table = table env.copy = copy env.tonumber = tonumber env.tostring = tostring env.mkdirectories = mkdirectories env.require = require env.texio = texio env.type = type env.lfs = lfs env.os = os env.io = io env.math = math env.unicode = unicode env.logging = logging -- it is necessary to use the settings table -- set in the Make environment by mkutils function env.set_settings(par) local settings = env.settings for k,v in pairs(par) do settings[k] = v end end -- Add a value to the current settings function env.settings_add(par) local settings = env.settings for k,v in pairs(par) do local oldval = settings[k] or "" settings[k] = oldval .. v end end function env.get_filter_settings(name) local settings = env.settings -- local settings = self.params local filters = settings.filter or {} local filter_options = filters[name] or {} return filter_options end function env.filter_settings(name) -- local settings = Make.params local settings = env.settings local filters = settings.filter or {} local filter_options = filters[name] or {} return function(par) filters[name] = merge(filter_options, par) settings.filter = filters end end env.Font = function(s) local font_name = s["name"] if not font_name then return nil, "Cannot find font name" end env.settings.fonts[font_name] = s end env.Make = make4ht.Make env.Make.params = env.settings env.Make:add("test","test the variables: ${tex4ht_sty_par} ${htlatex} ${input} ${config}") local htlatex = require "make4ht-htlatex" env.Make:add("htlatex", htlatex.htlatex ,{correct_exit=0}) env.Make:add("httex", htlatex.httex, { htlatex = "etex", correct_exit=0 }) env.Make:add("latexmk", function(par) local settings = get_filter_settings "htlatex" or {} par.interaction = par.interaction or settings.interaction or "batchmode" local command = Make.latex_command -- add " %O " after the engine name. it should be filled by latexmk command = command:gsub("%s", " %%O ", 1) par.expanded = command % par -- quotes in latex_command must be escaped, they cause Latexmk error par.expanded = par.expanded:gsub('"', '\\"') local newcommand = 'latexmk -pdf- -ps- -auxdir=${builddir} -outdir=${builddir} -latex="${expanded}" -dvi -jobname=${input} ${tex_file}' % par log:info("LaTeX call: " .. newcommand) os.execute(newcommand) return Make.testlogfile(par) end, {correct_exit= 0}) -- env.Make:add("tex4ht","tex4ht ${tex4ht_par} \"${input}.${dvi}\"", nil, 1) env.Make:add("tex4ht",function(par) -- detect if svg output is used -- if yes, we need to pass the -g.svg option to tex4ht command -- to support svg images for character pictures local logfile = mkutils.file_in_builddir(par.input .. ".log", par) if file_exists(logfile) then for line in io.lines(logfile) do local options = line:match("TeX4ht package options:(.+)") if options then log:info(options) if options:match("svg") then par.tex4ht_par = (par.tex4ht_par or "") .. " -g.svg" end break end end end local cwd = lfs.currentdir() if par.builddir~="" then lfs.chdir(par.builddir) end local command = "tex4ht ${tex4ht_par} \"${input}.${dvi}\"" % par log:info("executing: " .. command) local status, output = execute(command) lfs.chdir(cwd) return status, output end , nil, 1) env.Make:add("t4ht", function(par) par.ext = "dvi" local cwd = lfs.currentdir() if par.builddir ~= "" then lfs.chdir(par.builddir) end local command = "t4ht ${t4ht_par} \"${input}.${ext}\"" % par log:info("executing: " .. command) execute(command) lfs.chdir(cwd) end ) env.Make:add("clean", function(par) -- remove all functions that process produced files -- we will provide only one function, that remove all of them Make.matches = {} local main_name = mkutils.file_in_builddir( par.input, par) local remove_file = function(filename) if file_exists(filename) then log:info("removing file: " .. filename) os.remove(filename) end end -- try to find if the last converted file was in the ODT format local lg_name = main_name .. ".lg" local lg_file = parse_lg(lg_name, par.builddir) local is_odt = false if lg_file and lg_file.files then for _, x in ipairs(lg_file.files) do is_odt = x:match("odt$") or is_odt end end if is_odt then Make:match("4om$",function(filename) -- math temporary file local to_remove = filename:gsub("4om$", "tmp") remove_file(to_remove) return false end) Make:match("4og$", remove_file) end Make:match("tmp$", function() -- remove temporary and auxilary files for _,ext in ipairs {"aux", "xref", "tmp", "4tc", "4ct", "idv", "lg","dvi", "log", "ncx", "idx", "ind"} do remove_file(main_name .. "." .. ext) end end) Make:match(".*", function(filename, par) -- remove only files that start with the input file basename -- this should prevent removing of images. this also means that -- images shouldn't be names as -hello.png for example if filename:find(main_name, 1,true) then -- log:info("Matched file", filename) remove_file(filename) end end) end) -- enable extension in the config file -- the following two functions must be here and not in make4ht-lib.lua -- because of the access to env.settings env.Make.enable_extension = function(self,name) table.insert(env.settings.extensions, {type="+", name=name}) end -- disable extension in the config file env.Make.disable_extension = function(self,name) table.insert(env.settings.extensions, {type="-", name=name}) end function load_config(settings, config_name) local settings = settings or main_settings -- the extensions requested from the command line should take precedence over -- extensions enabled in the config file local saved_extensions = settings.extensions settings.extensions = {} env.settings = settings env.mode = settings.mode if config_name and not file_exists(config_name) then config_name = kpse.find_file(config_name, 'texmfscripts') or config_name end local f = io.open(config_name,"r") if not f then log:info("Cannot open config file", config_name) return env end log:info("Using build file", config_name) local code = f:read("*all") local fn, msg = run(code,env) if not fn then log:warning(msg) end assert(fn) -- reload extensions from command line arguments for the "format" parameter for _,v in ipairs(saved_extensions) do table.insert(settings.extensions, v) end return env end env.Make:add("xindy", function(par) local xindylog = logging.new "xindy" local settings = get_filter_settings "xindy" or {} par.encoding = settings.encoding or par.encoding or "utf8" par.language = settings.language or par.language or "english" local modules = settings.modules or par.modules or {} local t = {} for k,v in ipairs(modules) do xindylog:debug("Loading module: " ..v) t[#t+1] = "-M ".. v end par.moduleopt = table.concat(t, " ") return indexing.run_indexing_command("texindy -L ${language} -C ${encoding} ${moduleopt} -o ${indfile} ${newidxfile}", par) end, {}) env.Make:add("makeindex", function(par) local makeindxcall = "makeindex ${options} -t ${ilgfile} -o ${indfile} ${newidxfile}" local settings = get_filter_settings "makeindex" or {} par.options = settings.options or par.options or "" par.ilgfile = par.input .. ".ilg" local status = indexing.run_indexing_command(makeindxcall, par) return status end, {}) env.Make:add("xindex", function(par) local xindex_call = "xindex -l ${language} ${options} -o ${indfile} ${newidxfile}" local settings = get_filter_settings "xindex" or {} par.options = settings.options or par.options or "" par.language = settings.language or par.language or "en" local status = indexing.run_indexing_command(xindex_call, par) return status end, {}) local function find_lua_file(name) local extension_path = name:gsub("%.", "/") .. ".lua" return kpse.find_file(extension_path, "lua") end -- for the BibLaTeX support env.Make:add("biber", "biber ${input}") env.Make:add("bibtex", "bibtex ${input}") env.Make:add("pythontex", "pythontex ${input}") --- load the output format plugins function load_output_format(format_name) local format_library = "make4ht.formats.make4ht-"..format_name local is_format_file = find_lua_file(format_library) if is_format_file then local format = assert(require(format_library)) if format then format.prepare_extensions = format.prepare_extensions or function(extensions) return extensions end format.modify_build = format.modify_build or function(make) return make end end return format end end --- Execute the prepare_parameters function in list of extensions function extensions_prepare_parameters(extensions, parameters) for _, ext in ipairs(extensions) do -- execute the extension only if it contains prepare_parameters function local fn = ext.prepare_parameters if fn then parameters = fn(parameters) end end return parameters end --- Modify the build sequence using extensions -- @param extensions list of extensions -- @make Make object function extensions_modify_build(extensions, make) for _, ext in ipairs(extensions) do local fn = ext.modify_build if fn then make = fn(make) end end return make end --- load one extension -- @param name extension name -- @param format current output format function load_extension(name,format) -- first test if the extension exists local extension_library = "make4ht.extensions.make4ht-ext-" .. name local is_extension_file = find_lua_file(extension_library) -- don't try to load the extension if it doesn't exist if not is_extension_file then return nil, "cannot fint extension " .. name end local extension = nil local local_extension_path = package.searchpath(extension_library, package.path) if local_extension_path then extension = dofile(local_extension_path) else extension = require("make4ht.extensions.make4ht-ext-".. name) end -- extensions can test if the current output format is supported local test = extension.test if test then if test(format) then return extension end -- if the test fail return nil return nil, "extension " .. name .. " is not supported in the " .. format .. " format" end -- if the extension doesn't provide the test function, we will assume that -- it supports every output format return extension end --- load extensions -- @param extensions table created by mkparams.get_format_extensions function -- @param format output type format. extensions may support only certain file -- formats function load_extensions(extensions, format) local module_names = {} local extension_table = {} local extension_sequence = {} -- process the extension table. it contains type field, which can enable or -- diable the extension for _, v in ipairs(extensions) do local enable = v.type == "+" and true or nil -- load extenisons in a correct order -- don't load extensions multiple times if enable and not module_names[v.name] then table.insert(extension_sequence, v.name) end -- the last extension request can disable it module_names[v.name] = enable end for _, name in ipairs(extension_sequence) do -- the extension can be inserted into the extension_sequence, but disabled -- later. if module_names[name] == true then local extension, msg= load_extension(name,format) if extension then log:info("Load extension", name) table.insert(extension_table, extension) else log:warning("Cannot load extension: ".. name) log:warning(msg) end end end return extension_table end --- add new extensions to a list of loaded extensions -- @param added string with extensions to be added in the form +ext1+ext2 function add_extensions(added, extensions) local _, newextensions = mkparams.get_format_extensions("dummyfmt" .. added) -- insert new extension at the beginning, in order to support disabling using -- the -f option for _, x in ipairs(extensions or {}) do table.insert(newextensions, x) end return newextensions end -- I don't know if this is clean, but settings functions won't be available -- for filters and extensions otherwise for k,v in pairs(env) do _G[k] = v end ================================================ FILE: test/dom-test.lua ================================================ require "busted.runner" () kpse.set_program_name "luatex" local dom = require "luaxml-domobject" describe("Basic DOM functions", function() local document = [[ pokus

    pokus

    nazdar

    ]] local obj = dom.parse(document) it("It should parse XML", function() assert.truthy(type(obj), "table") assert.truthy(obj:root_node()) end) it("Path retrieving should work", function() local path = obj:get_path("html body") assert.truthy(path) assert.truthy(#path == 1) assert.truthy(path[1]:is_element()) assert.truthy(#path[1]:get_children() == 5) end) describe("Basic DOM traversing should work", function() local matched = false local count = 0 obj:traverse_elements(function(el) count = count + 1 if obj:get_element_name(el) == "p" then matched = true it("Element matching should work", function() assert.truthy(el:root_node():get_node_type() == "ROOT") assert.truthy(el:is_element()) assert.truthy(el:get_element_name()== "p") end) it("Node serializing should work", function() local p_serialize = el:serialize() assert.truthy(p_serialize == "

    nazdar

    ") end) el:remove_node(el) end end) it("Traverse should find 7 elements and match one

    ", function() assert.truthy(matched) assert.truthy(count == 7) end) end) describe("Modified DOM object serializing", function() local serialized = obj:serialize() assert.truthy(serialized) assert.truthy(type(serialized) == "string") assert.truthy(serialized:match("")) assert.truthy(serialized:match("

    ")== nil) end) -- css selector handling was moved to another module -- describe("CSS selector handling", function() -- local selector = "div#pokus span.ahoj, p, div.ahoj:first-child" -- local objects = obj:prepare_selector(selector) -- assert.truthy(#objects == 3) -- assert.truthy(obj:calculate_specificity(objects[1]) == 112) -- local document = [[ -- -- --

    -- first child -- Pokus --

    Uff

    -- Something different --
    -- -- -- ]] -- local newobj = dom.parse(document) -- local matchedlist = newobj:get_selector_path(objects) -- assert.truthy(#matchedlist == 3) -- -- assert.truthy(#obj:prepare_selector(selector)==2) -- end) end) ================================================ FILE: test/test-mkparams.lua ================================================ require "busted.runner" () kpse.set_program_name "luatex" local mkparams = require "mkparams" describe("Test output format and extensions", function() it("Should parse the output formats", function() local format, extensions = mkparams.get_format_extensions("html5+latexmk+sample-disabled") assert.are.equal(format, "html5") assert.are.equal(type(extensions), "table") assert.are.equal(#extensions, 3) assert.are.equal(extensions[2].name, "sample") assert.are.equal(extensions[3].type, "-") end) end) ================================================ FILE: tools/make_chardata.lua ================================================ kpse.set_program_name "luatex" -- create Lua module from UnicodeData -- we need mapping to lower case letters and decomposed base letters for accented characters local unicode_data = kpse.find_file("UnicodeData.txt") local chardata = {} for line in io.lines(unicode_data) do local record = line:explode(";") local char = tonumber(record[1], 16) local category = string.lower(record[3]) if category:match("^l") or category == "zs" then -- the decomposed field contains charcode for the base letter and accent -- we care only about the base letter local decomposed = record[6]:match("([%x]+)") decomposed = decomposed and tonumber(decomposed, 16) -- the lowercase letter is the last field local lower = record[#record - 1] lower = lower and tonumber(lower, 16) or nil chardata[#chardata+1] = { char = char, shcode = decomposed, lccode = lower, category = category } end end print "return {" local function add(fields, caption, value) if value then fields[#fields+1] = string.format("%s=%s", caption, value) end end for _, data in ipairs(chardata) do local fields = {} -- we need to add qotes to force string add(fields, "category", string.format('"%s"', data.category)) add(fields, "lccode", data.lccode) add(fields, "shcode", data.shcode) print(string.format("[%s] = {%s},", data.char, table.concat(fields, ", "))) end print "}" ================================================ FILE: tools/make_mathmlchardata.lua ================================================ -- This file generates Lua table with mapping of Unicode charcodes for different math font styles (bold, italic, bold-italic, etc.) -- The new version of MathML requires to use different charcodes for different font styles, -- so we need to replace characters in the MathML output depending on the value of the mathvariant attribute. kpse.set_program_name "luatex" local unicode = kpse.find_file("UnicodeData.txt") local function get_chartype(chartype) -- remove the extra information from the chartype and convert it to the format used in the mathvariant attribute return chartype:gsub("MATHEMATICAL ", "") :gsub("SYMBOL$", "") :gsub("%a+%s*$", "") :gsub("SMALL ", "") :gsub("CAPITAL ", "") :gsub("%s+$", "") :gsub("%s+", "-") :lower() end local function parse_unicode(unicode) local unicode_data = {} for line in io.lines(unicode) do -- parse the UnicodeData.txt file to get the base code for the mathematical symbols local code, chartype, basecode = line:match("^(%x+);([^;]+);[^;]+;[^;]+;[^;]+;([^;]+);") -- we are interested only in the mathematical symbols if code and chartype:match("^MATHEMATICAL") then -- the basecode contains extra tag, we need to remove it and convert the hexadecimal number to decimal local base = tonumber(basecode:match("(%x+)$"), 16) -- remove the extra information from the chartype chartype = get_chartype(chartype) local char = tonumber(code, 16) if base and char then -- we need to store corresponding base code for each symbol in the current font style local area = unicode_data[base] or {} area[chartype] = char unicode_data[base] = area -- print("unicode", char, chartype, base) end end end return unicode_data end local unicode_data = parse_unicode(unicode) print "-- This file is autogenerated from tools/make_mathmlchardata.lua" print "return {" local to_sort = {} for base, data in pairs(unicode_data) do local fields = {} for chartype, char in pairs(data) do fields[#fields+1] = string.format("['%s']=%s", chartype, char) end to_sort[#to_sort+1] = string.format("[%05i] = {%s},", base, table.concat(fields, ", ")) end -- sort characters table.sort(to_sort) for _, line in ipairs(to_sort) do print(line) end print "}"